diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..4aa4e9f --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +filterwarnings = + ignore:'audioop' is deprecated and slated for removal in Python 3.13:DeprecationWarning diff --git a/requirements.txt b/requirements.txt index 71a1aa2..dce5b56 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,30 @@ +aiohappyeyeballs==2.4.3 +aiohttp==3.10.10 +aiosignal==1.3.1 +anyio==4.6.2.post1 +attrs==24.2.0 beautifulsoup4==4.12.3 bump2version==1.0.1 +certifi==2024.8.30 discord.py==2.3.2 feedparser==6.0.11 +frozenlist==1.5.0 +h11==0.14.0 +httpcore==1.0.6 httpx==0.27.2 +idna==3.10 +iniconfig==2.0.0 markdownify==0.11.6 +multidict==6.1.0 +packaging==24.2 +pluggy==1.5.0 +propcache==0.2.0 +pytest==8.3.3 python-dotenv==1.0.0 rapidfuzz==3.9.4 +sgmllib3k==1.0.0 +six==1.16.0 +sniffio==1.3.1 +soupsieve==2.6 uwuipy==0.1.9 +yarl==1.17.0 diff --git a/src/tests.py b/src/tests.py deleted file mode 100644 index 41e596b..0000000 --- a/src/tests.py +++ /dev/null @@ -1,137 +0,0 @@ - -def test_content_filters(): - """ - In this test, a content filter is created and used to filter some data. - """ - - from models import ContentFilter, MatchingAlgorithm - - content_filter = ContentFilter( - id=0, - server_id=0, - name="Test Filter", - matching_pattern="postcode lottery", - matching_algorithm=MatchingAlgorithm.LITERAL, - is_insensitive=True, - is_whitelist=False - ) - - entry = { - "title": "This is the Title of the test Entry", - "description": "This is the description for the postcode lottery" - } - - # Should match 'Test' in entry title with the pattern - assert content_filter.matches(entry) == True - - print("success") - - -def test_content_duplicates(): - """ - In this test, two almost but not quite identical instances of `Content` are created, and - checked against each other as duplicates. - - They should be considered duplicate, because not all fields need to match in order to be considered a duplicate. - - The provided data is from a real world used example, where what should have been considered a duplicate was missed. - """ - - from models import Content - from datetime import datetime - - datetime_now = datetime.now() - - first_content = Content( - id=0, - subscription_id=38, - item_id="https://www.bbc.com/news/articles/ced9l7799w9o#0", - item_guid="https://www.bbc.com/news/articles/ced9l7799w9o#0", - item_url="https://www.bbc.com/news/articles/ced9l7799w9o", - item_title="Spain's PM orders 10,000 troops and police to Valencia", - item_description="", - item_content_hash="4a2ba8429a0584ce08f521db3f6d3000c248467f2cc6fa4b0458808169247ad8", - item_image_url="", - item_thumbnail_url="", - item_published=datetime_now, - item_author="", - item_author_url="", - item_feed_title="", - item_feed_url="", - blocked=False, - ) - - second_content = Content( - id=1, - subscription_id=38, - item_id="https://www.bbc.com/news/articles/ced9l7799w9o#0", - item_guid="https://www.bbc.com/news/articles/ced9l7799w9o#0", - item_url="https://www.bbc.com/news/articles/ced9l7799w9o", - item_title="Spain's PM orders 10,000 troops and police to flood-hit Valencia", - item_description="", - item_content_hash="4a2ba8429a0584ce08f521db3f6d3000c248467f2cc6fa4b0458808169247ad8", - item_image_url="", - item_thumbnail_url="", - item_published=datetime_now, - item_author="", - item_author_url="", - item_feed_title="", - item_feed_url="", - blocked=False, - ) - - assert first_content.is_duplicate(second_content), "Content is not considered a duplicate" - - print("1 success") - - # BUG: This one is identified but still gets processed... - - third_content = Content( - id=0, - subscription_id=38, - item_id="https://www.bbc.com/sport/formula1/articles/cdd0ey1v5j9o#8", - item_guid="https://www.bbc.com/sport/formula1/articles/cdd0ey1v5j9o#8", - item_url="https://www.bbc.com/sport/formula1/articles/cdd0ey1v5j9o", - item_title="Sao Paulo GP qualifying set for Sunday after rain postponement", - item_description="", - item_content_hash="b6c78de554a183cfeca88decf987401719d431647523f038a86fd7d972e4e799", - item_image_url="", - item_thumbnail_url="", - item_published=datetime_now, - item_author="", - item_author_url="", - item_feed_title="", - item_feed_url="", - blocked=False, - ) - - fourth_content = Content( - id=0, - subscription_id=38, - item_id="https://www.bbc.com/sport/formula1/articles/cdd0ey1v5j9o#8", - item_guid="https://www.bbc.com/sport/formula1/articles/cdd0ey1v5j9o#8", - item_url="https://www.bbc.com/sport/formula1/articles/cdd0ey1v5j9o", - item_title="Sao Paulo GP qualifying set for Sunday after rain postponement", - item_description="", - item_content_hash="6ddd15d7d9626f2d63ba5631056fda9bcaf920e8c82ec5c23fa824b02ce690d0", - item_image_url="", - item_thumbnail_url="", - item_published=datetime_now, - item_author="", - item_author_url="", - item_feed_title="", - item_feed_url="", - blocked=False, - ) - - assert third_content.is_duplicate(fourth_content) - - print("2 success") - -def main(): - # test_content_filters() - test_content_duplicates() - -if __name__ == "__main__": - main() - \ No newline at end of file diff --git a/src/tests/__init__.py b/src/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/tests/test_content.py b/src/tests/test_content.py new file mode 100644 index 0000000..918aaf3 --- /dev/null +++ b/src/tests/test_content.py @@ -0,0 +1,51 @@ +import pytest + +from models import MatchingAlgorithm, ContentFilter, Content + + +@pytest.fixture +def content() -> Content: + return Content( + id=0, + subscription_id=0, + item_id="", + item_guid="", + item_url="", + item_title="This week in the papers:", + item_description="The price of petrol has risen by over 2% since the previous financial report. Read the full article here.", + item_content_hash="", + item_image_url=None, + item_thumbnail_url=None, + item_published=None, + item_author="", + item_author_url=None, + item_feed_title="", + item_feed_url="" + ) + +@pytest.fixture +def content_filter() -> ContentFilter: + return ContentFilter( + id=0, + server_id=0, + name="Test Content Filter", + matching_pattern="", + matching_algorithm=MatchingAlgorithm.NONE, + is_insensitive=True, + is_whitelist=False + ) + +def test_content_filter_any(content: Content, content_filter: ContentFilter): + content_filter.matching_pattern = "france twenty report grass lately" + content_filter.matching_algorithm = MatchingAlgorithm.ANY + assert content_filter.matches(content) is True + +def test_content_filter_all(content: Content, content_filter: ContentFilter): + content_filter.matching_pattern = "week petrol risen" + content_filter.matching_algorithm = MatchingAlgorithm.ALL + assert content_filter.matches(content) is True + +def test_content_filter_regex(content: Content, content_filter: ContentFilter): + content_filter.matching_pattern = r"\b(The Papers|weekly quiz)\b" + content_filter.matching_algorithm = MatchingAlgorithm.REGEX + assert content_filter.matches(content) is True