working on tasks rewrite

2024-07-03 16:40:35 +01:00 · 2024-07-03 16:40:35 +01:00 · bc93b37b9a
commit bc93b37b9a
parent e9c805f8ed
1 changed files with 152 additions and 0 deletions
--- a/src/extensions/new_tasks.py
+++ b/src/extensions/new_tasks.py
@ -0,0 +1,152 @@
+"""
+
+task flow
+
+=-=-=-=-=
+
+1. every 10 minutes
+2. get all bot guild_ids
+3. get all subscriptions against known guild ids (account for pagination of 25)
+4. iterate through and process each subscription
+5. get all articles from subscription
+6. iterate through and process each article
+
+"""
+
+
+class Cog:
+    
+    async def subscription_task():
+        
+        subscriptions = self.get_subscriptions()
+
+        for subscription in subscriptions:
+            await self.process_subscription(subscription)
+
+    async def get_subscriptions():
+        # return subs from api, handle pagination
+
+    async def process_subscription(subscription):
+
+        articles = subscription.get_articles()
+
+        for article in articles:
+            await self.process_article(article)
+
+    async def process_article(article):
+        # validate article then:
+
+        if await self.track_article(article):
+            await self.send_article(article)
+
+
+    async def track_article():
+        pass
+
+    async def send_article():
+        pass
+
+
+class TaskCog(commands.Cog):
+    """
+    Tasks cog for PYRSS.
+    """
+
+    def __init__(self, bot):
+        super().__init__()
+        self.bot = bot
+
+    @tasks.loop(time=times)
+    async def subscription_task(self):
+        async with aiohttp.ClientSession() as session:
+            api = API(self.bot.api_token, session)
+            subscriptions = await self.get_subscriptions(api)
+            await self.process_subscriptions(api, subscriptions)
+            # articles = [*(await self.get_articles(api, sub)) for sub in subscriptions]
+            # await self.send_articles(api, articles)
+
+    async def get_subscriptions(self, api) -> list:
+        guild_ids = [guild.id for guild in self.bot.guilds]
+        subscriptions = []
+
+        for page in iter(int, 1):
+            try:
+                page_data = (await api.get_subscriptions(server__in=guild_ids, page=page+1))[0]
+            except apihttp.ClientResponseError as error:
+                if error.status == 404:
+                    break
+
+            except Exception as error:
+                log.error("Exception while gathering page data %s", error)
+                break
+
+            subscriptions.extend(page_data)
+
+    async def process_subscriptions(self, api, subscriptions):
+        for sub in subscriptions:
+            if not sub.active or not sub.channel_count:
+                continue
+
+            unparsed_feed = await get_unparsed_feed(sub.url, api.session)
+            parsed_feed = await parse(unparsed_feed)
+
+            rss_feed = RSSFeed.from_parsed_feed(parsed_feed)
+            await self.process_items(api, sub, rss_feed)
+
+    async def process_items(self, api, sub, feed):
+
+        channels = [self.bot.get_channel(channel.channel_id) for channel in await sub.get_channels()]
+        filters = [await api.get_filter(filter_id) for filter_id in sub.filters]
+
+        for item in sub.items:
+            blocked = any(self.filter_item(_filter, item) for _filter in filters)
+            mutated_item = item.create_mutated_copy(sub.mutators)
+
+            for channel in channels:
+                await self.mark_tracked_item(api, sub, item, channel.id, blocked)
+
+                if not blocked:
+                    channel.send(embed=item.to_embed(sub, feed))
+
+    async def filter_item(self, _filter: dict, item: RSSItem) -> bool:
+        """
+        Returns True if item should be ignored due to filters.
+        """
+
+        match_found = False  # This is the flag to determine if the content should be filtered
+
+        keywords = _filter["keywords"].split(",")
+        regex_pattern = _filter["regex"]
+        is_whitelist = _filter["whitelist"]
+
+        log.debug(
+            "trying filter '%s', keyword '%s', regex '%s', is whitelist: '%s'",
+            _filter["name"], keywords, regex_pattern, is_whitelist
+        )
+
+        assert not (keywords and regex_pattern), "Keywords and Regex used, only 1 can be used."
+
+        if any(word in item.title or word in item.description for word in keywords):
+            match_found = True
+
+        if regex_pattern:
+            regex = re.compile(regex_pattern)
+            match_found = regex.search(item.title) or regex.search(item.description)
+
+        return not match_found if is_whitelist else match_found
+
+    async def mark_tracked_item(self, sub, item, channel_id, blocked):
+        try:
+            api.create_tracked_content(
+                guid=item.guid,
+                title=item.title,
+                url=item.url,
+                subscription=sub.id,
+                channel_id=channel_id,
+                blocked=blocked
+            )
+        except aiohttp.ClientResponseError as error:
+            if error.status == 409:
+                log.debug(error)
+            else:
+                log.error(error)