diff --git a/src/extensions/new_tasks.py b/src/extensions/new_tasks.py new file mode 100644 index 0000000..998cace --- /dev/null +++ b/src/extensions/new_tasks.py @@ -0,0 +1,152 @@ +""" + +task flow + +=-=-=-=-= + +1. every 10 minutes +2. get all bot guild_ids +3. get all subscriptions against known guild ids (account for pagination of 25) +4. iterate through and process each subscription +5. get all articles from subscription +6. iterate through and process each article + +""" + + +class Cog: + + async def subscription_task(): + + subscriptions = self.get_subscriptions() + + for subscription in subscriptions: + await self.process_subscription(subscription) + + async def get_subscriptions(): + # return subs from api, handle pagination + + async def process_subscription(subscription): + + articles = subscription.get_articles() + + for article in articles: + await self.process_article(article) + + async def process_article(article): + # validate article then: + + if await self.track_article(article): + await self.send_article(article) + + + async def track_article(): + pass + + async def send_article(): + pass + + +class TaskCog(commands.Cog): + """ + Tasks cog for PYRSS. + """ + + def __init__(self, bot): + super().__init__() + self.bot = bot + + @tasks.loop(time=times) + async def subscription_task(self): + async with aiohttp.ClientSession() as session: + api = API(self.bot.api_token, session) + subscriptions = await self.get_subscriptions(api) + await self.process_subscriptions(api, subscriptions) + # articles = [*(await self.get_articles(api, sub)) for sub in subscriptions] + # await self.send_articles(api, articles) + + async def get_subscriptions(self, api) -> list: + guild_ids = [guild.id for guild in self.bot.guilds] + subscriptions = [] + + for page in iter(int, 1): + try: + page_data = (await api.get_subscriptions(server__in=guild_ids, page=page+1))[0] + except apihttp.ClientResponseError as error: + if error.status == 404: + break + + except Exception as error: + log.error("Exception while gathering page data %s", error) + break + + subscriptions.extend(page_data) + + async def process_subscriptions(self, api, subscriptions): + for sub in subscriptions: + if not sub.active or not sub.channel_count: + continue + + unparsed_feed = await get_unparsed_feed(sub.url, api.session) + parsed_feed = await parse(unparsed_feed) + + rss_feed = RSSFeed.from_parsed_feed(parsed_feed) + await self.process_items(api, sub, rss_feed) + + async def process_items(self, api, sub, feed): + + channels = [self.bot.get_channel(channel.channel_id) for channel in await sub.get_channels()] + filters = [await api.get_filter(filter_id) for filter_id in sub.filters] + + for item in sub.items: + blocked = any(self.filter_item(_filter, item) for _filter in filters) + mutated_item = item.create_mutated_copy(sub.mutators) + + for channel in channels: + await self.mark_tracked_item(api, sub, item, channel.id, blocked) + + if not blocked: + channel.send(embed=item.to_embed(sub, feed)) + + async def filter_item(self, _filter: dict, item: RSSItem) -> bool: + """ + Returns True if item should be ignored due to filters. + """ + + match_found = False # This is the flag to determine if the content should be filtered + + keywords = _filter["keywords"].split(",") + regex_pattern = _filter["regex"] + is_whitelist = _filter["whitelist"] + + log.debug( + "trying filter '%s', keyword '%s', regex '%s', is whitelist: '%s'", + _filter["name"], keywords, regex_pattern, is_whitelist + ) + + assert not (keywords and regex_pattern), "Keywords and Regex used, only 1 can be used." + + if any(word in item.title or word in item.description for word in keywords): + match_found = True + + if regex_pattern: + regex = re.compile(regex_pattern) + match_found = regex.search(item.title) or regex.search(item.description) + + return not match_found if is_whitelist else match_found + + async def mark_tracked_item(self, sub, item, channel_id, blocked): + try: + api.create_tracked_content( + guid=item.guid, + title=item.title, + url=item.url, + subscription=sub.id, + channel_id=channel_id, + blocked=blocked + ) + except aiohttp.ClientResponseError as error: + if error.status == 409: + log.debug(error) + else: + log.error(error)