diff --git a/src/extensions/tasks.py b/src/extensions/tasks.py index 03b7a2b..cc8bf11 100644 --- a/src/extensions/tasks.py +++ b/src/extensions/tasks.py @@ -3,6 +3,7 @@ Extension for the `TaskCog`. Loading this file via `commands.Bot.load_extension` will add `TaskCog` to the bot. """ +import re import json import logging import datetime @@ -55,8 +56,8 @@ class TaskCog(commands.Cog): async def on_ready(self): """Instructions to execute when the cog is ready.""" - if not self.bot.developing: - self.rss_task.start() + # if not self.bot.developing: + self.rss_task.start() log.info("%s cog is ready", self.__class__.__name__) @@ -102,7 +103,7 @@ class TaskCog(commands.Cog): for sub in subscriptions: await self.process_subscription(api, session, sub) - log.info("Finished subscription task, time elapsed: %s", process_time() - time) + log.info("Finished subscription task, time elapsed: %s", process_time() - time) async def get_subscriptions(self, api, guild_ids: list[int], page: int): @@ -129,114 +130,73 @@ class TaskCog(commands.Cog): log.warning("No channels to send this to") return + filters = [await api.get_filter(filter_id) for filter_id in sub.filters] + log.debug("found %s filter(s)", len(filters)) + unparsed_content = await get_unparsed_feed(sub.url, session) parsed_content = parse(unparsed_content) source = Source.from_parsed(parsed_content) - articles = source.get_latest_articles(3) + articles = source.get_latest_articles(10) if not articles: log.debug("No articles found") for article in articles: - await self.process_article(session, channels, article) + await self.process_article(api, session, sub.id, filters, channels, article) - async def process_article(self, session, channels: list[SubChannel], article: Article): - embed = await article.to_embed(session) + async def process_article(self, api, session, sub_id: int, filters: list[dict], channels: list[SubChannel], article: Article): + log.debug("processing article '%s' '%s'", article.guid, article.title) + + for _filter in filters: + if self.filter_article(_filter, article): + log.debug("filter matched, skipping article") + return + + # if any(self.filter_article(_filter, article) for _filter in filters): + # return + + try: + await api.create_tracked_content(guid=article.guid, title=article.title, url=article.url, subscription=sub_id) + log.debug("successfully tracked %s", article.guid) + except aiohttp.ClientResponseError as error: + log.error(error) + + if error.status == 400: + log.debug("It looks like this article already exists, skipping") + + return log.debug("attempting to send embed to %s channel(s)", len(channels)) + embed = await article.to_embed(session) for channel in channels: await channel.send(embed=embed) + def filter_article(self, _filter: dict, article: Article) -> bool: + """ + Returns True if article should be ignored due to filters. + """ - # async def batch_process_subs(self, data: list): + log.debug("trying filter '%s'", _filter["name"]) - # log.debug("batch process subs, count '%s'", len(data)) - # subscriptions = Subscription.from_list(data) + keywords = _filter["keywords"].split(",") + regex_pattern = _filter["regex"] - # for sub in subscriptions: - # log.info(sub.name) + assert not (keywords and regex_pattern), "Keywords and Regex used, only 1 can be used." + for word in keywords: + if word in article.title or word in article.description: + log.debug("keyword '%s' found in title or description", word) + return True - # async def process_feed(self, feed: FeedChannelModel, database: DatabaseManager): - # """Process the passed feed. Will also call process for each article found in the feed. + # if any(word in article.title or word in article.description for word in keywords): + # return True - # Parameters - # ---------- - # feed : FeedChannelModel - # Database model for the feed. - # database : DatabaseManager - # Database connection handler, must be open. - # """ - - # log.debug("Processing feed: %s", feed.id) - - # channel = self.bot.get_channel(feed.discord_channel_id) - - # # TODO: integrate the `validate_feed` code into here, also do on list command and show errors. - - # async with aiohttp.ClientSession() as session: - - # unparsed_content = await get_unparsed_feed(feed.rss_source.rss_url) - # parsed_feed = parse(unparsed_content) - # source = Source.from_parsed(parsed_feed) - # articles = source.get_latest_articles(5) - - # if not articles: - # log.info("No articles to process for %s in ", feed.rss_source.nick, feed.discord_server_id) - # return - - # for article in articles: - # await self.process_article(feed.id, article, channel, database, session) - - # async def process_article( - # self, feed_id: int, article: Article, channel: TextChannel, database: DatabaseManager, - # session: aiohttp.ClientSession - # ): - # """Process the passed article. Will send the embed to a channel if all is valid. - - # Parameters - # ---------- - # feed_id : int - # The feed model ID, used to log the sent article. - # article : Article - # Database model for the article. - # channel : TextChannel - # Where the article will be sent to. - # database : DatabaseManager - # Database connection handler, must be open. - # """ - - # log.debug("Processing article: %s", article.url) - - # query = select(SentArticleModel).where(and_( - # SentArticleModel.article_url == article.url, - # SentArticleModel.discord_channel_id == channel.id, - # )) - # result = await database.session.execute(query) - - # if result.scalars().all(): - # log.debug("Article already processed: %s", article.url) - # return - - # embed = await article.to_embed(session) - # try: - # await channel.send(embed=embed) - # except Forbidden as error: # TODO: find some way of informing the user about this error. - # log.error("Can't send article to channel: %s · %s · %s", channel.name, channel.id, error) - # return - - # query = insert(SentArticleModel).values( - # article_url = article.url, - # discord_channel_id = channel.id, - # discord_server_id = channel.guild.id, - # discord_message_id = -1, - # feed_channel_id = feed_id - # ) - # await database.session.execute(query) - - # log.debug("new Article processed: %s", article.url) + if regex_pattern: + regex = re.compile(regex_pattern) + return regex.search(article.title) or regex.search(article.description) + return False async def setup(bot): """