This commit is contained in:
Corban-Lee Jones 2024-06-11 20:47:16 +01:00
parent 864f299d9b
commit b97e671c8b

View File

@ -3,6 +3,7 @@ Extension for the `TaskCog`.
Loading this file via `commands.Bot.load_extension` will add `TaskCog` to the bot.
"""
import re
import json
import logging
import datetime
@ -55,8 +56,8 @@ class TaskCog(commands.Cog):
async def on_ready(self):
"""Instructions to execute when the cog is ready."""
if not self.bot.developing:
self.rss_task.start()
# if not self.bot.developing:
self.rss_task.start()
log.info("%s cog is ready", self.__class__.__name__)
@ -102,7 +103,7 @@ class TaskCog(commands.Cog):
for sub in subscriptions:
await self.process_subscription(api, session, sub)
log.info("Finished subscription task, time elapsed: %s", process_time() - time)
log.info("Finished subscription task, time elapsed: %s", process_time() - time)
async def get_subscriptions(self, api, guild_ids: list[int], page: int):
@ -129,114 +130,73 @@ class TaskCog(commands.Cog):
log.warning("No channels to send this to")
return
filters = [await api.get_filter(filter_id) for filter_id in sub.filters]
log.debug("found %s filter(s)", len(filters))
unparsed_content = await get_unparsed_feed(sub.url, session)
parsed_content = parse(unparsed_content)
source = Source.from_parsed(parsed_content)
articles = source.get_latest_articles(3)
articles = source.get_latest_articles(10)
if not articles:
log.debug("No articles found")
for article in articles:
await self.process_article(session, channels, article)
await self.process_article(api, session, sub.id, filters, channels, article)
async def process_article(self, session, channels: list[SubChannel], article: Article):
embed = await article.to_embed(session)
async def process_article(self, api, session, sub_id: int, filters: list[dict], channels: list[SubChannel], article: Article):
log.debug("processing article '%s' '%s'", article.guid, article.title)
for _filter in filters:
if self.filter_article(_filter, article):
log.debug("filter matched, skipping article")
return
# if any(self.filter_article(_filter, article) for _filter in filters):
# return
try:
await api.create_tracked_content(guid=article.guid, title=article.title, url=article.url, subscription=sub_id)
log.debug("successfully tracked %s", article.guid)
except aiohttp.ClientResponseError as error:
log.error(error)
if error.status == 400:
log.debug("It looks like this article already exists, skipping")
return
log.debug("attempting to send embed to %s channel(s)", len(channels))
embed = await article.to_embed(session)
for channel in channels:
await channel.send(embed=embed)
def filter_article(self, _filter: dict, article: Article) -> bool:
"""
Returns True if article should be ignored due to filters.
"""
# async def batch_process_subs(self, data: list):
log.debug("trying filter '%s'", _filter["name"])
# log.debug("batch process subs, count '%s'", len(data))
# subscriptions = Subscription.from_list(data)
keywords = _filter["keywords"].split(",")
regex_pattern = _filter["regex"]
# for sub in subscriptions:
# log.info(sub.name)
assert not (keywords and regex_pattern), "Keywords and Regex used, only 1 can be used."
for word in keywords:
if word in article.title or word in article.description:
log.debug("keyword '%s' found in title or description", word)
return True
# async def process_feed(self, feed: FeedChannelModel, database: DatabaseManager):
# """Process the passed feed. Will also call process for each article found in the feed.
# if any(word in article.title or word in article.description for word in keywords):
# return True
# Parameters
# ----------
# feed : FeedChannelModel
# Database model for the feed.
# database : DatabaseManager
# Database connection handler, must be open.
# """
# log.debug("Processing feed: %s", feed.id)
# channel = self.bot.get_channel(feed.discord_channel_id)
# # TODO: integrate the `validate_feed` code into here, also do on list command and show errors.
# async with aiohttp.ClientSession() as session:
# unparsed_content = await get_unparsed_feed(feed.rss_source.rss_url)
# parsed_feed = parse(unparsed_content)
# source = Source.from_parsed(parsed_feed)
# articles = source.get_latest_articles(5)
# if not articles:
# log.info("No articles to process for %s in ", feed.rss_source.nick, feed.discord_server_id)
# return
# for article in articles:
# await self.process_article(feed.id, article, channel, database, session)
# async def process_article(
# self, feed_id: int, article: Article, channel: TextChannel, database: DatabaseManager,
# session: aiohttp.ClientSession
# ):
# """Process the passed article. Will send the embed to a channel if all is valid.
# Parameters
# ----------
# feed_id : int
# The feed model ID, used to log the sent article.
# article : Article
# Database model for the article.
# channel : TextChannel
# Where the article will be sent to.
# database : DatabaseManager
# Database connection handler, must be open.
# """
# log.debug("Processing article: %s", article.url)
# query = select(SentArticleModel).where(and_(
# SentArticleModel.article_url == article.url,
# SentArticleModel.discord_channel_id == channel.id,
# ))
# result = await database.session.execute(query)
# if result.scalars().all():
# log.debug("Article already processed: %s", article.url)
# return
# embed = await article.to_embed(session)
# try:
# await channel.send(embed=embed)
# except Forbidden as error: # TODO: find some way of informing the user about this error.
# log.error("Can't send article to channel: %s · %s · %s", channel.name, channel.id, error)
# return
# query = insert(SentArticleModel).values(
# article_url = article.url,
# discord_channel_id = channel.id,
# discord_server_id = channel.guild.id,
# discord_message_id = -1,
# feed_channel_id = feed_id
# )
# await database.session.execute(query)
# log.debug("new Article processed: %s", article.url)
if regex_pattern:
regex = re.compile(regex_pattern)
return regex.search(article.title) or regex.search(article.description)
return False
async def setup(bot):
"""