filters
This commit is contained in:
parent
864f299d9b
commit
b97e671c8b
@ -3,6 +3,7 @@ Extension for the `TaskCog`.
|
||||
Loading this file via `commands.Bot.load_extension` will add `TaskCog` to the bot.
|
||||
"""
|
||||
|
||||
import re
|
||||
import json
|
||||
import logging
|
||||
import datetime
|
||||
@ -55,8 +56,8 @@ class TaskCog(commands.Cog):
|
||||
async def on_ready(self):
|
||||
"""Instructions to execute when the cog is ready."""
|
||||
|
||||
if not self.bot.developing:
|
||||
self.rss_task.start()
|
||||
# if not self.bot.developing:
|
||||
self.rss_task.start()
|
||||
|
||||
log.info("%s cog is ready", self.__class__.__name__)
|
||||
|
||||
@ -102,7 +103,7 @@ class TaskCog(commands.Cog):
|
||||
for sub in subscriptions:
|
||||
await self.process_subscription(api, session, sub)
|
||||
|
||||
log.info("Finished subscription task, time elapsed: %s", process_time() - time)
|
||||
log.info("Finished subscription task, time elapsed: %s", process_time() - time)
|
||||
|
||||
async def get_subscriptions(self, api, guild_ids: list[int], page: int):
|
||||
|
||||
@ -129,114 +130,73 @@ class TaskCog(commands.Cog):
|
||||
log.warning("No channels to send this to")
|
||||
return
|
||||
|
||||
filters = [await api.get_filter(filter_id) for filter_id in sub.filters]
|
||||
log.debug("found %s filter(s)", len(filters))
|
||||
|
||||
unparsed_content = await get_unparsed_feed(sub.url, session)
|
||||
parsed_content = parse(unparsed_content)
|
||||
source = Source.from_parsed(parsed_content)
|
||||
articles = source.get_latest_articles(3)
|
||||
articles = source.get_latest_articles(10)
|
||||
|
||||
if not articles:
|
||||
log.debug("No articles found")
|
||||
|
||||
for article in articles:
|
||||
await self.process_article(session, channels, article)
|
||||
await self.process_article(api, session, sub.id, filters, channels, article)
|
||||
|
||||
async def process_article(self, session, channels: list[SubChannel], article: Article):
|
||||
embed = await article.to_embed(session)
|
||||
async def process_article(self, api, session, sub_id: int, filters: list[dict], channels: list[SubChannel], article: Article):
|
||||
log.debug("processing article '%s' '%s'", article.guid, article.title)
|
||||
|
||||
for _filter in filters:
|
||||
if self.filter_article(_filter, article):
|
||||
log.debug("filter matched, skipping article")
|
||||
return
|
||||
|
||||
# if any(self.filter_article(_filter, article) for _filter in filters):
|
||||
# return
|
||||
|
||||
try:
|
||||
await api.create_tracked_content(guid=article.guid, title=article.title, url=article.url, subscription=sub_id)
|
||||
log.debug("successfully tracked %s", article.guid)
|
||||
except aiohttp.ClientResponseError as error:
|
||||
log.error(error)
|
||||
|
||||
if error.status == 400:
|
||||
log.debug("It looks like this article already exists, skipping")
|
||||
|
||||
return
|
||||
|
||||
log.debug("attempting to send embed to %s channel(s)", len(channels))
|
||||
|
||||
embed = await article.to_embed(session)
|
||||
for channel in channels:
|
||||
await channel.send(embed=embed)
|
||||
|
||||
def filter_article(self, _filter: dict, article: Article) -> bool:
|
||||
"""
|
||||
Returns True if article should be ignored due to filters.
|
||||
"""
|
||||
|
||||
# async def batch_process_subs(self, data: list):
|
||||
log.debug("trying filter '%s'", _filter["name"])
|
||||
|
||||
# log.debug("batch process subs, count '%s'", len(data))
|
||||
# subscriptions = Subscription.from_list(data)
|
||||
keywords = _filter["keywords"].split(",")
|
||||
regex_pattern = _filter["regex"]
|
||||
|
||||
# for sub in subscriptions:
|
||||
# log.info(sub.name)
|
||||
assert not (keywords and regex_pattern), "Keywords and Regex used, only 1 can be used."
|
||||
|
||||
for word in keywords:
|
||||
if word in article.title or word in article.description:
|
||||
log.debug("keyword '%s' found in title or description", word)
|
||||
return True
|
||||
|
||||
# async def process_feed(self, feed: FeedChannelModel, database: DatabaseManager):
|
||||
# """Process the passed feed. Will also call process for each article found in the feed.
|
||||
# if any(word in article.title or word in article.description for word in keywords):
|
||||
# return True
|
||||
|
||||
# Parameters
|
||||
# ----------
|
||||
# feed : FeedChannelModel
|
||||
# Database model for the feed.
|
||||
# database : DatabaseManager
|
||||
# Database connection handler, must be open.
|
||||
# """
|
||||
|
||||
# log.debug("Processing feed: %s", feed.id)
|
||||
|
||||
# channel = self.bot.get_channel(feed.discord_channel_id)
|
||||
|
||||
# # TODO: integrate the `validate_feed` code into here, also do on list command and show errors.
|
||||
|
||||
# async with aiohttp.ClientSession() as session:
|
||||
|
||||
# unparsed_content = await get_unparsed_feed(feed.rss_source.rss_url)
|
||||
# parsed_feed = parse(unparsed_content)
|
||||
# source = Source.from_parsed(parsed_feed)
|
||||
# articles = source.get_latest_articles(5)
|
||||
|
||||
# if not articles:
|
||||
# log.info("No articles to process for %s in ", feed.rss_source.nick, feed.discord_server_id)
|
||||
# return
|
||||
|
||||
# for article in articles:
|
||||
# await self.process_article(feed.id, article, channel, database, session)
|
||||
|
||||
# async def process_article(
|
||||
# self, feed_id: int, article: Article, channel: TextChannel, database: DatabaseManager,
|
||||
# session: aiohttp.ClientSession
|
||||
# ):
|
||||
# """Process the passed article. Will send the embed to a channel if all is valid.
|
||||
|
||||
# Parameters
|
||||
# ----------
|
||||
# feed_id : int
|
||||
# The feed model ID, used to log the sent article.
|
||||
# article : Article
|
||||
# Database model for the article.
|
||||
# channel : TextChannel
|
||||
# Where the article will be sent to.
|
||||
# database : DatabaseManager
|
||||
# Database connection handler, must be open.
|
||||
# """
|
||||
|
||||
# log.debug("Processing article: %s", article.url)
|
||||
|
||||
# query = select(SentArticleModel).where(and_(
|
||||
# SentArticleModel.article_url == article.url,
|
||||
# SentArticleModel.discord_channel_id == channel.id,
|
||||
# ))
|
||||
# result = await database.session.execute(query)
|
||||
|
||||
# if result.scalars().all():
|
||||
# log.debug("Article already processed: %s", article.url)
|
||||
# return
|
||||
|
||||
# embed = await article.to_embed(session)
|
||||
# try:
|
||||
# await channel.send(embed=embed)
|
||||
# except Forbidden as error: # TODO: find some way of informing the user about this error.
|
||||
# log.error("Can't send article to channel: %s · %s · %s", channel.name, channel.id, error)
|
||||
# return
|
||||
|
||||
# query = insert(SentArticleModel).values(
|
||||
# article_url = article.url,
|
||||
# discord_channel_id = channel.id,
|
||||
# discord_server_id = channel.guild.id,
|
||||
# discord_message_id = -1,
|
||||
# feed_channel_id = feed_id
|
||||
# )
|
||||
# await database.session.execute(query)
|
||||
|
||||
# log.debug("new Article processed: %s", article.url)
|
||||
if regex_pattern:
|
||||
regex = re.compile(regex_pattern)
|
||||
return regex.search(article.title) or regex.search(article.description)
|
||||
|
||||
return False
|
||||
|
||||
async def setup(bot):
|
||||
"""
|
||||
|
Loading…
x
Reference in New Issue
Block a user