working on tasks rewrite

This commit is contained in:
Corban-Lee Jones 2024-07-03 16:40:35 +01:00
parent e9c805f8ed
commit bc93b37b9a

152
src/extensions/new_tasks.py Normal file
View File

@ -0,0 +1,152 @@
"""
task flow
=-=-=-=-=
1. every 10 minutes
2. get all bot guild_ids
3. get all subscriptions against known guild ids (account for pagination of 25)
4. iterate through and process each subscription
5. get all articles from subscription
6. iterate through and process each article
"""
class Cog:
async def subscription_task():
subscriptions = self.get_subscriptions()
for subscription in subscriptions:
await self.process_subscription(subscription)
async def get_subscriptions():
# return subs from api, handle pagination
async def process_subscription(subscription):
articles = subscription.get_articles()
for article in articles:
await self.process_article(article)
async def process_article(article):
# validate article then:
if await self.track_article(article):
await self.send_article(article)
async def track_article():
pass
async def send_article():
pass
class TaskCog(commands.Cog):
"""
Tasks cog for PYRSS.
"""
def __init__(self, bot):
super().__init__()
self.bot = bot
@tasks.loop(time=times)
async def subscription_task(self):
async with aiohttp.ClientSession() as session:
api = API(self.bot.api_token, session)
subscriptions = await self.get_subscriptions(api)
await self.process_subscriptions(api, subscriptions)
# articles = [*(await self.get_articles(api, sub)) for sub in subscriptions]
# await self.send_articles(api, articles)
async def get_subscriptions(self, api) -> list:
guild_ids = [guild.id for guild in self.bot.guilds]
subscriptions = []
for page in iter(int, 1):
try:
page_data = (await api.get_subscriptions(server__in=guild_ids, page=page+1))[0]
except apihttp.ClientResponseError as error:
if error.status == 404:
break
except Exception as error:
log.error("Exception while gathering page data %s", error)
break
subscriptions.extend(page_data)
async def process_subscriptions(self, api, subscriptions):
for sub in subscriptions:
if not sub.active or not sub.channel_count:
continue
unparsed_feed = await get_unparsed_feed(sub.url, api.session)
parsed_feed = await parse(unparsed_feed)
rss_feed = RSSFeed.from_parsed_feed(parsed_feed)
await self.process_items(api, sub, rss_feed)
async def process_items(self, api, sub, feed):
channels = [self.bot.get_channel(channel.channel_id) for channel in await sub.get_channels()]
filters = [await api.get_filter(filter_id) for filter_id in sub.filters]
for item in sub.items:
blocked = any(self.filter_item(_filter, item) for _filter in filters)
mutated_item = item.create_mutated_copy(sub.mutators)
for channel in channels:
await self.mark_tracked_item(api, sub, item, channel.id, blocked)
if not blocked:
channel.send(embed=item.to_embed(sub, feed))
async def filter_item(self, _filter: dict, item: RSSItem) -> bool:
"""
Returns True if item should be ignored due to filters.
"""
match_found = False # This is the flag to determine if the content should be filtered
keywords = _filter["keywords"].split(",")
regex_pattern = _filter["regex"]
is_whitelist = _filter["whitelist"]
log.debug(
"trying filter '%s', keyword '%s', regex '%s', is whitelist: '%s'",
_filter["name"], keywords, regex_pattern, is_whitelist
)
assert not (keywords and regex_pattern), "Keywords and Regex used, only 1 can be used."
if any(word in item.title or word in item.description for word in keywords):
match_found = True
if regex_pattern:
regex = re.compile(regex_pattern)
match_found = regex.search(item.title) or regex.search(item.description)
return not match_found if is_whitelist else match_found
async def mark_tracked_item(self, sub, item, channel_id, blocked):
try:
api.create_tracked_content(
guid=item.guid,
title=item.title,
url=item.url,
subscription=sub.id,
channel_id=channel_id,
blocked=blocked
)
except aiohttp.ClientResponseError as error:
if error.status == 409:
log.debug(error)
else:
log.error(error)