diff --git a/src/extensions/rss.py b/src/extensions/rss.py index b133795..a4e9904 100644 --- a/src/extensions/rss.py +++ b/src/extensions/rss.py @@ -104,7 +104,7 @@ class RssCog(commands.Cog): RssSourceModel.discord_server_id == inter.guild_id, RssSourceModel.nick.ilike(f"%{nickname}%") ) - query = select(RssSourceModel).where(whereclause) + query = select(RssSourceModel).where(whereclause).order_by(RssSourceModel.nick) result = await database.session.execute(query) sources = [ Choice(name=rss.nick, value=rss.rss_url) @@ -313,32 +313,7 @@ class RssCog(commands.Cog): # 8. The title cannot hyperlink URLs # 10. The embed and author URL must be valid, these will be checked. - embeds = [] - for article in articles: - md_description = markdownify(article.description, strip=("img",)) - article_description = textwrap.shorten(md_description, 4096) - - md_title = markdownify(article.title, strip=("img", "a")) - article_title = textwrap.shorten(md_title, 256) - - embed = Embed( - title=article_title, - description=article_description, - url=article.url if validators.url(article.url) else None, - timestamp=article.published, - colour=Colour.brand_red() - ) - - thumbail_url = await article.get_thumbnail_url() - thumbail_url = thumbail_url if validators.url(thumbail_url) else None - embed.set_thumbnail(url=source.icon_url if validators.url(source.icon_url) else None) - embed.set_image(url=thumbail_url) - embed.set_footer(text=article.author) - embed.set_author( - name=source.name, - url=source.url if validators.url(source.url) else None, - ) - embeds.append(embed) + embeds = [await article.to_embed() for article in articles] async with DatabaseManager() as database: query = insert(SentArticleModel).values([ diff --git a/src/feed.py b/src/feed.py index c08011d..b003ccb 100644 --- a/src/feed.py +++ b/src/feed.py @@ -8,6 +8,11 @@ from dataclasses import dataclass from datetime import datetime import aiohttp + +import validators +from textwrap import shorten +from markdownify import markdownify +from discord import Embed, Colour from bs4 import BeautifulSoup as bs4 from feedparser import FeedParserDict, parse @@ -24,9 +29,10 @@ class Article: url: str | None published: datetime | None author: str | None + source: object @classmethod - def from_entry(cls, entry:FeedParserDict): + def from_entry(cls, source, entry:FeedParserDict): """Create an Article from an RSS feed entry. Parameters @@ -50,7 +56,8 @@ class Article: description=entry.get("description"), url=entry.get("link"), published=published, - author = entry.get("author") + author = entry.get("author"), + source=source ) async def get_thumbnail_url(self) -> str | None: @@ -74,7 +81,48 @@ class Article: soup = bs4(html, "html.parser") image_element = soup.select_one("meta[property='og:image']") - return image_element.get("content") if image_element else None + if not image_element: + return None + + image_content = image_element.get("content") + return image_content if validators.url(image_content) else None + + async def to_embed(self) -> Embed: + """Creates and returns a Discord Embed object from the article. + + Returns + ------- + Embed + A Discord Embed object representing the article. + """ + + log.debug(f"Creating embed from article: {self}") + + # Replace HTML with Markdown, and shorten text. + title = shorten(markdownify(self.title, strip=["img", "a"]), 256) + desc = shorten(markdownify(self.description, strip=["img"]), 4096) + author = shorten(self.source.name, 256) + + # validate urls + embed_url = self.url if validators.url(self.url) else None + author_url = self.source.url if validators.url(self.source.url) else None + icon_url = self.source.icon_url if validators.url(self.source.icon_url) else None + thumb_url = await self.get_thumbnail_url() # validation done inside func + + embed = Embed( + title=title, + description=desc, + timestamp=self.published, + url=embed_url, + colour=Colour.from_str("#3498DB") + ) + + embed.set_thumbnail(url=icon_url) + embed.set_image(url=thumb_url) + embed.set_author(url=author_url, name=author) + embed.set_footer(text=self.author) + + return embed @dataclass @@ -127,7 +175,7 @@ class Source: log.debug("Fetching latest articles from %s, max=%s", self, max) return [ - Article.from_entry(entry) + Article.from_entry(self, entry) for i, entry in enumerate(self.feed.entries) if i < max ]