PYRSS-Bot/src/feed.py


import json
import logging
from enum import Enum
from dataclasses import dataclass
from datetime import datetime

import aiohttp
from bs4 import BeautifulSoup as bs4
from feedparser import FeedParserDict, parse

log = logging.getLogger(__name__)


class Feeds(Enum):
    THE_UPPER_LIP = "https://theupperlip.co.uk/rss"
    THE_BABYLON_BEE=  "https://babylonbee.com/feed"
    BBC_NEWS = "https://feeds.bbci.co.uk/news/rss.xml"


@dataclass
class Source:

    name: str
    url: str
    icon_url: str
    feed: FeedParserDict

    @classmethod
    def from_parsed(cls, feed:FeedParserDict):

        # print(json.dumps(feed, indent=8))
        return cls(
            name=feed.channel.title,
            url=feed.channel.link,
            icon_url=feed.feed.image.href,
            feed=feed
        )

    def get_latest_article(self):
        return Article.from_parsed(self.feed)


@dataclass
class Article:

    title: str
    description: str
    url: str
    published: datetime
    author: str | None

    @classmethod
    def from_parsed(cls, feed:FeedParserDict):
        entry = feed.entries[0]
        # log.debug(json.dumps(entry, indent=8))

        return cls(
            title=entry.title,
            description=entry.description,
            url=entry.link,
            published=datetime(*entry.published_parsed[0:-2]),
            author = entry.get("author", None)
        )

    async def get_thumbnail_url(self):
        """

        """

        async with aiohttp.ClientSession() as session:
            async with session.get(self.url) as response:
                html = await response.text()

        # Parse the thumbnail for the news story
        soup = bs4(html, "html.parser")
        image_element = soup.select_one("meta[property='og:image']")
        return image_element.get("content") if image_element else None


def get_source(feed: Feeds) -> Source:
    """

    """

    parsed_feed = parse("https://gitea.corbz.dev/corbz/BBC-News-Bot/rss/branch/main/src/extensions/news.py")
    return Source.from_parsed(parsed_feed)


def get_test():

    parsed = parse(Feeds.THE_UPPER_LIP.value)
    print(json.dumps(parsed, indent=4))
    return parsed