PYRSS-Bot/src/feed.py

95 lines
2.1 KiB
Python

import json
import logging
from enum import Enum
from dataclasses import dataclass
from datetime import datetime
import aiohttp
from bs4 import BeautifulSoup as bs4
from feedparser import FeedParserDict, parse
log = logging.getLogger(__name__)
class Feeds(Enum):
THE_UPPER_LIP = "https://theupperlip.co.uk/rss"
THE_BABYLON_BEE= "https://babylonbee.com/feed"
BBC_NEWS = "https://feeds.bbci.co.uk/news/rss.xml"
@dataclass
class Source:
name: str
url: str
icon_url: str
feed: FeedParserDict
@classmethod
def from_parsed(cls, feed:FeedParserDict):
# print(json.dumps(feed, indent=8))
return cls(
name=feed.channel.title,
url=feed.channel.link,
icon_url=feed.feed.image.href,
feed=feed
)
def get_latest_article(self):
return Article.from_parsed(self.feed)
@dataclass
class Article:
title: str
description: str
url: str
published: datetime
author: str | None
@classmethod
def from_parsed(cls, feed:FeedParserDict):
entry = feed.entries[0]
# log.debug(json.dumps(entry, indent=8))
return cls(
title=entry.title,
description=entry.description,
url=entry.link,
published=datetime(*entry.published_parsed[0:-2]),
author = entry.get("author", None)
)
async def get_thumbnail_url(self):
"""
"""
async with aiohttp.ClientSession() as session:
async with session.get(self.url) as response:
html = await response.text()
# Parse the thumbnail for the news story
soup = bs4(html, "html.parser")
image_element = soup.select_one("meta[property='og:image']")
return image_element.get("content") if image_element else None
def get_source(feed: Feeds) -> Source:
"""
"""
parsed_feed = parse("https://gitea.corbz.dev/corbz/BBC-News-Bot/rss/branch/main/src/extensions/news.py")
return Source.from_parsed(parsed_feed)
def get_test():
parsed = parse(Feeds.THE_UPPER_LIP.value)
print(json.dumps(parsed, indent=4))
return parsed