PYRSS-Bot/src/feed.py
2023-12-15 23:13:39 +00:00

123 lines
2.9 KiB
Python

import json
import logging
from enum import Enum
from dataclasses import dataclass
from datetime import datetime
import aiohttp
from bs4 import BeautifulSoup as bs4
from feedparser import FeedParserDict, parse
log = logging.getLogger(__name__)
class Feeds(Enum):
THE_UPPER_LIP = "https://theupperlip.co.uk/rss"
THE_BABYLON_BEE= "https://babylonbee.com/feed"
BBC_NEWS = "https://feeds.bbci.co.uk/news/rss.xml"
@dataclass
class Source:
name: str | None
url: str | None
icon_url: str | None
feed: FeedParserDict
@classmethod
def from_parsed(cls, feed:FeedParserDict):
# print(json.dumps(feed, indent=8))
return cls(
name=feed.get("channel", {}).get("title"),
url=feed.get("channel", {}).get("link"),
icon_url=feed.get("feed", {}).get("image", {}).get("href"),
feed=feed
)
def get_latest_articles(self, max: int) -> list:
""""""
articles = []
for i, entry in enumerate(self.feed.entries):
if i >= max:
break
articles.append(Article.from_entry(entry))
return articles
@dataclass
class Article:
title: str | None
description: str | None
url: str | None
published: datetime | None
author: str | None
@classmethod
def from_parsed(cls, feed:FeedParserDict):
entry = feed.entries[0]
# log.debug(json.dumps(entry, indent=8))
published_parsed = entry.get("published_parsed")
published = datetime(*entry.published_parsed[0:-2]) if published_parsed else None
return cls(
title=entry.get("title"),
description=entry.get("description"),
url=entry.get("link"),
published=published,
author = entry.get("author")
)
@classmethod
def from_entry(cls, entry:FeedParserDict):
published_parsed = entry.get("published_parsed")
published = datetime(*entry.published_parsed[0:-2]) if published_parsed else None
return cls(
title=entry.get("title"),
description=entry.get("description"),
url=entry.get("link"),
published=published,
author = entry.get("author")
)
async def get_thumbnail_url(self):
"""
"""
async with aiohttp.ClientSession() as session:
async with session.get(self.url) as response:
html = await response.text()
# Parse the thumbnail for the news story
soup = bs4(html, "html.parser")
image_element = soup.select_one("meta[property='og:image']")
return image_element.get("content") if image_element else None
def get_source(rss_url: str) -> Source:
"""
"""
parsed_feed = parse(rss_url)
return Source.from_parsed(parsed_feed)
def get_test():
parsed = parse(Feeds.THE_UPPER_LIP.value)
print(json.dumps(parsed, indent=4))
return parsed