123 lines
2.9 KiB
Python
123 lines
2.9 KiB
Python
|
|
import json
|
|
import logging
|
|
from enum import Enum
|
|
from dataclasses import dataclass
|
|
from datetime import datetime
|
|
|
|
import aiohttp
|
|
from bs4 import BeautifulSoup as bs4
|
|
from feedparser import FeedParserDict, parse
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
|
|
class Feeds(Enum):
|
|
THE_UPPER_LIP = "https://theupperlip.co.uk/rss"
|
|
THE_BABYLON_BEE= "https://babylonbee.com/feed"
|
|
BBC_NEWS = "https://feeds.bbci.co.uk/news/rss.xml"
|
|
|
|
|
|
@dataclass
|
|
class Source:
|
|
|
|
name: str | None
|
|
url: str | None
|
|
icon_url: str | None
|
|
feed: FeedParserDict
|
|
|
|
@classmethod
|
|
def from_parsed(cls, feed:FeedParserDict):
|
|
# print(json.dumps(feed, indent=8))
|
|
|
|
return cls(
|
|
name=feed.get("channel", {}).get("title"),
|
|
url=feed.get("channel", {}).get("link"),
|
|
icon_url=feed.get("feed", {}).get("image", {}).get("href"),
|
|
feed=feed
|
|
)
|
|
|
|
def get_latest_articles(self, max: int) -> list:
|
|
""""""
|
|
|
|
articles = []
|
|
|
|
for i, entry in enumerate(self.feed.entries):
|
|
if i >= max:
|
|
break
|
|
|
|
articles.append(Article.from_entry(entry))
|
|
|
|
return articles
|
|
|
|
|
|
@dataclass
|
|
class Article:
|
|
|
|
title: str | None
|
|
description: str | None
|
|
url: str | None
|
|
published: datetime | None
|
|
author: str | None
|
|
|
|
@classmethod
|
|
def from_parsed(cls, feed:FeedParserDict):
|
|
entry = feed.entries[0]
|
|
# log.debug(json.dumps(entry, indent=8))
|
|
|
|
published_parsed = entry.get("published_parsed")
|
|
published = datetime(*entry.published_parsed[0:-2]) if published_parsed else None
|
|
|
|
return cls(
|
|
title=entry.get("title"),
|
|
description=entry.get("description"),
|
|
url=entry.get("link"),
|
|
published=published,
|
|
author = entry.get("author")
|
|
)
|
|
|
|
@classmethod
|
|
def from_entry(cls, entry:FeedParserDict):
|
|
|
|
published_parsed = entry.get("published_parsed")
|
|
published = datetime(*entry.published_parsed[0:-2]) if published_parsed else None
|
|
|
|
return cls(
|
|
title=entry.get("title"),
|
|
description=entry.get("description"),
|
|
url=entry.get("link"),
|
|
published=published,
|
|
author = entry.get("author")
|
|
)
|
|
|
|
|
|
async def get_thumbnail_url(self):
|
|
"""
|
|
|
|
"""
|
|
|
|
async with aiohttp.ClientSession() as session:
|
|
async with session.get(self.url) as response:
|
|
html = await response.text()
|
|
|
|
# Parse the thumbnail for the news story
|
|
soup = bs4(html, "html.parser")
|
|
image_element = soup.select_one("meta[property='og:image']")
|
|
return image_element.get("content") if image_element else None
|
|
|
|
|
|
def get_source(rss_url: str) -> Source:
|
|
"""
|
|
|
|
"""
|
|
|
|
parsed_feed = parse(rss_url)
|
|
return Source.from_parsed(parsed_feed)
|
|
|
|
|
|
def get_test():
|
|
|
|
parsed = parse(Feeds.THE_UPPER_LIP.value)
|
|
print(json.dumps(parsed, indent=4))
|
|
return parsed
|