Working articles

This commit is contained in:
Corban-Lee Jones 2023-12-13 23:22:33 +00:00
parent 1259fe9bc4
commit de3fd84a9e
7 changed files with 53 additions and 84 deletions

View File

@ -1,10 +1,12 @@
aiohttp==3.9.1
aiopg==1.4.0
aiosignal==1.3.1
aiosqlite==0.19.0
async-timeout==4.0.3
asyncpg==0.29.0
attrs==23.1.0
beautifulsoup4==4.12.2
bs4==0.0.1
discord.py==2.3.2
feedparser==6.0.11
frozenlist==1.4.0
@ -12,6 +14,7 @@ greenlet==3.0.2
idna==3.6
markdownify==0.11.6
multidict==6.0.4
psycopg2==2.9.9
psycopg2-binary==2.9.9
python-dotenv==1.0.0
sgmllib3k==1.0.0

View File

@ -7,6 +7,9 @@ from pathlib import Path
from discord import Intents
from discord.ext import commands
from sqlalchemy import insert
from db import DatabaseManager, AuditModel
log = logging.getLogger(__name__)

View File

@ -12,7 +12,7 @@ from discord.ext import commands, tasks
from sqlalchemy import insert, select
from db import DatabaseManager, AuditModel
from feed import Article, Source, Parser, Feeds, get_source
from feed import Feeds, get_source
log = logging.getLogger(__name__)
@ -31,55 +31,44 @@ class Test(commands.Cog):
async def on_ready(self):
log.info(f"{self.__class__.__name__} cog is ready")
@app_commands.command(name="test-bbc")
async def test_command(self, inter: Interaction):
@app_commands.command(name="test-latest-article")
# @app_commands.choices(source=[
# app_commands.Choice(name="The Babylon Bee", value=Feeds.THE_BABYLON_BEE),
# app_commands.Choice(name="The Upper Lip", value=Feeds.THE_UPPER_LIP),
# app_commands.Choice(name="BBC News", value=Feeds.BBC_NEWS),
# ])
async def test_bee(self, inter: Interaction, source: Feeds):
await inter.response.defer()
await self.bot.audit("Requesting latest article.", inter.user_id)
await self.bot.audit("Requesting latest article.", inter.user.id)
source = get_source(Feeds.THE_UPPER_LIP)
source = get_source(source)
article = source.get_latest_article()
md_description = markdownify(article.description)
md_description = markdownify(article.description, strip=("img",))
article_description = textwrap.shorten(md_description, 4096)
embed = Embed(
title=article.title,
description=article_description,
url=article.url,
timestamp=article.published,
)
embed.set_thumbnail(url=source.icon_url)
embed.set_image(url=await article.get_thumbnail_url())
embed.set_footer(text=article.author)
embed.set_author(
name=source.name,
url=source.url,
icon_url=source.icon_url
)
log.debug(article)
log.debug(article_description)
await inter.followup.send(embed=embed)
@app_commands.command(name="test-upperlip")
async def test_command(self, inter: Interaction):
await inter.response.defer()
await self.bot.audit("Requesting latest article.", inter.user_id)
source = get_source(Feeds.THE_UPPER_LIP)
article = source.get_latest_article()
md_description = markdownify(article.description)
article_description = textwrap.shorten(md_description, 4096)
embed = Embed(
title=article.title,
description=article_description,
url=article.url,
)
embed.set_author(
name=source.name,
url=source.url,
icon_url=source.icon_url
)
await inter.followup.send(embed=embed)
async def setup(bot):

View File

@ -1,14 +1,21 @@
import json
import logging
from enum import Enum
from dataclasses import dataclass
from datetime import datetime
import aiohttp
from bs4 import BeautifulSoup as bs4
from feedparser import FeedParserDict, parse
log = logging.getLogger(__name__)
class Feeds(Enum):
THE_UPPER_LIP = "https://theupperlip.co.uk/rss"
THE_BABYLON_BEE= ""
THE_BABYLON_BEE= "https://babylonbee.com/feed"
BBC_NEWS = "https://feeds.bbci.co.uk/news/rss.xml"
@dataclass
@ -40,18 +47,36 @@ class Article:
title: str
description: str
url: str
thumbnail_url: str
published: datetime
author: str | None
@classmethod
def from_parsed(cls, feed:FeedParserDict):
entry = feed.entries[0]
# log.debug(json.dumps(entry, indent=8))
return cls(
title=entry.title,
description=entry.description,
url=entry.link,
thumbnail_url=None
published=datetime(*entry.published_parsed[0:-2]),
author = entry.get("author", None)
)
async def get_thumbnail_url(self):
"""
"""
async with aiohttp.ClientSession() as session:
async with session.get(self.url) as response:
html = await response.text()
# Parse the thumbnail for the news story
soup = bs4(html, "html.parser")
image_element = soup.select_one("meta[property='og:image']")
return image_element.get("content") if image_element else None
def get_source(feed: Feeds) -> Source:
"""

View File

@ -1,2 +0,0 @@
from .parser import Article, Source, Parser
from .feed import Feeds, get_source

View File

@ -1,49 +0,0 @@
import textwrap
from datetime import datetime
from dataclasses import dataclass
import feedparser
from .feed import Feeds
class Parser:
def __init__(self, feed:Feeds):
self.feed_url = feed.value
def get_latest(self):
result = feedparser.parse(self.feed_url)
entry = result.entries[0]
return Article(
title=entry.title,
description=entry.description,
content="", # textwrap.shorten(100, entry.content),
url=entry.link,
thumbnail_url=""
)
def get_source(self):
return Source()
@dataclass
class Article:
title: str
description: str
content: str
url: str
thumbnail_url: str
@dataclass
class Source:
name: str
description: str
url: str
icon_url: str
last_updated: datetime

View File

@ -39,7 +39,7 @@ async def main():
async with DiscordBot(BASE_DIR) as bot:
await bot.load_extensions()
await bot.start(token)
await bot.start(token, reconnect=True)
if __name__ == "__main__":