From 0297fb12b605f6b29119484f3ffb0482d72d5d42 Mon Sep 17 00:00:00 2001 From: Corban-Lee Jones Date: Mon, 12 May 2025 17:25:41 +0100 Subject: [PATCH] working on rss processing --- package.json | 2 + src/bot/bot.ts | 3 ++ src/bot/filter.ts | 40 ++++++++++++++++++++ src/bot/task.ts | 96 +++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 141 insertions(+) create mode 100644 src/bot/filter.ts create mode 100644 src/bot/task.ts diff --git a/package.json b/package.json index 87fe8c6..e254ae9 100644 --- a/package.json +++ b/package.json @@ -63,8 +63,10 @@ "express": "^5.1.0", "jquery": "^3.7.1", "lodash": "^4.17.21", + "node-html-parser": "^7.0.1", "nouislider": "^15.8.1", "preline": "^3.0.1", + "rss-parser": "^3.13.0", "tsconfig-paths": "^4.2.0", "vanilla-calendar-pro": "^3.0.4", "winston": "^3.17.0" diff --git a/src/bot/bot.ts b/src/bot/bot.ts index 4b96e39..ca9e306 100644 --- a/src/bot/bot.ts +++ b/src/bot/bot.ts @@ -1,4 +1,5 @@ import { Client, GatewayIntentBits, ActivityType } from "discord.js"; +import { triggerTask } from "@bot/task"; export const client = new Client({ intents: [ @@ -13,6 +14,8 @@ client.on("ready", () => { throw Error("Client is null"); } + setInterval(() => triggerTask(client), 5000); + client.user.setActivity("new sources", {type: ActivityType.Watching}); console.log(`Discord Bot ${client.user.displayName} is online!`) }); diff --git a/src/bot/filter.ts b/src/bot/filter.ts new file mode 100644 index 0000000..fa02904 --- /dev/null +++ b/src/bot/filter.ts @@ -0,0 +1,40 @@ +import { Filter, MatchingAlgorithms } from "../../generated/prisma"; + +export const any = (filter: Filter, input: string) => { + for (let word in input.split(" ")) { + word = filter.is_insensitive ? word.toLowerCase() : word; + const against = filter.is_insensitive ? filter.value.toLowerCase() : filter.value; + + if (word.includes(against)) return true; + } + + return false; +}; + +export const all = (filter: Filter, input: string) => { + return true; +}; + +export const literal = (filter: Filter, input: string) => { + return true; +}; + +export const regex = (filter: Filter, input: string) => { + return true; +}; + +export const fuzzy = (filter: Filter, input: string) => { + return true; +}; + +export const mapAlgorithmToFunction = (filter: Filter, input: string) => { + switch (filter.matching_algorithm) { + case MatchingAlgorithms.ALL: return all(filter, input); + case MatchingAlgorithms.ANY: return any(filter, input); + case MatchingAlgorithms.EXACT: return literal(filter, input); + case MatchingAlgorithms.REGEX: return regex(filter, input); + case MatchingAlgorithms.FUZZY: return fuzzy(filter, input); + default: + throw new Error(`Unknown algorithm: ${filter.matching_algorithm}`); + } +}; \ No newline at end of file diff --git a/src/bot/task.ts b/src/bot/task.ts new file mode 100644 index 0000000..aacca7f --- /dev/null +++ b/src/bot/task.ts @@ -0,0 +1,96 @@ +import { Client, EmbedBuilder, Guild, HexColorString, Channel as DiscordChannel, TextChannel } from "discord.js"; +import RssParser from "rss-parser"; +import { parse as HtmlParser } from "node-html-parser"; +import { Feed, Filter, MessageStyle, Channel, MatchingAlgorithms } from "../../generated/prisma"; +import * as filters from "@bot/filter"; +import prisma from "@server/prisma"; + +export const triggerTask = async (client: Client) => { + for (const [_, guild] of client.guilds.cache) { + await processGuild(guild, client); + } +}; + +interface ExpandedFeed extends Feed { + channels: Channel[], + filters: Filter[], + message_style: MessageStyle +} + +const processGuild = async (guild: Guild, client: Client) => { + const feeds = await prisma.feed.findMany({ + where: { guild_id: guild.id, active: true }, + include: { channels: true, filters: true, message_style: true } + }) as ExpandedFeed[]; + + for (const feed of feeds) { + await processFeed(feed, client); + } +}; + +const getParsedUrl = async (url: string) => { + const parser = new RssParser(); + + try { return parser.parseURL(url) } + catch (error) { return undefined } +}; + +const processFeed = async (feed: ExpandedFeed, client: Client) => { + const parsed = await getParsedUrl(feed.url); + if (!parsed) return; + + for (const channelId of feed.channels.map(channel => channel.channel_id)) { + const channel = client.channels.cache.get(channelId); + if (channel) await processItems(parsed.items, feed, channel, client); + } +}; + +const processItems = async (items: RssParser.Item[], feed: ExpandedFeed, channel: DiscordChannel, client: Client) => { + for (const item of items) { + await processItem(item, feed, channel, client); + } +}; + +const processItem = async (item: RssParser.Item, feed: ExpandedFeed, channel: DiscordChannel, client: Client) => { + const filterPromises = feed.filters.map(async filter => { + return passesFilter(filter, item); + }); + + const filterResults = await Promise.all(filterPromises); + if (filterResults.includes(false)) return; + + const embed = new EmbedBuilder(); + embed.setTitle(item.title ?? null); + embed.setDescription(item.contentSnippet ?? null); + embed.setImage(await getItemImageUrl(item.link ?? "") ?? null); + embed.setColor(feed.message_style.colour as HexColorString); + + await (channel as TextChannel).send({ embeds: [embed] }); +}; + +const getItemImageUrl = async (url: string) => { + const response = await fetch(url); + const html = HtmlParser.parse(await response.text()); + + const imageElement = html.querySelector("meta[property='og:image']"); + if (!imageElement) return ""; + + return imageElement.getAttribute("content"); +}; + +const passesFilter = async (filter: Filter, item: RssParser.Item) => { + if (!filter.matching_algorithm.trim()) return !filter.is_whitelist; + + let matchFound = false; + + if (filter.matching_algorithm === MatchingAlgorithms.ALL) { + matchFound = filters.all(filter, `${item.title} ${item.content}`); + } else { + matchFound = ( + filters.mapAlgorithmToFunction(filter, item.title ?? "") + || filters.mapAlgorithmToFunction(filter, item.content ?? "") + ); + } + + return filter.is_whitelist ? matchFound : !matchFound; +};