working on rss processing

This commit is contained in:
Corban-Lee Jones 2025-05-12 17:25:41 +01:00
parent fb011e80c2
commit 0297fb12b6
4 changed files with 141 additions and 0 deletions

View File

@ -63,8 +63,10 @@
"express": "^5.1.0",
"jquery": "^3.7.1",
"lodash": "^4.17.21",
"node-html-parser": "^7.0.1",
"nouislider": "^15.8.1",
"preline": "^3.0.1",
"rss-parser": "^3.13.0",
"tsconfig-paths": "^4.2.0",
"vanilla-calendar-pro": "^3.0.4",
"winston": "^3.17.0"

View File

@ -1,4 +1,5 @@
import { Client, GatewayIntentBits, ActivityType } from "discord.js";
import { triggerTask } from "@bot/task";
export const client = new Client({
intents: [
@ -13,6 +14,8 @@ client.on("ready", () => {
throw Error("Client is null");
}
setInterval(() => triggerTask(client), 5000);
client.user.setActivity("new sources", {type: ActivityType.Watching});
console.log(`Discord Bot ${client.user.displayName} is online!`)
});

40
src/bot/filter.ts Normal file
View File

@ -0,0 +1,40 @@
import { Filter, MatchingAlgorithms } from "../../generated/prisma";
export const any = (filter: Filter, input: string) => {
for (let word in input.split(" ")) {
word = filter.is_insensitive ? word.toLowerCase() : word;
const against = filter.is_insensitive ? filter.value.toLowerCase() : filter.value;
if (word.includes(against)) return true;
}
return false;
};
export const all = (filter: Filter, input: string) => {
return true;
};
export const literal = (filter: Filter, input: string) => {
return true;
};
export const regex = (filter: Filter, input: string) => {
return true;
};
export const fuzzy = (filter: Filter, input: string) => {
return true;
};
export const mapAlgorithmToFunction = (filter: Filter, input: string) => {
switch (filter.matching_algorithm) {
case MatchingAlgorithms.ALL: return all(filter, input);
case MatchingAlgorithms.ANY: return any(filter, input);
case MatchingAlgorithms.EXACT: return literal(filter, input);
case MatchingAlgorithms.REGEX: return regex(filter, input);
case MatchingAlgorithms.FUZZY: return fuzzy(filter, input);
default:
throw new Error(`Unknown algorithm: ${filter.matching_algorithm}`);
}
};

96
src/bot/task.ts Normal file
View File

@ -0,0 +1,96 @@
import { Client, EmbedBuilder, Guild, HexColorString, Channel as DiscordChannel, TextChannel } from "discord.js";
import RssParser from "rss-parser";
import { parse as HtmlParser } from "node-html-parser";
import { Feed, Filter, MessageStyle, Channel, MatchingAlgorithms } from "../../generated/prisma";
import * as filters from "@bot/filter";
import prisma from "@server/prisma";
export const triggerTask = async (client: Client) => {
for (const [_, guild] of client.guilds.cache) {
await processGuild(guild, client);
}
};
interface ExpandedFeed extends Feed {
channels: Channel[],
filters: Filter[],
message_style: MessageStyle
}
const processGuild = async (guild: Guild, client: Client) => {
const feeds = await prisma.feed.findMany({
where: { guild_id: guild.id, active: true },
include: { channels: true, filters: true, message_style: true }
}) as ExpandedFeed[];
for (const feed of feeds) {
await processFeed(feed, client);
}
};
const getParsedUrl = async (url: string) => {
const parser = new RssParser();
try { return parser.parseURL(url) }
catch (error) { return undefined }
};
const processFeed = async (feed: ExpandedFeed, client: Client) => {
const parsed = await getParsedUrl(feed.url);
if (!parsed) return;
for (const channelId of feed.channels.map(channel => channel.channel_id)) {
const channel = client.channels.cache.get(channelId);
if (channel) await processItems(parsed.items, feed, channel, client);
}
};
const processItems = async (items: RssParser.Item[], feed: ExpandedFeed, channel: DiscordChannel, client: Client) => {
for (const item of items) {
await processItem(item, feed, channel, client);
}
};
const processItem = async (item: RssParser.Item, feed: ExpandedFeed, channel: DiscordChannel, client: Client) => {
const filterPromises = feed.filters.map(async filter => {
return passesFilter(filter, item);
});
const filterResults = await Promise.all(filterPromises);
if (filterResults.includes(false)) return;
const embed = new EmbedBuilder();
embed.setTitle(item.title ?? null);
embed.setDescription(item.contentSnippet ?? null);
embed.setImage(await getItemImageUrl(item.link ?? "") ?? null);
embed.setColor(feed.message_style.colour as HexColorString);
await (channel as TextChannel).send({ embeds: [embed] });
};
const getItemImageUrl = async (url: string) => {
const response = await fetch(url);
const html = HtmlParser.parse(await response.text());
const imageElement = html.querySelector("meta[property='og:image']");
if (!imageElement) return "";
return imageElement.getAttribute("content");
};
const passesFilter = async (filter: Filter, item: RssParser.Item) => {
if (!filter.matching_algorithm.trim()) return !filter.is_whitelist;
let matchFound = false;
if (filter.matching_algorithm === MatchingAlgorithms.ALL) {
matchFound = filters.all(filter, `${item.title} ${item.content}`);
} else {
matchFound = (
filters.mapAlgorithmToFunction(filter, item.title ?? "")
|| filters.mapAlgorithmToFunction(filter, item.content ?? "")
);
}
return filter.is_whitelist ? matchFound : !matchFound;
};