From 81dcf325c31c511b381d834f92d0f7f55eb914ec Mon Sep 17 00:00:00 2001 From: Corban-Lee Date: Tue, 13 May 2025 16:57:39 +0100 Subject: [PATCH] working on filters --- package.json | 1 + src/bot/filter.ts | 40 +++++++++++++++++-------- src/bot/task.ts | 75 ++++++++++++++++++++++++++++++++++++----------- 3 files changed, 87 insertions(+), 29 deletions(-) diff --git a/package.json b/package.json index e254ae9..4fd4c32 100644 --- a/package.json +++ b/package.json @@ -61,6 +61,7 @@ "ejs": "^3.1.10", "ejs-mate": "^4.0.0", "express": "^5.1.0", + "fuzzball": "^2.2.2", "jquery": "^3.7.1", "lodash": "^4.17.21", "node-html-parser": "^7.0.1", diff --git a/src/bot/filter.ts b/src/bot/filter.ts index fa02904..bfafaf6 100644 --- a/src/bot/filter.ts +++ b/src/bot/filter.ts @@ -1,30 +1,46 @@ +import fuzz from "fuzzball"; // todo: implement for fuzzy match import { Filter, MatchingAlgorithms } from "../../generated/prisma"; -export const any = (filter: Filter, input: string) => { - for (let word in input.split(" ")) { - word = filter.is_insensitive ? word.toLowerCase() : word; - const against = filter.is_insensitive ? filter.value.toLowerCase() : filter.value; +function splitMatch(filterValue: string): string[] { + const findTerms = [...filterValue.matchAll(/"([^"]+)"|(\S+)/g)]; + return findTerms.map(value => { + const term = value[1] || value[2]; + return term.trim().replace(/\s+/g, "\\s+").replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + }); +} - if (word.includes(against)) return true; +export const all = (filter: Filter, input: string) => { + throw new Error("'all' filter not implemented"); +}; + +export const any = (filter: Filter, input: string) => { + // const words = splitMatch(filter.value); + // console.log(JSON.stringify(words)); + // return words.some(word => new RegExp(`\\b${word}\\b`).test(input)); + + const inputWords = input.split(" ").map(word => filter.is_insensitive ? word.toLowerCase() : word); + const filterWords = filter.value.split(" ").map(word => filter.is_insensitive ? word.toLowerCase() : word); + + for (const filterWord of filterWords) { + if (inputWords.includes(filterWord)) { + console.log(input); + return true; + } } return false; }; -export const all = (filter: Filter, input: string) => { - return true; -}; - export const literal = (filter: Filter, input: string) => { - return true; + throw new Error("'literal' filter not implemented"); }; export const regex = (filter: Filter, input: string) => { - return true; + throw new Error("'regex' filter not implemented"); }; export const fuzzy = (filter: Filter, input: string) => { - return true; + throw new Error("'fuzzy' filter not implemented"); }; export const mapAlgorithmToFunction = (filter: Filter, input: string) => { diff --git a/src/bot/task.ts b/src/bot/task.ts index 75d341d..037c97f 100644 --- a/src/bot/task.ts +++ b/src/bot/task.ts @@ -39,6 +39,8 @@ const processFeed = async (feed: ExpandedFeed, client: Client) => { const parsed = await getParsedUrl(feed.url); if (!parsed) return; + console.log(`Processing feed: ${feed.name}`); + for (const channelId of feed.channels.map(channel => channel.channel_id)) { const channel = client.channels.cache.get(channelId); if (channel) await processItems(parsed.items, feed, channel, client); @@ -46,26 +48,65 @@ const processFeed = async (feed: ExpandedFeed, client: Client) => { }; const processItems = async (items: RssParser.Item[], feed: ExpandedFeed, channel: DiscordChannel, client: Client) => { - for (const item of items) { - await processItem(item, feed, channel, client); - } -}; + console.log(`Processing ${items.length} items`); -const processItem = async (item: RssParser.Item, feed: ExpandedFeed, channel: DiscordChannel, client: Client) => { - for (const filter of feed.filters) { - if (!await passesFilter(filter, item)) { - console.log("fails filter") - return; + for (let i = items.length; i--;) { + if (!(await Promise.all(feed.filters.map(f => passesFilter(f, items[i])))).every(Boolean)) { + items.splice(i, 1); } } - const embed = new EmbedBuilder(); - embed.setTitle(item.title ?? null); - embed.setDescription(item.contentSnippet ?? null); - embed.setImage(await getItemImageUrl(item.link ?? "") ?? null); - embed.setColor(feed.message_style.colour as HexColorString); + console.log(`Processing ${items.length} items (post-filter)`) - await (channel as TextChannel).send({ embeds: [embed] }); + const batchSize = 4; + const totalBatches = Math.floor((items.length + batchSize - 1) / batchSize); + + console.log(`batchSize: ${batchSize}, totalBatches: ${totalBatches}`) + + for (let batchNumber = 0; batchNumber * batchSize < items.length; batchNumber++) { + console.log(`Processing items batch [${batchNumber+1}/${totalBatches}]`); + + const i = batchNumber * batchSize; + const batch = items.slice(i, i + batchSize); + + const embeds = await createEmbedFromItems(batch, feed, batchNumber, totalBatches); + + await (channel as TextChannel).send({ embeds: embeds }); + } +}; + +const createEmbedFromItems = async (items: RssParser.Item[], feed: ExpandedFeed, batchNumber: number, totalBatches: number) => { + if (!items.length) { + throw new Error("Items empty, expected at least 1 item."); + } + + const mainEmbed = new EmbedBuilder(); + const embeds = [mainEmbed] + + mainEmbed.setTitle(totalBatches > 1 ? `${feed.name} [${batchNumber+1}/${totalBatches}]` : feed.name); + mainEmbed.setColor(feed.message_style.colour as HexColorString); + mainEmbed.setURL(process.env.PUBLIC_URL ?? null); + + if (items.length == 1) { + mainEmbed.setImage(await getItemImageUrl(items[0].link ?? "") ?? null); + } + + for (const item of items) { + const contentSnippet = item.contentSnippet + `\n[View Article](${item.link})`; + mainEmbed.addFields({ + name: item.title ?? "- no title found -", + value: contentSnippet ?? "- no desc found -", + inline: false + }) + + if (embeds.length <= 5) { + const imageEmbed = new EmbedBuilder({ title: "dummy", url: process.env.PUBLIC_URL }); + imageEmbed.setImage(await getItemImageUrl(item.link ?? "") ?? null); + embeds.push(imageEmbed); + } + } + + return embeds }; const getItemImageUrl = async (url: string) => { @@ -92,7 +133,7 @@ const passesFilter = async (filter: Filter, item: RssParser.Item) => { ); } - console.log(`${matchFound} - ${filter.is_whitelist}`); + console.log(`Filter result: matchFound=${matchFound}, is_whitelist=${filter.is_whitelist}, willSend=${filter.is_whitelist ? matchFound : !matchFound}`); - return filter.is_whitelist ? !matchFound : matchFound; + return filter.is_whitelist ? matchFound : !matchFound; };