working on filters

This commit is contained in:
Corban-Lee Jones 2025-05-13 16:57:39 +01:00
parent f871a1d847
commit 81dcf325c3
3 changed files with 87 additions and 29 deletions

View File

@ -61,6 +61,7 @@
"ejs": "^3.1.10",
"ejs-mate": "^4.0.0",
"express": "^5.1.0",
"fuzzball": "^2.2.2",
"jquery": "^3.7.1",
"lodash": "^4.17.21",
"node-html-parser": "^7.0.1",

View File

@ -1,30 +1,46 @@
import fuzz from "fuzzball"; // todo: implement for fuzzy match
import { Filter, MatchingAlgorithms } from "../../generated/prisma";
export const any = (filter: Filter, input: string) => {
for (let word in input.split(" ")) {
word = filter.is_insensitive ? word.toLowerCase() : word;
const against = filter.is_insensitive ? filter.value.toLowerCase() : filter.value;
function splitMatch(filterValue: string): string[] {
const findTerms = [...filterValue.matchAll(/"([^"]+)"|(\S+)/g)];
return findTerms.map(value => {
const term = value[1] || value[2];
return term.trim().replace(/\s+/g, "\\s+").replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
});
}
if (word.includes(against)) return true;
export const all = (filter: Filter, input: string) => {
throw new Error("'all' filter not implemented");
};
export const any = (filter: Filter, input: string) => {
// const words = splitMatch(filter.value);
// console.log(JSON.stringify(words));
// return words.some(word => new RegExp(`\\b${word}\\b`).test(input));
const inputWords = input.split(" ").map(word => filter.is_insensitive ? word.toLowerCase() : word);
const filterWords = filter.value.split(" ").map(word => filter.is_insensitive ? word.toLowerCase() : word);
for (const filterWord of filterWords) {
if (inputWords.includes(filterWord)) {
console.log(input);
return true;
}
}
return false;
};
export const all = (filter: Filter, input: string) => {
return true;
};
export const literal = (filter: Filter, input: string) => {
return true;
throw new Error("'literal' filter not implemented");
};
export const regex = (filter: Filter, input: string) => {
return true;
throw new Error("'regex' filter not implemented");
};
export const fuzzy = (filter: Filter, input: string) => {
return true;
throw new Error("'fuzzy' filter not implemented");
};
export const mapAlgorithmToFunction = (filter: Filter, input: string) => {

View File

@ -39,6 +39,8 @@ const processFeed = async (feed: ExpandedFeed, client: Client) => {
const parsed = await getParsedUrl(feed.url);
if (!parsed) return;
console.log(`Processing feed: ${feed.name}`);
for (const channelId of feed.channels.map(channel => channel.channel_id)) {
const channel = client.channels.cache.get(channelId);
if (channel) await processItems(parsed.items, feed, channel, client);
@ -46,26 +48,65 @@ const processFeed = async (feed: ExpandedFeed, client: Client) => {
};
const processItems = async (items: RssParser.Item[], feed: ExpandedFeed, channel: DiscordChannel, client: Client) => {
for (const item of items) {
await processItem(item, feed, channel, client);
}
};
console.log(`Processing ${items.length} items`);
const processItem = async (item: RssParser.Item, feed: ExpandedFeed, channel: DiscordChannel, client: Client) => {
for (const filter of feed.filters) {
if (!await passesFilter(filter, item)) {
console.log("fails filter")
return;
for (let i = items.length; i--;) {
if (!(await Promise.all(feed.filters.map(f => passesFilter(f, items[i])))).every(Boolean)) {
items.splice(i, 1);
}
}
const embed = new EmbedBuilder();
embed.setTitle(item.title ?? null);
embed.setDescription(item.contentSnippet ?? null);
embed.setImage(await getItemImageUrl(item.link ?? "") ?? null);
embed.setColor(feed.message_style.colour as HexColorString);
console.log(`Processing ${items.length} items (post-filter)`)
await (channel as TextChannel).send({ embeds: [embed] });
const batchSize = 4;
const totalBatches = Math.floor((items.length + batchSize - 1) / batchSize);
console.log(`batchSize: ${batchSize}, totalBatches: ${totalBatches}`)
for (let batchNumber = 0; batchNumber * batchSize < items.length; batchNumber++) {
console.log(`Processing items batch [${batchNumber+1}/${totalBatches}]`);
const i = batchNumber * batchSize;
const batch = items.slice(i, i + batchSize);
const embeds = await createEmbedFromItems(batch, feed, batchNumber, totalBatches);
await (channel as TextChannel).send({ embeds: embeds });
}
};
const createEmbedFromItems = async (items: RssParser.Item[], feed: ExpandedFeed, batchNumber: number, totalBatches: number) => {
if (!items.length) {
throw new Error("Items empty, expected at least 1 item.");
}
const mainEmbed = new EmbedBuilder();
const embeds = [mainEmbed]
mainEmbed.setTitle(totalBatches > 1 ? `${feed.name} [${batchNumber+1}/${totalBatches}]` : feed.name);
mainEmbed.setColor(feed.message_style.colour as HexColorString);
mainEmbed.setURL(process.env.PUBLIC_URL ?? null);
if (items.length == 1) {
mainEmbed.setImage(await getItemImageUrl(items[0].link ?? "") ?? null);
}
for (const item of items) {
const contentSnippet = item.contentSnippet + `\n[View Article](${item.link})`;
mainEmbed.addFields({
name: item.title ?? "- no title found -",
value: contentSnippet ?? "- no desc found -",
inline: false
})
if (embeds.length <= 5) {
const imageEmbed = new EmbedBuilder({ title: "dummy", url: process.env.PUBLIC_URL });
imageEmbed.setImage(await getItemImageUrl(item.link ?? "") ?? null);
embeds.push(imageEmbed);
}
}
return embeds
};
const getItemImageUrl = async (url: string) => {
@ -92,7 +133,7 @@ const passesFilter = async (filter: Filter, item: RssParser.Item) => {
);
}
console.log(`${matchFound} - ${filter.is_whitelist}`);
console.log(`Filter result: matchFound=${matchFound}, is_whitelist=${filter.is_whitelist}, willSend=${filter.is_whitelist ? matchFound : !matchFound}`);
return filter.is_whitelist ? !matchFound : matchFound;
return filter.is_whitelist ? matchFound : !matchFound;
};