working on filters
This commit is contained in:
parent
f871a1d847
commit
81dcf325c3
@ -61,6 +61,7 @@
|
|||||||
"ejs": "^3.1.10",
|
"ejs": "^3.1.10",
|
||||||
"ejs-mate": "^4.0.0",
|
"ejs-mate": "^4.0.0",
|
||||||
"express": "^5.1.0",
|
"express": "^5.1.0",
|
||||||
|
"fuzzball": "^2.2.2",
|
||||||
"jquery": "^3.7.1",
|
"jquery": "^3.7.1",
|
||||||
"lodash": "^4.17.21",
|
"lodash": "^4.17.21",
|
||||||
"node-html-parser": "^7.0.1",
|
"node-html-parser": "^7.0.1",
|
||||||
|
@ -1,30 +1,46 @@
|
|||||||
|
import fuzz from "fuzzball"; // todo: implement for fuzzy match
|
||||||
import { Filter, MatchingAlgorithms } from "../../generated/prisma";
|
import { Filter, MatchingAlgorithms } from "../../generated/prisma";
|
||||||
|
|
||||||
export const any = (filter: Filter, input: string) => {
|
function splitMatch(filterValue: string): string[] {
|
||||||
for (let word in input.split(" ")) {
|
const findTerms = [...filterValue.matchAll(/"([^"]+)"|(\S+)/g)];
|
||||||
word = filter.is_insensitive ? word.toLowerCase() : word;
|
return findTerms.map(value => {
|
||||||
const against = filter.is_insensitive ? filter.value.toLowerCase() : filter.value;
|
const term = value[1] || value[2];
|
||||||
|
return term.trim().replace(/\s+/g, "\\s+").replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
if (word.includes(against)) return true;
|
export const all = (filter: Filter, input: string) => {
|
||||||
|
throw new Error("'all' filter not implemented");
|
||||||
|
};
|
||||||
|
|
||||||
|
export const any = (filter: Filter, input: string) => {
|
||||||
|
// const words = splitMatch(filter.value);
|
||||||
|
// console.log(JSON.stringify(words));
|
||||||
|
// return words.some(word => new RegExp(`\\b${word}\\b`).test(input));
|
||||||
|
|
||||||
|
const inputWords = input.split(" ").map(word => filter.is_insensitive ? word.toLowerCase() : word);
|
||||||
|
const filterWords = filter.value.split(" ").map(word => filter.is_insensitive ? word.toLowerCase() : word);
|
||||||
|
|
||||||
|
for (const filterWord of filterWords) {
|
||||||
|
if (inputWords.includes(filterWord)) {
|
||||||
|
console.log(input);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
|
|
||||||
export const all = (filter: Filter, input: string) => {
|
|
||||||
return true;
|
|
||||||
};
|
|
||||||
|
|
||||||
export const literal = (filter: Filter, input: string) => {
|
export const literal = (filter: Filter, input: string) => {
|
||||||
return true;
|
throw new Error("'literal' filter not implemented");
|
||||||
};
|
};
|
||||||
|
|
||||||
export const regex = (filter: Filter, input: string) => {
|
export const regex = (filter: Filter, input: string) => {
|
||||||
return true;
|
throw new Error("'regex' filter not implemented");
|
||||||
};
|
};
|
||||||
|
|
||||||
export const fuzzy = (filter: Filter, input: string) => {
|
export const fuzzy = (filter: Filter, input: string) => {
|
||||||
return true;
|
throw new Error("'fuzzy' filter not implemented");
|
||||||
};
|
};
|
||||||
|
|
||||||
export const mapAlgorithmToFunction = (filter: Filter, input: string) => {
|
export const mapAlgorithmToFunction = (filter: Filter, input: string) => {
|
||||||
|
@ -39,6 +39,8 @@ const processFeed = async (feed: ExpandedFeed, client: Client) => {
|
|||||||
const parsed = await getParsedUrl(feed.url);
|
const parsed = await getParsedUrl(feed.url);
|
||||||
if (!parsed) return;
|
if (!parsed) return;
|
||||||
|
|
||||||
|
console.log(`Processing feed: ${feed.name}`);
|
||||||
|
|
||||||
for (const channelId of feed.channels.map(channel => channel.channel_id)) {
|
for (const channelId of feed.channels.map(channel => channel.channel_id)) {
|
||||||
const channel = client.channels.cache.get(channelId);
|
const channel = client.channels.cache.get(channelId);
|
||||||
if (channel) await processItems(parsed.items, feed, channel, client);
|
if (channel) await processItems(parsed.items, feed, channel, client);
|
||||||
@ -46,26 +48,65 @@ const processFeed = async (feed: ExpandedFeed, client: Client) => {
|
|||||||
};
|
};
|
||||||
|
|
||||||
const processItems = async (items: RssParser.Item[], feed: ExpandedFeed, channel: DiscordChannel, client: Client) => {
|
const processItems = async (items: RssParser.Item[], feed: ExpandedFeed, channel: DiscordChannel, client: Client) => {
|
||||||
for (const item of items) {
|
console.log(`Processing ${items.length} items`);
|
||||||
await processItem(item, feed, channel, client);
|
|
||||||
|
for (let i = items.length; i--;) {
|
||||||
|
if (!(await Promise.all(feed.filters.map(f => passesFilter(f, items[i])))).every(Boolean)) {
|
||||||
|
items.splice(i, 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`Processing ${items.length} items (post-filter)`)
|
||||||
|
|
||||||
|
const batchSize = 4;
|
||||||
|
const totalBatches = Math.floor((items.length + batchSize - 1) / batchSize);
|
||||||
|
|
||||||
|
console.log(`batchSize: ${batchSize}, totalBatches: ${totalBatches}`)
|
||||||
|
|
||||||
|
for (let batchNumber = 0; batchNumber * batchSize < items.length; batchNumber++) {
|
||||||
|
console.log(`Processing items batch [${batchNumber+1}/${totalBatches}]`);
|
||||||
|
|
||||||
|
const i = batchNumber * batchSize;
|
||||||
|
const batch = items.slice(i, i + batchSize);
|
||||||
|
|
||||||
|
const embeds = await createEmbedFromItems(batch, feed, batchNumber, totalBatches);
|
||||||
|
|
||||||
|
await (channel as TextChannel).send({ embeds: embeds });
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const processItem = async (item: RssParser.Item, feed: ExpandedFeed, channel: DiscordChannel, client: Client) => {
|
const createEmbedFromItems = async (items: RssParser.Item[], feed: ExpandedFeed, batchNumber: number, totalBatches: number) => {
|
||||||
for (const filter of feed.filters) {
|
if (!items.length) {
|
||||||
if (!await passesFilter(filter, item)) {
|
throw new Error("Items empty, expected at least 1 item.");
|
||||||
console.log("fails filter")
|
}
|
||||||
return;
|
|
||||||
|
const mainEmbed = new EmbedBuilder();
|
||||||
|
const embeds = [mainEmbed]
|
||||||
|
|
||||||
|
mainEmbed.setTitle(totalBatches > 1 ? `${feed.name} [${batchNumber+1}/${totalBatches}]` : feed.name);
|
||||||
|
mainEmbed.setColor(feed.message_style.colour as HexColorString);
|
||||||
|
mainEmbed.setURL(process.env.PUBLIC_URL ?? null);
|
||||||
|
|
||||||
|
if (items.length == 1) {
|
||||||
|
mainEmbed.setImage(await getItemImageUrl(items[0].link ?? "") ?? null);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const item of items) {
|
||||||
|
const contentSnippet = item.contentSnippet + `\n[View Article](${item.link})`;
|
||||||
|
mainEmbed.addFields({
|
||||||
|
name: item.title ?? "- no title found -",
|
||||||
|
value: contentSnippet ?? "- no desc found -",
|
||||||
|
inline: false
|
||||||
|
})
|
||||||
|
|
||||||
|
if (embeds.length <= 5) {
|
||||||
|
const imageEmbed = new EmbedBuilder({ title: "dummy", url: process.env.PUBLIC_URL });
|
||||||
|
imageEmbed.setImage(await getItemImageUrl(item.link ?? "") ?? null);
|
||||||
|
embeds.push(imageEmbed);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const embed = new EmbedBuilder();
|
return embeds
|
||||||
embed.setTitle(item.title ?? null);
|
|
||||||
embed.setDescription(item.contentSnippet ?? null);
|
|
||||||
embed.setImage(await getItemImageUrl(item.link ?? "") ?? null);
|
|
||||||
embed.setColor(feed.message_style.colour as HexColorString);
|
|
||||||
|
|
||||||
await (channel as TextChannel).send({ embeds: [embed] });
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const getItemImageUrl = async (url: string) => {
|
const getItemImageUrl = async (url: string) => {
|
||||||
@ -92,7 +133,7 @@ const passesFilter = async (filter: Filter, item: RssParser.Item) => {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`${matchFound} - ${filter.is_whitelist}`);
|
console.log(`Filter result: matchFound=${matchFound}, is_whitelist=${filter.is_whitelist}, willSend=${filter.is_whitelist ? matchFound : !matchFound}`);
|
||||||
|
|
||||||
return filter.is_whitelist ? !matchFound : matchFound;
|
return filter.is_whitelist ? matchFound : !matchFound;
|
||||||
};
|
};
|
||||||
|
Loading…
x
Reference in New Issue
Block a user