From 3feb4640970094682203f5a39e0c99c752d1db2a Mon Sep 17 00:00:00 2001 From: Corban-Lee Date: Thu, 15 May 2025 10:32:40 +0100 Subject: [PATCH] test(bot): write tests for ANY filter --- src/bot/__tests__/filters.test.ts | 67 +++++++++++++++++++++++++++++-- src/bot/filter.ts | 35 ++++++---------- 2 files changed, 77 insertions(+), 25 deletions(-) diff --git a/src/bot/__tests__/filters.test.ts b/src/bot/__tests__/filters.test.ts index e1664a8..7beb54a 100644 --- a/src/bot/__tests__/filters.test.ts +++ b/src/bot/__tests__/filters.test.ts @@ -14,13 +14,72 @@ const templateFilter: prisma.Filter = { }; -describe("Regex Matching Test", () => { +describe("Match: ALL", () => { }); + +describe("Match: ANY", () => { + const filter: prisma.Filter = { + ...templateFilter, + name: "Block American Politics", + value: String.raw`trump biden democrat republican gop dnc kamala harris`, + is_insensitive: true + }; + + const testCases: { input: string, expected: boolean }[] = [ +{ + input: "Republicans float new tax breaks for tips, local taxes in Trump budget package", + expected: true // Contains 'republican' and 'trump'. + }, + { + input: "Biden’s Approval Rating Dips Amid Economic Concerns", + expected: true // Contains 'biden'. + }, + { + input: "GOP Governors Call for Tougher Border Security Measures", + expected: true // Contains 'gop'. + }, + { + input: "Biden Announces New Initiative to Tackle Climate Change", + expected: true // Contains 'biden'. + }, + { + input: "Biden Announces New Initiative to Tackle Climate Change", + expected: true // Contains 'biden'. + }, + { + input: "Joe Biden gives thoughts on Donald Trump, Vladimir Putin, Ukraine, and Kamala Harris in first interview since leaving office", + expected: true // Contains 'biden', 'trump', 'kamala' and 'harris'. + }, + { + input: "UK Prime Minister Keir Starmer hails limited US-UK trade deal, but 10% duties remain", + expected: false + }, + { + input: "Scientists discover new species of fish in Pacific Ocean", + expected: false + }, + { + input: "Federal Reserve signals cautious approach to interest rate cuts in 2025", + expected: false + } + ]; + + for (const { input, expected } of testCases) { + test(`Test input: ${input}`, () => { + const result = filters.any(filter, input); + expect(result).toBe(expected); + }); + } +}); + +describe("Match: LITERAL", () => { }); + +describe("Match: REGEX", () => { const filter: prisma.Filter = { ...templateFilter, name: "Block American Politics", value: String.raw`\b(trump|biden|democrat|republican|gop|dnc|kamala|harris)\b`, is_insensitive: true - } + }; const testCases: { input: string, expected: boolean }[] = [ { @@ -67,4 +126,6 @@ describe("Regex Matching Test", () => { expect(result).toBe(expected); }); } -}); \ No newline at end of file +}); + +describe("Match: FUZZY", () => { }); diff --git a/src/bot/filter.ts b/src/bot/filter.ts index 5b78461..f425690 100644 --- a/src/bot/filter.ts +++ b/src/bot/filter.ts @@ -1,11 +1,13 @@ import fuzz from "fuzzball"; // todo: implement for fuzzy match import { Filter, MatchingAlgorithms } from "../../generated/prisma"; -function splitMatch(filterValue: string): string[] { +function splitWords(filterValue: string): string[] { const findTerms = [...filterValue.matchAll(/"([^"]+)"|(\S+)/g)]; return findTerms.map(value => { const term = value[1] || value[2]; - return term.trim().replace(/\s+/g, "\\s+").replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); + return term.trim() + .replace(/\s+/g, "\\s+") // Replace whitespace with equivelant regex characters + .replace(/[.*+?^${}()|[\]\\]/g, '\\$&') // Escape common regex characters }); } @@ -14,26 +16,15 @@ export const all = (filter: Filter, input: string) => { }; export const any = (filter: Filter, input: string) => { - // const words = splitMatch(filter.value); - // console.log(JSON.stringify(words)); - // return words.some(word => new RegExp(`\\b${word}\\b`).test(input)); - - // TODO: - // The below code works, but I'd like to use regex as was done in pyrss. - // The above commented code does not work, figure it out. - - - const inputWords = input.split(" ").map(word => filter.is_insensitive ? word.toLowerCase() : word); - const filterWords = filter.value.split(" ").map(word => filter.is_insensitive ? word.toLowerCase() : word); - - for (const filterWord of filterWords) { - if (inputWords.includes(filterWord)) { - console.log(input); - return true; - } + try { + const flags = filter.is_insensitive ? "i" : ""; + const filterWords = splitWords(filter.value).toString().replace(/,/g, "|"); + const filterValue = String.raw`\b(${filterWords})\b` + return new RegExp(filterValue, flags).test(input); + } catch (error) { + console.error(`ANY: Invalid regex pattern: ${filter.value}`, error); + return false; } - - return false; }; export const literal = (filter: Filter, input: string) => { @@ -45,7 +36,7 @@ export const regex = (filter: Filter, input: string) => { const flags = filter.is_insensitive ? "i" : ""; return new RegExp(filter.value, flags).test(input); } catch (error) { - console.error(`Invalid regex pattern: ${filter.value}`, error); + console.error(`REGEX: Invalid regex pattern: ${filter.value}`, error); return false; } };