From 980712ad9e19d61290363bbc6bfcb57780c749cc Mon Sep 17 00:00:00 2001 From: Corban-Lee Jones Date: Thu, 11 Jul 2024 23:33:23 +0100 Subject: [PATCH] fix fuzzy match --- requirements.txt | 1 + src/filters.py | 14 ++++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/requirements.txt b/requirements.txt index 8a95ffe..5af2777 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,6 +16,7 @@ multidict==6.0.5 pip-chill==1.0.3 psycopg2-binary==2.9.9 python-dotenv==1.0.0 +rapidfuzz==3.9.4 sgmllib3k==1.0.0 six==1.16.0 soupsieve==2.5 diff --git a/src/filters.py b/src/filters.py index d42cc07..ec2f0c2 100644 --- a/src/filters.py +++ b/src/filters.py @@ -1,4 +1,9 @@ import re +import logging + +from rapidfuzz import fuzz + +log = logging.getLogger(__name__) def _split_match(_match): """ @@ -51,9 +56,7 @@ def _match_regex(_match: str, matching_to: str, **search_kwargs) -> bool: log.error(err) return False -def _match_fuzzy(_match: str, matching_to: str, **search_kwargs) -> bool: - from rapidfuzz import fuzz - +def _match_fuzzy(_match: str, matching_to: str, is_insensitive: bool) -> bool: _match = re.sub(r"[^\w\s]", "", _match) text = re.sub(r"[^\w\s]", "", matching_to) if is_insensitive: @@ -99,4 +102,7 @@ def match_text(_filter: dict, matching_to: str) -> bool: return _match_regex(_match, matching_to, **search_kwargs) case 5: # Fuzzy Match - return _match_fuzzy(_match, matching_to, **search_kwargs) + return _match_fuzzy(_match, matching_to, is_insensitive) + + case _: + return False