1515import argparse
1616import re
1717import sys
18- from typing import Iterable
18+
1919
2020import pandas as pd
2121
2222
23- GROCERY_KEYWORDS : Iterable [str ] = (
24- # Common grocery chains & patterns (extend as needed)
25- "SAVE ON" ,
26- "SAVE-ON" ,
27- "SAVEON" ,
28- "WHOLE FOODS" ,
29- "WHOLEFOODS" ,
30- "SAFEWAY" ,
31- "NO FRILLS" ,
32- "NOFRILLS" ,
33- "REAL CANADIAN SUPERSTORE" ,
34- "SUPERSTORE" ,
35- "THRIFTY FOODS" ,
36- "THRIFTY" ,
37- "WALMART SUPERCENTER" ,
38- "WALMART SUPERCENTRE" ,
39- "COSTCO WHOLESALE" ,
40- "CHOICES MARKETS" ,
41- "URBAN FARE" ,
42- "IGA" ,
43- )
23+ # Regex patterns for grocery stores
24+ GROCERY_PATTERNS = [
25+ re .compile (r"\bSAVE[-\s]?ON\b" , re .IGNORECASE ),
26+ re .compile (r"\bWHOLE[-\s]?FOODS\b" , re .IGNORECASE ),
27+ re .compile (r"\bSAFEWAY\b" , re .IGNORECASE ),
28+ re .compile (r"\bNO[-\s]?FRILLS\b" , re .IGNORECASE ),
29+ re .compile (r"\bSUPERSTORE\b" , re .IGNORECASE ),
30+ re .compile (r"\bTHRIFTY\b" , re .IGNORECASE ),
31+ re .compile (r"\bWALMART\b" , re .IGNORECASE ),
32+ re .compile (r"\b7\sELEVEN\b" , re .IGNORECASE ),
33+ ]
4434
4535TAYLOR_NAME = "TAYLOR"
4636ANVITA_NAME = "ANVITA"
@@ -61,11 +51,10 @@ def clean_description(desc: str) -> str:
6151
6252
6353def is_grocery (merchant : str ) -> bool :
64- """Heuristic: check if the cleaned description contains a known grocery keyword ."""
54+ """Check if the description matches any grocery store pattern ."""
6555 if not isinstance (merchant , str ):
6656 return False
67- u = merchant .upper ()
68- return any (k in u for k in GROCERY_KEYWORDS )
57+ return any (pattern .search (merchant ) for pattern in GROCERY_PATTERNS )
6958
7059
7160def main ():
0 commit comments