|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Process American Express activity CSV into a cleaned Excel file with columns: |
| 4 | +- Date (transaction date) |
| 5 | +- Source ("American Express Cobalt") |
| 6 | +- Expense (cleaned description) |
| 7 | +- Taylor Paid (amount if Card Member is Taylor Curran; else 0) |
| 8 | +- Anvita Paid (amount if Card Member is Anvita Akkur; else 0) |
| 9 | +- Taylor Portion (0.6 if groceries; else blank) |
| 10 | +
|
| 11 | +Usage: |
| 12 | + python process_amex_expenses.py --input activity.csv --output amex_expenses_full.xlsx |
| 13 | +""" |
| 14 | + |
| 15 | +import argparse |
| 16 | +import re |
| 17 | +import sys |
| 18 | +from typing import Iterable |
| 19 | + |
| 20 | +import pandas as pd |
| 21 | + |
| 22 | + |
| 23 | +GROCERY_KEYWORDS: Iterable[str] = ( |
| 24 | + # Common grocery chains & patterns (extend as needed) |
| 25 | + "SAVE ON", |
| 26 | + "SAVE-ON", |
| 27 | + "SAVEON", |
| 28 | + "WHOLE FOODS", |
| 29 | + "WHOLEFOODS", |
| 30 | + "SAFEWAY", |
| 31 | + "NO FRILLS", |
| 32 | + "NOFRILLS", |
| 33 | + "REAL CANADIAN SUPERSTORE", |
| 34 | + "SUPERSTORE", |
| 35 | + "THRIFTY FOODS", |
| 36 | + "THRIFTY", |
| 37 | + "WALMART SUPERCENTER", |
| 38 | + "WALMART SUPERCENTRE", |
| 39 | + "COSTCO WHOLESALE", |
| 40 | + "CHOICES MARKETS", |
| 41 | + "URBAN FARE", |
| 42 | + "IGA", |
| 43 | +) |
| 44 | + |
| 45 | +TAYLOR_NAME = "TAYLOR" |
| 46 | +ANVITA_NAME = "ANVITA" |
| 47 | + |
| 48 | + |
| 49 | +def clean_description(desc: str) -> str: |
| 50 | + """Simplify merchant description: remove store numbers, URLs, long numbers/phones, collapse spaces.""" |
| 51 | + if not isinstance(desc, str): |
| 52 | + return "" |
| 53 | + |
| 54 | + s = desc |
| 55 | + s = re.sub(r"#\d+", "", s) # remove store numbers like "#12345" |
| 56 | + s = re.sub(r"http\S+", "", s, flags=re.IGNORECASE) # remove URLs |
| 57 | + s = re.sub(r"\+?\d[\d\-\s\(\)]{6,}", "", s) # remove phone-like numbers |
| 58 | + s = re.sub(r"\b\d{7,}\b", "", s) # remove long digit runs |
| 59 | + s = " ".join(s.split()) # normalize whitespace |
| 60 | + return s.strip() |
| 61 | + |
| 62 | + |
| 63 | +def is_grocery(merchant: str) -> bool: |
| 64 | + """Heuristic: check if the cleaned description contains a known grocery keyword.""" |
| 65 | + if not isinstance(merchant, str): |
| 66 | + return False |
| 67 | + u = merchant.upper() |
| 68 | + return any(k in u for k in GROCERY_KEYWORDS) |
| 69 | + |
| 70 | + |
| 71 | +def main(): |
| 72 | + p = argparse.ArgumentParser() |
| 73 | + p.add_argument("--input", "-i", required=True, help="Path to Amex activity CSV") |
| 74 | + p.add_argument("--output", "-o", required=True, help="Path to output Excel file") |
| 75 | + args = p.parse_args() |
| 76 | + |
| 77 | + # Read CSV (expects columns like: Date, Date Processed, Description, Card Member, Account #, Amount) |
| 78 | + try: |
| 79 | + df = pd.read_csv(args.input) |
| 80 | + except Exception as e: |
| 81 | + print(f"Error reading input CSV: {e}", file=sys.stderr) |
| 82 | + sys.exit(1) |
| 83 | + |
| 84 | + # Keep only expenses (positive amounts). Payments/credits are negative in Amex export. |
| 85 | + if "Amount" not in df.columns: |
| 86 | + print("Input CSV missing 'Amount' column.", file=sys.stderr) |
| 87 | + sys.exit(1) |
| 88 | + expenses = df[df["Amount"] > 0].copy() |
| 89 | + |
| 90 | + # Required input columns |
| 91 | + for col in ("Date", "Description", "Card Member"): |
| 92 | + if col not in expenses.columns: |
| 93 | + print(f"Input CSV missing '{col}' column.", file=sys.stderr) |
| 94 | + sys.exit(1) |
| 95 | + |
| 96 | + # Transform |
| 97 | + expenses["Expense"] = expenses["Description"].apply(clean_description) |
| 98 | + expenses["Source"] = "American Express Cobalt" |
| 99 | + |
| 100 | + def paid_for(row, who: str) -> float: |
| 101 | + cm = str(row.get("Card Member", "")).upper() |
| 102 | + return float(row["Amount"]) if who in cm else 0.0 |
| 103 | + |
| 104 | + expenses["Taylor Paid"] = expenses.apply(lambda r: paid_for(r, TAYLOR_NAME), axis=1) |
| 105 | + expenses["Anvita Paid"] = expenses.apply(lambda r: paid_for(r, ANVITA_NAME), axis=1) |
| 106 | + expenses["Taylor Portion"] = expenses["Expense"].apply(lambda x: 0.6 if is_grocery(x) else "") |
| 107 | + |
| 108 | + out = expenses[ |
| 109 | + ["Date", "Source", "Expense", "Taylor Paid", "Anvita Paid", "Taylor Portion"] |
| 110 | + ].copy() |
| 111 | + |
| 112 | + try: |
| 113 | + out.to_excel(args.output, index=False) |
| 114 | + except Exception as e: |
| 115 | + print(f"Error writing Excel: {e}", file=sys.stderr) |
| 116 | + sys.exit(1) |
| 117 | + |
| 118 | + |
| 119 | +if __name__ == "__main__": |
| 120 | + main() |
0 commit comments