Skip to content

Commit eb2836f

Browse files
committed
Add initial implementation of process-amex-expenses
1 parent 4de2eba commit eb2836f

File tree

3 files changed

+122
-0
lines changed

3 files changed

+122
-0
lines changed

requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
11
# Add your project dependencies here
2+
pandas
3+
openpyxl

scripts/process-amex-expenses.py

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Process American Express activity CSV into a cleaned Excel file with columns:
4+
- Date (transaction date)
5+
- Source ("American Express Cobalt")
6+
- Expense (cleaned description)
7+
- Taylor Paid (amount if Card Member is Taylor Curran; else 0)
8+
- Anvita Paid (amount if Card Member is Anvita Akkur; else 0)
9+
- Taylor Portion (0.6 if groceries; else blank)
10+
11+
Usage:
12+
python process_amex_expenses.py --input activity.csv --output amex_expenses_full.xlsx
13+
"""
14+
15+
import argparse
16+
import re
17+
import sys
18+
from typing import Iterable
19+
20+
import pandas as pd
21+
22+
23+
GROCERY_KEYWORDS: Iterable[str] = (
24+
# Common grocery chains & patterns (extend as needed)
25+
"SAVE ON",
26+
"SAVE-ON",
27+
"SAVEON",
28+
"WHOLE FOODS",
29+
"WHOLEFOODS",
30+
"SAFEWAY",
31+
"NO FRILLS",
32+
"NOFRILLS",
33+
"REAL CANADIAN SUPERSTORE",
34+
"SUPERSTORE",
35+
"THRIFTY FOODS",
36+
"THRIFTY",
37+
"WALMART SUPERCENTER",
38+
"WALMART SUPERCENTRE",
39+
"COSTCO WHOLESALE",
40+
"CHOICES MARKETS",
41+
"URBAN FARE",
42+
"IGA",
43+
)
44+
45+
TAYLOR_NAME = "TAYLOR"
46+
ANVITA_NAME = "ANVITA"
47+
48+
49+
def clean_description(desc: str) -> str:
50+
"""Simplify merchant description: remove store numbers, URLs, long numbers/phones, collapse spaces."""
51+
if not isinstance(desc, str):
52+
return ""
53+
54+
s = desc
55+
s = re.sub(r"#\d+", "", s) # remove store numbers like "#12345"
56+
s = re.sub(r"http\S+", "", s, flags=re.IGNORECASE) # remove URLs
57+
s = re.sub(r"\+?\d[\d\-\s\(\)]{6,}", "", s) # remove phone-like numbers
58+
s = re.sub(r"\b\d{7,}\b", "", s) # remove long digit runs
59+
s = " ".join(s.split()) # normalize whitespace
60+
return s.strip()
61+
62+
63+
def is_grocery(merchant: str) -> bool:
64+
"""Heuristic: check if the cleaned description contains a known grocery keyword."""
65+
if not isinstance(merchant, str):
66+
return False
67+
u = merchant.upper()
68+
return any(k in u for k in GROCERY_KEYWORDS)
69+
70+
71+
def main():
72+
p = argparse.ArgumentParser()
73+
p.add_argument("--input", "-i", required=True, help="Path to Amex activity CSV")
74+
p.add_argument("--output", "-o", required=True, help="Path to output Excel file")
75+
args = p.parse_args()
76+
77+
# Read CSV (expects columns like: Date, Date Processed, Description, Card Member, Account #, Amount)
78+
try:
79+
df = pd.read_csv(args.input)
80+
except Exception as e:
81+
print(f"Error reading input CSV: {e}", file=sys.stderr)
82+
sys.exit(1)
83+
84+
# Keep only expenses (positive amounts). Payments/credits are negative in Amex export.
85+
if "Amount" not in df.columns:
86+
print("Input CSV missing 'Amount' column.", file=sys.stderr)
87+
sys.exit(1)
88+
expenses = df[df["Amount"] > 0].copy()
89+
90+
# Required input columns
91+
for col in ("Date", "Description", "Card Member"):
92+
if col not in expenses.columns:
93+
print(f"Input CSV missing '{col}' column.", file=sys.stderr)
94+
sys.exit(1)
95+
96+
# Transform
97+
expenses["Expense"] = expenses["Description"].apply(clean_description)
98+
expenses["Source"] = "American Express Cobalt"
99+
100+
def paid_for(row, who: str) -> float:
101+
cm = str(row.get("Card Member", "")).upper()
102+
return float(row["Amount"]) if who in cm else 0.0
103+
104+
expenses["Taylor Paid"] = expenses.apply(lambda r: paid_for(r, TAYLOR_NAME), axis=1)
105+
expenses["Anvita Paid"] = expenses.apply(lambda r: paid_for(r, ANVITA_NAME), axis=1)
106+
expenses["Taylor Portion"] = expenses["Expense"].apply(lambda x: 0.6 if is_grocery(x) else "")
107+
108+
out = expenses[
109+
["Date", "Source", "Expense", "Taylor Paid", "Anvita Paid", "Taylor Portion"]
110+
].copy()
111+
112+
try:
113+
out.to_excel(args.output, index=False)
114+
except Exception as e:
115+
print(f"Error writing Excel: {e}", file=sys.stderr)
116+
sys.exit(1)
117+
118+
119+
if __name__ == "__main__":
120+
main()

scripts/process-amex.py

Whitespace-only changes.

0 commit comments

Comments
 (0)