Initial commit
This commit is contained in:
commit
983cee0320
322 changed files with 57174 additions and 0 deletions
609
app/routes/csv_import.py
Normal file
609
app/routes/csv_import.py
Normal file
|
|
@ -0,0 +1,609 @@
|
|||
"""
|
||||
CSV/Bank Statement Import Routes for FINA
|
||||
Handles file upload, parsing, duplicate detection, and category mapping
|
||||
"""
|
||||
from flask import Blueprint, request, jsonify
|
||||
from flask_login import login_required, current_user
|
||||
from werkzeug.utils import secure_filename
|
||||
from app import db
|
||||
from app.models import Expense, Category
|
||||
from datetime import datetime, timedelta
|
||||
from sqlalchemy import and_, or_
|
||||
import csv
|
||||
import io
|
||||
import re
|
||||
import json
|
||||
from decimal import Decimal
|
||||
|
||||
bp = Blueprint('csv_import', __name__, url_prefix='/api/import')
|
||||
|
||||
|
||||
class CSVParser:
|
||||
"""Parse CSV files with auto-detection of format"""
|
||||
|
||||
def __init__(self):
|
||||
self.errors = []
|
||||
|
||||
def detect_delimiter(self, sample):
|
||||
"""Auto-detect CSV delimiter"""
|
||||
delimiters = [',', ';', '\t', '|']
|
||||
counts = {d: sample.count(d) for d in delimiters}
|
||||
return max(counts, key=counts.get)
|
||||
|
||||
def detect_encoding(self, file_bytes):
|
||||
"""Detect file encoding"""
|
||||
encodings = ['utf-8', 'utf-8-sig', 'latin-1', 'cp1252', 'iso-8859-1']
|
||||
for encoding in encodings:
|
||||
try:
|
||||
file_bytes.decode(encoding)
|
||||
return encoding
|
||||
except UnicodeDecodeError:
|
||||
continue
|
||||
return 'utf-8'
|
||||
|
||||
def detect_columns(self, headers):
|
||||
"""Auto-detect which columns contain date, description, amount"""
|
||||
headers_lower = [h.lower().strip() if h else '' for h in headers]
|
||||
|
||||
mapping = {
|
||||
'date': None,
|
||||
'description': None,
|
||||
'amount': None,
|
||||
'debit': None,
|
||||
'credit': None,
|
||||
'category': None
|
||||
}
|
||||
|
||||
# Date column keywords
|
||||
date_keywords = ['date', 'data', 'fecha', 'datum', 'transaction date', 'trans date', 'posting date']
|
||||
for idx, name in enumerate(headers_lower):
|
||||
if any(keyword in name for keyword in date_keywords):
|
||||
mapping['date'] = idx
|
||||
break
|
||||
|
||||
# Description column keywords - prioritize "name" for merchant/payee names
|
||||
# First try to find "name" column (commonly used for merchant/payee)
|
||||
for idx, name in enumerate(headers_lower):
|
||||
if name == 'name' or 'payee' in name or 'merchant name' in name:
|
||||
mapping['description'] = idx
|
||||
break
|
||||
|
||||
# If no "name" column, look for other description columns
|
||||
if mapping['description'] is None:
|
||||
desc_keywords = ['description', 'descriere', 'descripción', 'details', 'detalii', 'merchant',
|
||||
'comerciant', 'narrative', 'memo', 'particulars', 'transaction details']
|
||||
for idx, name in enumerate(headers_lower):
|
||||
if any(keyword in name for keyword in desc_keywords):
|
||||
mapping['description'] = idx
|
||||
break
|
||||
|
||||
# Category column keywords (optional) - avoid generic "type" column that contains payment types
|
||||
# Only use "category" explicitly, not "type" which often contains payment methods
|
||||
for idx, name in enumerate(headers_lower):
|
||||
if name == 'category' or 'categorie' in name or 'categoría' in name:
|
||||
mapping['category'] = idx
|
||||
break
|
||||
|
||||
# Amount columns
|
||||
amount_keywords = ['amount', 'suma', 'monto', 'valoare', 'value']
|
||||
debit_keywords = ['debit', 'withdrawal', 'retragere', 'spent', 'expense', 'cheltuială', 'out']
|
||||
credit_keywords = ['credit', 'deposit', 'depunere', 'income', 'venit', 'in']
|
||||
|
||||
for idx, name in enumerate(headers_lower):
|
||||
if any(keyword in name for keyword in debit_keywords):
|
||||
mapping['debit'] = idx
|
||||
elif any(keyword in name for keyword in credit_keywords):
|
||||
mapping['credit'] = idx
|
||||
elif any(keyword in name for keyword in amount_keywords) and mapping['amount'] is None:
|
||||
mapping['amount'] = idx
|
||||
|
||||
return mapping
|
||||
|
||||
def parse_date(self, date_str):
|
||||
"""Parse date string in various formats"""
|
||||
if not date_str or not isinstance(date_str, str):
|
||||
return None
|
||||
|
||||
date_str = date_str.strip()
|
||||
if not date_str:
|
||||
return None
|
||||
|
||||
# Common date formats
|
||||
formats = [
|
||||
'%d/%m/%Y', '%d-%m-%Y', '%Y-%m-%d', '%Y/%m/%d',
|
||||
'%d.%m.%Y', '%m/%d/%Y', '%d %b %Y', '%d %B %Y',
|
||||
'%Y%m%d', '%d-%b-%Y', '%d-%B-%Y', '%b %d, %Y',
|
||||
'%B %d, %Y', '%Y-%m-%d %H:%M:%S', '%d/%m/%Y %H:%M:%S'
|
||||
]
|
||||
|
||||
for fmt in formats:
|
||||
try:
|
||||
return datetime.strptime(date_str, fmt).date()
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
return None
|
||||
|
||||
def parse_amount(self, amount_str):
|
||||
"""Parse amount string to float"""
|
||||
if not amount_str:
|
||||
return 0.0
|
||||
|
||||
if isinstance(amount_str, (int, float)):
|
||||
return float(amount_str)
|
||||
|
||||
# Remove currency symbols and spaces
|
||||
amount_str = str(amount_str).strip()
|
||||
amount_str = re.sub(r'[^\d.,\-+]', '', amount_str)
|
||||
|
||||
if not amount_str or amount_str == '-':
|
||||
return 0.0
|
||||
|
||||
try:
|
||||
# Handle European format (1.234,56)
|
||||
if ',' in amount_str and '.' in amount_str:
|
||||
if amount_str.rfind(',') > amount_str.rfind('.'):
|
||||
# European format: 1.234,56
|
||||
amount_str = amount_str.replace('.', '').replace(',', '.')
|
||||
else:
|
||||
# US format: 1,234.56
|
||||
amount_str = amount_str.replace(',', '')
|
||||
elif ',' in amount_str:
|
||||
# Could be European (1,56) or US thousands (1,234)
|
||||
parts = amount_str.split(',')
|
||||
if len(parts[-1]) == 2: # Likely European decimal
|
||||
amount_str = amount_str.replace(',', '.')
|
||||
else: # Likely US thousands
|
||||
amount_str = amount_str.replace(',', '')
|
||||
|
||||
return abs(float(amount_str))
|
||||
except (ValueError, AttributeError):
|
||||
return 0.0
|
||||
|
||||
def parse_csv(self, file_bytes):
|
||||
"""Parse CSV file and extract transactions"""
|
||||
try:
|
||||
# Detect encoding
|
||||
encoding = self.detect_encoding(file_bytes)
|
||||
content = file_bytes.decode(encoding)
|
||||
|
||||
# Detect delimiter
|
||||
first_line = content.split('\n')[0]
|
||||
delimiter = self.detect_delimiter(first_line)
|
||||
|
||||
# Parse CSV
|
||||
stream = io.StringIO(content)
|
||||
reader = csv.reader(stream, delimiter=delimiter)
|
||||
|
||||
# Read headers
|
||||
headers = next(reader, None)
|
||||
if not headers:
|
||||
return {'success': False, 'error': 'CSV file is empty'}
|
||||
|
||||
# Detect column mapping
|
||||
column_map = self.detect_columns(headers)
|
||||
|
||||
if column_map['date'] is None:
|
||||
return {'success': False, 'error': 'Could not detect date column. Please ensure your CSV has a date column.'}
|
||||
|
||||
if column_map['description'] is None:
|
||||
column_map['description'] = 1 if len(headers) > 1 else 0
|
||||
|
||||
# Parse transactions
|
||||
transactions = []
|
||||
row_num = 0
|
||||
|
||||
for row in reader:
|
||||
row_num += 1
|
||||
|
||||
if not row or len(row) == 0:
|
||||
continue
|
||||
|
||||
try:
|
||||
transaction = self.extract_transaction(row, column_map)
|
||||
if transaction:
|
||||
transactions.append(transaction)
|
||||
except Exception as e:
|
||||
self.errors.append(f"Row {row_num}: {str(e)}")
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'transactions': transactions,
|
||||
'total_found': len(transactions),
|
||||
'column_mapping': {k: headers[v] if v is not None else None for k, v in column_map.items()},
|
||||
'errors': self.errors
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
return {'success': False, 'error': f'Failed to parse CSV: {str(e)}'}
|
||||
|
||||
def extract_transaction(self, row, column_map):
|
||||
"""Extract transaction data from CSV row"""
|
||||
if len(row) <= max(v for v in column_map.values() if v is not None):
|
||||
return None
|
||||
|
||||
# Parse date
|
||||
date_idx = column_map['date']
|
||||
trans_date = self.parse_date(row[date_idx])
|
||||
if not trans_date:
|
||||
return None
|
||||
|
||||
# Parse description
|
||||
desc_idx = column_map['description']
|
||||
description = row[desc_idx].strip() if desc_idx is not None and desc_idx < len(row) else 'Transaction'
|
||||
if not description:
|
||||
description = 'Transaction'
|
||||
|
||||
# Parse amount (handle debit/credit or single amount column)
|
||||
amount = 0.0
|
||||
trans_type = 'expense'
|
||||
|
||||
if column_map['debit'] is not None and column_map['credit'] is not None:
|
||||
debit_val = self.parse_amount(row[column_map['debit']] if column_map['debit'] < len(row) else '0')
|
||||
credit_val = self.parse_amount(row[column_map['credit']] if column_map['credit'] < len(row) else '0')
|
||||
|
||||
if debit_val > 0:
|
||||
amount = debit_val
|
||||
trans_type = 'expense'
|
||||
elif credit_val > 0:
|
||||
amount = credit_val
|
||||
trans_type = 'income'
|
||||
elif column_map['amount'] is not None:
|
||||
amount_val = self.parse_amount(row[column_map['amount']] if column_map['amount'] < len(row) else '0')
|
||||
amount = abs(amount_val)
|
||||
# Negative amounts are expenses, positive are income
|
||||
trans_type = 'expense' if amount_val < 0 or amount_val == 0 else 'income'
|
||||
|
||||
if amount == 0:
|
||||
return None
|
||||
|
||||
# Get bank category if available
|
||||
bank_category = None
|
||||
if column_map['category'] is not None and column_map['category'] < len(row):
|
||||
bank_category = row[column_map['category']].strip()
|
||||
|
||||
return {
|
||||
'date': trans_date.isoformat(),
|
||||
'description': description[:200], # Limit description length
|
||||
'amount': round(amount, 2),
|
||||
'type': trans_type,
|
||||
'bank_category': bank_category
|
||||
}
|
||||
|
||||
|
||||
@bp.route('/parse-csv', methods=['POST'])
|
||||
@login_required
|
||||
def parse_csv():
|
||||
"""
|
||||
Parse uploaded CSV file and return transactions for review
|
||||
Security: User must be authenticated, file size limited
|
||||
"""
|
||||
if 'file' not in request.files:
|
||||
return jsonify({'success': False, 'error': 'No file uploaded'}), 400
|
||||
|
||||
file = request.files['file']
|
||||
|
||||
if not file or not file.filename:
|
||||
return jsonify({'success': False, 'error': 'No file selected'}), 400
|
||||
|
||||
# Security: Validate filename
|
||||
filename = secure_filename(file.filename)
|
||||
if not filename.lower().endswith('.csv'):
|
||||
return jsonify({'success': False, 'error': 'Only CSV files are supported'}), 400
|
||||
|
||||
# Security: Check file size (max 10MB)
|
||||
file_bytes = file.read()
|
||||
if len(file_bytes) > 10 * 1024 * 1024:
|
||||
return jsonify({'success': False, 'error': 'File too large. Maximum size is 10MB'}), 400
|
||||
|
||||
# Parse CSV
|
||||
parser = CSVParser()
|
||||
result = parser.parse_csv(file_bytes)
|
||||
|
||||
if not result['success']:
|
||||
return jsonify(result), 400
|
||||
|
||||
return jsonify(result)
|
||||
|
||||
|
||||
@bp.route('/detect-duplicates', methods=['POST'])
|
||||
@login_required
|
||||
def detect_duplicates():
|
||||
"""
|
||||
Check for duplicate transactions in the database
|
||||
Security: Only checks current user's expenses
|
||||
"""
|
||||
data = request.get_json()
|
||||
transactions = data.get('transactions', [])
|
||||
|
||||
if not transactions:
|
||||
return jsonify({'success': False, 'error': 'No transactions provided'}), 400
|
||||
|
||||
duplicates = []
|
||||
|
||||
for trans in transactions:
|
||||
try:
|
||||
trans_date = datetime.fromisoformat(trans['date']).date()
|
||||
amount = float(trans['amount'])
|
||||
description = trans['description']
|
||||
|
||||
# Look for potential duplicates within ±2 days and exact amount
|
||||
date_start = trans_date - timedelta(days=2)
|
||||
date_end = trans_date + timedelta(days=2)
|
||||
|
||||
# Security: Filter by current user only
|
||||
existing = Expense.query.filter(
|
||||
Expense.user_id == current_user.id,
|
||||
Expense.date >= date_start,
|
||||
Expense.date <= date_end,
|
||||
Expense.amount == amount
|
||||
).all()
|
||||
|
||||
# Check for similar descriptions
|
||||
for exp in existing:
|
||||
# Simple similarity: check if descriptions overlap significantly
|
||||
desc_lower = description.lower()
|
||||
exp_desc_lower = exp.description.lower()
|
||||
|
||||
# Check if at least 50% of words match
|
||||
desc_words = set(desc_lower.split())
|
||||
exp_words = set(exp_desc_lower.split())
|
||||
|
||||
if len(desc_words) > 0:
|
||||
overlap = len(desc_words.intersection(exp_words)) / len(desc_words)
|
||||
if overlap >= 0.5:
|
||||
duplicates.append({
|
||||
'transaction': trans,
|
||||
'existing': {
|
||||
'id': exp.id,
|
||||
'date': exp.date.isoformat(),
|
||||
'description': exp.description,
|
||||
'amount': float(exp.amount),
|
||||
'category': exp.category.name if exp.category else None
|
||||
},
|
||||
'similarity': round(overlap * 100, 0)
|
||||
})
|
||||
break
|
||||
except Exception as e:
|
||||
continue
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'duplicates': duplicates,
|
||||
'duplicate_count': len(duplicates)
|
||||
})
|
||||
|
||||
|
||||
@bp.route('/import', methods=['POST'])
|
||||
@login_required
|
||||
def import_transactions():
|
||||
"""
|
||||
Import selected transactions into the database
|
||||
Security: Only imports to current user's account, validates all data
|
||||
"""
|
||||
data = request.get_json()
|
||||
transactions = data.get('transactions', [])
|
||||
category_mapping = data.get('category_mapping', {})
|
||||
skip_duplicates = data.get('skip_duplicates', False)
|
||||
|
||||
if not transactions:
|
||||
return jsonify({'success': False, 'error': 'No transactions to import'}), 400
|
||||
|
||||
imported = []
|
||||
skipped = []
|
||||
errors = []
|
||||
|
||||
# Security: Get user's categories
|
||||
user_categories = {cat.id: cat for cat in Category.query.filter_by(user_id=current_user.id).all()}
|
||||
|
||||
if not user_categories:
|
||||
return jsonify({'success': False, 'error': 'No categories found. Please create categories first.'}), 400
|
||||
|
||||
# Get default category
|
||||
default_category_id = list(user_categories.keys())[0]
|
||||
|
||||
for idx, trans in enumerate(transactions):
|
||||
try:
|
||||
# Skip if marked as duplicate
|
||||
if skip_duplicates and trans.get('is_duplicate'):
|
||||
skipped.append({'transaction': trans, 'reason': 'Duplicate'})
|
||||
continue
|
||||
|
||||
# Parse and validate data
|
||||
try:
|
||||
trans_date = datetime.fromisoformat(trans['date']).date()
|
||||
except (ValueError, KeyError) as e:
|
||||
errors.append({'transaction': trans, 'error': f'Invalid date: {trans.get("date", "missing")}'})
|
||||
continue
|
||||
|
||||
try:
|
||||
amount = float(trans['amount'])
|
||||
except (ValueError, KeyError, TypeError) as e:
|
||||
errors.append({'transaction': trans, 'error': f'Invalid amount: {trans.get("amount", "missing")}'})
|
||||
continue
|
||||
|
||||
description = trans.get('description', 'Transaction')
|
||||
|
||||
# Validate amount
|
||||
if amount <= 0:
|
||||
errors.append({'transaction': trans, 'error': f'Invalid amount: {amount}'})
|
||||
continue
|
||||
|
||||
# Get category ID from mapping or bank category
|
||||
category_id = None
|
||||
bank_category = trans.get('bank_category')
|
||||
|
||||
# Try to get from explicit mapping
|
||||
if bank_category and bank_category in category_mapping:
|
||||
category_id = int(category_mapping[bank_category])
|
||||
elif str(idx) in category_mapping:
|
||||
category_id = int(category_mapping[str(idx)])
|
||||
else:
|
||||
category_id = default_category_id
|
||||
|
||||
# Security: Verify category belongs to user
|
||||
if category_id not in user_categories:
|
||||
errors.append({'transaction': trans, 'error': f'Invalid category ID: {category_id}'})
|
||||
continue
|
||||
|
||||
# Prepare tags with bank category if available
|
||||
tags = []
|
||||
if bank_category:
|
||||
tags.append(f'Import: {bank_category}')
|
||||
|
||||
# Create expense
|
||||
expense = Expense(
|
||||
user_id=current_user.id,
|
||||
category_id=category_id,
|
||||
amount=amount,
|
||||
description=description,
|
||||
date=trans_date,
|
||||
currency=current_user.currency,
|
||||
tags=json.dumps(tags)
|
||||
)
|
||||
|
||||
db.session.add(expense)
|
||||
imported.append({
|
||||
'date': trans_date.isoformat(),
|
||||
'description': description,
|
||||
'amount': amount,
|
||||
'category': user_categories[category_id].name
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
errors.append({'transaction': trans, 'error': str(e)})
|
||||
|
||||
# Commit all imports
|
||||
try:
|
||||
db.session.commit()
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'imported_count': len(imported),
|
||||
'skipped_count': len(skipped),
|
||||
'error_count': len(errors),
|
||||
'imported': imported,
|
||||
'skipped': skipped,
|
||||
'errors': errors
|
||||
})
|
||||
except Exception as e:
|
||||
db.session.rollback()
|
||||
return jsonify({'success': False, 'error': f'Database error: {str(e)}'}), 500
|
||||
|
||||
|
||||
@bp.route('/create-categories', methods=['POST'])
|
||||
@login_required
|
||||
def create_categories():
|
||||
"""
|
||||
Create missing categories from CSV bank categories
|
||||
Security: Only creates for current user
|
||||
"""
|
||||
data = request.get_json()
|
||||
bank_categories = data.get('bank_categories', [])
|
||||
|
||||
if not bank_categories:
|
||||
return jsonify({'success': False, 'error': 'No categories provided'}), 400
|
||||
|
||||
# Get existing categories for user
|
||||
existing_cats = {cat.name.lower(): cat for cat in Category.query.filter_by(user_id=current_user.id).all()}
|
||||
|
||||
created = []
|
||||
mapping = {}
|
||||
|
||||
for bank_cat in bank_categories:
|
||||
if not bank_cat or not bank_cat.strip():
|
||||
continue
|
||||
|
||||
bank_cat_clean = bank_cat.strip()
|
||||
bank_cat_lower = bank_cat_clean.lower()
|
||||
|
||||
# Check if category already exists
|
||||
if bank_cat_lower in existing_cats:
|
||||
mapping[bank_cat] = existing_cats[bank_cat_lower].id
|
||||
else:
|
||||
# Create new category
|
||||
max_order = db.session.query(db.func.max(Category.display_order)).filter_by(user_id=current_user.id).scalar() or 0
|
||||
new_cat = Category(
|
||||
user_id=current_user.id,
|
||||
name=bank_cat_clean,
|
||||
icon='category',
|
||||
color='#' + format(hash(bank_cat_clean) % 0xFFFFFF, '06x'), # Generate color from name
|
||||
display_order=max_order + 1
|
||||
)
|
||||
db.session.add(new_cat)
|
||||
db.session.flush() # Get ID without committing
|
||||
|
||||
created.append({
|
||||
'name': bank_cat_clean,
|
||||
'id': new_cat.id
|
||||
})
|
||||
mapping[bank_cat] = new_cat.id
|
||||
existing_cats[bank_cat_lower] = new_cat
|
||||
|
||||
try:
|
||||
db.session.commit()
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'created': created,
|
||||
'mapping': mapping,
|
||||
'message': f'Created {len(created)} new categories'
|
||||
})
|
||||
except Exception as e:
|
||||
db.session.rollback()
|
||||
return jsonify({'success': False, 'error': f'Failed to create categories: {str(e)}'}), 500
|
||||
|
||||
|
||||
@bp.route('/suggest-category', methods=['POST'])
|
||||
@login_required
|
||||
def suggest_category():
|
||||
"""
|
||||
Suggest category mapping based on description and existing expenses
|
||||
Uses simple keyword matching and historical patterns
|
||||
"""
|
||||
data = request.get_json()
|
||||
description = data.get('description', '').lower()
|
||||
bank_category = data.get('bank_category', '').lower()
|
||||
|
||||
if not description:
|
||||
return jsonify({'success': False, 'error': 'No description provided'}), 400
|
||||
|
||||
# Security: Get only user's categories
|
||||
user_categories = Category.query.filter_by(user_id=current_user.id).all()
|
||||
|
||||
# Look for similar expenses in user's history
|
||||
similar_expenses = Expense.query.filter(
|
||||
Expense.user_id == current_user.id
|
||||
).order_by(Expense.date.desc()).limit(100).all()
|
||||
|
||||
# Score categories based on keyword matching
|
||||
category_scores = {cat.id: 0 for cat in user_categories}
|
||||
|
||||
for expense in similar_expenses:
|
||||
exp_desc = expense.description.lower()
|
||||
|
||||
# Simple word matching
|
||||
desc_words = set(description.split())
|
||||
exp_words = set(exp_desc.split())
|
||||
overlap = len(desc_words.intersection(exp_words))
|
||||
|
||||
if overlap > 0:
|
||||
category_scores[expense.category_id] += overlap
|
||||
|
||||
# Get best match
|
||||
if max(category_scores.values()) > 0:
|
||||
best_category_id = max(category_scores, key=category_scores.get)
|
||||
best_category = next(cat for cat in user_categories if cat.id == best_category_id)
|
||||
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'suggested_category_id': best_category.id,
|
||||
'suggested_category_name': best_category.name,
|
||||
'confidence': min(100, category_scores[best_category_id] * 20)
|
||||
})
|
||||
|
||||
# No match found, return first category
|
||||
return jsonify({
|
||||
'success': True,
|
||||
'suggested_category_id': user_categories[0].id,
|
||||
'suggested_category_name': user_categories[0].name,
|
||||
'confidence': 0
|
||||
})
|
||||
Loading…
Add table
Add a link
Reference in a new issue