""" CSV/Bank Statement Import Routes for FINA Handles file upload, parsing, duplicate detection, and category mapping """ from flask import Blueprint, request, jsonify from flask_login import login_required, current_user from werkzeug.utils import secure_filename from app import db from app.models import Expense, Category from datetime import datetime, timedelta from sqlalchemy import and_, or_ import csv import io import re import json from decimal import Decimal bp = Blueprint('csv_import', __name__, url_prefix='/api/import') class CSVParser: """Parse CSV files with auto-detection of format""" def __init__(self): self.errors = [] def detect_delimiter(self, sample): """Auto-detect CSV delimiter""" delimiters = [',', ';', '\t', '|'] counts = {d: sample.count(d) for d in delimiters} return max(counts, key=counts.get) def detect_encoding(self, file_bytes): """Detect file encoding""" encodings = ['utf-8', 'utf-8-sig', 'latin-1', 'cp1252', 'iso-8859-1'] for encoding in encodings: try: file_bytes.decode(encoding) return encoding except UnicodeDecodeError: continue return 'utf-8' def detect_columns(self, headers): """Auto-detect which columns contain date, description, amount""" headers_lower = [h.lower().strip() if h else '' for h in headers] mapping = { 'date': None, 'description': None, 'amount': None, 'debit': None, 'credit': None, 'category': None } # Date column keywords date_keywords = ['date', 'data', 'fecha', 'datum', 'transaction date', 'trans date', 'posting date'] for idx, name in enumerate(headers_lower): if any(keyword in name for keyword in date_keywords): mapping['date'] = idx break # Description column keywords - prioritize "name" for merchant/payee names # First try to find "name" column (commonly used for merchant/payee) for idx, name in enumerate(headers_lower): if name == 'name' or 'payee' in name or 'merchant name' in name: mapping['description'] = idx break # If no "name" column, look for other description columns if mapping['description'] is None: desc_keywords = ['description', 'descriere', 'descripción', 'details', 'detalii', 'merchant', 'comerciant', 'narrative', 'memo', 'particulars', 'transaction details'] for idx, name in enumerate(headers_lower): if any(keyword in name for keyword in desc_keywords): mapping['description'] = idx break # Category column keywords (optional) - avoid generic "type" column that contains payment types # Only use "category" explicitly, not "type" which often contains payment methods for idx, name in enumerate(headers_lower): if name == 'category' or 'categorie' in name or 'categoría' in name: mapping['category'] = idx break # Amount columns amount_keywords = ['amount', 'suma', 'monto', 'valoare', 'value'] debit_keywords = ['debit', 'withdrawal', 'retragere', 'spent', 'expense', 'cheltuială', 'out'] credit_keywords = ['credit', 'deposit', 'depunere', 'income', 'venit', 'in'] for idx, name in enumerate(headers_lower): if any(keyword in name for keyword in debit_keywords): mapping['debit'] = idx elif any(keyword in name for keyword in credit_keywords): mapping['credit'] = idx elif any(keyword in name for keyword in amount_keywords) and mapping['amount'] is None: mapping['amount'] = idx return mapping def parse_date(self, date_str): """Parse date string in various formats""" if not date_str or not isinstance(date_str, str): return None date_str = date_str.strip() if not date_str: return None # Common date formats formats = [ '%d/%m/%Y', '%d-%m-%Y', '%Y-%m-%d', '%Y/%m/%d', '%d.%m.%Y', '%m/%d/%Y', '%d %b %Y', '%d %B %Y', '%Y%m%d', '%d-%b-%Y', '%d-%B-%Y', '%b %d, %Y', '%B %d, %Y', '%Y-%m-%d %H:%M:%S', '%d/%m/%Y %H:%M:%S' ] for fmt in formats: try: return datetime.strptime(date_str, fmt).date() except ValueError: continue return None def parse_amount(self, amount_str): """Parse amount string to float""" if not amount_str: return 0.0 if isinstance(amount_str, (int, float)): return float(amount_str) # Remove currency symbols and spaces amount_str = str(amount_str).strip() amount_str = re.sub(r'[^\d.,\-+]', '', amount_str) if not amount_str or amount_str == '-': return 0.0 try: # Handle European format (1.234,56) if ',' in amount_str and '.' in amount_str: if amount_str.rfind(',') > amount_str.rfind('.'): # European format: 1.234,56 amount_str = amount_str.replace('.', '').replace(',', '.') else: # US format: 1,234.56 amount_str = amount_str.replace(',', '') elif ',' in amount_str: # Could be European (1,56) or US thousands (1,234) parts = amount_str.split(',') if len(parts[-1]) == 2: # Likely European decimal amount_str = amount_str.replace(',', '.') else: # Likely US thousands amount_str = amount_str.replace(',', '') return abs(float(amount_str)) except (ValueError, AttributeError): return 0.0 def parse_csv(self, file_bytes): """Parse CSV file and extract transactions""" try: # Detect encoding encoding = self.detect_encoding(file_bytes) content = file_bytes.decode(encoding) # Detect delimiter first_line = content.split('\n')[0] delimiter = self.detect_delimiter(first_line) # Parse CSV stream = io.StringIO(content) reader = csv.reader(stream, delimiter=delimiter) # Read headers headers = next(reader, None) if not headers: return {'success': False, 'error': 'CSV file is empty'} # Detect column mapping column_map = self.detect_columns(headers) if column_map['date'] is None: return {'success': False, 'error': 'Could not detect date column. Please ensure your CSV has a date column.'} if column_map['description'] is None: column_map['description'] = 1 if len(headers) > 1 else 0 # Parse transactions transactions = [] row_num = 0 for row in reader: row_num += 1 if not row or len(row) == 0: continue try: transaction = self.extract_transaction(row, column_map) if transaction: transactions.append(transaction) except Exception as e: self.errors.append(f"Row {row_num}: {str(e)}") return { 'success': True, 'transactions': transactions, 'total_found': len(transactions), 'column_mapping': {k: headers[v] if v is not None else None for k, v in column_map.items()}, 'errors': self.errors } except Exception as e: return {'success': False, 'error': f'Failed to parse CSV: {str(e)}'} def extract_transaction(self, row, column_map): """Extract transaction data from CSV row""" if len(row) <= max(v for v in column_map.values() if v is not None): return None # Parse date date_idx = column_map['date'] trans_date = self.parse_date(row[date_idx]) if not trans_date: return None # Parse description desc_idx = column_map['description'] description = row[desc_idx].strip() if desc_idx is not None and desc_idx < len(row) else 'Transaction' if not description: description = 'Transaction' # Parse amount (handle debit/credit or single amount column) amount = 0.0 trans_type = 'expense' if column_map['debit'] is not None and column_map['credit'] is not None: debit_val = self.parse_amount(row[column_map['debit']] if column_map['debit'] < len(row) else '0') credit_val = self.parse_amount(row[column_map['credit']] if column_map['credit'] < len(row) else '0') if debit_val > 0: amount = debit_val trans_type = 'expense' elif credit_val > 0: amount = credit_val trans_type = 'income' elif column_map['amount'] is not None: amount_val = self.parse_amount(row[column_map['amount']] if column_map['amount'] < len(row) else '0') amount = abs(amount_val) # Negative amounts are expenses, positive are income trans_type = 'expense' if amount_val < 0 or amount_val == 0 else 'income' if amount == 0: return None # Get bank category if available bank_category = None if column_map['category'] is not None and column_map['category'] < len(row): bank_category = row[column_map['category']].strip() return { 'date': trans_date.isoformat(), 'description': description[:200], # Limit description length 'amount': round(amount, 2), 'type': trans_type, 'bank_category': bank_category } @bp.route('/parse-csv', methods=['POST']) @login_required def parse_csv(): """ Parse uploaded CSV file and return transactions for review Security: User must be authenticated, file size limited """ if 'file' not in request.files: return jsonify({'success': False, 'error': 'No file uploaded'}), 400 file = request.files['file'] if not file or not file.filename: return jsonify({'success': False, 'error': 'No file selected'}), 400 # Security: Validate filename filename = secure_filename(file.filename) if not filename.lower().endswith('.csv'): return jsonify({'success': False, 'error': 'Only CSV files are supported'}), 400 # Security: Check file size (max 10MB) file_bytes = file.read() if len(file_bytes) > 10 * 1024 * 1024: return jsonify({'success': False, 'error': 'File too large. Maximum size is 10MB'}), 400 # Parse CSV parser = CSVParser() result = parser.parse_csv(file_bytes) if not result['success']: return jsonify(result), 400 return jsonify(result) @bp.route('/detect-duplicates', methods=['POST']) @login_required def detect_duplicates(): """ Check for duplicate transactions in the database Security: Only checks current user's expenses """ data = request.get_json() transactions = data.get('transactions', []) if not transactions: return jsonify({'success': False, 'error': 'No transactions provided'}), 400 duplicates = [] for trans in transactions: try: trans_date = datetime.fromisoformat(trans['date']).date() amount = float(trans['amount']) description = trans['description'] # Look for potential duplicates within ±2 days and exact amount date_start = trans_date - timedelta(days=2) date_end = trans_date + timedelta(days=2) # Security: Filter by current user only existing = Expense.query.filter( Expense.user_id == current_user.id, Expense.date >= date_start, Expense.date <= date_end, Expense.amount == amount ).all() # Check for similar descriptions for exp in existing: # Simple similarity: check if descriptions overlap significantly desc_lower = description.lower() exp_desc_lower = exp.description.lower() # Check if at least 50% of words match desc_words = set(desc_lower.split()) exp_words = set(exp_desc_lower.split()) if len(desc_words) > 0: overlap = len(desc_words.intersection(exp_words)) / len(desc_words) if overlap >= 0.5: duplicates.append({ 'transaction': trans, 'existing': { 'id': exp.id, 'date': exp.date.isoformat(), 'description': exp.description, 'amount': float(exp.amount), 'category': exp.category.name if exp.category else None }, 'similarity': round(overlap * 100, 0) }) break except Exception as e: continue return jsonify({ 'success': True, 'duplicates': duplicates, 'duplicate_count': len(duplicates) }) @bp.route('/import', methods=['POST']) @login_required def import_transactions(): """ Import selected transactions into the database Security: Only imports to current user's account, validates all data """ data = request.get_json() transactions = data.get('transactions', []) category_mapping = data.get('category_mapping', {}) skip_duplicates = data.get('skip_duplicates', False) if not transactions: return jsonify({'success': False, 'error': 'No transactions to import'}), 400 imported = [] skipped = [] errors = [] # Security: Get user's categories user_categories = {cat.id: cat for cat in Category.query.filter_by(user_id=current_user.id).all()} if not user_categories: return jsonify({'success': False, 'error': 'No categories found. Please create categories first.'}), 400 # Get default category default_category_id = list(user_categories.keys())[0] for idx, trans in enumerate(transactions): try: # Skip if marked as duplicate if skip_duplicates and trans.get('is_duplicate'): skipped.append({'transaction': trans, 'reason': 'Duplicate'}) continue # Parse and validate data try: trans_date = datetime.fromisoformat(trans['date']).date() except (ValueError, KeyError) as e: errors.append({'transaction': trans, 'error': f'Invalid date: {trans.get("date", "missing")}'}) continue try: amount = float(trans['amount']) except (ValueError, KeyError, TypeError) as e: errors.append({'transaction': trans, 'error': f'Invalid amount: {trans.get("amount", "missing")}'}) continue description = trans.get('description', 'Transaction') # Validate amount if amount <= 0: errors.append({'transaction': trans, 'error': f'Invalid amount: {amount}'}) continue # Get category ID from mapping or bank category category_id = None bank_category = trans.get('bank_category') # Try to get from explicit mapping if bank_category and bank_category in category_mapping: category_id = int(category_mapping[bank_category]) elif str(idx) in category_mapping: category_id = int(category_mapping[str(idx)]) else: category_id = default_category_id # Security: Verify category belongs to user if category_id not in user_categories: errors.append({'transaction': trans, 'error': f'Invalid category ID: {category_id}'}) continue # Prepare tags with bank category if available tags = [] if bank_category: tags.append(f'Import: {bank_category}') # Create expense expense = Expense( user_id=current_user.id, category_id=category_id, amount=amount, description=description, date=trans_date, currency=current_user.currency, tags=json.dumps(tags) ) db.session.add(expense) imported.append({ 'date': trans_date.isoformat(), 'description': description, 'amount': amount, 'category': user_categories[category_id].name }) except Exception as e: errors.append({'transaction': trans, 'error': str(e)}) # Commit all imports try: db.session.commit() return jsonify({ 'success': True, 'imported_count': len(imported), 'skipped_count': len(skipped), 'error_count': len(errors), 'imported': imported, 'skipped': skipped, 'errors': errors }) except Exception as e: db.session.rollback() return jsonify({'success': False, 'error': f'Database error: {str(e)}'}), 500 @bp.route('/create-categories', methods=['POST']) @login_required def create_categories(): """ Create missing categories from CSV bank categories Security: Only creates for current user """ data = request.get_json() bank_categories = data.get('bank_categories', []) if not bank_categories: return jsonify({'success': False, 'error': 'No categories provided'}), 400 # Get existing categories for user existing_cats = {cat.name.lower(): cat for cat in Category.query.filter_by(user_id=current_user.id).all()} created = [] mapping = {} for bank_cat in bank_categories: if not bank_cat or not bank_cat.strip(): continue bank_cat_clean = bank_cat.strip() bank_cat_lower = bank_cat_clean.lower() # Check if category already exists if bank_cat_lower in existing_cats: mapping[bank_cat] = existing_cats[bank_cat_lower].id else: # Create new category max_order = db.session.query(db.func.max(Category.display_order)).filter_by(user_id=current_user.id).scalar() or 0 new_cat = Category( user_id=current_user.id, name=bank_cat_clean, icon='category', color='#' + format(hash(bank_cat_clean) % 0xFFFFFF, '06x'), # Generate color from name display_order=max_order + 1 ) db.session.add(new_cat) db.session.flush() # Get ID without committing created.append({ 'name': bank_cat_clean, 'id': new_cat.id }) mapping[bank_cat] = new_cat.id existing_cats[bank_cat_lower] = new_cat try: db.session.commit() return jsonify({ 'success': True, 'created': created, 'mapping': mapping, 'message': f'Created {len(created)} new categories' }) except Exception as e: db.session.rollback() return jsonify({'success': False, 'error': f'Failed to create categories: {str(e)}'}), 500 @bp.route('/suggest-category', methods=['POST']) @login_required def suggest_category(): """ Suggest category mapping based on description and existing expenses Uses simple keyword matching and historical patterns """ data = request.get_json() description = data.get('description', '').lower() bank_category = data.get('bank_category', '').lower() if not description: return jsonify({'success': False, 'error': 'No description provided'}), 400 # Security: Get only user's categories user_categories = Category.query.filter_by(user_id=current_user.id).all() # Look for similar expenses in user's history similar_expenses = Expense.query.filter( Expense.user_id == current_user.id ).order_by(Expense.date.desc()).limit(100).all() # Score categories based on keyword matching category_scores = {cat.id: 0 for cat in user_categories} for expense in similar_expenses: exp_desc = expense.description.lower() # Simple word matching desc_words = set(description.split()) exp_words = set(exp_desc.split()) overlap = len(desc_words.intersection(exp_words)) if overlap > 0: category_scores[expense.category_id] += overlap # Get best match if max(category_scores.values()) > 0: best_category_id = max(category_scores, key=category_scores.get) best_category = next(cat for cat in user_categories if cat.id == best_category_id) return jsonify({ 'success': True, 'suggested_category_id': best_category.id, 'suggested_category_name': best_category.name, 'confidence': min(100, category_scores[best_category_id] * 20) }) # No match found, return first category return jsonify({ 'success': True, 'suggested_category_id': user_categories[0].id, 'suggested_category_name': user_categories[0].name, 'confidence': 0 })