Initial commit

2025-12-26 00:52:56 +00:00 · 2025-12-26 00:52:56 +00:00 · 983cee0320
commit 983cee0320
322 changed files with 57174 additions and 0 deletions
--- a/app/app/predictions.py
+++ b/app/app/predictions.py
@ -0,0 +1,373 @@
+"""
+Spending Predictions Module
+Analyzes historical spending patterns and predicts future expenses
+"""
+
+from app import db
+from app.models.category import Category, Expense
+from sqlalchemy import extract, func
+from datetime import datetime, timedelta
+from collections import defaultdict
+import statistics
+
+
+def get_spending_predictions(user_id, months_ahead=3):
+    """
+    Predict spending for the next X months based on historical data
+    
+    Args:
+        user_id: User ID to generate predictions for
+        months_ahead: Number of months to predict (default: 3)
+    
+    Returns:
+        dict with predictions per category and total
+    """
+    categories = Category.query.filter_by(user_id=user_id).all()
+    
+    predictions = {
+        'by_category': {},
+        'total_months': 0,
+        'insights': []
+    }
+    
+    current_date = datetime.now()
+    total_predicted = 0
+    total_months_data = []
+    
+    for category in categories:
+        category_prediction = predict_category_spending(
+            category, 
+            current_date, 
+            months_ahead
+        )
+        
+        if category_prediction['predicted_amount'] > 0:
+            # Add category_id for API calls
+            category_prediction['category_id'] = category.id
+            predictions['by_category'][category.name] = category_prediction
+            total_predicted += category_prediction['predicted_amount']
+            total_months_data.append(category_prediction['historical_months'])
+    
+    # Calculate overall statistics
+    if predictions['by_category']:
+        avg_months = sum(total_months_data) / len(total_months_data)
+        predictions['total_months'] = int(avg_months)
+        
+        # Determine overall confidence
+        if avg_months >= 6:
+            overall_confidence = 'high'
+        elif avg_months >= 3:
+            overall_confidence = 'medium'
+        else:
+            overall_confidence = 'low'
+        
+        # Determine overall trend
+        increasing = sum(1 for p in predictions['by_category'].values() if p['trend'] == 'increasing')
+        decreasing = sum(1 for p in predictions['by_category'].values() if p['trend'] == 'decreasing')
+        
+        if increasing > decreasing:
+            overall_trend = 'increasing'
+        elif decreasing > increasing:
+            overall_trend = 'decreasing'
+        else:
+            overall_trend = 'stable'
+        
+        predictions['total'] = {
+            'amount': round(total_predicted, 2),
+            'confidence': overall_confidence,
+            'trend': overall_trend,
+            'months_of_data': int(avg_months)
+        }
+    else:
+        predictions['total_months'] = 0
+        predictions['total'] = {
+            'amount': 0,
+            'confidence': 'none',
+            'trend': 'stable',
+            'months_of_data': 0
+        }
+    
+    # Generate insights
+    predictions['insights'] = generate_insights(predictions['by_category'], current_date)
+    
+    return predictions
+
+
+def predict_category_spending(category, current_date, months_ahead=3):
+    """
+    Predict spending for a specific category
+    
+    Uses weighted average with more recent months having higher weight
+    """
+    # Get last 12 months of data
+    twelve_months_ago = current_date - timedelta(days=365)
+    
+    monthly_spending = db.session.query(
+        extract('year', Expense.date).label('year'),
+        extract('month', Expense.date).label('month'),
+        func.sum(Expense.amount).label('total')
+    ).filter(
+        Expense.category_id == category.id,
+        Expense.date >= twelve_months_ago
+    ).group_by('year', 'month').all()
+    
+    if not monthly_spending:
+        return {
+            'predicted_amount': 0,
+            'historical_average': 0,
+            'trend': 'none',
+            'historical_months': 0,
+            'confidence': 'none'
+        }
+    
+    # Extract amounts and calculate statistics
+    amounts = [float(row.total) for row in monthly_spending]
+    historical_months = len(amounts)
+    
+    # Calculate weighted average (recent months have more weight)
+    weights = list(range(1, len(amounts) + 1))
+    weighted_avg = sum(a * w for a, w in zip(amounts, weights)) / sum(weights)
+    
+    # Calculate trend
+    if len(amounts) >= 3:
+        first_half = sum(amounts[:len(amounts)//2]) / (len(amounts)//2)
+        second_half = sum(amounts[len(amounts)//2:]) / (len(amounts) - len(amounts)//2)
+        
+        if second_half > first_half * 1.1:
+            trend = 'increasing'
+        elif second_half < first_half * 0.9:
+            trend = 'decreasing'
+        else:
+            trend = 'stable'
+    else:
+        trend = 'stable'
+    
+    # Adjust prediction based on trend
+    if trend == 'increasing':
+        predicted_amount = weighted_avg * 1.05  # 5% increase
+    elif trend == 'decreasing':
+        predicted_amount = weighted_avg * 0.95  # 5% decrease
+    else:
+        predicted_amount = weighted_avg
+    
+    # Multiply by months ahead
+    predicted_total = predicted_amount * months_ahead
+    
+    # Calculate confidence based on data consistency
+    if len(amounts) >= 3:
+        std_dev = statistics.stdev(amounts)
+        avg = statistics.mean(amounts)
+        coefficient_of_variation = std_dev / avg if avg > 0 else 1
+        
+        if coefficient_of_variation < 0.3:
+            confidence = 'high'
+        elif coefficient_of_variation < 0.6:
+            confidence = 'medium'
+        else:
+            confidence = 'low'
+    else:
+        confidence = 'low'
+    
+    return {
+        'predicted_amount': round(predicted_total, 2),
+        'monthly_average': round(predicted_amount, 2),
+        'historical_average': round(statistics.mean(amounts), 2),
+        'trend': trend,
+        'historical_months': historical_months,
+        'confidence': confidence,
+        'min': round(min(amounts), 2),
+        'max': round(max(amounts), 2)
+    }
+
+
+def generate_insights(category_predictions, current_date):
+    """Generate human-readable insights from predictions"""
+    insights = []
+    
+    # Find categories with increasing trends
+    increasing = [
+        name for name, pred in category_predictions.items() 
+        if pred['trend'] == 'increasing'
+    ]
+    if increasing:
+        insights.append({
+            'type': 'warning',
+            'message': f"Spending is increasing in: {', '.join(increasing)}"
+        })
+    
+    # Find categories with high spending
+    sorted_by_amount = sorted(
+        category_predictions.items(), 
+        key=lambda x: x[1]['predicted_amount'], 
+        reverse=True
+    )
+    
+    if sorted_by_amount:
+        top_category = sorted_by_amount[0]
+        insights.append({
+            'type': 'info',
+            'message': f"Highest predicted spending: {top_category[0]}"
+        })
+    
+    # Find categories with high confidence
+    high_confidence = [
+        name for name, pred in category_predictions.items() 
+        if pred['confidence'] == 'high'
+    ]
+    if len(high_confidence) >= 3:
+        insights.append({
+            'type': 'success',
+            'message': f"High prediction accuracy for {len(high_confidence)} categories"
+        })
+    
+    # Seasonal insight (simple check)
+    current_month = current_date.month
+    if current_month in [11, 12]:  # November, December
+        insights.append({
+            'type': 'info',
+            'message': "Holiday season - spending typically increases"
+        })
+    elif current_month in [1, 2]:  # January, February
+        insights.append({
+            'type': 'info',
+            'message': "Post-holiday period - spending may decrease"
+        })
+    
+    return insights
+
+
+def get_category_forecast(category_id, user_id, months=6):
+    """
+    Get detailed forecast for a specific category
+    
+    Returns monthly predictions for next N months
+    """
+    category = Category.query.filter_by(
+        id=category_id, 
+        user_id=user_id
+    ).first()
+    
+    if not category:
+        return None
+    
+    current_date = datetime.now()
+    
+    # Get historical monthly data
+    twelve_months_ago = current_date - timedelta(days=365)
+    
+    monthly_data = db.session.query(
+        extract('year', Expense.date).label('year'),
+        extract('month', Expense.date).label('month'),
+        func.sum(Expense.amount).label('total')
+    ).filter(
+        Expense.category_id == category_id,
+        Expense.date >= twelve_months_ago
+    ).group_by('year', 'month').order_by('year', 'month').all()
+    
+    if not monthly_data:
+        return {
+            'category_name': category.name,
+            'forecast': [],
+            'message': 'Not enough data for predictions'
+        }
+    
+    # Calculate base prediction
+    amounts = [float(row.total) for row in monthly_data]
+    avg_spending = statistics.mean(amounts)
+    
+    # Generate forecast for next months
+    forecast = []
+    for i in range(1, months + 1):
+        future_date = current_date + timedelta(days=30 * i)
+        
+        # Simple seasonal adjustment based on month
+        seasonal_factor = get_seasonal_factor(future_date.month)
+        predicted = avg_spending * seasonal_factor
+        
+        forecast.append({
+            'month': future_date.strftime('%B %Y'),
+            'month_num': future_date.month,
+            'year': future_date.year,
+            'predicted_amount': round(predicted, 2)
+        })
+    
+    return {
+        'category_name': category.name,
+        'category_color': category.color,
+        'historical_average': round(avg_spending, 2),
+        'forecast': forecast
+    }
+
+
+def get_seasonal_factor(month):
+    """
+    Get seasonal adjustment factor based on month
+    
+    This is a simplified version - could be made more sophisticated
+    with actual historical data analysis
+    """
+    # Holiday months (Nov, Dec) typically have higher spending
+    # Summer months might vary by category
+    factors = {
+        1: 0.9,   # January - post-holiday slowdown
+        2: 0.95,  # February
+        3: 1.0,   # March
+        4: 1.0,   # April
+        5: 1.05,  # May
+        6: 1.05,  # June - summer
+        7: 1.05,  # July - summer
+        8: 1.0,   # August
+        9: 1.0,   # September - back to school
+        10: 1.05, # October
+        11: 1.1,  # November - holidays starting
+        12: 1.15  # December - peak holiday
+    }
+    return factors.get(month, 1.0)
+
+
+def compare_with_predictions(user_id, month=None, year=None):
+    """
+    Compare actual spending with predictions
+    
+    Useful for showing accuracy of predictions
+    """
+    if month is None:
+        month = datetime.now().month
+    if year is None:
+        year = datetime.now().year
+    
+    categories = Category.query.filter_by(user_id=user_id).all()
+    
+    comparison = {
+        'month': month,
+        'year': year,
+        'categories': {}
+    }
+    
+    for category in categories:
+        # Get actual spending for the month
+        actual = db.session.query(func.sum(Expense.amount)).filter(
+            Expense.category_id == category.id,
+            extract('year', Expense.date) == year,
+            extract('month', Expense.date) == month
+        ).scalar()
+        
+        actual = float(actual) if actual else 0
+        
+        # Get predicted value (simplified - using average)
+        prediction = predict_category_spending(category, datetime.now(), 1)
+        predicted = prediction['monthly_average']
+        
+        if predicted > 0:
+            accuracy = (1 - abs(actual - predicted) / predicted) * 100
+        else:
+            accuracy = 0 if actual == 0 else 0
+        
+        comparison['categories'][category.name] = {
+            'actual': round(actual, 2),
+            'predicted': round(predicted, 2),
+            'difference': round(actual - predicted, 2),
+            'accuracy': round(accuracy, 1)
+        }
+    
+    return comparison