374 lines
12 KiB
Python
374 lines
12 KiB
Python
|
|
"""
|
||
|
|
Spending Predictions Module
|
||
|
|
Analyzes historical spending patterns and predicts future expenses
|
||
|
|
"""
|
||
|
|
|
||
|
|
from app import db
|
||
|
|
from app.models.category import Category, Expense
|
||
|
|
from sqlalchemy import extract, func
|
||
|
|
from datetime import datetime, timedelta
|
||
|
|
from collections import defaultdict
|
||
|
|
import statistics
|
||
|
|
|
||
|
|
|
||
|
|
def get_spending_predictions(user_id, months_ahead=3):
|
||
|
|
"""
|
||
|
|
Predict spending for the next X months based on historical data
|
||
|
|
|
||
|
|
Args:
|
||
|
|
user_id: User ID to generate predictions for
|
||
|
|
months_ahead: Number of months to predict (default: 3)
|
||
|
|
|
||
|
|
Returns:
|
||
|
|
dict with predictions per category and total
|
||
|
|
"""
|
||
|
|
categories = Category.query.filter_by(user_id=user_id).all()
|
||
|
|
|
||
|
|
predictions = {
|
||
|
|
'by_category': {},
|
||
|
|
'total_months': 0,
|
||
|
|
'insights': []
|
||
|
|
}
|
||
|
|
|
||
|
|
current_date = datetime.now()
|
||
|
|
total_predicted = 0
|
||
|
|
total_months_data = []
|
||
|
|
|
||
|
|
for category in categories:
|
||
|
|
category_prediction = predict_category_spending(
|
||
|
|
category,
|
||
|
|
current_date,
|
||
|
|
months_ahead
|
||
|
|
)
|
||
|
|
|
||
|
|
if category_prediction['predicted_amount'] > 0:
|
||
|
|
# Add category_id for API calls
|
||
|
|
category_prediction['category_id'] = category.id
|
||
|
|
predictions['by_category'][category.name] = category_prediction
|
||
|
|
total_predicted += category_prediction['predicted_amount']
|
||
|
|
total_months_data.append(category_prediction['historical_months'])
|
||
|
|
|
||
|
|
# Calculate overall statistics
|
||
|
|
if predictions['by_category']:
|
||
|
|
avg_months = sum(total_months_data) / len(total_months_data)
|
||
|
|
predictions['total_months'] = int(avg_months)
|
||
|
|
|
||
|
|
# Determine overall confidence
|
||
|
|
if avg_months >= 6:
|
||
|
|
overall_confidence = 'high'
|
||
|
|
elif avg_months >= 3:
|
||
|
|
overall_confidence = 'medium'
|
||
|
|
else:
|
||
|
|
overall_confidence = 'low'
|
||
|
|
|
||
|
|
# Determine overall trend
|
||
|
|
increasing = sum(1 for p in predictions['by_category'].values() if p['trend'] == 'increasing')
|
||
|
|
decreasing = sum(1 for p in predictions['by_category'].values() if p['trend'] == 'decreasing')
|
||
|
|
|
||
|
|
if increasing > decreasing:
|
||
|
|
overall_trend = 'increasing'
|
||
|
|
elif decreasing > increasing:
|
||
|
|
overall_trend = 'decreasing'
|
||
|
|
else:
|
||
|
|
overall_trend = 'stable'
|
||
|
|
|
||
|
|
predictions['total'] = {
|
||
|
|
'amount': round(total_predicted, 2),
|
||
|
|
'confidence': overall_confidence,
|
||
|
|
'trend': overall_trend,
|
||
|
|
'months_of_data': int(avg_months)
|
||
|
|
}
|
||
|
|
else:
|
||
|
|
predictions['total_months'] = 0
|
||
|
|
predictions['total'] = {
|
||
|
|
'amount': 0,
|
||
|
|
'confidence': 'none',
|
||
|
|
'trend': 'stable',
|
||
|
|
'months_of_data': 0
|
||
|
|
}
|
||
|
|
|
||
|
|
# Generate insights
|
||
|
|
predictions['insights'] = generate_insights(predictions['by_category'], current_date)
|
||
|
|
|
||
|
|
return predictions
|
||
|
|
|
||
|
|
|
||
|
|
def predict_category_spending(category, current_date, months_ahead=3):
|
||
|
|
"""
|
||
|
|
Predict spending for a specific category
|
||
|
|
|
||
|
|
Uses weighted average with more recent months having higher weight
|
||
|
|
"""
|
||
|
|
# Get last 12 months of data
|
||
|
|
twelve_months_ago = current_date - timedelta(days=365)
|
||
|
|
|
||
|
|
monthly_spending = db.session.query(
|
||
|
|
extract('year', Expense.date).label('year'),
|
||
|
|
extract('month', Expense.date).label('month'),
|
||
|
|
func.sum(Expense.amount).label('total')
|
||
|
|
).filter(
|
||
|
|
Expense.category_id == category.id,
|
||
|
|
Expense.date >= twelve_months_ago
|
||
|
|
).group_by('year', 'month').all()
|
||
|
|
|
||
|
|
if not monthly_spending:
|
||
|
|
return {
|
||
|
|
'predicted_amount': 0,
|
||
|
|
'historical_average': 0,
|
||
|
|
'trend': 'none',
|
||
|
|
'historical_months': 0,
|
||
|
|
'confidence': 'none'
|
||
|
|
}
|
||
|
|
|
||
|
|
# Extract amounts and calculate statistics
|
||
|
|
amounts = [float(row.total) for row in monthly_spending]
|
||
|
|
historical_months = len(amounts)
|
||
|
|
|
||
|
|
# Calculate weighted average (recent months have more weight)
|
||
|
|
weights = list(range(1, len(amounts) + 1))
|
||
|
|
weighted_avg = sum(a * w for a, w in zip(amounts, weights)) / sum(weights)
|
||
|
|
|
||
|
|
# Calculate trend
|
||
|
|
if len(amounts) >= 3:
|
||
|
|
first_half = sum(amounts[:len(amounts)//2]) / (len(amounts)//2)
|
||
|
|
second_half = sum(amounts[len(amounts)//2:]) / (len(amounts) - len(amounts)//2)
|
||
|
|
|
||
|
|
if second_half > first_half * 1.1:
|
||
|
|
trend = 'increasing'
|
||
|
|
elif second_half < first_half * 0.9:
|
||
|
|
trend = 'decreasing'
|
||
|
|
else:
|
||
|
|
trend = 'stable'
|
||
|
|
else:
|
||
|
|
trend = 'stable'
|
||
|
|
|
||
|
|
# Adjust prediction based on trend
|
||
|
|
if trend == 'increasing':
|
||
|
|
predicted_amount = weighted_avg * 1.05 # 5% increase
|
||
|
|
elif trend == 'decreasing':
|
||
|
|
predicted_amount = weighted_avg * 0.95 # 5% decrease
|
||
|
|
else:
|
||
|
|
predicted_amount = weighted_avg
|
||
|
|
|
||
|
|
# Multiply by months ahead
|
||
|
|
predicted_total = predicted_amount * months_ahead
|
||
|
|
|
||
|
|
# Calculate confidence based on data consistency
|
||
|
|
if len(amounts) >= 3:
|
||
|
|
std_dev = statistics.stdev(amounts)
|
||
|
|
avg = statistics.mean(amounts)
|
||
|
|
coefficient_of_variation = std_dev / avg if avg > 0 else 1
|
||
|
|
|
||
|
|
if coefficient_of_variation < 0.3:
|
||
|
|
confidence = 'high'
|
||
|
|
elif coefficient_of_variation < 0.6:
|
||
|
|
confidence = 'medium'
|
||
|
|
else:
|
||
|
|
confidence = 'low'
|
||
|
|
else:
|
||
|
|
confidence = 'low'
|
||
|
|
|
||
|
|
return {
|
||
|
|
'predicted_amount': round(predicted_total, 2),
|
||
|
|
'monthly_average': round(predicted_amount, 2),
|
||
|
|
'historical_average': round(statistics.mean(amounts), 2),
|
||
|
|
'trend': trend,
|
||
|
|
'historical_months': historical_months,
|
||
|
|
'confidence': confidence,
|
||
|
|
'min': round(min(amounts), 2),
|
||
|
|
'max': round(max(amounts), 2)
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
def generate_insights(category_predictions, current_date):
|
||
|
|
"""Generate human-readable insights from predictions"""
|
||
|
|
insights = []
|
||
|
|
|
||
|
|
# Find categories with increasing trends
|
||
|
|
increasing = [
|
||
|
|
name for name, pred in category_predictions.items()
|
||
|
|
if pred['trend'] == 'increasing'
|
||
|
|
]
|
||
|
|
if increasing:
|
||
|
|
insights.append({
|
||
|
|
'type': 'warning',
|
||
|
|
'message': f"Spending is increasing in: {', '.join(increasing)}"
|
||
|
|
})
|
||
|
|
|
||
|
|
# Find categories with high spending
|
||
|
|
sorted_by_amount = sorted(
|
||
|
|
category_predictions.items(),
|
||
|
|
key=lambda x: x[1]['predicted_amount'],
|
||
|
|
reverse=True
|
||
|
|
)
|
||
|
|
|
||
|
|
if sorted_by_amount:
|
||
|
|
top_category = sorted_by_amount[0]
|
||
|
|
insights.append({
|
||
|
|
'type': 'info',
|
||
|
|
'message': f"Highest predicted spending: {top_category[0]}"
|
||
|
|
})
|
||
|
|
|
||
|
|
# Find categories with high confidence
|
||
|
|
high_confidence = [
|
||
|
|
name for name, pred in category_predictions.items()
|
||
|
|
if pred['confidence'] == 'high'
|
||
|
|
]
|
||
|
|
if len(high_confidence) >= 3:
|
||
|
|
insights.append({
|
||
|
|
'type': 'success',
|
||
|
|
'message': f"High prediction accuracy for {len(high_confidence)} categories"
|
||
|
|
})
|
||
|
|
|
||
|
|
# Seasonal insight (simple check)
|
||
|
|
current_month = current_date.month
|
||
|
|
if current_month in [11, 12]: # November, December
|
||
|
|
insights.append({
|
||
|
|
'type': 'info',
|
||
|
|
'message': "Holiday season - spending typically increases"
|
||
|
|
})
|
||
|
|
elif current_month in [1, 2]: # January, February
|
||
|
|
insights.append({
|
||
|
|
'type': 'info',
|
||
|
|
'message': "Post-holiday period - spending may decrease"
|
||
|
|
})
|
||
|
|
|
||
|
|
return insights
|
||
|
|
|
||
|
|
|
||
|
|
def get_category_forecast(category_id, user_id, months=6):
|
||
|
|
"""
|
||
|
|
Get detailed forecast for a specific category
|
||
|
|
|
||
|
|
Returns monthly predictions for next N months
|
||
|
|
"""
|
||
|
|
category = Category.query.filter_by(
|
||
|
|
id=category_id,
|
||
|
|
user_id=user_id
|
||
|
|
).first()
|
||
|
|
|
||
|
|
if not category:
|
||
|
|
return None
|
||
|
|
|
||
|
|
current_date = datetime.now()
|
||
|
|
|
||
|
|
# Get historical monthly data
|
||
|
|
twelve_months_ago = current_date - timedelta(days=365)
|
||
|
|
|
||
|
|
monthly_data = db.session.query(
|
||
|
|
extract('year', Expense.date).label('year'),
|
||
|
|
extract('month', Expense.date).label('month'),
|
||
|
|
func.sum(Expense.amount).label('total')
|
||
|
|
).filter(
|
||
|
|
Expense.category_id == category_id,
|
||
|
|
Expense.date >= twelve_months_ago
|
||
|
|
).group_by('year', 'month').order_by('year', 'month').all()
|
||
|
|
|
||
|
|
if not monthly_data:
|
||
|
|
return {
|
||
|
|
'category_name': category.name,
|
||
|
|
'forecast': [],
|
||
|
|
'message': 'Not enough data for predictions'
|
||
|
|
}
|
||
|
|
|
||
|
|
# Calculate base prediction
|
||
|
|
amounts = [float(row.total) for row in monthly_data]
|
||
|
|
avg_spending = statistics.mean(amounts)
|
||
|
|
|
||
|
|
# Generate forecast for next months
|
||
|
|
forecast = []
|
||
|
|
for i in range(1, months + 1):
|
||
|
|
future_date = current_date + timedelta(days=30 * i)
|
||
|
|
|
||
|
|
# Simple seasonal adjustment based on month
|
||
|
|
seasonal_factor = get_seasonal_factor(future_date.month)
|
||
|
|
predicted = avg_spending * seasonal_factor
|
||
|
|
|
||
|
|
forecast.append({
|
||
|
|
'month': future_date.strftime('%B %Y'),
|
||
|
|
'month_num': future_date.month,
|
||
|
|
'year': future_date.year,
|
||
|
|
'predicted_amount': round(predicted, 2)
|
||
|
|
})
|
||
|
|
|
||
|
|
return {
|
||
|
|
'category_name': category.name,
|
||
|
|
'category_color': category.color,
|
||
|
|
'historical_average': round(avg_spending, 2),
|
||
|
|
'forecast': forecast
|
||
|
|
}
|
||
|
|
|
||
|
|
|
||
|
|
def get_seasonal_factor(month):
|
||
|
|
"""
|
||
|
|
Get seasonal adjustment factor based on month
|
||
|
|
|
||
|
|
This is a simplified version - could be made more sophisticated
|
||
|
|
with actual historical data analysis
|
||
|
|
"""
|
||
|
|
# Holiday months (Nov, Dec) typically have higher spending
|
||
|
|
# Summer months might vary by category
|
||
|
|
factors = {
|
||
|
|
1: 0.9, # January - post-holiday slowdown
|
||
|
|
2: 0.95, # February
|
||
|
|
3: 1.0, # March
|
||
|
|
4: 1.0, # April
|
||
|
|
5: 1.05, # May
|
||
|
|
6: 1.05, # June - summer
|
||
|
|
7: 1.05, # July - summer
|
||
|
|
8: 1.0, # August
|
||
|
|
9: 1.0, # September - back to school
|
||
|
|
10: 1.05, # October
|
||
|
|
11: 1.1, # November - holidays starting
|
||
|
|
12: 1.15 # December - peak holiday
|
||
|
|
}
|
||
|
|
return factors.get(month, 1.0)
|
||
|
|
|
||
|
|
|
||
|
|
def compare_with_predictions(user_id, month=None, year=None):
|
||
|
|
"""
|
||
|
|
Compare actual spending with predictions
|
||
|
|
|
||
|
|
Useful for showing accuracy of predictions
|
||
|
|
"""
|
||
|
|
if month is None:
|
||
|
|
month = datetime.now().month
|
||
|
|
if year is None:
|
||
|
|
year = datetime.now().year
|
||
|
|
|
||
|
|
categories = Category.query.filter_by(user_id=user_id).all()
|
||
|
|
|
||
|
|
comparison = {
|
||
|
|
'month': month,
|
||
|
|
'year': year,
|
||
|
|
'categories': {}
|
||
|
|
}
|
||
|
|
|
||
|
|
for category in categories:
|
||
|
|
# Get actual spending for the month
|
||
|
|
actual = db.session.query(func.sum(Expense.amount)).filter(
|
||
|
|
Expense.category_id == category.id,
|
||
|
|
extract('year', Expense.date) == year,
|
||
|
|
extract('month', Expense.date) == month
|
||
|
|
).scalar()
|
||
|
|
|
||
|
|
actual = float(actual) if actual else 0
|
||
|
|
|
||
|
|
# Get predicted value (simplified - using average)
|
||
|
|
prediction = predict_category_spending(category, datetime.now(), 1)
|
||
|
|
predicted = prediction['monthly_average']
|
||
|
|
|
||
|
|
if predicted > 0:
|
||
|
|
accuracy = (1 - abs(actual - predicted) / predicted) * 100
|
||
|
|
else:
|
||
|
|
accuracy = 0 if actual == 0 else 0
|
||
|
|
|
||
|
|
comparison['categories'][category.name] = {
|
||
|
|
'actual': round(actual, 2),
|
||
|
|
'predicted': round(predicted, 2),
|
||
|
|
'difference': round(actual - predicted, 2),
|
||
|
|
'accuracy': round(accuracy, 1)
|
||
|
|
}
|
||
|
|
|
||
|
|
return comparison
|