Initial commit

This commit is contained in:
iulian 2025-12-26 00:52:56 +00:00
commit 983cee0320
322 changed files with 57174 additions and 0 deletions

View file

@ -0,0 +1,373 @@
"""
Spending Predictions Module
Analyzes historical spending patterns and predicts future expenses
"""
from app import db
from app.models.category import Category, Expense
from sqlalchemy import extract, func
from datetime import datetime, timedelta
from collections import defaultdict
import statistics
def get_spending_predictions(user_id, months_ahead=3):
"""
Predict spending for the next X months based on historical data
Args:
user_id: User ID to generate predictions for
months_ahead: Number of months to predict (default: 3)
Returns:
dict with predictions per category and total
"""
categories = Category.query.filter_by(user_id=user_id).all()
predictions = {
'by_category': {},
'total_months': 0,
'insights': []
}
current_date = datetime.now()
total_predicted = 0
total_months_data = []
for category in categories:
category_prediction = predict_category_spending(
category,
current_date,
months_ahead
)
if category_prediction['predicted_amount'] > 0:
# Add category_id for API calls
category_prediction['category_id'] = category.id
predictions['by_category'][category.name] = category_prediction
total_predicted += category_prediction['predicted_amount']
total_months_data.append(category_prediction['historical_months'])
# Calculate overall statistics
if predictions['by_category']:
avg_months = sum(total_months_data) / len(total_months_data)
predictions['total_months'] = int(avg_months)
# Determine overall confidence
if avg_months >= 6:
overall_confidence = 'high'
elif avg_months >= 3:
overall_confidence = 'medium'
else:
overall_confidence = 'low'
# Determine overall trend
increasing = sum(1 for p in predictions['by_category'].values() if p['trend'] == 'increasing')
decreasing = sum(1 for p in predictions['by_category'].values() if p['trend'] == 'decreasing')
if increasing > decreasing:
overall_trend = 'increasing'
elif decreasing > increasing:
overall_trend = 'decreasing'
else:
overall_trend = 'stable'
predictions['total'] = {
'amount': round(total_predicted, 2),
'confidence': overall_confidence,
'trend': overall_trend,
'months_of_data': int(avg_months)
}
else:
predictions['total_months'] = 0
predictions['total'] = {
'amount': 0,
'confidence': 'none',
'trend': 'stable',
'months_of_data': 0
}
# Generate insights
predictions['insights'] = generate_insights(predictions['by_category'], current_date)
return predictions
def predict_category_spending(category, current_date, months_ahead=3):
"""
Predict spending for a specific category
Uses weighted average with more recent months having higher weight
"""
# Get last 12 months of data
twelve_months_ago = current_date - timedelta(days=365)
monthly_spending = db.session.query(
extract('year', Expense.date).label('year'),
extract('month', Expense.date).label('month'),
func.sum(Expense.amount).label('total')
).filter(
Expense.category_id == category.id,
Expense.date >= twelve_months_ago
).group_by('year', 'month').all()
if not monthly_spending:
return {
'predicted_amount': 0,
'historical_average': 0,
'trend': 'none',
'historical_months': 0,
'confidence': 'none'
}
# Extract amounts and calculate statistics
amounts = [float(row.total) for row in monthly_spending]
historical_months = len(amounts)
# Calculate weighted average (recent months have more weight)
weights = list(range(1, len(amounts) + 1))
weighted_avg = sum(a * w for a, w in zip(amounts, weights)) / sum(weights)
# Calculate trend
if len(amounts) >= 3:
first_half = sum(amounts[:len(amounts)//2]) / (len(amounts)//2)
second_half = sum(amounts[len(amounts)//2:]) / (len(amounts) - len(amounts)//2)
if second_half > first_half * 1.1:
trend = 'increasing'
elif second_half < first_half * 0.9:
trend = 'decreasing'
else:
trend = 'stable'
else:
trend = 'stable'
# Adjust prediction based on trend
if trend == 'increasing':
predicted_amount = weighted_avg * 1.05 # 5% increase
elif trend == 'decreasing':
predicted_amount = weighted_avg * 0.95 # 5% decrease
else:
predicted_amount = weighted_avg
# Multiply by months ahead
predicted_total = predicted_amount * months_ahead
# Calculate confidence based on data consistency
if len(amounts) >= 3:
std_dev = statistics.stdev(amounts)
avg = statistics.mean(amounts)
coefficient_of_variation = std_dev / avg if avg > 0 else 1
if coefficient_of_variation < 0.3:
confidence = 'high'
elif coefficient_of_variation < 0.6:
confidence = 'medium'
else:
confidence = 'low'
else:
confidence = 'low'
return {
'predicted_amount': round(predicted_total, 2),
'monthly_average': round(predicted_amount, 2),
'historical_average': round(statistics.mean(amounts), 2),
'trend': trend,
'historical_months': historical_months,
'confidence': confidence,
'min': round(min(amounts), 2),
'max': round(max(amounts), 2)
}
def generate_insights(category_predictions, current_date):
"""Generate human-readable insights from predictions"""
insights = []
# Find categories with increasing trends
increasing = [
name for name, pred in category_predictions.items()
if pred['trend'] == 'increasing'
]
if increasing:
insights.append({
'type': 'warning',
'message': f"Spending is increasing in: {', '.join(increasing)}"
})
# Find categories with high spending
sorted_by_amount = sorted(
category_predictions.items(),
key=lambda x: x[1]['predicted_amount'],
reverse=True
)
if sorted_by_amount:
top_category = sorted_by_amount[0]
insights.append({
'type': 'info',
'message': f"Highest predicted spending: {top_category[0]}"
})
# Find categories with high confidence
high_confidence = [
name for name, pred in category_predictions.items()
if pred['confidence'] == 'high'
]
if len(high_confidence) >= 3:
insights.append({
'type': 'success',
'message': f"High prediction accuracy for {len(high_confidence)} categories"
})
# Seasonal insight (simple check)
current_month = current_date.month
if current_month in [11, 12]: # November, December
insights.append({
'type': 'info',
'message': "Holiday season - spending typically increases"
})
elif current_month in [1, 2]: # January, February
insights.append({
'type': 'info',
'message': "Post-holiday period - spending may decrease"
})
return insights
def get_category_forecast(category_id, user_id, months=6):
"""
Get detailed forecast for a specific category
Returns monthly predictions for next N months
"""
category = Category.query.filter_by(
id=category_id,
user_id=user_id
).first()
if not category:
return None
current_date = datetime.now()
# Get historical monthly data
twelve_months_ago = current_date - timedelta(days=365)
monthly_data = db.session.query(
extract('year', Expense.date).label('year'),
extract('month', Expense.date).label('month'),
func.sum(Expense.amount).label('total')
).filter(
Expense.category_id == category_id,
Expense.date >= twelve_months_ago
).group_by('year', 'month').order_by('year', 'month').all()
if not monthly_data:
return {
'category_name': category.name,
'forecast': [],
'message': 'Not enough data for predictions'
}
# Calculate base prediction
amounts = [float(row.total) for row in monthly_data]
avg_spending = statistics.mean(amounts)
# Generate forecast for next months
forecast = []
for i in range(1, months + 1):
future_date = current_date + timedelta(days=30 * i)
# Simple seasonal adjustment based on month
seasonal_factor = get_seasonal_factor(future_date.month)
predicted = avg_spending * seasonal_factor
forecast.append({
'month': future_date.strftime('%B %Y'),
'month_num': future_date.month,
'year': future_date.year,
'predicted_amount': round(predicted, 2)
})
return {
'category_name': category.name,
'category_color': category.color,
'historical_average': round(avg_spending, 2),
'forecast': forecast
}
def get_seasonal_factor(month):
"""
Get seasonal adjustment factor based on month
This is a simplified version - could be made more sophisticated
with actual historical data analysis
"""
# Holiday months (Nov, Dec) typically have higher spending
# Summer months might vary by category
factors = {
1: 0.9, # January - post-holiday slowdown
2: 0.95, # February
3: 1.0, # March
4: 1.0, # April
5: 1.05, # May
6: 1.05, # June - summer
7: 1.05, # July - summer
8: 1.0, # August
9: 1.0, # September - back to school
10: 1.05, # October
11: 1.1, # November - holidays starting
12: 1.15 # December - peak holiday
}
return factors.get(month, 1.0)
def compare_with_predictions(user_id, month=None, year=None):
"""
Compare actual spending with predictions
Useful for showing accuracy of predictions
"""
if month is None:
month = datetime.now().month
if year is None:
year = datetime.now().year
categories = Category.query.filter_by(user_id=user_id).all()
comparison = {
'month': month,
'year': year,
'categories': {}
}
for category in categories:
# Get actual spending for the month
actual = db.session.query(func.sum(Expense.amount)).filter(
Expense.category_id == category.id,
extract('year', Expense.date) == year,
extract('month', Expense.date) == month
).scalar()
actual = float(actual) if actual else 0
# Get predicted value (simplified - using average)
prediction = predict_category_spending(category, datetime.now(), 1)
predicted = prediction['monthly_average']
if predicted > 0:
accuracy = (1 - abs(actual - predicted) / predicted) * 100
else:
accuracy = 0 if actual == 0 else 0
comparison['categories'][category.name] = {
'actual': round(actual, 2),
'predicted': round(predicted, 2),
'difference': round(actual - predicted, 2),
'accuracy': round(accuracy, 1)
}
return comparison