Initial commit
This commit is contained in:
commit
983cee0320
322 changed files with 57174 additions and 0 deletions
373
backup/first -fina app/app/predictions.py
Normal file
373
backup/first -fina app/app/predictions.py
Normal file
|
|
@ -0,0 +1,373 @@
|
|||
"""
|
||||
Spending Predictions Module
|
||||
Analyzes historical spending patterns and predicts future expenses
|
||||
"""
|
||||
|
||||
from app import db
|
||||
from app.models.category import Category, Expense
|
||||
from sqlalchemy import extract, func
|
||||
from datetime import datetime, timedelta
|
||||
from collections import defaultdict
|
||||
import statistics
|
||||
|
||||
|
||||
def get_spending_predictions(user_id, months_ahead=3):
|
||||
"""
|
||||
Predict spending for the next X months based on historical data
|
||||
|
||||
Args:
|
||||
user_id: User ID to generate predictions for
|
||||
months_ahead: Number of months to predict (default: 3)
|
||||
|
||||
Returns:
|
||||
dict with predictions per category and total
|
||||
"""
|
||||
categories = Category.query.filter_by(user_id=user_id).all()
|
||||
|
||||
predictions = {
|
||||
'by_category': {},
|
||||
'total_months': 0,
|
||||
'insights': []
|
||||
}
|
||||
|
||||
current_date = datetime.now()
|
||||
total_predicted = 0
|
||||
total_months_data = []
|
||||
|
||||
for category in categories:
|
||||
category_prediction = predict_category_spending(
|
||||
category,
|
||||
current_date,
|
||||
months_ahead
|
||||
)
|
||||
|
||||
if category_prediction['predicted_amount'] > 0:
|
||||
# Add category_id for API calls
|
||||
category_prediction['category_id'] = category.id
|
||||
predictions['by_category'][category.name] = category_prediction
|
||||
total_predicted += category_prediction['predicted_amount']
|
||||
total_months_data.append(category_prediction['historical_months'])
|
||||
|
||||
# Calculate overall statistics
|
||||
if predictions['by_category']:
|
||||
avg_months = sum(total_months_data) / len(total_months_data)
|
||||
predictions['total_months'] = int(avg_months)
|
||||
|
||||
# Determine overall confidence
|
||||
if avg_months >= 6:
|
||||
overall_confidence = 'high'
|
||||
elif avg_months >= 3:
|
||||
overall_confidence = 'medium'
|
||||
else:
|
||||
overall_confidence = 'low'
|
||||
|
||||
# Determine overall trend
|
||||
increasing = sum(1 for p in predictions['by_category'].values() if p['trend'] == 'increasing')
|
||||
decreasing = sum(1 for p in predictions['by_category'].values() if p['trend'] == 'decreasing')
|
||||
|
||||
if increasing > decreasing:
|
||||
overall_trend = 'increasing'
|
||||
elif decreasing > increasing:
|
||||
overall_trend = 'decreasing'
|
||||
else:
|
||||
overall_trend = 'stable'
|
||||
|
||||
predictions['total'] = {
|
||||
'amount': round(total_predicted, 2),
|
||||
'confidence': overall_confidence,
|
||||
'trend': overall_trend,
|
||||
'months_of_data': int(avg_months)
|
||||
}
|
||||
else:
|
||||
predictions['total_months'] = 0
|
||||
predictions['total'] = {
|
||||
'amount': 0,
|
||||
'confidence': 'none',
|
||||
'trend': 'stable',
|
||||
'months_of_data': 0
|
||||
}
|
||||
|
||||
# Generate insights
|
||||
predictions['insights'] = generate_insights(predictions['by_category'], current_date)
|
||||
|
||||
return predictions
|
||||
|
||||
|
||||
def predict_category_spending(category, current_date, months_ahead=3):
|
||||
"""
|
||||
Predict spending for a specific category
|
||||
|
||||
Uses weighted average with more recent months having higher weight
|
||||
"""
|
||||
# Get last 12 months of data
|
||||
twelve_months_ago = current_date - timedelta(days=365)
|
||||
|
||||
monthly_spending = db.session.query(
|
||||
extract('year', Expense.date).label('year'),
|
||||
extract('month', Expense.date).label('month'),
|
||||
func.sum(Expense.amount).label('total')
|
||||
).filter(
|
||||
Expense.category_id == category.id,
|
||||
Expense.date >= twelve_months_ago
|
||||
).group_by('year', 'month').all()
|
||||
|
||||
if not monthly_spending:
|
||||
return {
|
||||
'predicted_amount': 0,
|
||||
'historical_average': 0,
|
||||
'trend': 'none',
|
||||
'historical_months': 0,
|
||||
'confidence': 'none'
|
||||
}
|
||||
|
||||
# Extract amounts and calculate statistics
|
||||
amounts = [float(row.total) for row in monthly_spending]
|
||||
historical_months = len(amounts)
|
||||
|
||||
# Calculate weighted average (recent months have more weight)
|
||||
weights = list(range(1, len(amounts) + 1))
|
||||
weighted_avg = sum(a * w for a, w in zip(amounts, weights)) / sum(weights)
|
||||
|
||||
# Calculate trend
|
||||
if len(amounts) >= 3:
|
||||
first_half = sum(amounts[:len(amounts)//2]) / (len(amounts)//2)
|
||||
second_half = sum(amounts[len(amounts)//2:]) / (len(amounts) - len(amounts)//2)
|
||||
|
||||
if second_half > first_half * 1.1:
|
||||
trend = 'increasing'
|
||||
elif second_half < first_half * 0.9:
|
||||
trend = 'decreasing'
|
||||
else:
|
||||
trend = 'stable'
|
||||
else:
|
||||
trend = 'stable'
|
||||
|
||||
# Adjust prediction based on trend
|
||||
if trend == 'increasing':
|
||||
predicted_amount = weighted_avg * 1.05 # 5% increase
|
||||
elif trend == 'decreasing':
|
||||
predicted_amount = weighted_avg * 0.95 # 5% decrease
|
||||
else:
|
||||
predicted_amount = weighted_avg
|
||||
|
||||
# Multiply by months ahead
|
||||
predicted_total = predicted_amount * months_ahead
|
||||
|
||||
# Calculate confidence based on data consistency
|
||||
if len(amounts) >= 3:
|
||||
std_dev = statistics.stdev(amounts)
|
||||
avg = statistics.mean(amounts)
|
||||
coefficient_of_variation = std_dev / avg if avg > 0 else 1
|
||||
|
||||
if coefficient_of_variation < 0.3:
|
||||
confidence = 'high'
|
||||
elif coefficient_of_variation < 0.6:
|
||||
confidence = 'medium'
|
||||
else:
|
||||
confidence = 'low'
|
||||
else:
|
||||
confidence = 'low'
|
||||
|
||||
return {
|
||||
'predicted_amount': round(predicted_total, 2),
|
||||
'monthly_average': round(predicted_amount, 2),
|
||||
'historical_average': round(statistics.mean(amounts), 2),
|
||||
'trend': trend,
|
||||
'historical_months': historical_months,
|
||||
'confidence': confidence,
|
||||
'min': round(min(amounts), 2),
|
||||
'max': round(max(amounts), 2)
|
||||
}
|
||||
|
||||
|
||||
def generate_insights(category_predictions, current_date):
|
||||
"""Generate human-readable insights from predictions"""
|
||||
insights = []
|
||||
|
||||
# Find categories with increasing trends
|
||||
increasing = [
|
||||
name for name, pred in category_predictions.items()
|
||||
if pred['trend'] == 'increasing'
|
||||
]
|
||||
if increasing:
|
||||
insights.append({
|
||||
'type': 'warning',
|
||||
'message': f"Spending is increasing in: {', '.join(increasing)}"
|
||||
})
|
||||
|
||||
# Find categories with high spending
|
||||
sorted_by_amount = sorted(
|
||||
category_predictions.items(),
|
||||
key=lambda x: x[1]['predicted_amount'],
|
||||
reverse=True
|
||||
)
|
||||
|
||||
if sorted_by_amount:
|
||||
top_category = sorted_by_amount[0]
|
||||
insights.append({
|
||||
'type': 'info',
|
||||
'message': f"Highest predicted spending: {top_category[0]}"
|
||||
})
|
||||
|
||||
# Find categories with high confidence
|
||||
high_confidence = [
|
||||
name for name, pred in category_predictions.items()
|
||||
if pred['confidence'] == 'high'
|
||||
]
|
||||
if len(high_confidence) >= 3:
|
||||
insights.append({
|
||||
'type': 'success',
|
||||
'message': f"High prediction accuracy for {len(high_confidence)} categories"
|
||||
})
|
||||
|
||||
# Seasonal insight (simple check)
|
||||
current_month = current_date.month
|
||||
if current_month in [11, 12]: # November, December
|
||||
insights.append({
|
||||
'type': 'info',
|
||||
'message': "Holiday season - spending typically increases"
|
||||
})
|
||||
elif current_month in [1, 2]: # January, February
|
||||
insights.append({
|
||||
'type': 'info',
|
||||
'message': "Post-holiday period - spending may decrease"
|
||||
})
|
||||
|
||||
return insights
|
||||
|
||||
|
||||
def get_category_forecast(category_id, user_id, months=6):
|
||||
"""
|
||||
Get detailed forecast for a specific category
|
||||
|
||||
Returns monthly predictions for next N months
|
||||
"""
|
||||
category = Category.query.filter_by(
|
||||
id=category_id,
|
||||
user_id=user_id
|
||||
).first()
|
||||
|
||||
if not category:
|
||||
return None
|
||||
|
||||
current_date = datetime.now()
|
||||
|
||||
# Get historical monthly data
|
||||
twelve_months_ago = current_date - timedelta(days=365)
|
||||
|
||||
monthly_data = db.session.query(
|
||||
extract('year', Expense.date).label('year'),
|
||||
extract('month', Expense.date).label('month'),
|
||||
func.sum(Expense.amount).label('total')
|
||||
).filter(
|
||||
Expense.category_id == category_id,
|
||||
Expense.date >= twelve_months_ago
|
||||
).group_by('year', 'month').order_by('year', 'month').all()
|
||||
|
||||
if not monthly_data:
|
||||
return {
|
||||
'category_name': category.name,
|
||||
'forecast': [],
|
||||
'message': 'Not enough data for predictions'
|
||||
}
|
||||
|
||||
# Calculate base prediction
|
||||
amounts = [float(row.total) for row in monthly_data]
|
||||
avg_spending = statistics.mean(amounts)
|
||||
|
||||
# Generate forecast for next months
|
||||
forecast = []
|
||||
for i in range(1, months + 1):
|
||||
future_date = current_date + timedelta(days=30 * i)
|
||||
|
||||
# Simple seasonal adjustment based on month
|
||||
seasonal_factor = get_seasonal_factor(future_date.month)
|
||||
predicted = avg_spending * seasonal_factor
|
||||
|
||||
forecast.append({
|
||||
'month': future_date.strftime('%B %Y'),
|
||||
'month_num': future_date.month,
|
||||
'year': future_date.year,
|
||||
'predicted_amount': round(predicted, 2)
|
||||
})
|
||||
|
||||
return {
|
||||
'category_name': category.name,
|
||||
'category_color': category.color,
|
||||
'historical_average': round(avg_spending, 2),
|
||||
'forecast': forecast
|
||||
}
|
||||
|
||||
|
||||
def get_seasonal_factor(month):
|
||||
"""
|
||||
Get seasonal adjustment factor based on month
|
||||
|
||||
This is a simplified version - could be made more sophisticated
|
||||
with actual historical data analysis
|
||||
"""
|
||||
# Holiday months (Nov, Dec) typically have higher spending
|
||||
# Summer months might vary by category
|
||||
factors = {
|
||||
1: 0.9, # January - post-holiday slowdown
|
||||
2: 0.95, # February
|
||||
3: 1.0, # March
|
||||
4: 1.0, # April
|
||||
5: 1.05, # May
|
||||
6: 1.05, # June - summer
|
||||
7: 1.05, # July - summer
|
||||
8: 1.0, # August
|
||||
9: 1.0, # September - back to school
|
||||
10: 1.05, # October
|
||||
11: 1.1, # November - holidays starting
|
||||
12: 1.15 # December - peak holiday
|
||||
}
|
||||
return factors.get(month, 1.0)
|
||||
|
||||
|
||||
def compare_with_predictions(user_id, month=None, year=None):
|
||||
"""
|
||||
Compare actual spending with predictions
|
||||
|
||||
Useful for showing accuracy of predictions
|
||||
"""
|
||||
if month is None:
|
||||
month = datetime.now().month
|
||||
if year is None:
|
||||
year = datetime.now().year
|
||||
|
||||
categories = Category.query.filter_by(user_id=user_id).all()
|
||||
|
||||
comparison = {
|
||||
'month': month,
|
||||
'year': year,
|
||||
'categories': {}
|
||||
}
|
||||
|
||||
for category in categories:
|
||||
# Get actual spending for the month
|
||||
actual = db.session.query(func.sum(Expense.amount)).filter(
|
||||
Expense.category_id == category.id,
|
||||
extract('year', Expense.date) == year,
|
||||
extract('month', Expense.date) == month
|
||||
).scalar()
|
||||
|
||||
actual = float(actual) if actual else 0
|
||||
|
||||
# Get predicted value (simplified - using average)
|
||||
prediction = predict_category_spending(category, datetime.now(), 1)
|
||||
predicted = prediction['monthly_average']
|
||||
|
||||
if predicted > 0:
|
||||
accuracy = (1 - abs(actual - predicted) / predicted) * 100
|
||||
else:
|
||||
accuracy = 0 if actual == 0 else 0
|
||||
|
||||
comparison['categories'][category.name] = {
|
||||
'actual': round(actual, 2),
|
||||
'predicted': round(predicted, 2),
|
||||
'difference': round(actual - predicted, 2),
|
||||
'accuracy': round(accuracy, 1)
|
||||
}
|
||||
|
||||
return comparison
|
||||
Loading…
Add table
Add a link
Reference in a new issue