Initial commit
This commit is contained in:
commit
983cee0320
322 changed files with 57174 additions and 0 deletions
354
backup/first -fina app/app/smart_detection.py
Normal file
354
backup/first -fina app/app/smart_detection.py
Normal file
|
|
@ -0,0 +1,354 @@
|
|||
"""
|
||||
Smart detection algorithms for recurring expenses and subscriptions
|
||||
"""
|
||||
from datetime import datetime, timedelta
|
||||
from collections import defaultdict
|
||||
import re
|
||||
import json
|
||||
from sqlalchemy import and_
|
||||
from app import db
|
||||
from app.models.category import Expense
|
||||
from app.models.subscription import RecurringPattern, Subscription
|
||||
|
||||
|
||||
def detect_recurring_expenses(user_id, min_occurrences=3, min_confidence=70):
|
||||
"""
|
||||
Detect recurring expenses for a user
|
||||
|
||||
Args:
|
||||
user_id: User ID to analyze
|
||||
min_occurrences: Minimum number of similar transactions to consider
|
||||
min_confidence: Minimum confidence score (0-100) to suggest
|
||||
|
||||
Returns:
|
||||
List of detected patterns
|
||||
"""
|
||||
# Get all expenses for the user from the last year
|
||||
one_year_ago = datetime.now() - timedelta(days=365)
|
||||
expenses = Expense.query.filter(
|
||||
and_(
|
||||
Expense.user_id == user_id,
|
||||
Expense.date >= one_year_ago.date()
|
||||
)
|
||||
).order_by(Expense.date).all()
|
||||
|
||||
if len(expenses) < min_occurrences:
|
||||
return []
|
||||
|
||||
# Group expenses by similarity
|
||||
patterns = []
|
||||
processed_ids = set()
|
||||
|
||||
for i, expense in enumerate(expenses):
|
||||
if expense.id in processed_ids:
|
||||
continue
|
||||
|
||||
similar_expenses = find_similar_expenses(expense, expenses[i+1:], processed_ids)
|
||||
|
||||
if len(similar_expenses) >= min_occurrences - 1: # -1 because we include the current expense
|
||||
similar_expenses.insert(0, expense)
|
||||
pattern = analyze_pattern(similar_expenses, user_id)
|
||||
|
||||
if pattern and pattern['confidence_score'] >= min_confidence:
|
||||
patterns.append(pattern)
|
||||
processed_ids.update([e.id for e in similar_expenses])
|
||||
|
||||
return patterns
|
||||
|
||||
|
||||
def find_similar_expenses(target_expense, expenses, exclude_ids):
|
||||
"""Find expenses similar to target expense"""
|
||||
similar = []
|
||||
target_amount = target_expense.amount
|
||||
target_desc = normalize_description(target_expense.description or '')
|
||||
|
||||
# Amount tolerance: 5% or $5, whichever is larger
|
||||
amount_tolerance = max(target_amount * 0.05, 5.0)
|
||||
|
||||
for expense in expenses:
|
||||
if expense.id in exclude_ids:
|
||||
continue
|
||||
|
||||
# Check category match
|
||||
if expense.category_id != target_expense.category_id:
|
||||
continue
|
||||
|
||||
# Check amount similarity
|
||||
amount_diff = abs(expense.amount - target_amount)
|
||||
if amount_diff > amount_tolerance:
|
||||
continue
|
||||
|
||||
# Check description similarity
|
||||
expense_desc = normalize_description(expense.description or '')
|
||||
if not descriptions_similar(target_desc, expense_desc):
|
||||
continue
|
||||
|
||||
similar.append(expense)
|
||||
|
||||
return similar
|
||||
|
||||
|
||||
def normalize_description(desc):
|
||||
"""Normalize description for comparison"""
|
||||
# Remove common patterns like dates, numbers at end
|
||||
desc = re.sub(r'\d{1,2}[/-]\d{1,2}[/-]\d{2,4}', '', desc)
|
||||
desc = re.sub(r'#\d+', '', desc)
|
||||
desc = re.sub(r'\s+\d+$', '', desc)
|
||||
|
||||
# Convert to lowercase and strip
|
||||
desc = desc.lower().strip()
|
||||
|
||||
# Remove common words
|
||||
common_words = ['payment', 'subscription', 'monthly', 'recurring', 'auto']
|
||||
for word in common_words:
|
||||
desc = desc.replace(word, '')
|
||||
|
||||
return desc.strip()
|
||||
|
||||
|
||||
def descriptions_similar(desc1, desc2, threshold=0.6):
|
||||
"""Check if two descriptions are similar enough"""
|
||||
if not desc1 or not desc2:
|
||||
return False
|
||||
|
||||
# Exact match
|
||||
if desc1 == desc2:
|
||||
return True
|
||||
|
||||
# Check if one contains the other
|
||||
if desc1 in desc2 or desc2 in desc1:
|
||||
return True
|
||||
|
||||
# Simple word overlap check
|
||||
words1 = set(desc1.split())
|
||||
words2 = set(desc2.split())
|
||||
|
||||
if not words1 or not words2:
|
||||
return False
|
||||
|
||||
overlap = len(words1 & words2) / max(len(words1), len(words2))
|
||||
return overlap >= threshold
|
||||
|
||||
|
||||
def analyze_pattern(expenses, user_id):
|
||||
"""Analyze a group of similar expenses to determine pattern"""
|
||||
if len(expenses) < 2:
|
||||
return None
|
||||
|
||||
# Sort by date
|
||||
expenses = sorted(expenses, key=lambda e: e.date)
|
||||
|
||||
# Calculate intervals between expenses
|
||||
intervals = []
|
||||
for i in range(len(expenses) - 1):
|
||||
days = (expenses[i + 1].date - expenses[i].date).days
|
||||
intervals.append(days)
|
||||
|
||||
if not intervals:
|
||||
return None
|
||||
|
||||
# Determine frequency
|
||||
avg_interval = sum(intervals) / len(intervals)
|
||||
frequency, confidence = determine_frequency(intervals, avg_interval)
|
||||
|
||||
if not frequency:
|
||||
return None
|
||||
|
||||
# Calculate average amount
|
||||
avg_amount = sum(e.amount for e in expenses) / len(expenses)
|
||||
amount_variance = calculate_variance([e.amount for e in expenses])
|
||||
|
||||
# Adjust confidence based on amount consistency
|
||||
if amount_variance < 0.05: # Less than 5% variance
|
||||
confidence += 10
|
||||
elif amount_variance > 0.2: # More than 20% variance
|
||||
confidence -= 10
|
||||
|
||||
confidence = min(max(confidence, 0), 100) # Clamp between 0-100
|
||||
|
||||
# Generate suggested name
|
||||
suggested_name = generate_subscription_name(expenses[0])
|
||||
|
||||
# Check if pattern already exists
|
||||
existing = RecurringPattern.query.filter_by(
|
||||
user_id=user_id,
|
||||
suggested_name=suggested_name,
|
||||
is_dismissed=False,
|
||||
is_converted=False
|
||||
).first()
|
||||
|
||||
if existing:
|
||||
return None # Don't create duplicates
|
||||
|
||||
return {
|
||||
'user_id': user_id,
|
||||
'category_id': expenses[0].category_id,
|
||||
'suggested_name': suggested_name,
|
||||
'average_amount': round(avg_amount, 2),
|
||||
'detected_frequency': frequency,
|
||||
'confidence_score': round(confidence, 1),
|
||||
'expense_ids': json.dumps([e.id for e in expenses]),
|
||||
'first_occurrence': expenses[0].date,
|
||||
'last_occurrence': expenses[-1].date,
|
||||
'occurrence_count': len(expenses)
|
||||
}
|
||||
|
||||
|
||||
def determine_frequency(intervals, avg_interval):
|
||||
"""Determine frequency from intervals"""
|
||||
# Check consistency of intervals
|
||||
variance = calculate_variance(intervals)
|
||||
|
||||
# Base confidence on consistency
|
||||
base_confidence = 70 if variance < 0.15 else 50
|
||||
|
||||
# Determine frequency based on average interval
|
||||
if 5 <= avg_interval <= 9:
|
||||
return 'weekly', base_confidence + 10
|
||||
elif 12 <= avg_interval <= 16:
|
||||
return 'biweekly', base_confidence
|
||||
elif 27 <= avg_interval <= 33:
|
||||
return 'monthly', base_confidence + 15
|
||||
elif 85 <= avg_interval <= 95:
|
||||
return 'quarterly', base_confidence
|
||||
elif 355 <= avg_interval <= 375:
|
||||
return 'yearly', base_confidence
|
||||
else:
|
||||
# Check if it's a multiple of common frequencies
|
||||
if 25 <= avg_interval <= 35:
|
||||
return 'monthly', base_confidence - 10
|
||||
elif 7 <= avg_interval <= 10:
|
||||
return 'weekly', base_confidence - 10
|
||||
|
||||
return None, 0
|
||||
|
||||
|
||||
def calculate_variance(values):
|
||||
"""Calculate coefficient of variation"""
|
||||
if not values or len(values) < 2:
|
||||
return 0
|
||||
|
||||
avg = sum(values) / len(values)
|
||||
if avg == 0:
|
||||
return 0
|
||||
|
||||
variance = sum((x - avg) ** 2 for x in values) / len(values)
|
||||
std_dev = variance ** 0.5
|
||||
|
||||
return std_dev / avg
|
||||
|
||||
|
||||
def generate_subscription_name(expense):
|
||||
"""Generate a friendly name for the subscription"""
|
||||
desc = expense.description or 'Recurring Expense'
|
||||
|
||||
# Clean up description
|
||||
desc = re.sub(r'\d{1,2}[/-]\d{1,2}[/-]\d{2,4}', '', desc)
|
||||
desc = re.sub(r'#\d+', '', desc)
|
||||
desc = re.sub(r'\s+\d+$', '', desc)
|
||||
desc = desc.strip()
|
||||
|
||||
# Capitalize first letter of each word
|
||||
desc = ' '.join(word.capitalize() for word in desc.split())
|
||||
|
||||
# Limit length
|
||||
if len(desc) > 50:
|
||||
desc = desc[:47] + '...'
|
||||
|
||||
return desc or 'Recurring Expense'
|
||||
|
||||
|
||||
def save_detected_patterns(patterns):
|
||||
"""Save detected patterns to database"""
|
||||
saved_count = 0
|
||||
|
||||
for pattern_data in patterns:
|
||||
pattern = RecurringPattern(**pattern_data)
|
||||
db.session.add(pattern)
|
||||
saved_count += 1
|
||||
|
||||
try:
|
||||
db.session.commit()
|
||||
return saved_count
|
||||
except Exception as e:
|
||||
db.session.rollback()
|
||||
print(f"Error saving patterns: {e}")
|
||||
return 0
|
||||
|
||||
|
||||
def get_user_suggestions(user_id):
|
||||
"""Get all active suggestions for a user"""
|
||||
return RecurringPattern.query.filter_by(
|
||||
user_id=user_id,
|
||||
is_dismissed=False,
|
||||
is_converted=False
|
||||
).order_by(RecurringPattern.confidence_score.desc()).all()
|
||||
|
||||
|
||||
def convert_pattern_to_subscription(pattern_id, user_id):
|
||||
"""Convert a detected pattern to a confirmed subscription"""
|
||||
pattern = RecurringPattern.query.filter_by(
|
||||
id=pattern_id,
|
||||
user_id=user_id
|
||||
).first()
|
||||
|
||||
if not pattern or pattern.is_converted:
|
||||
return None
|
||||
|
||||
# Create subscription
|
||||
subscription = Subscription(
|
||||
name=pattern.suggested_name,
|
||||
amount=pattern.average_amount,
|
||||
frequency=pattern.detected_frequency,
|
||||
category_id=pattern.category_id,
|
||||
user_id=pattern.user_id,
|
||||
next_due_date=pattern.last_occurrence + timedelta(days=get_frequency_days(pattern.detected_frequency)),
|
||||
is_active=True,
|
||||
is_confirmed=True,
|
||||
auto_detected=True,
|
||||
confidence_score=pattern.confidence_score
|
||||
)
|
||||
|
||||
db.session.add(subscription)
|
||||
|
||||
# Mark pattern as converted
|
||||
pattern.is_converted = True
|
||||
|
||||
try:
|
||||
db.session.commit()
|
||||
return subscription
|
||||
except Exception as e:
|
||||
db.session.rollback()
|
||||
print(f"Error converting pattern: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def get_frequency_days(frequency):
|
||||
"""Get number of days for frequency"""
|
||||
frequency_map = {
|
||||
'weekly': 7,
|
||||
'biweekly': 14,
|
||||
'monthly': 30,
|
||||
'quarterly': 90,
|
||||
'yearly': 365
|
||||
}
|
||||
return frequency_map.get(frequency, 30)
|
||||
|
||||
|
||||
def dismiss_pattern(pattern_id, user_id):
|
||||
"""Dismiss a detected pattern"""
|
||||
pattern = RecurringPattern.query.filter_by(
|
||||
id=pattern_id,
|
||||
user_id=user_id
|
||||
).first()
|
||||
|
||||
if pattern:
|
||||
pattern.is_dismissed = True
|
||||
try:
|
||||
db.session.commit()
|
||||
return True
|
||||
except Exception as e:
|
||||
db.session.rollback()
|
||||
print(f"Error dismissing pattern: {e}")
|
||||
|
||||
return False
|
||||
Loading…
Add table
Add a link
Reference in a new issue