first commit

This commit is contained in:
2026-01-16 22:20:18 +03:00
commit 5d437e5e28
56 changed files with 4463 additions and 0 deletions

View File

@@ -0,0 +1,301 @@
import logging
import time
from datetime import datetime, timezone, timedelta
from typing import List, Dict, Optional
import requests
from django.db import transaction
from django.utils import timezone as django_timezone
from decimal import Decimal
from monitor.models import BitcoinPrice
logger = logging.getLogger(__name__)
class HistoricalDataFetcher:
"""Fetches historical Bitcoin price data."""
def __init__(self):
self.base_url = "https://api.coingecko.com/api/v3"
self.session = requests.Session()
self.session.headers.update({
'User-Agent': 'BitcoinMonitor/1.0',
'Accept': 'application/json',
})
def fetch_historical_data(self, days: int = 365) -> List[Dict]:
"""
Fetch historical Bitcoin data for specified number of days.
Args:
days: Number of days of historical data to fetch
Returns:
List of price data dictionaries
"""
try:
logger.info(f"Fetching {days} days of historical Bitcoin data...")
url = f"{self.base_url}/coins/bitcoin/market_chart"
params = {
'vs_currency': 'usd',
'days': days,
'interval': 'daily',
}
response = self.session.get(url, params=params, timeout=30)
response.raise_for_status()
data = response.json()
historical_data = []
# Process prices
for price_point in data.get('prices', []):
timestamp = datetime.fromtimestamp(price_point[0] / 1000, timezone.utc)
price = price_point[1]
historical_data.append({
'timestamp': timestamp,
'price_usd': price,
'volume': None,
'market_cap': None,
})
# Add volume data if available
volumes = data.get('total_volumes', [])
for i, (timestamp_ms, volume) in enumerate(volumes):
if i < len(historical_data):
historical_data[i]['volume'] = volume
# Add market cap data if available
market_caps = data.get('market_caps', [])
for i, (timestamp_ms, market_cap) in enumerate(market_caps):
if i < len(historical_data):
historical_data[i]['market_cap'] = market_cap
logger.info(f"Fetched {len(historical_data)} historical price points")
return historical_data
except requests.exceptions.RequestException as e:
logger.error(f"Request error fetching historical data: {e}")
return []
except Exception as e:
logger.error(f"Error fetching historical data: {e}")
return []
def fetch_historical_data_range(self, start_date: datetime, end_date: datetime) -> List[Dict]:
"""
Fetch historical data for a specific date range.
Note: CoinGecko API doesn't support arbitrary date ranges directly,
so we fetch maximum days and filter.
"""
# Calculate days between dates
days_difference = (end_date - start_date).days
# Fetch more data than needed to ensure we have the range
all_data = self.fetch_historical_data(days=days_difference + 100)
# Filter to date range
filtered_data = [
point for point in all_data
if start_date <= point['timestamp'] <= end_date
]
return filtered_data
def save_historical_data(self, historical_data: List[Dict], clear_existing: bool = False) -> Dict:
"""
Save historical data to database.
Args:
historical_data: List of price data dictionaries
clear_existing: Whether to clear existing data before saving
Returns:
Dictionary with statistics about the operation
"""
if not historical_data:
logger.warning("No historical data to save")
return {'saved': 0, 'skipped': 0, 'errors': 0}
try:
with transaction.atomic():
if clear_existing:
deleted_count, _ = BitcoinPrice.objects.all().delete()
logger.info(f"Cleared {deleted_count} existing price records")
saved_count = 0
skipped_count = 0
error_count = 0
for data_point in historical_data:
try:
# Check if price already exists for this timestamp
exists = BitcoinPrice.objects.filter(
timestamp=data_point['timestamp']
).exists()
if exists:
skipped_count += 1
continue
# Create BitcoinPrice object
BitcoinPrice.objects.create(
timestamp=data_point['timestamp'],
price_usd=Decimal(str(data_point['price_usd'])),
volume=Decimal(str(data_point['volume'])) if data_point.get('volume') else None,
market_cap=Decimal(str(data_point['market_cap'])) if data_point.get('market_cap') else None,
)
saved_count += 1
# Log progress every 50 records
if saved_count % 50 == 0:
logger.info(f"Saved {saved_count} historical records...")
except Exception as e:
error_count += 1
logger.error(f"Error saving data point {data_point.get('timestamp')}: {e}")
logger.info(f"Historical data saved: {saved_count} new, {skipped_count} existing, {error_count} errors")
return {
'saved': saved_count,
'skipped': skipped_count,
'errors': error_count,
'total': len(historical_data),
}
except Exception as e:
logger.error(f"Transaction error saving historical data: {e}")
return {'saved': 0, 'skipped': 0, 'errors': len(historical_data), 'total': len(historical_data)}
def generate_test_data(self, days: int = 30, base_price: float = 45000) -> List[Dict]:
"""
Generate synthetic test data for development.
Args:
days: Number of days of test data
base_price: Base price for the data
Returns:
List of synthetic price data
"""
import random
from datetime import timedelta
logger.info(f"Generating {days} days of synthetic test data...")
test_data = []
now = django_timezone.now()
for i in range(days * 24): # Generate hourly data
timestamp = now - timedelta(hours=i)
# Create realistic price fluctuations (±5%)
variation = random.uniform(0.95, 1.05)
price = base_price * variation
# Generate volume and market cap with some randomness
volume = random.uniform(20000000000, 40000000000)
market_cap = random.uniform(800000000000, 900000000000)
test_data.append({
'timestamp': timestamp,
'price_usd': round(price, 2),
'volume': round(volume, 2),
'market_cap': round(market_cap, 2),
})
# Reverse to have chronological order
test_data.reverse()
logger.info(f"Generated {len(test_data)} synthetic data points")
return test_data
def analyze_historical_data_quality(self, historical_data: List[Dict]) -> Dict:
"""
Analyze the quality of historical data.
Args:
historical_data: List of price data dictionaries
Returns:
Dictionary with quality metrics
"""
if not historical_data:
return {'error': 'No data to analyze'}
# Sort by timestamp
sorted_data = sorted(historical_data, key=lambda x: x['timestamp'])
timestamps = [d['timestamp'] for d in sorted_data]
prices = [d['price_usd'] for d in sorted_data]
# Calculate metrics
min_price = min(prices)
max_price = max(prices)
avg_price = sum(prices) / len(prices)
# Check for gaps in timestamps
time_gaps = []
for i in range(1, len(timestamps)):
gap = (timestamps[i] - timestamps[i-1]).total_seconds() / 3600 # hours
if gap > 24: # More than 1 day gap
time_gaps.append({
'from': timestamps[i-1],
'to': timestamps[i],
'gap_hours': gap,
})
# Check for missing values
missing_prices = sum(1 for d in sorted_data if d['price_usd'] is None)
missing_volumes = sum(1 for d in sorted_data if d.get('volume') is None)
missing_market_caps = sum(1 for d in sorted_data if d.get('market_cap') is None)
return {
'total_points': len(historical_data),
'date_range': {
'start': timestamps[0],
'end': timestamps[-1],
'days': (timestamps[-1] - timestamps[0]).days,
},
'price_stats': {
'min': min_price,
'max': max_price,
'average': avg_price,
'range_percent': ((max_price - min_price) / min_price * 100),
},
'data_quality': {
'missing_prices': missing_prices,
'missing_volumes': missing_volumes,
'missing_market_caps': missing_market_caps,
'time_gaps': len(time_gaps),
'time_gaps_details': time_gaps[:5], # First 5 gaps
},
'suggestions': self._generate_data_quality_suggestions({
'missing_prices': missing_prices,
'time_gaps': len(time_gaps),
'total_points': len(historical_data),
})
}
def _generate_data_quality_suggestions(self, metrics: Dict) -> List[str]:
"""Generate suggestions based on data quality metrics."""
suggestions = []
if metrics['missing_prices'] > 0:
suggestions.append(f"Found {metrics['missing_prices']} missing prices. Consider filling gaps.")
if metrics['time_gaps'] > 0:
suggestions.append(f"Found {metrics['time_gaps']} time gaps. Data may not be continuous.")
if metrics['total_points'] < 30:
suggestions.append("Less than 30 data points. Consider fetching more data.")
if not suggestions:
suggestions.append("Data quality looks good!")
return suggestions