from django.core.management.base import BaseCommand from monitor.services.historical_data import HistoricalDataFetcher from monitor.models import BitcoinPrice from django.db.models import Min, Max, Count import logging logger = logging.getLogger(__name__) class Command(BaseCommand): help = 'Check quality and statistics of Bitcoin price data in database' def add_arguments(self, parser): parser.add_argument( '--fetch-sample', action='store_true', help='Fetch sample data for comparison' ) def handle(self, *args, **options): self.stdout.write(self.style.HTTP_INFO("Bitcoin Data Quality Check")) self.stdout.write("=" * 50) # Get database statistics total_records = BitcoinPrice.objects.count() if total_records == 0: self.stdout.write(self.style.ERROR("No Bitcoin price data in database.")) self.stdout.write("Run: python manage.py load_historical_data") return # Get date range date_range = BitcoinPrice.objects.aggregate( earliest=Min('timestamp'), latest=Max('timestamp') ) # Calculate time span if date_range['earliest'] and date_range['latest']: time_span = date_range['latest'] - date_range['earliest'] days_span = time_span.days self.stdout.write(f"Data range: {date_range['earliest'].strftime('%Y-%m-%d')} " f"to {date_range['latest'].strftime('%Y-%m-%d')}") self.stdout.write(f"Time span: {days_span} days") self.stdout.write(f"Total records: {total_records}") # Calculate records per day if days_span > 0: records_per_day = total_records / days_span self.stdout.write(f"Records per day: {records_per_day:.2f}") if records_per_day < 0.9: self.stdout.write( self.style.WARNING("⚠️ Less than 1 record per day - data may be incomplete") ) elif records_per_day > 24: self.stdout.write("📈 More than hourly data - good coverage") else: self.stdout.write("📊 Daily data coverage") # Check for missing values missing_volume = BitcoinPrice.objects.filter(volume__isnull=True).count() missing_market_cap = BitcoinPrice.objects.filter(market_cap__isnull=True).count() if missing_volume > 0: self.stdout.write( self.style.WARNING(f"Missing volume data: {missing_volume} records ({missing_volume/total_records*100:.1f}%)") ) if missing_market_cap > 0: self.stdout.write( self.style.WARNING(f"Missing market cap: {missing_market_cap} records ({missing_market_cap/total_records*100:.1f}%)") ) # Get price statistics prices = BitcoinPrice.objects.all().order_by('timestamp') price_list = [float(p.price_usd) for p in prices] if price_list: min_price = min(price_list) max_price = max(price_list) avg_price = sum(price_list) / len(price_list) self.stdout.write("\n" + self.style.SUCCESS("Price Statistics")) self.stdout.write("-" * 30) self.stdout.write(f"Minimum price: ${min_price:.2f}") self.stdout.write(f"Maximum price: ${max_price:.2f}") self.stdout.write(f"Average price: ${avg_price:.2f}") self.stdout.write(f"Price range: ${max_price - min_price:.2f} " f"({((max_price - min_price) / min_price * 100):.1f}%)") # Check for time gaps time_gaps = [] prev_timestamp = None for price in prices.order_by('timestamp'): if prev_timestamp: gap_hours = (price.timestamp - prev_timestamp).total_seconds() / 3600 if gap_hours > 24: # More than 1 day gap time_gaps.append({ 'from': prev_timestamp, 'to': price.timestamp, 'gap_days': gap_hours / 24, }) prev_timestamp = price.timestamp if time_gaps: self.stdout.write("\n" + self.style.WARNING("Time Gaps Detected")) self.stdout.write("-" * 30) for gap in time_gaps[:3]: # Show first 3 gaps self.stdout.write( f"Gap of {gap['gap_days']:.1f} days from " f"{gap['from'].strftime('%Y-%m-%d')} to {gap['to'].strftime('%Y-%m-%d')}" ) if len(time_gaps) > 3: self.stdout.write(f"... and {len(time_gaps) - 3} more gaps") # Compare with fresh data if requested if options['fetch_sample']: self.stdout.write("\n" + self.style.INFO("Fetching sample data for comparison...")) fetcher = HistoricalDataFetcher() sample_data = fetcher.fetch_historical_data(days=30) if sample_data: self.stdout.write(f"Sample data points: {len(sample_data)}") sample_prices = [d['price_usd'] for d in sample_data] sample_min = min(sample_prices) sample_max = max(sample_prices) sample_avg = sum(sample_prices) / len(sample_prices) self.stdout.write(f"Sample min: ${sample_min:.2f}") self.stdout.write(f"Sample max: ${sample_max:.2f}") self.stdout.write(f"Sample avg: ${sample_avg:.2f}") # Recommendations self.stdout.write("\n" + self.style.HTTP_INFO("Recommendations")) self.stdout.write("-" * 30) if total_records < 100: self.stdout.write("1. Load more data: python manage.py load_historical_data --days 365") if missing_volume > total_records * 0.5: self.stdout.write("2. Consider fetching data with volume information") if time_gaps: self.stdout.write("3. Consider filling time gaps with additional data") if total_records >= 100 and not time_gaps and missing_volume < total_records * 0.1: self.stdout.write("✅ Data quality looks good!") self.stdout.write("\n" + self.style.SUCCESS("Quality check complete!"))