first commit
This commit is contained in:
156
monitor/management/commands/check_data_quality.py
Normal file
156
monitor/management/commands/check_data_quality.py
Normal file
@@ -0,0 +1,156 @@
|
||||
from django.core.management.base import BaseCommand
|
||||
from monitor.services.historical_data import HistoricalDataFetcher
|
||||
from monitor.models import BitcoinPrice
|
||||
from django.db.models import Min, Max, Count
|
||||
import logging
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Command(BaseCommand):
|
||||
help = 'Check quality and statistics of Bitcoin price data in database'
|
||||
|
||||
def add_arguments(self, parser):
|
||||
parser.add_argument(
|
||||
'--fetch-sample',
|
||||
action='store_true',
|
||||
help='Fetch sample data for comparison'
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
self.stdout.write(self.style.HTTP_INFO("Bitcoin Data Quality Check"))
|
||||
self.stdout.write("=" * 50)
|
||||
|
||||
# Get database statistics
|
||||
total_records = BitcoinPrice.objects.count()
|
||||
|
||||
if total_records == 0:
|
||||
self.stdout.write(self.style.ERROR("No Bitcoin price data in database."))
|
||||
self.stdout.write("Run: python manage.py load_historical_data")
|
||||
return
|
||||
|
||||
# Get date range
|
||||
date_range = BitcoinPrice.objects.aggregate(
|
||||
earliest=Min('timestamp'),
|
||||
latest=Max('timestamp')
|
||||
)
|
||||
|
||||
# Calculate time span
|
||||
if date_range['earliest'] and date_range['latest']:
|
||||
time_span = date_range['latest'] - date_range['earliest']
|
||||
days_span = time_span.days
|
||||
|
||||
self.stdout.write(f"Data range: {date_range['earliest'].strftime('%Y-%m-%d')} "
|
||||
f"to {date_range['latest'].strftime('%Y-%m-%d')}")
|
||||
self.stdout.write(f"Time span: {days_span} days")
|
||||
|
||||
self.stdout.write(f"Total records: {total_records}")
|
||||
|
||||
# Calculate records per day
|
||||
if days_span > 0:
|
||||
records_per_day = total_records / days_span
|
||||
self.stdout.write(f"Records per day: {records_per_day:.2f}")
|
||||
|
||||
if records_per_day < 0.9:
|
||||
self.stdout.write(
|
||||
self.style.WARNING("⚠️ Less than 1 record per day - data may be incomplete")
|
||||
)
|
||||
elif records_per_day > 24:
|
||||
self.stdout.write("📈 More than hourly data - good coverage")
|
||||
else:
|
||||
self.stdout.write("📊 Daily data coverage")
|
||||
|
||||
# Check for missing values
|
||||
missing_volume = BitcoinPrice.objects.filter(volume__isnull=True).count()
|
||||
missing_market_cap = BitcoinPrice.objects.filter(market_cap__isnull=True).count()
|
||||
|
||||
if missing_volume > 0:
|
||||
self.stdout.write(
|
||||
self.style.WARNING(f"Missing volume data: {missing_volume} records ({missing_volume/total_records*100:.1f}%)")
|
||||
)
|
||||
|
||||
if missing_market_cap > 0:
|
||||
self.stdout.write(
|
||||
self.style.WARNING(f"Missing market cap: {missing_market_cap} records ({missing_market_cap/total_records*100:.1f}%)")
|
||||
)
|
||||
|
||||
# Get price statistics
|
||||
prices = BitcoinPrice.objects.all().order_by('timestamp')
|
||||
price_list = [float(p.price_usd) for p in prices]
|
||||
|
||||
if price_list:
|
||||
min_price = min(price_list)
|
||||
max_price = max(price_list)
|
||||
avg_price = sum(price_list) / len(price_list)
|
||||
|
||||
self.stdout.write("\n" + self.style.SUCCESS("Price Statistics"))
|
||||
self.stdout.write("-" * 30)
|
||||
self.stdout.write(f"Minimum price: ${min_price:.2f}")
|
||||
self.stdout.write(f"Maximum price: ${max_price:.2f}")
|
||||
self.stdout.write(f"Average price: ${avg_price:.2f}")
|
||||
self.stdout.write(f"Price range: ${max_price - min_price:.2f} "
|
||||
f"({((max_price - min_price) / min_price * 100):.1f}%)")
|
||||
|
||||
# Check for time gaps
|
||||
time_gaps = []
|
||||
prev_timestamp = None
|
||||
|
||||
for price in prices.order_by('timestamp'):
|
||||
if prev_timestamp:
|
||||
gap_hours = (price.timestamp - prev_timestamp).total_seconds() / 3600
|
||||
if gap_hours > 24: # More than 1 day gap
|
||||
time_gaps.append({
|
||||
'from': prev_timestamp,
|
||||
'to': price.timestamp,
|
||||
'gap_days': gap_hours / 24,
|
||||
})
|
||||
prev_timestamp = price.timestamp
|
||||
|
||||
if time_gaps:
|
||||
self.stdout.write("\n" + self.style.WARNING("Time Gaps Detected"))
|
||||
self.stdout.write("-" * 30)
|
||||
for gap in time_gaps[:3]: # Show first 3 gaps
|
||||
self.stdout.write(
|
||||
f"Gap of {gap['gap_days']:.1f} days from "
|
||||
f"{gap['from'].strftime('%Y-%m-%d')} to {gap['to'].strftime('%Y-%m-%d')}"
|
||||
)
|
||||
|
||||
if len(time_gaps) > 3:
|
||||
self.stdout.write(f"... and {len(time_gaps) - 3} more gaps")
|
||||
|
||||
# Compare with fresh data if requested
|
||||
if options['fetch_sample']:
|
||||
self.stdout.write("\n" + self.style.INFO("Fetching sample data for comparison..."))
|
||||
|
||||
fetcher = HistoricalDataFetcher()
|
||||
sample_data = fetcher.fetch_historical_data(days=30)
|
||||
|
||||
if sample_data:
|
||||
self.stdout.write(f"Sample data points: {len(sample_data)}")
|
||||
|
||||
sample_prices = [d['price_usd'] for d in sample_data]
|
||||
sample_min = min(sample_prices)
|
||||
sample_max = max(sample_prices)
|
||||
sample_avg = sum(sample_prices) / len(sample_prices)
|
||||
|
||||
self.stdout.write(f"Sample min: ${sample_min:.2f}")
|
||||
self.stdout.write(f"Sample max: ${sample_max:.2f}")
|
||||
self.stdout.write(f"Sample avg: ${sample_avg:.2f}")
|
||||
|
||||
# Recommendations
|
||||
self.stdout.write("\n" + self.style.HTTP_INFO("Recommendations"))
|
||||
self.stdout.write("-" * 30)
|
||||
|
||||
if total_records < 100:
|
||||
self.stdout.write("1. Load more data: python manage.py load_historical_data --days 365")
|
||||
|
||||
if missing_volume > total_records * 0.5:
|
||||
self.stdout.write("2. Consider fetching data with volume information")
|
||||
|
||||
if time_gaps:
|
||||
self.stdout.write("3. Consider filling time gaps with additional data")
|
||||
|
||||
if total_records >= 100 and not time_gaps and missing_volume < total_records * 0.1:
|
||||
self.stdout.write("✅ Data quality looks good!")
|
||||
|
||||
self.stdout.write("\n" + self.style.SUCCESS("Quality check complete!"))
|
||||
Reference in New Issue
Block a user