Files
ppr-ng/backend/seed_data.py
2025-12-04 18:29:09 +00:00

175 lines
5.5 KiB
Python

#!/usr/bin/env python3
"""
Seed reference data into the database
Loads airport and aircraft data from CSV files
"""
import os
import csv
import sys
from pathlib import Path
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from app.core.config import settings
from app.models.ppr import Airport, Aircraft
def load_airports(db, csv_path):
"""Load airport data from CSV"""
if not os.path.exists(csv_path):
print(f" ⚠ Airport data file not found: {csv_path}")
return 0
# Check if data already exists
existing_count = db.query(Airport).count()
if existing_count > 0:
print(f" ⚠ Airport data already exists ({existing_count} records), skipping")
return existing_count
print(f" Loading airports from {csv_path}...")
loaded = 0
batch = []
with open(csv_path, 'r', encoding='utf-8') as f:
reader = csv.reader(f) # CSV has no headers
for row in reader:
if len(row) < 4:
continue # Skip invalid rows
airport = Airport(
icao=row[0].strip('"'),
iata=row[1].strip('"') if row[1].strip('"') else None,
name=row[2].strip('"'),
country=row[3].strip('"')
)
batch.append(airport)
loaded += 1
# Commit in batches of 1000
if len(batch) >= 1000:
db.bulk_save_objects(batch)
db.commit()
batch = []
print(f" Loaded {loaded} airports...", end='\r')
# Commit remaining
if batch:
db.bulk_save_objects(batch)
db.commit()
print(f" ✓ Loaded {loaded} airport records" + " " * 20)
return loaded
def load_aircraft(db, csv_path):
"""Load aircraft data from CSV"""
if not os.path.exists(csv_path):
print(f" ⚠ Aircraft data file not found: {csv_path}")
return 0
# Check if data already exists
existing_count = db.query(Aircraft).count()
if existing_count > 0:
print(f" ⚠ Aircraft data already exists ({existing_count} records), skipping")
return existing_count
print(f" Loading aircraft from {csv_path}...")
loaded = 0
batch = []
with open(csv_path, 'r', encoding='utf-8') as f:
reader = csv.reader(f) # CSV has no headers
for row in reader:
if len(row) < 6:
continue # Skip invalid rows
aircraft = Aircraft(
icao24=row[0].strip('"') if row[0].strip('"') else None,
registration=row[1].strip('"') if row[1].strip('"') else None,
manufacturer_icao=row[2].strip('"') if row[2].strip('"') else None,
type_code=row[3].strip('"') if row[3].strip('"') else None,
manufacturer_name=row[4].strip('"') if row[4].strip('"') else None,
model=row[5].strip('"') if row[5].strip('"') else None
)
batch.append(aircraft)
loaded += 1
# Commit in batches of 1000
if len(batch) >= 1000:
db.bulk_save_objects(batch)
db.commit()
batch = []
print(f" Loaded {loaded} aircraft...", end='\r')
# Commit remaining
if batch:
db.bulk_save_objects(batch)
db.commit()
print(f" ✓ Loaded {loaded} aircraft records" + " " * 20)
return loaded
def main():
"""Main seeding function"""
print("Starting data seeding process...")
try:
# Create database connection
engine = create_engine(settings.database_url)
Session = sessionmaker(bind=engine)
db = Session()
# Determine CSV paths - check multiple locations
base_paths = [
Path('/app/../db-init'), # Docker mounted volume
Path('/app/db-init'), # If copied into container
Path('./db-init'), # Current directory
Path('../db-init'), # Parent directory
]
airport_csv = None
aircraft_csv = None
for base in base_paths:
if base.exists():
potential_airport = base / 'airports_data_clean.csv'
potential_aircraft = base / 'aircraft_data.csv'
if potential_airport.exists() and not airport_csv:
airport_csv = str(potential_airport)
if potential_aircraft.exists() and not aircraft_csv:
aircraft_csv = str(potential_aircraft)
if airport_csv and aircraft_csv:
break
# Load data
airports_loaded = 0
aircraft_loaded = 0
if airport_csv:
airports_loaded = load_airports(db, airport_csv)
else:
print(" ⚠ No airport CSV file found")
if aircraft_csv:
aircraft_loaded = load_aircraft(db, aircraft_csv)
else:
print(" ⚠ No aircraft CSV file found")
db.close()
print("")
print(f"Seeding complete:")
print(f" Airports: {airports_loaded:,}")
print(f" Aircraft: {aircraft_loaded:,}")
return 0
except Exception as e:
print(f"✗ Error during seeding: {e}")
import traceback
traceback.print_exc()
return 1
if __name__ == "__main__":
sys.exit(main())