""" Generate a 10,000-row synthetic patient CSV for Signal volume testing. Uses canonical headers and synthetic patient IDs (SYN-00001 through SYN-10000). Realistic distribution across flags, payers, and devices. Usage: python3 test-data/generate_10k.py """ import csv import random from datetime import date, timedelta from pathlib import Path random.seed(99) TODAY = date.today() OUTPUT = Path(__file__).parent / "10k-patients.csv" DEVICE_OPTIONS = [ ("dexcom_g7", "sensor", 0.40), ("freestyle_libre_3", "sensor", 0.25), ("freestyle_libre_2", "sensor", 0.20), ("dexcom_g6", "sensor", 0.10), ("omnipod_5", "pod", 0.05), ] PAYER_OPTIONS = [ ("Medicare Part B", 0.50), ("Medicaid - GA", 0.10), ("Medicaid - PA", 0.10), ("BCBS - FL", 0.08), ("Aetna", 0.07), ("UnitedHealth", 0.06), ("Cigna", 0.05), ("Humana", 0.04), ] FLAG_DATE_RANGES = [ ("out_of_coverage", (TODAY - timedelta(days=600), TODAY - timedelta(days=400)), 0.30), ("visit_due", (TODAY - timedelta(days=400), TODAY - timedelta(days=250)), 0.25), ("refill_window", (TODAY - timedelta(days=30), TODAY - timedelta(days=20)), 0.20), ("ok", (TODAY - timedelta(days=10), TODAY - timedelta(days=1)), 0.25), ] devices = [d[0] for d in DEVICE_OPTIONS] dev_weights = [d[2] for d in DEVICE_OPTIONS] dev_comp = {d[0]: d[1] for d in DEVICE_OPTIONS} payers = [p[0] for p in PAYER_OPTIONS] pay_weights = [p[1] for p in PAYER_OPTIONS] flags = [f[0] for f in FLAG_DATE_RANGES] flag_ranges = {f[0]: f[1] for f in FLAG_DATE_RANGES} flag_weights= [f[2] for f in FLAG_DATE_RANGES] def random_date_in(bucket): start, end = bucket delta = (end - start).days return start + timedelta(days=random.randint(0, max(delta, 0))) rows_written = 0 with open(OUTPUT, "w", newline="") as f: writer = csv.writer(f) writer.writerow(["patient_id", "device_type", "shipment_date", "quantity", "payer", "component"]) for i in range(1, 10_001): pid = f"SYN-{i:05d}" device = random.choices(devices, weights=dev_weights)[0] comp = dev_comp[device] payer = random.choices(payers, weights=pay_weights)[0] flag = random.choices(flags, weights=flag_weights)[0] ship = random_date_in(flag_ranges[flag]) qty = random.choice([1, 2, 3, 6, 9, 14]) writer.writerow([pid, device, ship.isoformat(), qty, payer, comp]) rows_written += 1 print(f"Wrote {OUTPUT}") print(f"Rows: {rows_written:,}") print("Distribution targets: 30% Out of Coverage, 25% Visit Due, 20% Resupply Ready, 25% Active")