Signal/python-backend/core/persistence.py
Kisa 4a0e043a6d add phase 2 supabase persistence layer
- supabase_client.py: lazy singleton client (no-ops when env vars absent)
- persistence.py: persist_upload writes batch, source_files, normalized_records,
  mapping_decisions, report_runs; persist_export records export_files
- schema.sql: 11-table schema with RLS + WORM rules for audit/raw tables
- main.py: wire persist_upload/persist_export; add ExportRequest body model
  so export accepts {records, batch_id}; batch_id returned on upload response
- api.js: add exportFromBackend helper passing batch_id through
- requirements.txt: add supabase>=2.0.0
- smoke_test.py: update export call to new body format

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-29 06:50:34 -04:00

175 lines
5.6 KiB
Python

"""
Supabase persistence for Signal upload batches, scored records, and report runs.
All writes are best-effort: failures are logged but never surface to the API caller.
The core scoring pipeline works without Supabase (dev mode / env vars not set).
"""
import hashlib
import logging
from datetime import date
from core.supabase_client import get_client
logger = logging.getLogger(__name__)
DEMO_ORG_SLUG = "gaboro-pilot"
_demo_org_id: str | None = None
def _sha256(value: str) -> str:
return hashlib.sha256(value.encode()).hexdigest()
def _get_or_create_org() -> str | None:
global _demo_org_id
if _demo_org_id:
return _demo_org_id
client = get_client()
if not client:
return None
try:
result = client.table("organizations").select("id").eq("slug", DEMO_ORG_SLUG).execute()
if result.data:
_demo_org_id = result.data[0]["id"]
return _demo_org_id
result = client.table("organizations").insert({
"name": "Gaboro DME — Pilot",
"slug": DEMO_ORG_SLUG,
}).execute()
_demo_org_id = result.data[0]["id"]
logger.info(f"Created pilot org: {_demo_org_id}")
return _demo_org_id
except Exception as e:
logger.error(f"Failed to get/create org: {e}")
return None
def persist_upload(
filename: str,
content_bytes: bytes,
shipment_records: list,
coverage_results: list,
skipped_count: int,
mapping_summary: dict,
) -> str | None:
"""
Persist one upload batch and all related records to Supabase.
Returns the batch_id UUID string, or None if persistence is unavailable.
"""
client = get_client()
if not client:
return None
org_id = _get_or_create_org()
if not org_id:
return None
try:
# 1. Upload batch
batch_res = client.table("upload_batches").insert({
"org_id": org_id,
"filename": filename,
"row_count": len(coverage_results),
"skipped_count": skipped_count,
"status": "complete",
}).execute()
batch_id = batch_res.data[0]["id"]
# 2. Source file metadata
content_hash = _sha256(content_bytes.decode("utf-8", errors="replace"))
client.table("source_files").insert({
"batch_id": batch_id,
"filename": filename,
"content_hash": content_hash,
"byte_size": len(content_bytes),
}).execute()
# 3. Normalized records — one row per scored patient
# shipment_records and coverage_results are same-indexed
qty_map = {sr.patient_id: sr.quantity for sr in shipment_records}
norm_rows = []
for r in coverage_results:
flag_val = r.flag.value if hasattr(r.flag, "value") else str(r.flag)
norm_rows.append({
"batch_id": batch_id,
"patient_id_hash": _sha256(r.patient_id),
"device_type": r.device_type,
"shipment_date": r.last_shipment_date.isoformat(),
"quantity": qty_map.get(r.patient_id, 1),
"payer": r.payer,
"component": r.component,
"coverage_status": flag_val,
"days_remaining": r.days_until_coverage_end,
"rule_version": r.rule_version,
})
if norm_rows:
client.table("normalized_records").insert(norm_rows).execute()
# 4. Mapping decisions — how each CSV header was resolved
mapping_rows = []
for canonical, detail in mapping_summary.get("mapped", {}).items():
mapping_rows.append({
"batch_id": batch_id,
"raw_header": detail["raw_header"],
"canonical_field": canonical,
"confidence": detail["confidence"],
})
for raw_h in mapping_summary.get("unmapped_columns", []):
mapping_rows.append({
"batch_id": batch_id,
"raw_header": raw_h,
"canonical_field": None,
"confidence": "unmapped",
})
if mapping_rows:
client.table("mapping_decisions").insert(mapping_rows).execute()
# 5. Report run summary
flagged = sum(
1 for r in coverage_results
if (r.flag.value if hasattr(r.flag, "value") else str(r.flag)) != "OK"
)
client.table("report_runs").insert({
"batch_id": batch_id,
"org_id": org_id,
"status": "complete",
"total_records": len(coverage_results),
"flagged_count": flagged,
}).execute()
logger.info(f"Persisted batch {batch_id}: {len(coverage_results)} records, {flagged} flagged")
return batch_id
except Exception as e:
logger.error(f"Persistence error on upload '{filename}': {e}")
return None
def persist_export(batch_id: str | None, filename: str, row_count: int) -> None:
"""Record that a work queue CSV was exported. Best-effort."""
if not batch_id:
return
client = get_client()
if not client:
return
try:
# Find the report_run for this batch
run_res = client.table("report_runs").select("id").eq("batch_id", batch_id).execute()
if not run_res.data:
return
run_id = run_res.data[0]["id"]
client.table("export_files").insert({
"report_run_id": run_id,
"filename": filename,
"row_count": row_count,
}).execute()
except Exception as e:
logger.error(f"Persistence error on export: {e}")