feat: AR-House initial commit
This commit is contained in:
@@ -0,0 +1,94 @@
|
||||
"""Migration: separar Zillow zpids del campo case_number.
|
||||
|
||||
BUG ORIGINAL:
|
||||
El Zillow scraper estaba guardando el zpid en la columna case_number, que es
|
||||
INCORRECTO porque case_number es solo para court cases reales (foreclosure
|
||||
judicial, tax_deed). Esto contaminaba el dataset con falsos positivos de
|
||||
"deals judiciales".
|
||||
|
||||
FIX:
|
||||
1. Aplica ALTER TABLE para agregar external_id (idempotent via init_db)
|
||||
2. Para cada deal de source='zillow': mover case_number → external_id
|
||||
3. Tambien aplica a hud_homestore (su HUD case# es trackeo, no court case judicial)
|
||||
|
||||
Idempotent: si ya se corrio, los nuevos rows seran 0.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import io, sys
|
||||
from pathlib import Path
|
||||
|
||||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8", errors="replace")
|
||||
ROOT = Path(__file__).resolve().parent.parent
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
from deals_db import init_db, _get_conn
|
||||
|
||||
|
||||
def main() -> int:
|
||||
init_db() # ensures external_id column exists
|
||||
conn = _get_conn()
|
||||
|
||||
cols = {r["name"] for r in conn.execute("PRAGMA table_info(deals)").fetchall()}
|
||||
if "external_id" not in cols:
|
||||
print("ERROR: external_id column missing after init_db()")
|
||||
return 1
|
||||
|
||||
# Step 1: Zillow zpids — move case_number → external_id
|
||||
rows_z = conn.execute(
|
||||
"SELECT COUNT(*) FROM deals WHERE source = 'zillow' "
|
||||
"AND case_number IS NOT NULL AND case_number != '' "
|
||||
"AND (external_id IS NULL OR external_id = '')"
|
||||
).fetchone()
|
||||
print(f"Zillow deals needing migration: {rows_z[0]}")
|
||||
if rows_z[0] > 0:
|
||||
conn.execute(
|
||||
"UPDATE deals SET external_id = case_number, case_number = NULL "
|
||||
"WHERE source = 'zillow' "
|
||||
"AND case_number IS NOT NULL AND case_number != '' "
|
||||
"AND (external_id IS NULL OR external_id = '')"
|
||||
)
|
||||
print(f" Migrated {rows_z[0]} Zillow zpid values")
|
||||
|
||||
# Step 2: HUD Homestore — HUD case# is a tracking number, NOT a court case.
|
||||
# We still move it to external_id; case_number stays NULL for HUD (since
|
||||
# HUD listings are REO, not judicial proceedings).
|
||||
rows_h = conn.execute(
|
||||
"SELECT COUNT(*) FROM deals WHERE source = 'hud_homestore' "
|
||||
"AND case_number IS NOT NULL AND case_number != '' "
|
||||
"AND (external_id IS NULL OR external_id = '')"
|
||||
).fetchone()
|
||||
print(f"HUD deals needing migration: {rows_h[0]}")
|
||||
if rows_h[0] > 0:
|
||||
conn.execute(
|
||||
"UPDATE deals SET external_id = case_number, case_number = NULL "
|
||||
"WHERE source = 'hud_homestore' "
|
||||
"AND case_number IS NOT NULL AND case_number != '' "
|
||||
"AND (external_id IS NULL OR external_id = '')"
|
||||
)
|
||||
print(f" Migrated {rows_h[0]} HUD case# values")
|
||||
|
||||
# Step 3: Verify clerks NOT affected (their case_number IS a real court case)
|
||||
clerk_with_case = conn.execute(
|
||||
"SELECT COUNT(*) FROM deals WHERE source LIKE '%_clerk' "
|
||||
"AND case_number IS NOT NULL AND case_number != ''"
|
||||
).fetchone()[0]
|
||||
print(f"\nClerk deals with case_number (court cases, unchanged): {clerk_with_case}")
|
||||
|
||||
# Final state
|
||||
print()
|
||||
print("=== Final state ===")
|
||||
rows = conn.execute("""
|
||||
SELECT source,
|
||||
SUM(CASE WHEN case_number IS NOT NULL AND case_number != '' THEN 1 ELSE 0 END) AS with_case,
|
||||
SUM(CASE WHEN external_id IS NOT NULL AND external_id != '' THEN 1 ELSE 0 END) AS with_ext
|
||||
FROM deals GROUP BY source ORDER BY source
|
||||
""").fetchall()
|
||||
print(f"{'source':<22} {'case_number':>12} {'external_id':>12}")
|
||||
for r in rows:
|
||||
print(f" {r['source']:<22} {r['with_case']:>12} {r['with_ext']:>12}")
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user