security: SECRET_KEY from env, CORS restricted to localhost

- Replace hardcoded secret_key with os.environ.get('SECRET_KEY') - RuntimeError if SECRET_KEY not set (fail fast) - Restrict CORS to localhost:8765 origins (was allow all with credentials) - Add .gitignore excluding db, env, __pycache__, backups Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-07-03 12:55:19 -04:00
commit 235a9abbfe
8 changed files with 7670 additions and 0 deletions
@@ -0,0 +1,21 @@
 __pycache__/
 *.pyc
 *.pyo
 .venv/
 venv/
 # Database (local data)
 *.db
 *.sqlite
 # Environment secrets
 .env
 *.env
 # Backup archives
 Backup/
 *.zip
 # OS
 .DS_Store
 Thumbs.db
@@ -0,0 +1,99 @@
@echo off
 chcp 65001 >nul
 title Boat^&Ship-Finder
 echo.
 echo  ===================================================
 echo       Boat^&Ship-Finder - Broker Tool
 echo  ===================================================
 echo.
 :: Buscar Python
 set PYTHON=
 where python >nul 2>&1 && set PYTHON=python
 if not defined PYTHON (
    where python3 >nul 2>&1 && set PYTHON=python3
 )
 if not defined PYTHON (
    if exist "%LOCALAPPDATA%\Programs\Python\Python312\python.exe" set PYTHON=%LOCALAPPDATA%\Programs\Python\Python312\python.exe
 )
 if not defined PYTHON (
    if exist "%LOCALAPPDATA%\Programs\Python\Python311\python.exe" set PYTHON=%LOCALAPPDATA%\Programs\Python\Python311\python.exe
 )
 if not defined PYTHON (
    if exist "%LOCALAPPDATA%\Programs\Python\Python310\python.exe" set PYTHON=%LOCALAPPDATA%\Programs\Python\Python310\python.exe
 )
 if not defined PYTHON (
    if exist "C:\Python312\python.exe" set PYTHON=C:\Python312\python.exe
 )
 if not defined PYTHON (
    if exist "C:\Python311\python.exe" set PYTHON=C:\Python311\python.exe
 )
 if not defined PYTHON (
    echo [ERROR] No se encontro Python.
    echo.
    echo Descargalo de: https://www.python.org/downloads/
    echo Durante la instalacion marca: "Add Python to PATH"
    echo.
    pause & exit /b 1
 )
 echo [OK] Python: %PYTHON%
 :: Verificar/instalar Flask
 %PYTHON% -c "import flask" >nul 2>&1
 if %errorlevel% neq 0 (
    echo [INSTALANDO] Flask y dependencias...
    %PYTHON% -m pip install flask flask-cors requests beautifulsoup4 --quiet
    echo [OK] Dependencias instaladas.
 )
 :: Verificar Ollama
 curl -s http://localhost:11434/api/tags >nul 2>&1
 if %errorlevel% neq 0 (
    echo.
    echo [AVISO] Ollama no esta corriendo.
    echo         Abre Ollama Desktop desde la barra de tareas.
    echo         Luego presiona cualquier tecla aqui.
    echo.
    pause >nul
 )
 echo [OK] Ollama activo.
 :: Puerto fijo
 set PORT=8765
 set MARINE_PORT=8765
 echo [OK] Puerto: %PORT%
 :: Obtener IP de Tailscale
 set TSIP=
 for /f "tokens=*" %%i in ('tailscale ip -4 2^>nul') do set TSIP=%%i
 if not defined TSIP (
    for /f "tokens=2 delims=:" %%a in ('ipconfig ^| findstr /i "tailscale" 2^>nul') do set TSIP=%%a
 )
 echo.
 echo  ===================================================
 echo   Corriendo en puerto %PORT%
 echo.
 echo   Desde esta PC:    http://localhost:%PORT%
 if defined TSIP (
 echo   Desde tu celular: http://%TSIP%:%PORT%
 ) else (
 echo   Tailscale: no detectado
 )
 echo.
 echo   Presiona Ctrl+C para detener
 echo  ===================================================
 echo.
 :: Ir a carpeta con server.py
 if exist "server.py" goto :run
 if exist "Boat^&Ship-Finder\server.py" cd /d "Boat^&Ship-Finder"
 :run
 :: Abrir navegador en 5 segundos
 start "" cmd /c "timeout /t 5 /nobreak >nul & start http://localhost:8765"
 :: Iniciar servidor
 %PYTHON% server.py
 pause
@@ -0,0 +1,151 @@
 # Boat&Ship-Finder — Guía de Instalación
 ## Broker Intelligence Platform + Ollama (Windows + Tailscale)
 ---
 ## LO QUE TIENES
 ```
 Boat&Ship-Finder/
 ├── INSTALAR.bat        ← Ejecuta esto PRIMERO (una sola vez)
 ├── INICIAR.bat         ← Ejecuta esto cada vez que quieras usar la app
 ├── server.py           ← Backend Python (Flask + Ollama)
 └── static/
    └── index.html      ← Frontend (dashboard completo)
 ```
 ---
 ## PASO 1 — Verificar que Ollama está corriendo
 1. Abre **Ollama Desktop** desde tu barra de tareas
 2. Verifica en el navegador: http://localhost:11434
   - Debes ver: `Ollama is running`
 Los modelos que usará el sistema:
 - **qwen2.5:72b** → Extracción y análisis principal
 - **llama3.1:8b** → Clasificación rápida
 - **nomic-embed-text** → Deduplicación semántica
 ---
 ## PASO 2 — Instalar (solo la primera vez)
 1. Pon todos los archivos en una carpeta (ej: `C:\Boat&Ship-Finder\`)
 2. Doble clic en **INSTALAR.bat**
 3. Espera que termine (instala Flask y dependencias Python)
 ---
 ## PASO 3 — Iniciar el servidor
 1. Doble clic en **INICIAR.bat**
 2. Verás en pantalla:
   ```
   Acceso local:     http://localhost:8000
   Acceso Tailscale: http://100.x.x.x:8000
   ```
 3. Abre esa URL en tu navegador o celular
 ---
 ## ACCESO DESDE CELULAR (Tailscale)
 1. Instala Tailscale en tu celular (App Store / Play Store)
 2. Inicia sesión con la misma cuenta que tu Windows
 3. Abre en el celular: `http://100.x.x.x:8000`
   (usa la IP que muestra INICIAR.bat)
 ---
 ## CÓMO USAR LA APP
 ### Buscar con IA
 - Clic en **"⚡ Buscar con IA Local"** (barra de búsqueda)
 - Escribe en lenguaje natural:
  - `"remolcador acero más de 30 metros en subasta"`
  - `"fishing vessel noruego buen precio"`
  - `"offshore support vessel government surplus Florida"`
 - La IA consulta +60 fuentes y extrae resultados
 ### Analizar un anuncio
 - Clic en **"📋 Analizar"**
 - Pega el texto de cualquier anuncio (periódico, email, clasificado)
 - La IA extrae automáticamente todos los datos técnicos
 ### Guardar favoritas
 - Clic en ☆ en cualquier tarjeta
 - Ver en la pestaña **"★ Guardadas"**
 ### Crear alertas
 - Pestaña **"🔔 Alertas"**
 - Define criterios (tipo, precio, estado)
 - El sistema notifica cuando encuentra coincidencias
 ---
 ## FUENTES MONITOREADAS (+60)
 ### Subastas USA
 GovPlanet, GovDeals, PropertyRoom, PublicSurplus, AuctionTime, IronPlanet, HiBid, Copart, BidSpotter
 ### Subastas Internacionales
 Ritchie Bros, Euro Auctions, Troostwijk, Surplex, BVA, Catawiki, ShipXchange
 ### Venta Especializada
 YachtWorld, Boats.com, BoatTrader, Apollo Duck, Rightboat, Boat24, Seaboats, NauticExpo
 ### Clasificados Globales
 Craigslist, eBay Marine, Facebook Marketplace, Kijiji, Gumtree, Subito.it, LeBonCoin, Wallapop, MercadoLibre, OLX
 ### Salvage & Wrecks
 Salvex, MarineWrecks, BoatBreakers, NavAuctions, Barnacle Marine
 ### Revistas & Noticias
 Boat International, Superyacht Times, The Triton, WorkBoat, Lloyd's List, TradeWinds, Maritime Executive, Splash247
 ### Registros
 USCG, UK Ship Register, Panama Registry, Marshall Islands, Liberian Registry, Bahamas Maritime
 ---
 ## CAMBIAR EL MODELO DE IA
 Edita `server.py`, sección `MODELS`:
 ```python
 MODELS = {
    'extract':  'qwen2.5:72b',      # Cambia por cualquier modelo que tengas
    'classify': 'llama3.1:8b',
    'embed':    'nomic-embed-text',
    'parse':    'qwen3-coder:latest'
 }
 ```
 Para ver tus modelos disponibles: http://localhost:11434/api/tags
 ---
 ## SOLUCIÓN DE PROBLEMAS
 | Problema | Solución |
 |----------|----------|
 | "Servidor desconectado" | Ejecuta INICIAR.bat |
 | "Ollama no responde" | Abre Ollama Desktop |
 | Sin resultados en búsqueda | Verifica que qwen2.5:72b está descargado |
 | No accede desde celular | Verifica que Tailscale está activo en ambos dispositivos |
 | Puerto 8000 ocupado | Cambia `port=8000` a `port=8001` en server.py |
 ---
 ## AGREGAR MÁS FUENTES
 En `server.py`, sección `SOURCES`, agrega:
 ```python
 "Mi categoría": [
    {"name": "NombreSitio", "url": "https://sitio.com", "type": "auction"},
 ],
 ```
 Tipos disponibles: `auction`, `broker`, `classifieds`, `salvage`, `news`, `magazine`, `registry`, `commercial`
@@ -0,0 +1 @@
 # Patch marker — not used, just to verify write access
@@ -0,0 +1,637 @@
 """
 scraper_test.py — Prueba individual de scrapers sin levantar el servidor Flask.
 Uso:
    python scraper_test.py                    # prueba los 5 scrapers con query por defecto
    python scraper_test.py "catalina 30"      # query personalizada
    python scraper_test.py "beneteau" ebay    # solo eBay
    python scraper_test.py "sailboat" yachtworld boattrader
 Scrapers disponibles: ebay, yachtworld, boattrader, boats, hibid
 """
 import sys
 import re
 import time
 import random
 import threading
 import requests
 import urllib3
 from bs4 import BeautifulSoup
 urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 # ── User-Agents ──────────────────────────────────────────────────────────────
 USER_AGENTS = [
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:123.0) Gecko/20100101 Firefox/123.0',
 ]
 _interleave_lock = threading.Lock()
 _interleave_idx  = 0
 _interleave_sites = ["https://miami.craigslist.org", "https://www.ebay.com"]
 def get_headers(referer=None):
    h = {
        'User-Agent': random.choice(USER_AGENTS),
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.9',
        'Accept-Encoding': 'gzip, deflate, br',
        'Connection': 'keep-alive',
        'Cache-Control': 'max-age=0',
    }
    if referer:
        h['Referer'] = referer
    return h
 def polite_pause(source_name: str):
    global _interleave_idx
    with _interleave_lock:
        site = _interleave_sites[_interleave_idx % len(_interleave_sites)]
        _interleave_idx += 1
    try:
        requests.get(site, headers=get_headers(), timeout=5, verify=False)
    except Exception:
        pass
    time.sleep(random.uniform(2.0, 4.0))
    print(f"[{source_name}] Pausa cortés lista — continuando...")
 def _extract_best_src(img_tag) -> str:
    candidates = [
        img_tag.get("src",""), img_tag.get("data-src",""),
        img_tag.get("data-lazy-src",""), img_tag.get("data-original",""),
        img_tag.get("data-lazy",""), img_tag.get("data-image",""),
    ]
    srcset = img_tag.get("srcset","") or img_tag.get("data-srcset","")
    if srcset:
        parts = [p.strip().split()[0] for p in srcset.split(",") if p.strip()]
        candidates.extend(parts)
    for c in candidates:
        c = c.strip()
        if c and c.startswith("http") and not c.startswith("data:"):
            return c
    return ""
 # ══════════════════════════════════════════════════════════════════════════════
 # SCRAPERS
 # ══════════════════════════════════════════════════════════════════════════════
 def scrape_ebay(query: str, filters: dict = None) -> list:
    filters = filters or {}
    src = {
        "name": "eBay Marine",
        "search_url": "https://www.ebay.com/sch/i.html?_nkw={query}&_sacat=26429&LH_BIN=1&_sop=10",
        "type": "classifieds",
        "category": "Clasificados USA",
    }
    results = []
    seen    = set()
    raw_url = src["search_url"]
    clean_q = " ".join(dict.fromkeys(query.strip().split()))
    url     = raw_url.replace("{query}", requests.utils.quote(clean_q))
    # Ajuste de categoría por tipo de embarcación
    vtype = filters.get("type","").lower()
    EBAY_CAT = {
        "sailboat":"36431","sail":"36431","velero":"36431",
        "motor":"36432","motorboat":"36432","yacht":"36432",
        "fishing":"36432","tug":"36432","barge":"36432",
    }
    if vtype and vtype in EBAY_CAT:
        url = re.sub(r'_sacat=\d+', f'_sacat={EBAY_CAT[vtype]}', url)
    print(f"[eBay] URL: {url}")
    try:
        from playwright.sync_api import sync_playwright
        with sync_playwright() as p:
            browser = p.chromium.launch(
                headless=True,
                args=["--disable-blink-features=AutomationControlled","--no-sandbox"]
            )
            context = browser.new_context(
                viewport={"width":1280,"height":900},
                user_agent=random.choice(USER_AGENTS),
                locale="en-US", timezone_id="America/New_York",
                ignore_https_errors=True,
            )
            context.add_init_script(
                "Object.defineProperty(navigator,'webdriver',{get:()=>undefined});"
                "window.chrome={runtime:{}};"
            )
            page = context.new_page()
            try:
                page.goto(url, timeout=30000, wait_until="domcontentloaded")
                page.wait_for_timeout(random.randint(1500,2500))
                page.evaluate("window.scrollBy(0,600)")
                page.wait_for_timeout(800)
                html = page.content()
            except Exception as e:
                print(f"[eBay] Playwright error: {e}")
                html = ""
            finally:
                try: page.close()
                except: pass
            browser.close()
        if not html:
            print("[eBay] Sin HTML")
            return []
        soup  = BeautifulSoup(html, "html.parser")
        cards = soup.find_all("li", class_="s-card")
        print(f"[eBay] Tarjetas nuevo layout: {len(cards)}")
        if not cards:
            # layout antiguo li.s-item
            for item in soup.find_all("li", class_="s-item"):
                try:
                    link = item.find("a", class_="s-item__link")
                    if not link: continue
                    href = link.get("href","")
                    m = re.search(r'(https?://www\.ebay\.com/itm/\d+)', href)
                    if not m: continue
                    href = m.group(1)
                    if href in seen: continue
                    seen.add(href)
                    title_el = (item.find("span", class_="BOLD") or
                                item.find("div",  class_="s-item__title") or
                                item.find("span", class_="s-item__title"))
                    title = (title_el or link).get_text(strip=True)
                    if not title or title.lower().startswith("shop on ebay"): continue
                    price_el = item.find("span", class_="s-item__price")
                    price    = price_el.get_text(strip=True) if price_el else ""
                    results.append({"title":title,"url":href,"price":price,"source":"eBay"})
                except Exception:
                    continue
            print(f"[eBay] {len(results)} resultados (layout antiguo)")
            return results
        for card in cards:
            try:
                title_link = None
                for a in card.find_all("a", class_="s-card__link"):
                    if "image-treatment" in (a.get("class") or []): continue
                    t = a.get_text(strip=True)
                    if t and not t.lower().startswith("shop on ebay"):
                        title_link = a; break
                if not title_link: continue
                href = title_link.get("href","")
                if "/itm/" not in href: continue
                m = re.search(r'(https?://(?:www\.)?ebay\.com/itm/\d+)', href)
                if not m: continue
                href = m.group(1)
                if href in seen: continue
                seen.add(href)
                title = re.sub(r'\s*Opens in a new window or tab.*','',
                               title_link.get_text(strip=True), flags=re.I).strip()
                price_el = (card.find(class_="s-card__price") or
                            card.find(class_="s-item__price"))
                price = price_el.get_text(strip=True) if price_el else ""
                results.append({"title":title,"url":href,"price":price,"source":"eBay"})
            except Exception:
                continue
        print(f"[eBay] {len(results)} resultados")
    except Exception as e:
        print(f"[eBay] Error: {e}")
    return results
 def scrape_yachtworld(query: str, filters: dict = None) -> list:
    filters     = filters or {}
    results     = []
    seen        = set()
    vessel_type = filters.get("type","").lower()
    yw_type     = "sail" if vessel_type in ["sailboat","sail","velero","yacht",""] else "power"
    base_url = f"https://www.yachtworld.com/boats-for-sale/type-{yw_type}/"
    if vessel_type in ["sailboat","sail","velero",""]:
        base_url = "https://www.yachtworld.com/boats-for-sale/type-sail/class-sail-cruiser/"
    print(f"[YachtWorld] URL: {base_url}")
    try:
        from playwright.sync_api import sync_playwright
        with sync_playwright() as p:
            browser = p.chromium.launch(
                headless=True,
                args=['--disable-blink-features=AutomationControlled','--no-sandbox']
            )
            context = browser.new_context(
                viewport={'width':1920,'height':1080},
                user_agent=random.choice(USER_AGENTS),
                locale='en-US', timezone_id='America/New_York',
                ignore_https_errors=True,
            )
            context.add_init_script(
                "Object.defineProperty(navigator,'webdriver',{get:()=>undefined});"
                "window.chrome={runtime:{}};"
            )
            page = context.new_page()
            try:
                page.goto(base_url, timeout=30000, wait_until='domcontentloaded')
                page.wait_for_timeout(random.randint(2000,4000))
                for _ in range(3):
                    page.evaluate("window.scrollBy(0,400)")
                    page.wait_for_timeout(random.randint(400,800))
                html = page.content()
            except Exception as e:
                print(f"[YachtWorld] Error: {e}")
                html = ""
            finally:
                try: page.close()
                except: pass
            browser.close()
        if not html:
            print("[YachtWorld] Sin HTML")
            return []
        soup = BeautifulSoup(html,'html.parser')
        page_count = 0
        for a in soup.find_all('a', href=True):
            href = a['href']
            if '/boat-details/' in href or '/yacht/' in href:
                if not href.startswith('http'):
                    href = 'https://www.yachtworld.com' + href
                if href in seen: continue
                seen.add(href)
                title = a.get_text(strip=True)
                results.append({"title":title,"url":href,"price":"","source":"YachtWorld"})
                page_count += 1
        print(f"[YachtWorld] {page_count} resultados")
    except Exception as e:
        print(f"[YachtWorld] Error: {e}")
    return results
 def scrape_boattrader(query: str, filters: dict = None) -> list:
    filters = filters or {}
    src = {
        "name": "BoatTrader",
        "search_url": "https://www.boattrader.com/boats/?query={query}",
        "type": "broker",
        "category": "Venta Especializada",
    }
    results = []
    seen    = set()
    url     = src["search_url"].replace("{query}", requests.utils.quote(query.strip()))
    print(f"[BoatTrader] URL: {url}")
    try:
        from playwright.sync_api import sync_playwright
        with sync_playwright() as p:
            browser = p.chromium.launch(
                headless=True,
                args=["--disable-blink-features=AutomationControlled","--no-sandbox"]
            )
            context = browser.new_context(
                viewport={"width":1280,"height":900},
                user_agent=random.choice(USER_AGENTS),
                locale="en-US", timezone_id="America/New_York",
                ignore_https_errors=True,
            )
            context.add_init_script(
                "Object.defineProperty(navigator,'webdriver',{get:()=>undefined});"
                "window.chrome={runtime:{}};"
            )
            page = context.new_page()
            try:
                page.goto(url, timeout=35000, wait_until="domcontentloaded")
                page.wait_for_timeout(random.randint(4000,6000))
                page.evaluate("window.scrollBy(0,600)")
                page.wait_for_timeout(1500)
                html = page.content()
            except Exception as e:
                print(f"[BoatTrader] Error: {e}")
                html = ""
            finally:
                try: page.close()
                except: pass
            browser.close()
        if not html:
            print("[BoatTrader] Sin HTML")
            return []
        soup  = BeautifulSoup(html,"html.parser")
        cards = soup.find_all("li", class_="lib-card")
        if not cards:
            cards = soup.find_all(class_=re.compile(r'\blib-card\b'))
        print(f"[BoatTrader] Cards encontradas: {len(cards)}")
        for card in cards:
            try:
                link_tag = card.find("a", href=re.compile(r'^/boat/[\w-]+-\d+/$'))
                if not link_tag: continue
                href = "https://www.boattrader.com" + link_tag["href"]
                if href in seen: continue
                seen.add(href)
                title_el = card.find(class_=re.compile(r'listingTitle',re.I))
                title = title_el.get_text(strip=True) if title_el else link_tag.get_text(strip=True)
                price_el = card.find(class_=re.compile(r'listingPrice',re.I))
                price = ""
                if price_el:
                    pm = re.search(r'\$\s*([\d,]+)', price_el.get_text(" ",strip=True))
                    if pm: price = f"${pm.group(1)}"
                results.append({"title":title,"url":href,"price":price,"source":"BoatTrader"})
            except Exception:
                continue
        print(f"[BoatTrader] {len(results)} resultados")
    except Exception as e:
        print(f"[BoatTrader] Error: {e}")
    return results
 def scrape_boats(query: str, filters: dict = None) -> list:
    filters = filters or {}
    src = {
        "name": "Boats.com",
        "search_url": "https://www.boats.com/boats-for-sale/?query={query}",
        "type": "broker",
        "category": "Venta Especializada",
    }
    results = []
    seen    = set()
    url     = src["search_url"].replace("{query}", requests.utils.quote(query.strip()))
    print(f"[Boats.com] URL: {url}")
    try:
        from playwright.sync_api import sync_playwright
        with sync_playwright() as p:
            browser = p.chromium.launch(
                headless=True,
                args=["--disable-blink-features=AutomationControlled","--no-sandbox"]
            )
            context = browser.new_context(
                viewport={"width":1280,"height":900},
                user_agent=random.choice(USER_AGENTS),
                locale="en-US", timezone_id="America/New_York",
                ignore_https_errors=True,
            )
            context.add_init_script(
                "Object.defineProperty(navigator,'webdriver',{get:()=>undefined});"
                "window.chrome={runtime:{}};"
            )
            page = context.new_page()
            try:
                page.goto(url, timeout=35000, wait_until="domcontentloaded")
                page.wait_for_timeout(random.randint(4000,6000))
                page.evaluate("window.scrollBy(0,600)")
                page.wait_for_timeout(1500)
                html = page.content()
            except Exception as e:
                print(f"[Boats.com] Error: {e}")
                html = ""
            finally:
                try: page.close()
                except: pass
            browser.close()
        if not html:
            print("[Boats.com] Sin HTML")
            return []
        soup = BeautifulSoup(html,"html.parser")
        def _extract_card(card):
            a = card.find("a", href=re.compile(r'^/'))
            if not a: return
            href = "https://www.boats.com" + a["href"]
            if href in seen: return
            seen.add(href)
            year_el = card.select_one("div.year")
            name_el = card.select_one("h2")
            year  = year_el.get_text(strip=True) if year_el else ""
            name  = name_el.get_text(strip=True) if name_el else ""
            title = f"{year} {name}".strip() if year else name
            if not title: return
            price_el = card.select_one("div.price")
            price = ""
            if price_el:
                pm = re.search(r'\$\s*([\d,]+)', price_el.get_text(" ",strip=True))
                price = f"${pm.group(1)}" if pm else ""
            results.append({"title":title,"url":href,"price":price,"source":"Boats.com"})
        for card in soup.select("li[data-listing-id]"):
            _extract_card(card)
        for card in soup.select("li.enhanced.oem"):
            _extract_card(card)
        print(f"[Boats.com] {len(results)} resultados")
    except Exception as e:
        print(f"[Boats.com] Error: {e}")
    return results
 def scrape_craigslist(query: str, filters: dict = None) -> list:
    filters = filters or {}
    results = []
    seen    = set()
    CITIES  = ["miami", "tampa", "sfbay", "losangeles", "seattle",
               "boston", "newyork", "chicago", "sandiego"]
    qs = requests.utils.quote(query.strip())
    print(f"[Craigslist] Query: '{query}' - probando 3 ciudades al azar")
    try:
        from playwright.sync_api import sync_playwright
        all_html_parts = []
        cities_tested  = random.sample(CITIES, min(3, len(CITIES)))
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True, args=["--no-sandbox"])
            ctx = browser.new_context(
                user_agent=random.choice(USER_AGENTS),
                locale="en-US", ignore_https_errors=True,
            )
            for city in cities_tested:
                city_url = f"https://{city}.craigslist.org/search/boa?query={qs}&sort=rel"
                print(f"[Craigslist] >> {city_url}")
                page = ctx.new_page()
                try:
                    page.goto(city_url, timeout=25000, wait_until="domcontentloaded")
                    page.wait_for_timeout(2500)
                    all_html_parts.append((city, page.content()))
                except Exception as e:
                    print(f"[Craigslist] {city} error: {e}")
                finally:
                    try: page.close()
                    except: pass
            browser.close()
        for city, html in all_html_parts:
            soup  = BeautifulSoup(html, "html.parser")
            cards = soup.find_all(attrs={"data-pid": True})
            print(f"[Craigslist] {city}: {len(cards)} cards en HTML")
            for card in cards:
                try:
                    a_main = card.find("a", class_="main")
                    if not a_main: continue
                    listing_url = a_main.get("href","")
                    if not listing_url or listing_url in seen: continue
                    seen.add(listing_url)
                    title = card.get("title","")
                    if not title:
                        span = card.find("span", class_="label")
                        title = span.get_text(strip=True) if span else ""
                    if not title: continue
                    price_el = card.find("span", class_="priceinfo")
                    price    = price_el.get_text(strip=True) if price_el else ""
                    results.append({
                        "title": title[:120], "url": listing_url,
                        "price": price, "source": f"Craigslist {city}",
                    })
                except Exception:
                    continue
        print(f"[Craigslist] {len(results)} resultados totales")
    except Exception as e:
        print(f"[Craigslist] Error: {e}")
    return results
 def scrape_hibid(query: str, filters: dict = None) -> list:
    filters = filters or {}
    src     = {"name":"HiBid","type":"auction","category":"Subastas USA"}
    results = []
    q       = requests.utils.quote((query.strip() + " boat"))
    url     = f"https://www.hibid.com/lots?q={q}"
    print(f"[HiBid] URL: {url}")
    try:
        from playwright.sync_api import sync_playwright
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True, args=["--no-sandbox"])
            ctx = browser.new_context(
                user_agent=random.choice(USER_AGENTS),
                viewport={"width":1280,"height":900},
                locale="en-US", ignore_https_errors=True,
            )
            ctx.add_init_script(
                "Object.defineProperty(navigator,'webdriver',{get:()=>undefined});"
            )
            page = ctx.new_page()
            try:
                page.goto(url, timeout=30000, wait_until="domcontentloaded")
                page.wait_for_timeout(4000)
                html = page.content()
            finally:
                try: page.close()
                except: pass
            browser.close()
        soup = BeautifulSoup(html,"html.parser")
        seen = set()
        selectors = ".lot-tile, [class*=lot-item], [class*=LotTile], [class*=lotCard]"
        cards = soup.select(selectors)
        print(f"[HiBid] Cards encontradas: {len(cards)}")
        for card in cards:
            try:
                a = card.find("a", href=True)
                if not a: continue
                href = a["href"]
                if not href.startswith("http"):
                    href = "https://www.hibid.com" + href
                if href in seen: continue
                seen.add(href)
                title_el = card.select_one("h3, .lot-title, [class*=lot-title], [class*=lotTitle]")
                title = (title_el.get_text(strip=True) if title_el else a.get_text(strip=True))[:100]
                price_el = card.select_one(".high-bid, .lot-price, [class*=bid], [class*=price]")
                price = price_el.get_text(strip=True) if price_el else ""
                if title and len(title) > 4:
                    results.append({"title":title,"url":href,"price":price,"source":"HiBid"})
            except Exception:
                continue
        print(f"[HiBid] {len(results)} resultados")
    except Exception as e:
        print(f"[HiBid] Error: {e}")
    return results
 # ══════════════════════════════════════════════════════════════════════════════
 # RUNNER PRINCIPAL
 # ══════════════════════════════════════════════════════════════════════════════
 SCRAPER_MAP = {
    "ebay":        scrape_ebay,
    "yachtworld":  scrape_yachtworld,
    "boattrader":  scrape_boattrader,
    "boats":       scrape_boats,
    "hibid":       scrape_hibid,
    "craigslist":  scrape_craigslist,
 }
 def run_test(query: str, targets: list = None, filters: dict = None):
    targets  = targets or list(SCRAPER_MAP.keys())
    filters  = filters or {}
    total    = 0
    all_ok   = []
    print("\n" + "="*60)
    print(f"  PRUEBA DE SCRAPERS  |  query: {query!r}")
    print("="*60 + "\n")
    for name in targets:
        fn = SCRAPER_MAP.get(name.lower())
        if not fn:
            print(f"[!] Scraper desconocido: {name}")
            continue
        print(f"\n{'-'*50}")
        print(f"  >> {name.upper()}")
        print(f"{'-'*50}")
        t0      = time.time()
        results = fn(query, filters)
        elapsed = time.time() - t0
        def safe(s): return s.encode('ascii','replace').decode('ascii')
        if results:
            all_ok.append(name)
            print(f"\n[OK] {name}: {len(results)} resultados en {elapsed:.1f}s")
            for i, r in enumerate(results[:3], 1):
                print(f"   {i}. {safe(r['title'][:70])}")
                if r.get('price'):
                    print(f"      $ {safe(r['price'])}")
                print(f"      > {r['url'][:80]}")
        else:
            print(f"\n[!!] {name}: 0 resultados en {elapsed:.1f}s")
        total += len(results)
    print("\n" + "="*60)
    print(f"  RESUMEN: {total} resultados totales")
    print(f"  Funcionando: {', '.join(all_ok) if all_ok else 'ninguno'}")
    print("="*60)
 if __name__ == "__main__":
    args    = sys.argv[1:]
    query   = "sailboat velero"  # default
    targets = []
    if args:
        # El primer arg que NO empiece con letra de scraper es la query
        if args[0].lower() not in SCRAPER_MAP:
            query   = args[0]
            targets = [a.lower() for a in args[1:] if a.lower() in SCRAPER_MAP]
        else:
            targets = [a.lower() for a in args if a.lower() in SCRAPER_MAP]
    run_test(query, targets if targets else None)
@@ -0,0 +1,15 @@
@echo off
 set PID_FILE=%~dp0.server.pid
 if not exist "%PID_FILE%" (
    echo No hay ninguna instancia de Boat^&Ship-Finder corriendo.
    pause
    exit /b 0
 )
 set /p PID=<"%PID_FILE%"
 echo Cerrando Boat^&Ship-Finder (PID %PID%)...
 taskkill /F /PID %PID% >nul 2>&1
 del "%PID_FILE%" >nul 2>&1
 echo Servidor cerrado.
 pause
		`@@ -0,0 +1 @@`
							`# Patch marker — not used, just to verify write access`