BoatAndShip-Finder/server.py

"""
Boat&Ship-Finder - Backend Server
Requiere: pip install flask flask-cors requests beautifulsoup4 playwright
"""

from flask import Flask, jsonify, request, send_from_directory, session
import hashlib as _hashlib
from flask_cors import CORS
import requests
import json
import sqlite3
import os
import re
import time
import hashlib
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
from bs4 import BeautifulSoup
import threading
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

app = Flask(__name__, static_folder='static')
_secret = os.environ.get('SECRET_KEY')
if not _secret:
    raise RuntimeError("SECRET_KEY not set — add SECRET_KEY=<random> to your environment")
app.secret_key = _secret
CORS(app,
     origins=["http://localhost:8765", "http://127.0.0.1:8765"],
     supports_credentials=True)

DB_PATH = 'marine.db'
OLLAMA_URL = 'http://localhost:11434/api/generate'

# ── Modelos Ollama por tarea ──────────────────────────────────────────────────
MODELS = {
    'extract':  'qwen2.5:32b',           # Extracción de specs (más rápido que 72b, igual de preciso)
    'classify': 'llama3.1:8b',           # Clasificación rápida
    'embed':    'nomic-embed-text:latest', # Embeddings para dedup
    'parse':    'qwen3-coder:latest'      # Parsing estructurado
}

# ── Fuentes globales por categoría ────────────────────────────────────────────
SOURCES = {
    "Subastas USA": [
        {"name": "GovPlanet",       "url": "https://www.govplanet.com/boats",           "type": "auction"},
        {"name": "GovDeals",        "url": "https://www.govdeals.com",                  "type": "auction"},
        {"name": "PropertyRoom",    "url": "https://www.propertyroom.com/boats",        "type": "auction"},
        {"name": "PublicSurplus",   "url": "https://www.publicsurplus.com",             "type": "auction"},
        {"name": "AuctionTime",     "url": "https://www.auctiontime.com/boats",         "type": "auction"},
        {"name": "IronPlanet",      "url": "https://www.ironplanet.com/boats",          "type": "auction"},
        {"name": "HiBid",           "url": "https://www.hibid.com/boats",               "type": "auction"},
        {"name": "Copart Marine",   "url": "https://www.copart.com/boats",              "type": "auction"},
        {"name": "BidSpotter",      "url": "https://www.bidspotter.com/boats",          "type": "auction"},
        {"name": "32auctions",      "url": "https://www.32auctions.com",                "type": "auction"},
    ],
    "Subastas Internacionales": [
        {"name": "Ritchie Bros",    "url": "https://www.rbauction.com/boats",           "type": "auction"},
        {"name": "Euro Auctions",   "url": "https://www.euroauctions.com",              "type": "auction"},
        {"name": "Troostwijk",      "url": "https://www.troostwijkauctions.com",        "type": "auction"},
        {"name": "Surplex",         "url": "https://www.surplex.com/marine",            "type": "auction"},
        {"name": "BVA Auctions",    "url": "https://www.bva-auctions.com",              "type": "auction"},
        {"name": "Catawiki Marine", "url": "https://www.catawiki.com/boats",            "type": "auction"},
        {"name": "Barnebys",        "url": "https://www.barnebys.com/boats",            "type": "auction"},
        {"name": "ShipXchange",     "url": "https://www.shipxchange.com",               "type": "auction"},
    ],
    "Venta Especializada": [
        {"name": "YachtWorld",      "url": "https://www.yachtworld.com",                "type": "broker"},
        {"name": "Boats.com",       "url": "https://www.boats.com",                     "type": "broker",
         "search_url": "https://www.boats.com/boats-for-sale/?query={query}"},
        {"name": "BoatTrader",      "url": "https://www.boattrader.com",                "type": "broker",
         "search_url": "https://www.boattrader.com/boats/?query={query}"},
        {"name": "Apollo Duck",     "url": "https://www.apolloduck.com",                "type": "broker",
         "search_url": "https://www.apolloduck.com/search.phtml?search={query}&sr=1&q=1"},
        {"name": "Rightboat",       "url": "https://www.rightboat.com",                 "type": "broker",
         "search_url": "https://www.rightboat.com/boats-for-sale/?q={query}"},
        {"name": "Boat24",          "url": "https://www.boat24.com",                    "type": "broker",
         "search_url": "https://www.boat24.com/en/usedboats/"},
        {"name": "Inautia",         "url": "https://www.inautia.com",                   "type": "broker",
         "search_url": "https://www.inautia.com/boats/?q={query}"},
        # ── US Brokers ────────────────────────────────────────────────────────
        {"name": "HMY Yachts",      "url": "https://hmy.com",                           "type": "broker",
         "search_url": "https://www.hmy.com/yachts-for-sale/?SaleClassCode=used",
         "category": "Brokers USA"},
        {"name": "Denison Yachting","url": "https://www.denisonyachtsales.com",          "type": "broker",
         "search_url": "https://www.denisonyachtsales.com/yachts-for-sale/?search={query}",
         "category": "Brokers USA"},
        {"name": "United Yacht",    "url": "https://www.unitedyacht.com",               "type": "broker",
         "search_url": "https://www.unitedyacht.com/yachts-for-sale/",
         "category": "Brokers USA"},
        {"name": "Northrop & Johnson","url": "https://www.n-j.com",                     "type": "broker",
         "search_url": "https://www.n-j.com/yachts-for-sale/",
         "category": "Brokers USA"},
        {"name": "Worth Ave Yachts","url": "https://www.worthavenueyachts.com",         "type": "broker",
         "search_url": "https://www.worthavenueyachts.com/yachts-for-sale/",
         "category": "Brokers USA"},
        {"name": "Bluewater Yachting","url": "https://www.bluewateryachting.com",       "type": "broker",
         "category": "Brokers USA"},
        {"name": "Galati Yachts",   "url": "https://www.galatiyachts.com",              "type": "broker",
         "search_url": "https://www.galatiyachts.com/boat-search/?q={query}",
         "category": "Brokers USA"},
        {"name": "Fraser Yachts",   "url": "https://www.fraseryachts.com",              "type": "broker",
         "search_url": "https://www.fraseryachts.com/en/yachts-for-sale/?search={query}",
         "category": "Brokers INT"},
        {"name": "Burgess Yachts",  "url": "https://www.burgessyachts.com",             "type": "broker",
         "search_url": "https://www.burgessyachts.com/en/yacht-sale?q={query}",
         "category": "Brokers INT"},
        {"name": "Ocean Alexander", "url": "https://www.oceanalexander.com",            "type": "broker",
         "search_url": "https://www.oceanalexander.com/find-a-boat/?q={query}",
         "category": "Brokers USA"},
        {"name": "Merle Wood",      "url": "https://www.merlewood.com",                 "type": "broker",
         "search_url": "https://www.merlewood.com/yachts-for-sale/",
         "category": "Brokers INT"},
        # ── Other ─────────────────────────────────────────────────────────────
        {"name": "NauticExpo",      "url": "https://www.nauticexpo.com",                "type": "broker"},
        {"name": "Seaboats",        "url": "https://www.seaboats.net",                  "type": "broker"},
        {"name": "YachtBroker",     "url": "https://www.yachtbroker.com",               "type": "broker"},
    ],
    "Comercial / Industrial": [
        {"name": "WorkBoat",        "url": "https://www.workboat.com/classifieds",      "type": "commercial"},
        {"name": "TradeABoat",      "url": "https://www.tradeaboat.com.au",             "type": "broker"},
        {"name": "Boatpoint",       "url": "https://www.boatpoint.com.au",              "type": "broker"},
        {"name": "Boats & Outboards","url": "https://www.boatsandoutboards.co.uk",      "type": "broker"},
        {"name": "Commercial Vessel","url": "https://www.commercialvessel.com",         "type": "commercial"},
        {"name": "ShipServ",        "url": "https://www.shipserv.com",                  "type": "commercial"},
        {"name": "Marine Classifieds","url": "https://www.marineclassifieds.com",       "type": "classifieds"},
        {"name": "Barcos.net",      "url": "https://www.barcos.net",                    "type": "broker"},
        # ── Offshore / DP / OSV ───────────────────────────────────────────────
        {"name": "Offshore Vessel Exchange","url": "https://www.offshorevessel.exchange","type": "commercial",
         "search_url": "https://www.offshorevessel.exchange/?s={query}",
         "category": "Offshore / DP"},
        {"name": "MarineTraffic Vessels For Sale","url": "https://www.marinetraffic.com/en/ads/p/list","type": "commercial",
         "search_url": "https://www.marinetraffic.com/en/ads/p/list?search={query}",
         "category": "Offshore / DP"},
        {"name": "YachtWorld Commercial","url": "https://www.yachtworld.com","type": "commercial",
         "search_url": "https://www.yachtworld.com/boats-for-sale/type-commercial/?query={query}",
         "category": "Offshore / DP"},
        {"name": "Apollo Duck Workboats","url": "https://www.apolloduck.com","type": "commercial",
         "search_url": "https://www.apolloduck.com/search.phtml?search={query}&sr=1&q=1",
         "category": "Offshore / DP"},
        {"name": "Seawork Classifieds","url": "https://www.seawork.com","type": "commercial",
         "search_url": "https://www.seawork.com/classifieds/",
         "category": "Offshore / DP"},
        {"name": "ShipXchange OSV", "url": "https://www.shipxchange.com",               "type": "commercial",
         "search_url": "https://www.shipxchange.com/en/vessel-types/offshore-support-vessel",
         "category": "Offshore / DP"},
        {"name": "Vessel Sales & Acquisitions","url": "https://www.vsl.no",             "type": "commercial",
         "search_url": "https://www.vsl.no/vessels-for-sale/",
         "category": "Offshore / DP"},
    ],
    "Clasificados Generales": [
        {"name": "Craigslist Boats", "url": "https://www.craigslist.org/search/boa",   "type": "classifieds"},
        {"name": "eBay Motors Marine","url": "https://www.ebay.com/b/Boats/26429",     "type": "classifieds",
         "search_url": "https://www.ebay.com/sch/i.html?_nkw={query}&_sacat=26429&LH_BIN=1&_sop=10"},
        {"name": "Facebook Marketplace","url": "https://www.facebook.com/marketplace/boats","type": "classifieds"},
        {"name": "BoatCrazy",       "url": "https://boatcrazy.com",                    "type": "classifieds",
         "search_url": "https://boatcrazy.com/boats?q={query}",
         "category": "Clasificados USA"},
        {"name": "Kijiji Marine",   "url": "https://www.kijiji.ca/b-boats",            "type": "classifieds"},
        {"name": "Gumtree Boats",   "url": "https://www.gumtree.com/boats",            "type": "classifieds"},
        {"name": "Subito.it Barche", "url": "https://www.subito.it/barche",            "type": "classifieds"},
        {"name": "LeBonCoin Bateaux","url": "https://www.leboncoin.fr/bateaux",        "type": "classifieds"},
        {"name": "Wallapop Barcos", "url": "https://es.wallapop.com/barcos",           "type": "classifieds"},
        {"name": "MercadoLibre",    "url": "https://www.mercadolibre.com/barcos",      "type": "classifieds"},
        {"name": "OLX Marine",      "url": "https://www.olx.com/boats",                "type": "classifieds"},
    ],
    "Salvage & Wrecks": [
        {"name": "Cooper Capital Salvage", "url": "https://www.cooperss.com",          "type": "salvage",
         "search_url": "https://www.cooperss.com/",
         "category": "Salvage USA"},
        {"name": "Salvex",          "url": "https://www.salvex.com",                   "type": "salvage",
         "search_url": "https://www.salvex.com/search/?q={query}&cat=30",
         "category": "Salvage USA"},
        {"name": "Copart Marine",   "url": "https://www.copart.com",                   "type": "salvage",
         "search_url": "https://www.copart.com/public/data/lotSearchResults/?query={query}&vehicleType=BOAT",
         "category": "Salvage USA"},
        {"name": "IAA Watercraft",  "url": "https://www.iaai.com",                     "type": "salvage",
         "search_url": "https://www.iaai.com/Search?SearchText={query}&vehicleType=Watercraft",
         "category": "Salvage USA"},
        {"name": "Ritchie Bros Marine","url": "https://www.rbauction.com",             "type": "auction",
         "search_url": "https://www.rbauction.com/used-equipment?q={query}&searchType=MODEL&equipmentCategory=marine",
         "category": "Salvage USA"},
        {"name": "NavAuctions",     "url": "https://www.navauctions.com",              "type": "salvage"},
        {"name": "MarineWrecks",    "url": "https://www.marinewrecks.com",             "type": "salvage"},
        {"name": "BoatBreakers",    "url": "https://www.boatbreakers.com",             "type": "salvage"},
        {"name": "Barnacle Marine", "url": "https://www.barnaclemarine.com",           "type": "salvage"},
        {"name": "Boat Breakers AU","url": "https://www.boatbreakersnz.com",           "type": "salvage"},
    ],
    "Revistas & Noticias": [
        {"name": "Trade Only Today","url": "https://www.tradeonlytoday.com",           "type": "news"},
        {"name": "Nautical News",   "url": "https://www.nauticalnews.com",             "type": "news"},
        {"name": "Boat International","url": "https://www.boatinternational.com/yachts","type": "magazine"},
        {"name": "Superyacht Times","url": "https://www.superyachttimes.com",          "type": "magazine"},
        {"name": "The Triton",      "url": "https://www.the-triton.com/classifieds",   "type": "magazine"},
        {"name": "Passagemaker",    "url": "https://www.passagemaker.com",             "type": "magazine"},
        {"name": "WorkBoat Mag",    "url": "https://www.workboat.com",                 "type": "magazine"},
        {"name": "Lloyd's List",    "url": "https://lloydslist.maritimeintelligence.informa.com", "type": "news"},
        {"name": "Tradewinds",      "url": "https://www.tradewindsnews.com",           "type": "news"},
        {"name": "Maritime Executive","url": "https://www.maritime-executive.com",     "type": "news"},
        {"name": "Splash247",       "url": "https://splash247.com",                    "type": "news"},
        {"name": "Bairdmaritime",   "url": "https://www.bairdmaritime.com",            "type": "news"},
    ],
    "Registros & Gobierno": [
        {"name": "USCG Docs",       "url": "https://www.dco.uscg.mil/nvdc",           "type": "registry"},
        {"name": "UK Ship Register","url": "https://www.ukshipregister.co.uk",        "type": "registry"},
        {"name": "Panama Registry", "url": "https://www.segumar.com",                 "type": "registry"},
        {"name": "Marshall Islands","url": "https://www.register-iri.com",            "type": "registry"},
        {"name": "Liberian Registry","url": "https://www.liscr.com",                  "type": "registry"},
        {"name": "Bahamas Maritime","url": "https://www.bahamasmaritime.com",         "type": "registry"},
        {"name": "IHS Sea-web",     "url": "https://maritime.ihs.com",                "type": "registry"},
    ],
}

# ── Database ──────────────────────────────────────────────────────────────────
def init_db():
    conn = sqlite3.connect(DB_PATH)
    c = conn.cursor()
    c.executescript("""
        CREATE TABLE IF NOT EXISTS vessels (
            id          INTEGER PRIMARY KEY AUTOINCREMENT,
            name        TEXT,
            vessel_type TEXT,
            loa_m       REAL,
            beam_m      REAL,
            draft_m     REAL,
            year_built  INTEGER,
            hull        TEXT,
            propulsion  TEXT,
            status      TEXT,
            price_usd   REAL,
            currency    TEXT DEFAULT 'USD',
            location    TEXT,
            country     TEXT,
            source_name TEXT,
            source_url  TEXT,
            description TEXT,
            images      TEXT,
            flags       TEXT,
            score       REAL DEFAULT 0,
            fingerprint TEXT UNIQUE,
            raw_data    TEXT,
            created_at  TEXT DEFAULT (datetime('now')),
            updated_at  TEXT DEFAULT (datetime('now'))
        );
        CREATE TABLE IF NOT EXISTS saved_vessels (
            id         INTEGER PRIMARY KEY AUTOINCREMENT,
            vessel_id  INTEGER REFERENCES vessels(id),
            notes      TEXT,
            saved_at   TEXT DEFAULT (datetime('now'))
        );
        CREATE TABLE IF NOT EXISTS search_history (
            id         INTEGER PRIMARY KEY AUTOINCREMENT,
            query      TEXT,
            filters    TEXT,
            results    INTEGER,
            searched_at TEXT DEFAULT (datetime('now'))
        );
        CREATE TABLE IF NOT EXISTS custom_sources (
            id          INTEGER PRIMARY KEY AUTOINCREMENT,
            name        TEXT NOT NULL,
            category    TEXT DEFAULT 'Custom',
            search_url  TEXT NOT NULL,
            source_type TEXT DEFAULT 'broker',
            active      INTEGER DEFAULT 1,
            added_by    TEXT,
            last_status TEXT DEFAULT 'unknown',
            created_at  TEXT DEFAULT (datetime('now'))
        );
        CREATE TABLE IF NOT EXISTS users (
            id          INTEGER PRIMARY KEY AUTOINCREMENT,
            username    TEXT UNIQUE NOT NULL,
            password    TEXT NOT NULL,
            role        TEXT DEFAULT 'user',
            created_at  TEXT DEFAULT (datetime('now'))
        );
        CREATE TABLE IF NOT EXISTS collections (
            id          INTEGER PRIMARY KEY AUTOINCREMENT,
            name        TEXT NOT NULL,
            description TEXT,
            color       TEXT DEFAULT '#00b4ff',
            icon        TEXT DEFAULT '📁',
            created_at  TEXT DEFAULT (datetime('now'))
        );
        CREATE TABLE IF NOT EXISTS collection_vessels (
            id            INTEGER PRIMARY KEY AUTOINCREMENT,
            collection_id INTEGER REFERENCES collections(id),
            vessel_id     INTEGER REFERENCES vessels(id),
            notes         TEXT,
            added_at      TEXT DEFAULT (datetime('now')),
            UNIQUE(collection_id, vessel_id)
        );
        CREATE TABLE IF NOT EXISTS alerts (
            id          INTEGER PRIMARY KEY AUTOINCREMENT,
            name        TEXT,
            filters     TEXT,
            last_match  INTEGER DEFAULT 0,
            active      INTEGER DEFAULT 1,
            created_at  TEXT DEFAULT (datetime('now'))
        );
        CREATE INDEX IF NOT EXISTS idx_vessels_type   ON vessels(vessel_type);
        CREATE INDEX IF NOT EXISTS idx_vessels_status ON vessels(status);
        CREATE INDEX IF NOT EXISTS idx_vessels_price  ON vessels(price_usd);
        CREATE INDEX IF NOT EXISTS idx_vessels_score  ON vessels(score DESC);
    """)
    conn.commit()
    conn.close()

def get_db():
    conn = sqlite3.connect(DB_PATH)
    conn.row_factory = sqlite3.Row
    return conn

# ── Ollama helpers ─────────────────────────────────────────────────────────────
_ollama_sem = threading.Semaphore(3)   # max 3 concurrent Ollama calls

def ollama_generate(prompt: str, model: str = None, json_mode: bool = False) -> str:
    model = model or MODELS['classify']
    payload = {
        "model": model,
        "prompt": prompt,
        "stream": False,
        "options": {"temperature": 0.1, "num_predict": 2048}
    }
    if json_mode:
        payload["format"] = "json"
    with _ollama_sem:
        try:
            r = requests.post(OLLAMA_URL, json=payload, timeout=120)
            r.raise_for_status()
            return r.json().get("response", "")
        except Exception as e:
            print(f"[Ollama] Error: {e}")
            return ""

def ollama_models() -> list:
    try:
        r = requests.get("http://localhost:11434/api/tags", timeout=5)
        return [m["name"] for m in r.json().get("models", [])]
    except:
        return []

def extract_vessel_from_text(text: str, source: str) -> dict:
    """Use Ollama to extract structured vessel data from raw text."""
    prompt = f"""Eres un experto en inteligencia de mercado marítimo.
Analiza este texto de un anuncio de embarcación y extrae los datos disponibles.
Fuente: {source}

TEXTO:
{text[:3000]}

Responde SOLO con JSON válido. Si el texto NO es un listing de embarcación específica responde {{"skip": true}}.

{{
  "skip": false,
  "name": "nombre del barco o descripción corta",
  "vessel_type": "Yacht|Motor|Sailboat|Fishing|Tug|Barge|Offshore|Ferry|Salvage|Other",
  "loa_m": número o null,
  "beam_m": número o null,
  "draft_m": número o null,
  "year_built": número o null,
  "hull": "Fiberglass|Steel|Aluminum|Wood|Composite|Unknown",
  "propulsion": "Diesel|Gasoline|Electric|Sail|None|Unknown",
  "status": "active|auction|salvage|abandoned|sold",
  "price_usd": número o null,
  "currency": "USD|EUR|GBP|CAD|AUD|etc",
  "location": "ciudad, país",
  "country": "código ISO 2 letras",
  "description": "resumen en español máximo 200 caracteres",
  "flags": ["below_market","rare","auction","salvage_value","motivated_seller","commercial","government_surplus"],
  "score": número del 0 al 100 según oportunidad para un broker
}}"""

    response = ollama_generate(prompt, model=MODELS['extract'], json_mode=True)
    try:
        data = json.loads(response)
        if data.get("skip"):
            return {}
        return data
    except:
        match = re.search(r'\{.*\}', response, re.DOTALL)
        if match:
            try:
                data = json.loads(match.group())
                if data.get("skip"):
                    return {}
                return data
            except:
                pass
    return {}

# ── Direct source scrapers — no search engine middleman ──────────────────

import random

USER_AGENTS = [
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:123.0) Gecko/20100101 Firefox/123.0',
    'Mozilla/5.0 (Macintosh; Intel Mac OS X 14_3) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.3 Safari/605.1.15',
    'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36',
    'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0',
]

def get_headers(referer=None):
    ua = random.choice(USER_AGENTS)
    h = {
        'User-Agent': ua,
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
        'Accept-Language': 'en-US,en;q=0.9,es;q=0.8,fr;q=0.7',
        'Accept-Encoding': 'gzip, deflate, br',
        'Connection': 'keep-alive',
        'Upgrade-Insecure-Requests': '1',
        'Sec-Fetch-Dest': 'document',
        'Sec-Fetch-Mode': 'navigate',
        'Sec-Fetch-Site': 'none',
        'Cache-Control': 'max-age=0',
    }
    if referer:
        h['Referer'] = referer
    return h

HEADERS = get_headers()


# Each source has its own search URL pattern and CSS selectors
# Sites we scrape directly (confirmed working)
DIRECT_SOURCES = [
    # ── Craigslist ─────────────────────────────────────────────────────────────
    # Single multi-city entry (uses scrape_craigslist internally — Playwright, 3+ cities)
    {"name":"Craigslist",            "category":"Clasificados USA",    "search_url":"https://sfbay.craigslist.org/search/boa?query={query}",                 "type":"classifieds"},
    # Individual cities — each makes one targeted request via scrape_direct_source
    {"name":"Craigslist Miami",      "category":"Clasificados USA",    "search_url":"https://miami.craigslist.org/search/boa?query={query}",                  "type":"classifieds"},
    {"name":"Craigslist Tampa",      "category":"Clasificados USA",    "search_url":"https://tampa.craigslist.org/search/boa?query={query}",                   "type":"classifieds"},
    {"name":"Craigslist Fort Laud",  "category":"Clasificados USA",    "search_url":"https://miami.craigslist.org/search/boa?query={query}&sort=date",         "type":"classifieds"},
    {"name":"Craigslist New Orleans","category":"Clasificados USA",    "search_url":"https://neworleans.craigslist.org/search/boa?query={query}",              "type":"classifieds"},
    {"name":"Craigslist Houston",    "category":"Clasificados USA",    "search_url":"https://houston.craigslist.org/search/boa?query={query}",                 "type":"classifieds"},
    {"name":"Craigslist Seattle",    "category":"Clasificados USA",    "search_url":"https://seattle.craigslist.org/search/boa?query={query}",                 "type":"classifieds"},
    {"name":"Craigslist LA",         "category":"Clasificados USA",    "search_url":"https://losangeles.craigslist.org/search/boa?query={query}",              "type":"classifieds"},
    {"name":"Craigslist SF",         "category":"Clasificados USA",    "search_url":"https://sfbay.craigslist.org/search/boa?query={query}",                   "type":"classifieds"},
    {"name":"Craigslist Jacksonville","category":"Clasificados USA",   "search_url":"https://jacksonville.craigslist.org/search/boa?query={query}",            "type":"classifieds"},
    {"name":"Craigslist Sarasota",   "category":"Clasificados USA",    "search_url":"https://sarasota.craigslist.org/search/boa?query={query}",                "type":"classifieds"},
    {"name":"Craigslist Chicago",    "category":"Clasificados USA",    "search_url":"https://chicago.craigslist.org/search/boa?query={query}",                 "type":"classifieds"},
    {"name":"Craigslist Boston",     "category":"Clasificados USA",    "search_url":"https://boston.craigslist.org/search/boa?query={query}",                  "type":"classifieds"},
    {"name":"Craigslist Atlanta",    "category":"Clasificados USA",    "search_url":"https://atlanta.craigslist.org/search/boa?query={query}",                 "type":"classifieds"},
    {"name":"Craigslist Baltimore",  "category":"Clasificados USA",    "search_url":"https://baltimore.craigslist.org/search/boa?query={query}",               "type":"classifieds"},
    {"name":"Craigslist Norfolk",    "category":"Clasificados USA",    "search_url":"https://norfolk.craigslist.org/search/boa?query={query}",                 "type":"classifieds"},
    {"name":"Craigslist San Diego",  "category":"Clasificados USA",    "search_url":"https://sandiego.craigslist.org/search/boa?query={query}",                "type":"classifieds"},
    {"name":"Craigslist Portland OR","category":"Clasificados USA",    "search_url":"https://portland.craigslist.org/search/boa?query={query}",                "type":"classifieds"},
    {"name":"Craigslist Minneapolis","category":"Clasificados USA",    "search_url":"https://minneapolis.craigslist.org/search/boa?query={query}",             "type":"classifieds"},
    {"name":"Craigslist Detroit",    "category":"Clasificados USA",    "search_url":"https://detroit.craigslist.org/search/boa?query={query}",                 "type":"classifieds"},
    {"name":"Craigslist Cleveland",  "category":"Clasificados USA",    "search_url":"https://cleveland.craigslist.org/search/boa?query={query}",               "type":"classifieds"},
    {"name":"Craigslist Charlotte",  "category":"Clasificados USA",    "search_url":"https://charlotte.craigslist.org/search/boa?query={query}",               "type":"classifieds"},
    {"name":"Craigslist Denver",     "category":"Clasificados USA",    "search_url":"https://denver.craigslist.org/search/boa?query={query}",                  "type":"classifieds"},
    {"name":"Craigslist Phoenix",    "category":"Clasificados USA",    "search_url":"https://phoenix.craigslist.org/search/boa?query={query}",                 "type":"classifieds"},
    {"name":"Craigslist Annapolis",  "category":"Clasificados USA",    "search_url":"https://annapolis.craigslist.org/search/boa?query={query}",               "type":"classifieds"},
    {"name":"Craigslist New Jersey", "category":"Clasificados USA",    "search_url":"https://newjersey.craigslist.org/search/boa?query={query}",               "type":"classifieds"},
    {"name":"Craigslist Galveston",  "category":"Clasificados USA",    "search_url":"https://galveston.craigslist.org/search/boa?query={query}",               "type":"classifieds"},
    {"name":"Craigslist Pensacola",  "category":"Clasificados USA",    "search_url":"https://pensacola.craigslist.org/search/boa?query={query}",               "type":"classifieds"},
    {"name":"Craigslist Mobile AL",  "category":"Clasificados USA",    "search_url":"https://mobile.craigslist.org/search/boa?query={query}",                  "type":"classifieds"},
    {"name":"Craigslist Key West",   "category":"Clasificados USA",    "search_url":"https://keys.craigslist.org/search/boa?query={query}",                    "type":"classifieds"},
    {"name":"Craigslist Corpus",     "category":"Clasificados USA",    "search_url":"https://corpuschristi.craigslist.org/search/boa?query={query}",           "type":"classifieds"},
    {"name":"Craigslist Beaumont",   "category":"Clasificados USA",    "search_url":"https://beaumont.craigslist.org/search/boa?query={query}",                "type":"classifieds"},
    {"name":"Craigslist Baton Rouge","category":"Clasificados USA",    "search_url":"https://batonrouge.craigslist.org/search/boa?query={query}",              "type":"classifieds"},
    # NOTE: gulfcoast.craigslist.org (Biloxi) no longer exists — replaced with Mobile AL

    # ── eBay ──────────────────────────────────────────────────────────────────
    {"name":"eBay Marine",           "category":"Clasificados USA",    "search_url":"https://www.ebay.com/sch/i.html?_nkw={query}&_sacat=26429&LH_BIN=1&_sop=10",          "type":"classifieds"},
    {"name":"eBay Auction",          "category":"Subastas USA",        "search_url":"https://www.ebay.com/sch/i.html?_nkw={query}&_sacat=26429&LH_Auction=1",               "type":"auction"},
    {"name":"eBay Motors Sail",      "category":"Clasificados USA",    "search_url":"https://www.ebay.com/sch/i.html?_nkw={query}&_sacat=36431&LH_BIN=1&_sop=10",          "type":"classifieds"},
    {"name":"eBay Boats Complete",   "category":"Clasificados USA",    "search_url":"https://www.ebay.com/sch/i.html?_nkw={query}+boat&_sacat=26429&LH_BIN=1&_sop=15",     "type":"classifieds"},
    {"name":"eBay Salvage Boats",    "category":"Salvage / Subastas",  "search_url":"https://www.ebay.com/sch/i.html?_nkw={query}+salvage+boat&_sacat=26429&LH_Auction=1", "type":"salvage"},

    # ── Subastas Gobierno ─────────────────────────────────────────────────────
    {"name":"GovDeals",              "category":"Subastas Gobierno",   "search_url":"https://www.govdeals.com/index.cfm?fa=Main.AdvSearchResultsNew&kWord={query}&category=70", "type":"auction"},
    {"name":"PublicSurplus",         "category":"Subastas Gobierno",   "search_url":"https://www.publicsurplus.com/sms/browse/home?search={query}",                              "type":"auction"},
    {"name":"PropertyRoom",          "category":"Subastas Gobierno",   "search_url":"https://www.propertyroom.com/s?q={query}+boat",                                             "type":"auction"},
    # GovPlanet: correct URL confirmed working (Recreational Marine category)
    {"name":"GovPlanet",             "category":"Subastas Gobierno",   "search_url":"https://www.govplanet.com/Recreational+Marine",                                            "type":"auction"},
    # IronPlanet: correct URL confirmed working (Commercial Marine Vessels)
    {"name":"IronPlanet",            "category":"Subastas Gobierno",   "search_url":"https://www.ironplanet.com/Commercial+Marine+Vessels",                                     "type":"auction"},
    # HiBid: React SPA — scrape_hibid uses Playwright
    {"name":"HiBid",                 "category":"Subastas USA",        "search_url":"https://www.hibid.com/lots?q={query}+boat",                                                "type":"auction"},
    {"name":"AuctionTime",           "category":"Subastas USA",        "search_url":"https://www.auctiontime.com/listings/search?q={query}+boat",                               "type":"auction"},
    {"name":"BidSpotter",            "category":"Subastas USA",        "search_url":"https://www.bidspotter.com/en-us/auction-catalogues?q={query}+boat",                       "type":"auction"},
    # Copart: Playwright scraper handles JS-rendered lots
    {"name":"Copart Marine",         "category":"Subastas USA",        "search_url":"https://www.copart.com/vehicleFinderSection/?searchStr={query}&vehicleType=BOAT",          "type":"auction"},

    # ── Salvage ───────────────────────────────────────────────────────────────
    {"name":"Salvex Marine",         "category":"Salvage / Subastas",  "search_url":"https://salvex.com/listings/?q={query}&cat=marine",                                        "type":"salvage"},
    {"name":"Barnacle Marine",       "category":"Salvage / Subastas",  "search_url":"https://www.barnaclemarine.com/?s={query}",                                                 "type":"salvage"},
    {"name":"eBay Salvage",          "category":"Salvage / Subastas",  "search_url":"https://www.ebay.com/sch/i.html?_nkw={query}+salvage+boat&_sacat=26429&LH_Auction=1",     "type":"salvage"},
    {"name":"Cooper Capital Salvage","category":"Salvage USA",         "search_url":"https://www.cooperss.com/",                                                                  "type":"salvage"},
    {"name":"IAA Watercraft",        "category":"Salvage USA",         "search_url":"https://www.iaai.com/Search?SearchText={query}&vehicleType=Watercraft",                     "type":"salvage"},

    # ── Venta Especializada — principales ────────────────────────────────────
    {"name":"YachtWorld",            "category":"Venta Especializada", "search_url":"https://www.yachtworld.com/boats-for-sale/",                             "type":"broker"},
    {"name":"BoatTrader",            "category":"Venta Especializada", "search_url":"https://www.boattrader.com/boats/?query={query}",                        "type":"broker"},
    {"name":"Boats.com",             "category":"Venta Especializada", "search_url":"https://www.boats.com/boats-for-sale/?query={query}",                    "type":"broker"},
    {"name":"Apollo Duck",           "category":"Venta Especializada", "search_url":"https://www.apolloduck.com/search.phtml?search={query}&sr=1&q=1",        "type":"broker"},
    {"name":"Rightboat",             "category":"Venta Especializada", "search_url":"https://www.rightboat.com/boats-for-sale/?q={query}",                    "type":"broker"},
    # Boat24: 403 on requests — scrape_eu_broker uses Playwright
    {"name":"Boat24",                "category":"Venta Especializada", "search_url":"https://www.boat24.com/en/boats/?q={query}",                             "type":"broker"},
    # YachtMarket: uses scrape_eu_broker (Playwright) in case of blocks
    {"name":"YachtMarket",           "category":"Venta Especializada", "search_url":"https://www.yachtmarket.com/boats-for-sale/?q={query}",                  "type":"broker"},

    # ── SailboatListings (dedicated thread also runs in parallel) ────────────
    {"name":"SailboatListings",      "category":"Veleros Global",      "search_url":"https://www.sailboatlistings.com/cgi-bin/saildata/db.cgi?db=default&uid=default&sb=33&so=descend&websearch=1&manufacturer=&model=&length-gt={loa_min_ft}&length-lt={loa_max_ft}&year-lt=---&year-gt=---&price-lt={price_max}&type=&material=&hull=&state=&keyword={query}&view_records=+Show+Matching+Boats+", "type":"broker", "supports_filters": True},
    {"name":"SailboatListings View", "category":"Veleros Global",      "search_url":"https://www.sailboatlistings.com/cgi-bin/saildata/db.cgi?db=default&uid=default&sb=33&so=descend&websearch=1&manufacturer=&model=&length-gt={loa_min_ft}&length-lt={loa_max_ft}&year-lt=---&year-gt=---&price-lt={price_max}&type=Sail&material=&hull=&state=&keyword=&view_records=+Show+Matching+Boats+", "type":"broker", "supports_filters": True},
    # Forums: Playwright scraper handles vBulletin/XenForo FS sections
    {"name":"TheHullTruth",          "category":"Veleros Global",      "search_url":"https://www.thehulltruth.com/boating-forum/search.php?do=process&query={query}&prefixid=FS&type=post", "type":"classifieds"},
    {"name":"Cruisers Forum",        "category":"Veleros Global",      "search_url":"https://www.cruisersforum.com/forums/f152/",                             "type":"classifieds"},

    # ── Comercial / Offshore ──────────────────────────────────────────────────
    {"name":"WorkBoat Classifieds",  "category":"Comercial Offshore",  "search_url":"https://www.workboat.com/classifieds/?keywords={query}",                "type":"commercial"},
    {"name":"Commercial Vessel",     "category":"Comercial Offshore",  "search_url":"https://www.commercialvessel.com/search?keywords={query}",              "type":"commercial"},
    {"name":"OSV Broker",            "category":"Comercial Offshore",  "search_url":"https://www.osvbroker.com/?s={query}",                                  "type":"commercial"},
    {"name":"Marine Classifieds",    "category":"Comercial Offshore",  "search_url":"https://www.marineclassifieds.com/search.php?search={query}",           "type":"commercial"},
    {"name":"Seaboats",              "category":"Comercial Global",    "search_url":"https://www.seaboats.net/search.php?q={query}&cat=0",                   "type":"commercial"},
    {"name":"Seaboats Offshore",     "category":"Comercial Offshore",  "search_url":"https://www.seaboats.net/search.php?q={query}&cat=offshore+support+vessels", "type":"commercial"},
    {"name":"Seaboats Tug",          "category":"Comercial Offshore",  "search_url":"https://www.seaboats.net/search.php?q={query}&cat=tugs+%26+pushboats",  "type":"commercial"},
    {"name":"Seaboats Barge",        "category":"Comercial Offshore",  "search_url":"https://www.seaboats.net/search.php?q={query}&cat=barges+%26+lighters", "type":"commercial"},
    {"name":"Seaboats Fishing",      "category":"Comercial Offshore",  "search_url":"https://www.seaboats.net/search.php?q={query}&cat=fishing+vessels",     "type":"commercial"},
    {"name":"Apollo Duck Workboats", "category":"Comercial Offshore",  "search_url":"https://www.apolloduck.com/search.phtml?search={query}&sr=1&q=1",       "type":"commercial"},
    {"name":"YachtWorld Commercial", "category":"Comercial Offshore",  "search_url":"https://www.yachtworld.com/boats-for-sale/type-commercial/",             "type":"commercial"},

    # ── Australia / Pacífico ─────────────────────────────────────────────────
    # Trade a Boat AU: server-rendered, correct URL confirmed working
    {"name":"Trade a Boat AU",       "category":"Australia / Pacifico","search_url":"https://www.tradeaboat.com.au/search/Boats?category=Sail&keywords={query}", "type":"broker"},
    # Boatsales.com.au (Boatpoint redirects here): scrape_eu_broker via Playwright
    {"name":"Boatsales AU",          "category":"Australia / Pacifico","search_url":"https://www.boatsales.com.au/boats-for-sale/?q={query}",                "type":"broker"},

    # ── Reino Unido ───────────────────────────────────────────────────────────
    # Boats & Outboards UK: 403 on requests — scrape_eu_broker uses Playwright
    {"name":"Boats & Outboards UK",  "category":"Reino Unido",         "search_url":"https://www.boatsandoutboards.co.uk/boats-for-sale/?q={query}",         "type":"broker"},
    # Apollo Duck UK: use same apolloduck.com (no separate UK subdomain)
    {"name":"Apollo Duck UK",        "category":"Reino Unido",         "search_url":"https://www.apolloduck.com/search.phtml?search={query}&sr=1&q=1&country=GB", "type":"broker"},

    # ── Francia ───────────────────────────────────────────────────────────────
    # Annonces Bateau: 403 on requests — scrape_eu_broker uses Playwright
    {"name":"Annonces Bateau",       "category":"Francia",             "search_url":"https://www.annoncesbateau.com/bateaux/annonces-bateaux?keyword={query}", "type":"broker"},

    # ── España / Mediterráneo ────────────────────────────────────────────────
    # Inautia ES: 403 on requests — scrape_eu_broker uses Playwright
    {"name":"Inautia ES",            "category":"Espana / Global",     "search_url":"https://www.inautia.es/barca?q={query}",                                "type":"broker"},
    {"name":"Barcos.net",            "category":"Espana / Global",     "search_url":"https://www.barcos.net/busqueda/?q={query}",                             "type":"broker"},

    # ── Europa / Global ───────────────────────────────────────────────────────
    # YachtAll: 403 on requests — scrape_eu_broker uses Playwright
    {"name":"YachtAll",              "category":"Clasificados EU",     "search_url":"https://yachtall.com/yachts/?search={query}",                           "type":"broker"},

    # ── Brokers USA ───────────────────────────────────────────────────────────
    {"name":"HMY Yachts",            "category":"Brokers USA",         "search_url":"https://www.hmy.com/yachts-for-sale/?SaleClassCode=used",                "type":"broker"},
    {"name":"Denison Yachting",      "category":"Brokers USA",         "search_url":"https://www.denisonyachtsales.com/yachts-for-sale/?search={query}",      "type":"broker"},
    {"name":"BoatCrazy",             "category":"Brokers USA",         "search_url":"https://boatcrazy.com/boats?q={query}",                                  "type":"classifieds"},
    # Galati Yachts: server-rendered WP site — scrape_galati uses requests
    {"name":"Galati Yachts",         "category":"Brokers USA",         "search_url":"https://www.galatiyachts.com/yachts-for-sale/?keywords={query}",         "type":"broker"},
    {"name":"United Yacht Sales",    "category":"Brokers USA",         "search_url":"https://www.unitedyacht.com/yachts-for-sale/",                           "type":"broker"},
    # Worth Ave Yachts: hybrid server-rendered — scrape_luxury_broker uses Playwright
    {"name":"Worth Ave Yachts",      "category":"Brokers USA",         "search_url":"https://www.worthavenueyachts.com/yachts-for-sale/",                     "type":"broker"},

    # ── Brokers Internacionales ───────────────────────────────────────────────
    # Fraser Yachts: Vue/JS SPA — scrape_luxury_broker uses Playwright
    {"name":"Fraser Yachts",         "category":"Brokers Internacional","search_url":"https://www.fraseryachts.com/en/yachts-for-sale/",                      "type":"broker"},
    # Burgess Yachts: JS-loaded — scrape_luxury_broker uses Playwright
    {"name":"Burgess Yachts",        "category":"Brokers Internacional","search_url":"https://www.burgessyachts.com/en/yachts/sale/",                         "type":"broker"},
    # Northrop & Johnson: JS-loaded — scrape_luxury_broker uses Playwright
    {"name":"Northrop & Johnson",    "category":"Brokers Internacional","search_url":"https://www.njcharters.com/yachts-for-sale/",                           "type":"broker"},
    {"name":"Merle Wood",            "category":"Brokers Internacional","search_url":"https://www.merlewood.com/yachts-for-sale/",                            "type":"broker"},

    # ── Canada ────────────────────────────────────────────────────────────────
    {"name":"Kijiji Boats CA",       "category":"Canada",              "search_url":"https://www.kijiji.ca/b-boats/{query}/k0c132",                           "type":"classifieds"},
]

# Web search queries — finds listings on ANY site including blocked ones
# DuckDuckGo returns results from YachtWorld, Boats.com, Apollo Duck, etc.
# Base web search templates — {query} is replaced at runtime
# Dynamic templates also get price/loa filters appended when available
WEB_SEARCH_TEMPLATES = [
    '"{query}" boat for sale',
    '"{query}" sailboat for sale',
    '"{query}" vessel for sale',
    '"{query}" yacht for sale',
    '"{query}" barco venta',
    '"{query}" bateau vendre occasion',
    'site:yachtworld.com {query} for sale sail cruiser',
    'site:boats.com {query} sailboat for sale',
    'site:apolloduck.com {query} for sale',
    'site:rightboat.com {query} for sale',
    'site:boat24.com {query} for sale',
    'site:yachtall.com {query} sailboat',
    'site:annoncesbateau.com {query} voilier',
    'site:cruisersforum.com {query} for sale',
    'site:thehulltruth.com {query} for sale fs',
    'site:govplanet.com {query} vessel',
    'site:ironplanet.com {query} boat vessel',
    'site:govdeals.com {query} vessel boat',
    'site:publicsurplus.com {query} vessel',
    'site:hibid.com {query} boat',
    'site:copart.com {query} boat vessel',
    'site:rbauction.com {query} boat',
    '"{query}" boat auction government surplus',
    '"{query}" vessel auction salvage',
    # Salvage specific
    'site:salvex.com {query} marine vessel',
    'site:copart.com {query} boat salvage',
    'site:iaai.com {query} boat',
    'site:boatbreakers.com {query}',
    '"{query}" salvage boat for sale',
    '"{query}" insurance total loss boat',
    '"{query}" wrecked boat for sale parts',
    '"{query}" boat salvage title for sale',
    'site:seaboats.net {query}',
    'site:workboat.com {query} for sale',
    'site:commercialvessel.com {query}',
    # Offshore / commercial
    'site:osvbroker.com {query}',
    'site:marineclassifieds.com {query} for sale',
    'site:apolloduck.com {query} offshore tug barge',
    '"{query}" offshore supply vessel for sale',
    '"{query}" OSV for sale broker',
    '"{query}" crew boat for sale',
    '"{query}" workboat for sale',
    '"{query}" tug for sale',
    '"{query}" barge for sale',
    '"{query}" supply vessel for sale',
    '"{query}" fishing vessel for sale',
    '"{query}" commercial vessel for sale',
    # Australia / Pacific
    'site:tradeaboat.com.au {query} for sale',
    'site:boatpoint.com.au {query} for sale',
    # Europe classifieds
    'site:boatsandoutboards.co.uk {query} for sale',
    'site:annoncesbateau.com {query} voilier',
    'site:inautia.com {query} barco venta',
]

def build_web_queries(base_query: str, filters: dict) -> list:
    """Build web search queries filtered by vessel type/status to avoid irrelevant searches."""
    price_ctx = ""
    loa_ctx = ""
    if filters.get("max_price"):
        price_ctx = f" under ${filters['max_price']}"
    if filters.get("min_loa"):
        ft = int(float(filters["min_loa"]) / 0.3048)
        loa_ctx = f" {ft}ft+"

    vtype  = (filters.get("type","")   or "").lower()
    status = (filters.get("status","") or "").lower()

    # Categorize templates so we only include relevant ones
    SALVAGE_KWORDS  = {"salvage","copart","iaai","boatbreakers","insurance","total loss","wrecked","salvage title"}
    OFFSHORE_KWORDS = {"workboat","commercial","osvbroker","offshore","osv","crew boat","supply vessel","tug","barge","fishing vessel"}
    SAIL_KWORDS     = {"sailboat","yachtall","annoncesbateau","voilier","cruisersforum","sail cruiser"}
    GENERIC_KWORDS  = {"boat for sale","vessel for sale","yacht for sale","barco venta","bateau","yachtworld","boats.com","apolloduck","rightboat","boat24","govplanet","ironplanet","govdeals","publicsurplus","hibid","rbauction","tradeaboat","boatpoint","boatsandoutboards","inautia"}

    is_salvage  = status == "salvage" or "salvage" in base_query.lower()
    is_offshore = vtype in {"offshore","tug","barge","ferry","fishing","commercial"} or any(k in base_query.lower() for k in {"tug","barge","osv","crew boat","workboat"})
    is_sail     = vtype in {"sailboat","velero","sail"} or any(k in base_query.lower() for k in {"sail","velero","ketch","sloop"})

    queries = []
    for tmpl in WEB_SEARCH_TEMPLATES:
        tmpl_l = tmpl.lower()
        # Skip salvage templates for non-salvage searches
        if any(k in tmpl_l for k in SALVAGE_KWORDS) and not is_salvage:
            continue
        # Skip offshore templates for clearly non-offshore searches (sailboat/velero)
        if any(k in tmpl_l for k in OFFSHORE_KWORDS) and is_sail and not is_offshore:
            continue
        # Skip sailboat templates for offshore/salvage searches
        if any(k in tmpl_l for k in SAIL_KWORDS) and (is_offshore or is_salvage) and not is_sail:
            continue

        q = tmpl.replace("{query}", base_query)
        if not q.startswith("site:") and (price_ctx or loa_ctx):
            q += loa_ctx + price_ctx
        queries.append(q)
    return queries

SEARCH_ENGINES = [
    {
        "name": "DuckDuckGo",
        "url": "https://html.duckduckgo.com/html/?q={query}",
        "link_sel": "a.result__a",
        "snippet_sel": "a.result__snippet",
    },
    {
        "name": "Bing",
        "url": "https://www.bing.com/search?q={query}&count=20",
        "link_sel": "h2 a",
        "snippet_sel": ".b_caption p",
    },
]

def web_search(query: str, max_results: int = 8) -> list[dict]:
    """Search web engines for real listings."""
    results = []
    seen = set()
    skip = ["google.","bing.","duckduckgo.","yahoo.","wikipedia.","youtube.",
            "facebook.com/login","instagram.","twitter.","linkedin.",
            "pinterest.","reddit.com/r/",".pdf","amazon.com/s?"]

    for engine in SEARCH_ENGINES:
        try:
            url = engine["url"].format(query=requests.utils.quote(query))
            time.sleep(1.0)
            r = requests.get(url, headers=get_headers(), timeout=20, verify=False)
            if r.status_code != 200:
                continue
            soup = BeautifulSoup(r.text, "html.parser")
            links = soup.select(engine["link_sel"])
            snippets = soup.select(engine["snippet_sel"])

            for i, link in enumerate(links[:max_results*2]):
                href = link.get("href","")
                # Clean DDG redirect
                if "duckduckgo.com" in href:
                    m = re.search(r'uddg=([^&]+)', href)
                    if m: href = requests.utils.unquote(m.group(1))
                if not href.startswith("http"): continue
                if any(s in href for s in skip): continue
                if href in seen: continue
                seen.add(href)
                title = link.get_text(strip=True)
                snippet = snippets[i].get_text(strip=True) if i < len(snippets) else ""
                try: source = href.split("/")[2].replace("www.","")
                except: source = "web"
                results.append({
                    "url": href, "title": title, "snippet": snippet,
                    "price_text": "", "img_url": "",
                    "location": "", "source": source,
                    "source_type": "broker", "category": "Web Search"
                })
                if len(results) >= max_results: break
        except Exception as e:
            pass
        if len(results) >= max_results: break
    return results


def scrape_direct_source(source: dict, query: str, filters: dict = None) -> list[dict]:
    if filters is None: filters = {}
    """AI-powered scraper — no CSS selectors, reads HTML like a human."""
    results = []
    try:
        # Build URL — expand filter placeholders if source supports them
        raw_url = source["search_url"]
        if source.get("supports_filters"):
            min_loa_m  = float(filters.get("min_loa") or 0)
            max_price  = filters.get("max_price") or ""
            min_price  = filters.get("min_price") or ""
            loa_min_ft = int(min_loa_m / 0.3048) if min_loa_m else ""
            loa_max_ft = ""  # no max LOA filter in current UI
            raw_url = raw_url.replace("{loa_min_ft}", str(loa_min_ft))
            raw_url = raw_url.replace("{loa_max_ft}", str(loa_max_ft))
            raw_url = raw_url.replace("{price_min}", str(min_price))
            raw_url = raw_url.replace("{price_max}", str(max_price))
        # Clean query - remove duplicate "for sale"
        # Clean query - remove duplicates
        clean_q = query.strip()
        for phrase in [" for sale for sale", "for sale for sale", " velero velero", " sailboat sailboat"]:
            clean_q = clean_q.replace(phrase, phrase.split()[0] + " " + phrase.split()[1])
        clean_q = ' '.join(dict.fromkeys(clean_q.split()))  # remove duplicate words
        url = raw_url.format(query=requests.utils.quote(clean_q.replace(' for sale for sale',' for sale')))
        time.sleep(1.0)
        domain = url.split('/')[2]
        headers = get_headers(referer=f"https://{domain}/")
        r = requests.get(url, headers=headers, timeout=25, verify=False)

        # Retry with different UA if blocked
        if r.status_code in [403, 429, 503]:
            time.sleep(2)
            headers = get_headers()
            r = requests.get(url, headers=headers, timeout=25, verify=False)

        if r.status_code not in [200, 206]:
            print(f"[{source['name']}] HTTP {r.status_code}")
            return []

        soup = BeautifulSoup(r.text, "html.parser")
        for tag in soup(["script","style","nav","footer","header","aside","noscript","meta","link"]):
            tag.decompose()

        base_url = "/".join(url.split("/")[:3])
        raw_links = []
        skip_words = ["login","register","signup","about","contact","help",
                      "privacy","terms","facebook.com","twitter.com","instagram.com"]

        for a in soup.find_all("a", href=True)[:80]:
            href = a["href"].strip()
            if not href or href.startswith("#") or href.startswith("javascript"):
                continue
            if not href.startswith("http"):
                href = base_url + ("" if href.startswith("/") else "/") + href
            if any(s in href.lower() for s in skip_words):
                continue
            text = a.get_text(strip=True)[:150]
            parent = a.find_parent()
            price = ""
            img   = ""
            if parent:
                ptxt = parent.get_text(" ", strip=True)
                pm = re.search(r'[\d,]+(?:\.\d+)?\s*(?:USD|EUR|GBP|CAD|\$|€|£)', ptxt)
                if pm:
                    price = pm.group()
                # Traverse up to 4 levels to find a thumbnail image
                node = parent
                for _ in range(4):
                    if node is None:
                        break
                    im = node.find("img")
                    if im:
                        src = _extract_best_src(im)
                        if src:
                            # Convert relative to absolute
                            if src.startswith("//"):
                                src = "https:" + src
                            elif src.startswith("/"):
                                src = base_url + src
                            if src.startswith("http") and len(src) > 20:
                                img = src
                                break
                    node = node.parent
            if text and len(text) > 8:
                raw_links.append({"url":href,"title":text,"price":price,"img":img})

        if not raw_links:
            print(f"[{source['name']}] No links found")
            return []

        seen = set()
        unique = []
        for lnk in raw_links:
            if lnk["url"] not in seen:
                seen.add(lnk["url"])
                unique.append(lnk)

        # ── Heuristic listing filter (no AI needed) ──────────────────────────
        # Score each link — higher = more likely to be an actual vessel listing
        BOAT_KW = ["boat","yacht","vessel","sail","ketch","sloop","cutter","schooner",
                   "yawl","catamaran","trimaran","motor","tug","barge","cruiser","skiff",
                   "fishing","trawler","offshore","cabin","dinghy","pontoon","runabout"]

        def listing_score(lnk):
            url_l   = lnk["url"].lower()
            title_l = lnk["title"].lower()
            sc = 0
            if lnk["price"]:                              sc += 4  # price is strong signal
            if lnk["img"]:                                sc += 1  # has photo
            if re.search(r'/\d{5,}', url_l):             sc += 3  # 5+ digit ID
            if re.search(r'/(view|detail|listing|item|vessel|boat|ship|for-sale)[-/]', url_l): sc += 2
            if re.search(r'-for-sale[/-]?$', url_l):     sc += 2
            if re.search(r'\b(19[5-9]\d|20[0-2]\d)\b', title_l): sc += 3  # year in title
            if re.search(r'\d{2,3}\s*(?:\'|ft|feet|meter)', title_l): sc += 2  # size
            if any(k in title_l for k in BOAT_KW):       sc += 1
            if re.search(r'\b(for sale|en vente|vendre|en venta)\b', title_l): sc += 1
            if len(lnk["title"]) > 15:                   sc += 1  # nav links are short
            return sc

        scored = [(listing_score(lnk), lnk) for lnk in unique[:30]]
        scored.sort(key=lambda x: x[0], reverse=True)

        # Keep links with score >= 3, or fall back to top-5 if nothing qualifies
        good = [lnk for sc, lnk in scored if sc >= 3]
        if not good:
            good = [lnk for _, lnk in scored[:5]]  # best guesses from this source

        for lnk in good[:20]:
            results.append({
                "url":         lnk["url"],
                "title":       lnk["title"],
                "snippet":     f"Price: {lnk['price']}",
                "price_text":  lnk["price"],
                "img_url":     lnk["img"],
                "location":    "",
                "source":      source["name"],
                "source_type": source["type"],
                "category":    source["category"],
            })

        print(f"[{source['name']}] {len(results)} listings found")
    except Exception as e:
        print(f"[{source['name']}] Error: {e}")
    return results


# Interleave queue for polite scraping
_interleave_lock = threading.Lock()
_interleave_sites = [
    "https://miami.craigslist.org",
    "https://www.seaboats.net",
    "https://www.barcos.net",
    "https://www.ebay.com",
    "https://boston.craigslist.org",
    "https://seattle.craigslist.org",
]
_interleave_idx = 0

def polite_pause(source_name: str):
    """
    Between pages of the same site, make a quick request to a different
    site so we look like a human browsing — not a bot hammering one server.
    """
    global _interleave_idx
    with _interleave_lock:
        site = _interleave_sites[_interleave_idx % len(_interleave_sites)]
        _interleave_idx += 1
    try:
        requests.get(site, headers=get_headers(), timeout=5, verify=False)
    except Exception:
        pass
    # Random human-like delay: 2-5 seconds
    time.sleep(random.uniform(2.0, 5.0))
    print(f"[{source_name}] Polite pause done — continuing...")

def scrape_sailboatlistings(query: str, filters: dict, max_pages: int = 8) -> list[dict]:
    """
    Multi-page scraper for SailboatListings.com.
    Captures MAIN listings (sailboat=XXXXX) with full structured data,
    plus SIDEBAR featured listings (/view/XXXXX) as bonus.
    """
    results = []
    seen_urls = set()

    min_loa_m = float(filters.get("min_loa") or 0)
    max_loa_m = float(filters.get("max_loa") or 0)
    max_price  = filters.get("max_price") or ""
    loa_min_ft = int(min_loa_m / 0.3048) if min_loa_m else ""
    loa_max_ft = int(max_loa_m / 0.3048) if max_loa_m else ""

    vessel_type = filters.get("type","").lower()
    sbl_type_map = {
        "sailboat": "Sail", "sail": "Sail",
        "yacht":    "cruiser",
        "motor":    "powerboat", "motorboat": "powerboat",
        "fishing":  "fishing",
        "tug": "", "barge": "", "offshore": "", "ferry": "", "commercial": "",
    }
    # Default "" → search ALL types on SailboatListings
    sbl_type = sbl_type_map.get(vessel_type, "")
    hull = filters.get("hull","").lower()
    sbl_hull_map = {
        "fiberglass":"fiberglass","steel":"steel",
        "aluminum":"aluminum","wood":"wood",
    }
    sbl_material = sbl_hull_map.get(hull, "")

    year_min = filters.get("year_min","---")
    year_max = filters.get("year_max","---")
    if not year_min: year_min = "---"
    if not year_max: year_max = "---"

    base_url = (
        "https://www.sailboatlistings.com/cgi-bin/saildata/db.cgi"
        "?db=default&uid=default&sb=33&so=descend&websearch=1"
        f"&manufacturer=&model="
        f"&length-gt={loa_min_ft}&length-lt={loa_max_ft}"
        f"&year-lt={year_max}&year-gt={year_min}&price-lt={max_price}"
        f"&type={sbl_type}&material={sbl_material}&hull=&state="
        f"&keyword={requests.utils.quote(query)}"
        f"&view_records=+Show+Matching+Boats+"
    )

    for page in range(1, max_pages + 1):
        if page > 1:
            polite_pause("SailboatListings")

        try:
            url = base_url if page == 1 else base_url + f"&nh={page}"
            r = requests.get(url, headers=get_headers(), timeout=25, verify=False)

            if r.status_code == 429:
                print(f"[SailboatListings] Rate limited on page {page} — stopping")
                break
            if r.status_code != 200:
                print(f"[SailboatListings] Page {page} HTTP {r.status_code}")
                break

            soup = BeautifulSoup(r.text, "html.parser")
            body_text = soup.get_text()

            if "no records" in body_text.lower() or "0 matches" in body_text.lower():
                print(f"[SailboatListings] No more results at page {page}")
                break

            page_results = 0

            # ── MAIN LISTINGS (sailboat=XXXXX) — full structured data ──
            for header_link in soup.find_all("a", class_="sailheader"):
                href = header_link.get("href", "")
                m = re.search(r'sailboat=(\d+)', href)
                if not m:
                    continue
                sid = m.group(1)
                canonical = f"https://www.sailboatlistings.com/view/{sid}"
                if canonical in seen_urls:
                    continue
                seen_urls.add(canonical)

                title = header_link.get_text(strip=True)

                # Parent table contains all structured sailvb/sailvk spans
                listing_table = header_link.find_parent("table")
                if not listing_table:
                    continue

                # Extract structured fields
                fields = {}
                for label_span in listing_table.find_all("span", class_="sailvb"):
                    label = label_span.get_text(strip=True).rstrip(":").strip()
                    value_span = label_span.find_next("span", class_="sailvk")
                    if value_span:
                        fields[label] = value_span.get_text(strip=True)

                price_text = fields.get("Asking", "")
                location = fields.get("Location", "")

                # Build context string from structured fields
                context_parts = [f"{k}: {v}" for k, v in fields.items()]
                context = " | ".join(context_parts)

                # Extract image — upgrade thumbnail to full-size
                img_src = ""
                img_tag = listing_table.find("img")
                if img_tag:
                    img_src = img_tag.get("src", "") or img_tag.get("data-src", "")
                    if img_src and not img_src.startswith("http"):
                        img_src = "https://www.sailboatlistings.com" + img_src
                # Upgrade /sailimg/t/ (thumbnail) or /sailimg/m/ (medium) → /sailimg/ (full)
                for thumb in ["/sailimg/t/", "/sailimg/m/"]:
                    if thumb in img_src:
                        img_src = img_src.replace(thumb, "/sailimg/")
                        break
                if not img_src:
                    img_src = f"https://www.sailboatlistings.com/sailimg/{sid}/photo1.jpg"

                results.append({
                    "url": canonical,
                    "title": title or context[:80],
                    "snippet": context,
                    "price_text": price_text,
                    "img_url": img_src,
                    "location": location,
                    "source": "SailboatListings",
                    "source_type": "broker",
                    "category": "Veleros Global",
                    "fields": fields,  # pass structured fields for direct extraction
                })
                page_results += 1

            # ── SIDEBAR FEATURED (/view/XXXXX) — less data but more listings ──
            for a in soup.find_all("a", class_="featured"):
                href = a.get("href", "")
                view_m = re.search(r'/view/(\d+)', href)
                if not view_m:
                    continue
                sid = view_m.group(1)
                canonical = f"https://www.sailboatlistings.com/view/{sid}"
                if canonical in seen_urls:
                    continue
                seen_urls.add(canonical)

                link_text = a.get_text(" ", strip=True)
                # Extract price from link text: "45' Alden 45 Falmouth, Maine Asking $355,000"
                price_m = re.search(r'Asking\s*\$([\d,]+)', link_text)
                price_text = f"${price_m.group(1)}" if price_m else ""

                # Extract location from featurespec span
                spec_span = a.find("span", class_="featurespec")
                location = ""
                if spec_span:
                    spec_text = spec_span.get_text(" ", strip=True)
                    # Location is before "Asking"
                    loc_m = re.search(r'^(.+?)\s*Asking', spec_text)
                    if loc_m:
                        location = loc_m.group(1).strip()

                img_src = ""
                img_tag = a.find("img")
                if img_tag:
                    img_src = img_tag.get("src", "") or ""
                    if img_src and not img_src.startswith("http"):
                        img_src = "https://www.sailboatlistings.com" + img_src
                for thumb in ["/sailimg/t/", "/sailimg/m/"]:
                    if thumb in img_src:
                        img_src = img_src.replace(thumb, "/sailimg/")
                        break
                if not img_src:
                    img_src = f"https://www.sailboatlistings.com/sailimg/{sid}/photo1.jpg"

                results.append({
                    "url": canonical,
                    "title": link_text.split("Asking")[0].strip() if "Asking" in link_text else link_text,
                    "snippet": link_text,
                    "price_text": price_text,
                    "img_url": img_src,
                    "location": location,
                    "source": "SailboatListings",
                    "source_type": "broker",
                    "category": "Veleros Global",
                    "fields": {},  # no structured fields for sidebar listings
                })
                page_results += 1

            print(f"[SailboatListings] Page {page}: {page_results} listings (total: {len(results)})")
            if page_results == 0:
                break

        except Exception as e:
            print(f"[SailboatListings] Error page {page}: {e}")
            break

    print(f"[SailboatListings] Done — {len(results)} listings total")
    return results

def scrape_and_extract_sailboatlistings(query: str, filters: dict, search_id: str, max_pages: int = 8):
    """
    Runs SailboatListings scraping + AI extraction inline.
    Saves each vessel to DB immediately so it appears in dashboard in real-time.
    """
    print(f"[SBL-Thread] Starting SailboatListings extraction...")
    raw_results = scrape_sailboatlistings(query, filters, max_pages)

    if not raw_results:
        print("[SBL-Thread] No results from SailboatListings")
        return

    sbl_min_loa   = float(filters.get("min_loa")   or 0)
    sbl_max_price = float(filters.get("max_price") or 0)
    saved = 0

    for raw in raw_results:
        if search_state.get('search_id') != search_id or search_state.get('cancelled'):
            print("[SBL-Thread] Search cancelled — stopping")
            return

        try:
            snippet  = raw.get("snippet", "")
            title    = raw.get("title", "")
            fields   = raw.get("fields", {})  # structured fields from main listings
            src      = snippet + " " + title

            # ── Helper to parse feet values like "30'" or "5.25'" ──
            def parse_ft(val):
                if not val: return None
                m = re.match(r'([\d.]+)', val)
                return float(m.group(1)) if m else None

            # ── Use structured fields directly when available (main listings) ──
            if fields:
                loa_ft   = parse_ft(fields.get("Length"))
                beam_ft  = parse_ft(fields.get("Beam"))
                draft_ft = parse_ft(fields.get("Draft"))
                year_val = fields.get("Year", "")
                year_m   = re.search(r'(\d{4})', year_val)
                asking   = fields.get("Asking", "")
                price_r  = re.search(r'\$\s*([\d,]{3,})', asking)
                location = fields.get("Location", "")
                hull_val = fields.get("Hull", "").lower()
            else:
                # Fallback: regex for sidebar/featured listings
                length_r = re.search(r'Length:\s*([\d.]+)',  src, re.IGNORECASE)
                beam_r   = re.search(r'Beam:\s*([\d.]+)',    src, re.IGNORECASE)
                draft_r  = re.search(r'Draft:\s*([\d.]+)',   src, re.IGNORECASE)
                year_r   = re.search(r'Year:\s*(\d{4})',     src, re.IGNORECASE)
                price_r  = re.search(r'(?:Asking|Price):?\s*\$\s*([\d,]{3,})', src, re.IGNORECASE)
                if not price_r:
                    price_r = re.search(r'\$\s*([\d,]{4,})', src)
                loa_ft   = float(length_r.group(1)) if length_r else None
                beam_ft  = float(beam_r.group(1)) if beam_r else None
                draft_ft = float(draft_r.group(1)) if draft_r else None
                year_m   = year_r
                location = raw.get("location", "")
                hull_val = ""
                loc_r = re.search(r'Location:\s*([^\n\r]{3,60}?)(?:\s{2,}|$)', src, re.IGNORECASE)
                if loc_r: location = loc_r.group(1).strip()
                hull_r2 = re.search(r'Hull:\s*([^\n\r]{3,50}?)(?:\s{2,}|$)', src, re.IGNORECASE)
                if hull_r2: hull_val = hull_r2.group(1).lower()

            # Fallback: extract LOA from title e.g. "35' Pearson 35"
            if not loa_ft:
                tm = re.search(r'^(\d{2,3}(?:\.\d)?)\s*(?:\'|ft|feet)', title, re.IGNORECASE)
                if tm:
                    loa_ft = float(tm.group(1))
            loa_m     = round(loa_ft  * 0.3048, 1) if loa_ft  else None
            beam_m    = round(beam_ft * 0.3048, 1) if beam_ft else None
            draft_m   = round(draft_ft * 0.3048, 1) if draft_ft else None
            year      = int(year_m.group(1)) if year_m else None
            location  = location or raw.get("location", "")

            price_usd = None
            if price_r:
                try: price_usd = float(price_r.group(1).replace(",", ""))
                except: pass
            if not price_usd and raw.get("price_text"):
                pm = re.search(r'[\d,]+', raw["price_text"].replace("$",""))
                if pm:
                    try: price_usd = float(pm.group().replace(",",""))
                    except: pass

            # Skip only if absolutely no data
            if not loa_m and not year and not price_usd:
                continue

            # Apply filters
            if sbl_min_loa and loa_m and loa_m < (sbl_min_loa - 0.15):
                continue
            if sbl_max_price and price_usd and price_usd > sbl_max_price * 1.01:
                continue

            # Hull normalisation
            hull_txt = hull_val
            hull = ("Fiberglass" if "fiber" in hull_txt or "glass" in hull_txt else
                    "Steel"      if "steel" in hull_txt else
                    "Aluminum"   if "alum"  in hull_txt else
                    "Wood"       if "wood"  in hull_txt else
                    "Composite"  if "comp"  in hull_txt else "Unknown")

            # Algorithmic score (fast, no AI)
            score = 50
            if loa_m:
                score += min(15, int((loa_m - 13) * 1.5)) if loa_m >= 13 else 0
            if year:
                score += min(10, max(0, (year - 1980) // 3))
            if price_usd and loa_m:
                price_per_ft = price_usd / (loa_m / 0.3048)
                if price_per_ft < 500:  score += 15
                elif price_per_ft < 1000: score += 8

            flags = []
            if price_usd and loa_m and (price_usd / (loa_m / 0.3048)) < 600:
                flags.append("below_market")

            data = {
                "name":        title or "SailboatListings boat",
                "vessel_type": "Sailboat",
                "loa_m":       loa_m,
                "beam_m":      beam_m,
                "draft_m":     draft_m,
                "year_built":  year,
                "hull":        hull,
                "propulsion":  "Sail",
                "status":      "active",
                "price_usd":   price_usd,
                "currency":    "USD",
                "location":    location,
                "country":     "US",
                "description": f"Velero {title}. LOA: {loa_ft}ft. {location}".strip("."),
                "flags":       flags,
                "score":       min(100, score),
                "images":      [raw["img_url"]] if raw.get("img_url") else [],
                "source_url":  raw["url"],
                "source_name": "SailboatListings",
            }

            vid = save_vessel(data)
            if vid > 0:
                search_state['found'] += 1
                saved += 1
                msg = f"✓ {title} ({loa_ft}ft, ${price_usd:,.0f}) — SailboatListings" if price_usd else f"✓ {title} ({loa_ft}ft) — SailboatListings"
                print(f"[SBL-Thread] {msg}")
                search_state['log'].append(msg)

        except Exception as e:
            print(f"[SBL-Thread] Error on {raw.get('title','?')}: {e}")

    print(f"[SBL-Thread] Done — {saved}/{len(raw_results)} vessels saved")

def stealth_fetch(url: str, max_chars: int = 3000) -> tuple:
    """
    Fetch a Cloudflare-protected page using Playwright with human-like behavior.
    Returns (text, [image_urls])
    Techniques used:
    - Realistic viewport and user agent
    - Random mouse movements before scrolling
    - Human-like delays
    - Accept cookies automatically
    - Disable webdriver flags
    """
    text = ""
    images = []
    try:
        from playwright.sync_api import sync_playwright
        with sync_playwright() as p:
            browser = p.chromium.launch(
                headless=True,
                args=[
                    '--disable-blink-features=AutomationControlled',
                    '--disable-dev-shm-usage',
                    '--no-sandbox',
                    '--disable-web-security',
                    '--disable-features=IsolateOrigins,site-per-process',
                ]
            )
            context = browser.new_context(
                viewport={'width': 1366, 'height': 768},
                user_agent=random.choice(USER_AGENTS),
                locale='en-US',
                timezone_id='America/New_York',
                java_script_enabled=True,
                ignore_https_errors=True,
                extra_http_headers={
                    'Accept-Language': 'en-US,en;q=0.9',
                    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
                    'Cache-Control': 'no-cache',
                    'Pragma': 'no-cache',
                }
            )
            # Remove webdriver property
            context.add_init_script("""
                Object.defineProperty(navigator, 'webdriver', {get: () => undefined});
                Object.defineProperty(navigator, 'plugins', {get: () => [1, 2, 3, 4, 5]});
                window.chrome = {runtime: {}};
            """)
            page = context.new_page()

            # Navigate with realistic timeout
            page.goto(url, timeout=30000, wait_until='domcontentloaded')

            # Random delay like a human reading
            page.wait_for_timeout(random.randint(1500, 3000))

            # Accept cookies if button exists
            for selector in ['button:has-text("Accept")', 'button:has-text("Accept All")',
                              '#onetrust-accept-btn-handler', '.cookie-accept']:
                try:
                    page.click(selector, timeout=1000)
                    page.wait_for_timeout(500)
                    break
                except:
                    pass

            # Scroll down naturally
            page.evaluate("window.scrollBy(0, 300)")
            page.wait_for_timeout(random.randint(500, 1200))
            page.evaluate("window.scrollBy(0, 300)")
            page.wait_for_timeout(random.randint(300, 800))

            # Get content
            html = page.content()
            browser.close()

            from bs4 import BeautifulSoup as BS
            soup = BS(html, 'html.parser')

            # Extract images — check all lazy-load attributes
            skip_words = ['logo','icon','avatar','banner','pixel','sprite','ad',
                          'placeholder','blank','loading','spacer','1x1']
            seen_imgs = set()
            for img in soup.find_all('img'):
                src = _extract_best_src(img)
                if src and src not in seen_imgs:
                    if not any(s in src.lower() for s in skip_words):
                        seen_imgs.add(src)
                        images.append(src)
                        if len(images) >= 12:
                            break

            for tag in soup(['script','style','nav','footer','header','aside']):
                tag.decompose()
            text = ' '.join(soup.get_text(' ', strip=True).split())[:max_chars]

    except Exception as e:
        print(f"[Stealth] Error: {e}")
    return text, images


# Sites that need stealth scraping (Cloudflare protected)
STEALTH_REQUIRED = [
    'yachtworld.com', 'boats.com', 'boattrader.com',
    'rightboat.com', 'boat24.com', 'yachtall.com',
    'botentekoop.nl', 'leboncoin.fr', 'annoncesbateau.com',
    'thehulltruth.com', 'cruisersforum.com',
]

def smart_fetch(url: str, max_chars: int = 3000) -> tuple:
    """Use stealth for protected sites, regular fetch for others."""
    domain = url.split('/')[2].replace('www.','') if '//' in url else ''
    needs_stealth = any(s in domain for s in STEALTH_REQUIRED)
    if needs_stealth:
        print(f"[Fetch] Using stealth for {domain}")
        return stealth_fetch(url, max_chars)
    else:
        return fetch_page_with_images(url)


def scrape_yachtworld(query: str, filters: dict, max_pages: int = 5) -> list:
    """
    Dedicated YachtWorld stealth scraper.
    Builds filtered URL and navigates with human-like behavior.
    """
    results = []
    seen = set()

    # Build YachtWorld filtered URL
    vessel_type = filters.get("type","").lower()
    yw_type = "sail" if vessel_type in ["sailboat","sail","velero","yacht",""] else "power"
    min_loa  = filters.get("min_loa","")
    max_price = filters.get("max_price","")

    base_url = f"https://www.yachtworld.com/boats-for-sale/type-{yw_type}/"
    if vessel_type in ["sailboat","sail","velero",""]:
        base_url = "https://www.yachtworld.com/boats-for-sale/type-sail/class-sail-cruiser/"
    if min_loa:
        ft = int(float(min_loa) / 0.3048)
        base_url += f"length-{ft}/"
    if max_price:
        base_url += f"price-0,{max_price}/"

    print(f"[YachtWorld] Scraping: {base_url}")

    try:
        from playwright.sync_api import sync_playwright
        with sync_playwright() as p:
            browser = p.chromium.launch(
                headless=True,
                args=['--disable-blink-features=AutomationControlled','--no-sandbox']
            )
            context = browser.new_context(
                viewport={'width': 1920, 'height': 1080},
                user_agent=random.choice(USER_AGENTS),
                locale='en-US',
                timezone_id='America/New_York',
                ignore_https_errors=True,
            )
            context.add_init_script(
                "Object.defineProperty(navigator, 'webdriver', {get: () => undefined});"
                "window.chrome = {runtime: {}};"
            )

            for page_num in range(1, max_pages + 1):
                if search_state.get('cancelled'):
                    break

                page_url = base_url if page_num == 1 else base_url + f"?page={page_num}"
                page = context.new_page()
                try:
                    page.goto(page_url, timeout=30000, wait_until='domcontentloaded')
                    page.wait_for_timeout(random.randint(2000, 4000))

                    # Scroll to load lazy content
                    for _ in range(3):
                        page.evaluate("window.scrollBy(0, 400)")
                        page.wait_for_timeout(random.randint(400, 800))

                    html = page.content()
                    page.close()

                    from bs4 import BeautifulSoup as BS
                    soup = BS(html, 'html.parser')

                    # YachtWorld listing cards
                    page_count = 0
                    for a in soup.find_all('a', href=True):
                        href = a['href']
                        if '/boat-details/' in href or '/yacht/' in href:
                            if not href.startswith('http'):
                                href = 'https://www.yachtworld.com' + href
                            if href in seen:
                                continue
                            seen.add(href)
                            title = a.get_text(strip=True)
                            parent = a.find_parent() or a
                            ctx = parent.get_text(' ', strip=True)[:300]
                            img = ""
                            for im in parent.find_all('img'):
                                src = im.get('src') or im.get('data-src','')
                                if src and 'rendered_yacht' in src:
                                    img = src
                                    break
                            results.append({
                                "url": href, "title": title,
                                "snippet": ctx, "price_text": "",
                                "img_url": img, "location": "",
                                "source": "YachtWorld",
                                "source_type": "broker",
                                "category": "Brokers Especializados",
                            })
                            page_count += 1

                    print(f"[YachtWorld] Page {page_num}: {page_count} listings")
                    if page_count == 0:
                        break

                    # Polite pause between pages
                    if page_num < max_pages:
                        polite_pause("YachtWorld")

                except Exception as e:
                    print(f"[YachtWorld] Page {page_num} error: {e}")
                    try: page.close()
                    except: pass
                    break

            browser.close()
    except Exception as e:
        print(f"[YachtWorld] Fatal error: {e}")

    print(f"[YachtWorld] Total: {len(results)} listings")
    return results

def fetch_page_text(url: str, max_chars: int = 2000) -> str:
    """Fetch plain text from a page."""
    try:
        r = requests.get(url, headers=get_headers(), timeout=15, verify=False)
        if r.status_code != 200:
            return ""
        soup = BeautifulSoup(r.text, "html.parser")
        for tag in soup(["script","style","nav","footer","header","aside","noscript"]):
            tag.decompose()
        return " ".join(soup.get_text(" ", strip=True).split())[:max_chars]
    except Exception:
        return ""

def _extract_best_src(img_tag) -> str:
    """Extract the best image URL from an <img> tag, handling lazy-load patterns."""
    candidates = [
        img_tag.get("src",""),
        img_tag.get("data-src",""),
        img_tag.get("data-lazy-src",""),
        img_tag.get("data-original",""),
        img_tag.get("data-lazy",""),
        img_tag.get("data-image",""),
        img_tag.get("data-full",""),
        img_tag.get("data-url",""),
        img_tag.get("data-hi-res-src",""),
    ]
    # Also check srcset — take the largest variant
    srcset = img_tag.get("srcset","") or img_tag.get("data-srcset","")
    if srcset:
        parts = [p.strip().split()[0] for p in srcset.split(",") if p.strip()]
        candidates.extend(parts)
    for c in candidates:
        c = c.strip()
        if c and c.startswith("http") and not c.startswith("data:"):
            return c
    return ""

def fetch_page_with_images(url: str) -> tuple:
    """Fetch page text AND images. Returns (text, [image_urls])"""
    text = ""
    images = []
    base_url = "/".join(url.split("/")[:3])
    try:
        r = requests.get(url, headers=get_headers(referer=url), timeout=18, verify=False)
        if r.status_code != 200:
            return fetch_page_text(url), []
        soup = BeautifulSoup(r.text, "html.parser")
        # Extract images before stripping tags
        skip_words = ["logo","icon","avatar","banner","pixel","track","ad","sprite","button",
                      "placeholder","blank","loading","spacer","1x1","transparent"]
        seen_imgs = set()
        for img in soup.find_all("img"):
            src = _extract_best_src(img)
            if not src:
                continue
            # Normalise relative URLs
            if src.startswith("//"):
                src = "https:" + src
            elif src.startswith("/"):
                src = base_url + src
            if not src.startswith("http"):
                continue
            if any(s in src.lower() for s in skip_words):
                continue
            if src in seen_imgs:
                continue
            try:
                w = int(str(img.get("width","0")).replace("px","") or 0)
                if 0 < w < 100:
                    continue
            except:
                pass
            seen_imgs.add(src)
            images.append(src)
            if len(images) >= 10:
                break
        for tag in soup(["script","style","nav","footer","header","aside","noscript"]):
            tag.decompose()
        text = " ".join(soup.get_text(" ", strip=True).split())[:3000]
    except Exception:
        text = fetch_page_text(url)
    return text, images

# ══════════════════════════════════════════════════════════════════════════════
# DEDICATED SOURCE SCRAPERS
# Each function handles one site's quirks. scrape_source_router dispatches here.
# ══════════════════════════════════════════════════════════════════════════════

def scrape_ebay(src: dict, query: str, filters: dict) -> list[dict]:
    """
    eBay Marine scraper — uses Playwright (Akamai blocks plain requests).
    Handles all eBay entries: Marine, Auction, Sail, Salvage, etc.

    New eBay layout (2024+) uses:
      - <a class="s-card__link">  for item links
      - Text title in nearby spans/divs
      - <img> with i.ebayimg.com CDN URLs (s-l500 quality)
    """
    results = []
    seen    = set()

    raw_url = src.get("search_url", "")
    if not raw_url:
        return []

    clean_q = " ".join(dict.fromkeys(query.strip().split()))
    url = raw_url.replace("{query}", requests.utils.quote(clean_q))

    # ── Adjust eBay category based on vessel type filter ──────────────────────
    # 26429=All Boats  36431=Sailboats  36432=Powerboats  26430=PWC  63613=Kayaks
    vtype = filters.get("type","").lower() if filters else ""
    EBAY_CAT = {
        "sailboat": "36431", "sail": "36431", "velero": "36431",
        "motor":    "36432", "motorboat": "36432", "yacht": "36432",
        "fishing":  "36432", "tug": "36432", "barge": "36432",
        "offshore": "36432", "ferry": "36432",
    }
    if vtype and vtype in EBAY_CAT:
        url = re.sub(r'_sacat=\d+', f'_sacat={EBAY_CAT[vtype]}', url)

    try:
        from playwright.sync_api import sync_playwright
        with sync_playwright() as p:
            browser = p.chromium.launch(
                headless=True,
                args=["--disable-blink-features=AutomationControlled",
                      "--no-sandbox", "--disable-dev-shm-usage"]
            )
            context = browser.new_context(
                viewport={"width": 1280, "height": 900},
                user_agent=random.choice(USER_AGENTS),
                locale="en-US",
                timezone_id="America/New_York",
                ignore_https_errors=True,
            )
            context.add_init_script(
                "Object.defineProperty(navigator,'webdriver',{get:()=>undefined});"
                "window.chrome={runtime:{}};"
            )
            page = context.new_page()
            try:
                page.goto(url, timeout=30000, wait_until="domcontentloaded")
                page.wait_for_timeout(random.randint(1500, 2500))
                # Scroll a bit to trigger lazy images
                page.evaluate("window.scrollBy(0,600)")
                page.wait_for_timeout(800)

                html = page.content()
            except Exception as e:
                print(f"[{src['name']}] Playwright nav error: {e}")
                html = ""
            finally:
                try: page.close()
                except: pass
            browser.close()

        if not html:
            return []

        soup = BeautifulSoup(html, "html.parser")

        # ── New layout (2024+): li.s-card ─────────────────────────────────────
        cards = soup.find_all("li", class_="s-card")

        # ── Old layout fallback: li.s-item ────────────────────────────────────
        if not cards:
            return _parse_ebay_old_layout(soup, src)

        for card in cards:
            try:
                # Title + URL  —  a.s-card__link WITHOUT image-treatment class
                title_link = None
                for a in card.find_all("a", class_="s-card__link"):
                    if "image-treatment" in (a.get("class") or []):
                        continue
                    t = a.get_text(strip=True)
                    if t and not t.lower().startswith("shop on ebay"):
                        title_link = a
                        break
                if not title_link:
                    continue

                href = title_link.get("href", "")
                if "/itm/" not in href:
                    continue
                m = re.search(r'(https?://(?:www\.)?ebay\.com/itm/\d+)', href)
                if not m:
                    continue
                href = m.group(1)
                if href in seen:
                    continue
                seen.add(href)

                # Clean title — strip eBay UI noise appended to link text
                title = title_link.get_text(strip=True)
                title = re.sub(r'\s*Opens in a new window or tab.*', '',
                               title, flags=re.IGNORECASE).strip()

                # Price ── .s-card__price
                price_tag = (card.find(class_="s-card__price") or
                             card.find(class_="s-item__price"))
                price = price_tag.get_text(strip=True) if price_tag else ""

                # Image ── img inside a.s-card__link.image-treatment
                img = ""
                img_link = card.find("a", class_="image-treatment")
                if img_link:
                    im = img_link.find("img")
                    if im:
                        raw = (_extract_best_src(im) or
                               im.get("src","") or im.get("data-src",""))
                        if raw:
                            img = re.sub(r's-l\d+\.(jpg|webp|jpeg)',
                                         r's-l500.\1', raw)
                # Fallback: any ebayimg.com src in the card
                if not img:
                    for im in card.find_all("img"):
                        raw = (_extract_best_src(im) or im.get("src",""))
                        if raw and "ebayimg.com" in raw:
                            img = re.sub(r's-l\d+\.(jpg|webp|jpeg)',
                                         r's-l500.\1', raw)
                            break

                # Location ── "Located in: XXX" — stop before "Delivery"
                location = ""
                card_text = card.get_text(" ", strip=True)
                lm = re.search(
                    r'[Ll]ocated in[:\s]+([A-Za-z][^,\|•\n$\d]{2,30})',
                    card_text)
                if lm:
                    loc_raw = lm.group(1).strip()
                    # Trim trailing noise like "Delivery or pickup..."
                    loc_raw = re.split(r'\s+[Dd]elivery|\s+[Ss]hipping',
                                       loc_raw)[0].strip()
                    location = loc_raw

                results.append({
                    "url":         href,
                    "title":       title[:120],
                    "snippet":     f"{price} {location}".strip(),
                    "price_text":  price,
                    "img_url":     img,
                    "location":    location,
                    "source":      src.get("name",     "eBay"),
                    "source_type": src.get("type",     "classifieds"),
                    "category":    src.get("category", "Clasificados USA"),
                })
            except Exception:
                continue

        print(f"[{src['name']}] {len(results)} listings (new layout)")

    except Exception as e:
        print(f"[{src['name']}] Error: {e}")

    return results


def _parse_ebay_old_layout(soup, src: dict) -> list[dict]:
    """Fallback for the classic eBay li.s-item layout."""
    results = []
    seen    = set()
    for item in soup.find_all("li", class_="s-item"):
        try:
            link_tag = item.find("a", class_="s-item__link")
            if not link_tag: continue
            href = link_tag.get("href","")
            if "/itm/" not in href: continue
            m = re.search(r'(https?://www\.ebay\.com/itm/\d+)', href)
            if m: href = m.group(1)
            if href in seen: continue
            seen.add(href)

            title_tag = (item.find("span", class_="BOLD") or
                         item.find("div",  class_="s-item__title") or
                         item.find("span", class_="s-item__title"))
            title = (title_tag or link_tag).get_text(strip=True)
            if not title or title.lower().startswith("shop on ebay"): continue

            price_tag = item.find("span", class_="s-item__price")
            price = price_tag.get_text(strip=True) if price_tag else ""

            img = ""
            img_tag = item.find("img")
            if img_tag:
                img = (_extract_best_src(img_tag) or img_tag.get("src",""))
                if img: img = re.sub(r's-l\d+\.(jpg|webp|jpeg)', r's-l500.\1', img)

            loc_tag = (item.find("span", class_="s-item__location") or
                       item.find("span", class_="s-item__itemLocation"))
            location = ""
            if loc_tag:
                location = (loc_tag.get_text(strip=True)
                                   .replace("Located in: ","").strip())

            results.append({
                "url": href, "title": title, "snippet": f"{price} {location}".strip(),
                "price_text": price, "img_url": img, "location": location,
                "source": src.get("name","eBay"), "source_type": src.get("type","classifieds"),
                "category": src.get("category","Clasificados USA"),
            })
        except Exception:
            continue
    print(f"[{src.get('name','eBay')}] {len(results)} listings (old layout)")
    return results


def scrape_boattrader(src: dict, query: str, filters: dict) -> list[dict]:
    """
    BoatTrader scraper — uses Playwright (Cloudflare Turnstile on plain requests).

    Card structure (stable classes):
      li.lib-card                        — card root
      a[href^="/boat/...-<ID>/"]         — listing URL
      [class*=listingTitle]              — title element
      [class*=listingPrice]              — price element
      img                                — photo
      city, STATE ZIP pattern in text    — location
    """
    results = []
    seen    = set()

    raw_url = src.get("search_url", "")
    if not raw_url:
        return []

    clean_q = " ".join(dict.fromkeys(query.strip().split()))
    url = raw_url.replace("{query}", requests.utils.quote(clean_q))

    try:
        from playwright.sync_api import sync_playwright
        with sync_playwright() as p:
            browser = p.chromium.launch(
                headless=True,
                args=["--disable-blink-features=AutomationControlled",
                      "--no-sandbox", "--disable-dev-shm-usage"]
            )
            context = browser.new_context(
                viewport={"width": 1280, "height": 900},
                user_agent=random.choice(USER_AGENTS),
                locale="en-US",
                timezone_id="America/New_York",
                ignore_https_errors=True,
            )
            context.add_init_script(
                "Object.defineProperty(navigator,'webdriver',{get:()=>undefined});"
                "window.chrome={runtime:{}};"
            )
            page = context.new_page()
            try:
                page.goto(url, timeout=35000, wait_until="domcontentloaded")
                # BoatTrader needs time to hydrate React and load listing cards
                page.wait_for_timeout(random.randint(4000, 6000))
                page.evaluate("window.scrollBy(0, 600)")
                page.wait_for_timeout(1500)
                html = page.content()
            except Exception as e:
                print(f"[{src['name']}] Playwright nav error: {e}")
                html = ""
            finally:
                try: page.close()
                except: pass
            browser.close()

        if not html:
            return []

        soup = BeautifulSoup(html, "html.parser")

        # ── Card root: li.lib-card ─────────────────────────────────────────────
        cards = soup.find_all("li", class_="lib-card")
        if not cards:
            # Fallback: any element with lib-card class
            cards = soup.find_all(class_=re.compile(r'\blib-card\b'))

        for card in cards:
            try:
                # Link ── /boat/YEAR-MAKE-...-ID/
                link_tag = card.find(
                    "a", href=re.compile(r'^/boat/[\w-]+-\d+/$'))
                if not link_tag:
                    continue
                href = "https://www.boattrader.com" + link_tag["href"]
                if href in seen:
                    continue
                seen.add(href)

                # Title ── element whose class contains 'listingTitle'
                title_el = card.find(
                    class_=re.compile(r'listingTitle', re.I))
                if title_el:
                    title = title_el.get_text(strip=True)
                else:
                    # Fallback: build from URL slug (2026-catalina-34-123 → 2026 Catalina 34)
                    slug = link_tag["href"].strip("/").split("/")[-1]
                    parts = slug.rsplit("-", 1)[0].replace("-", " ").title()
                    title = parts
                if not title:
                    continue

                # Price ── element whose class contains 'listingPrice'
                price_el = card.find(
                    class_=re.compile(r'listingPrice', re.I))
                price = ""
                if price_el:
                    raw_price = price_el.get_text(" ", strip=True)
                    # Extract only the first dollar amount — ignore "/mo*" noise
                    pm = re.search(r'\$\s*([\d,]+)', raw_price)
                    if pm:
                        price = f"${pm.group(1)}"

                # Image ── first <img> with a boatsgroup or boattrader CDN src
                img = ""
                for im in card.find_all("img"):
                    raw = (_extract_best_src(im) or
                           im.get("src","") or im.get("data-src",""))
                    if raw and raw.startswith("http") and not raw.endswith(".svg"):
                        img = raw
                        break

                # Location ── "City, ST ZIP" pattern in card text
                # Use listingCaption element if available (more precise)
                location = ""
                caption_el = card.find(class_=re.compile(r'listingCaption|listingLocation', re.I))
                search_text = caption_el.get_text(" ", strip=True) if caption_el else card.get_text(" ", strip=True)
                lm = re.search(
                    r'\b([A-Z][a-zA-Z\s]{2,20},\s+[A-Z]{2}(?:\s+\d{5})?)',
                    search_text)
                if lm:
                    location = lm.group(1).strip()

                results.append({
                    "url":         href,
                    "title":       title[:120],
                    "snippet":     f"{price} {location}".strip(),
                    "price_text":  price,
                    "img_url":     img,
                    "location":    location,
                    "source":      src.get("name",     "BoatTrader"),
                    "source_type": src.get("type",     "broker"),
                    "category":    src.get("category", "Venta Especializada"),
                })
            except Exception:
                continue

        print(f"[{src['name']}] {len(results)} listings")

    except Exception as e:
        print(f"[{src['name']}] Error: {e}")

    return results


def scrape_apolloduck(src: dict, query: str, filters: dict) -> list[dict]:
    """
    Apollo Duck scraper — plain requests + BS4 (no JS needed).

    Two card types on the listing page:
      Sidebar cards: div.eastSDFPPanel  → a.SidebarTitle, a.SidebarPrice, img
      Featured cards: div._FeatureAdPanel → a._FeatureTitle, span._FeaturePrice,
                       img, td._PanelSpecData (location)

    Listing URL pattern: https://www.apolloduck.com/boat/{slug}/{id}
    """
    results = []
    seen    = set()

    # Use Apollo Duck keyword search — returns results filtered by query.
    # Strip trailing "for sale" / "en venta" / "a vendre" since Apollo Duck
    # searches listing titles and those phrases rarely appear there.
    stripped_q = re.sub(
        r'\s*(for\s+sale|en\s+venta|à\s+vendre|zu\s+verkaufen)\s*$',
        '', query.strip(), flags=re.I).strip()
    clean_q = requests.utils.quote(stripped_q or query.strip())
    if clean_q:
        url = f"https://www.apolloduck.com/search.phtml?search={clean_q}&sr=1&q=1"
    else:
        raw_url = src.get("search_url", "") or "https://www.apolloduck.com/boats/used-boats-for-sale"
        url = raw_url.replace("{query}", clean_q)
    is_search = bool(clean_q)   # only featured cards are query-filtered

    try:
        headers = {
            "User-Agent": random.choice(USER_AGENTS),
            "Accept-Language": "en-US,en;q=0.9",
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
        }
        resp = requests.get(url, headers=headers, timeout=20, allow_redirects=True)
        resp.raise_for_status()
        resp.encoding = resp.apparent_encoding or "utf-8"
        soup = BeautifulSoup(resp.text, "html.parser")

        def _parse_card(card, title_sel, price_sel, is_featured=False):
            """Common extraction for both card types."""
            title_el = card.select_one(title_sel)
            if not title_el:
                return
            title = title_el.get_text(strip=True)
            if not title:
                return

            # URL — from title link or image link
            href = title_el.get("href", "")
            if not href:
                a = card.find("a", href=re.compile(r'/boat/'))
                href = a["href"] if a else ""
            if not href:
                return
            full_url = ("https://www.apolloduck.com" + href
                        if href.startswith("/") else href)
            if full_url in seen:
                return
            seen.add(full_url)

            # Price
            price_el = card.select_one(price_sel)
            price = price_el.get_text(strip=True) if price_el else ""

            # Image
            img = ""
            for im in card.find_all("img"):
                raw = (im.get("src") or im.get("data-src") or
                       im.get("data-lazy-src") or "")
                if raw and raw.startswith("http") and not raw.endswith(".svg"):
                    img = raw
                    break
                # srcset fallback
                ss = im.get("srcset","")
                if ss:
                    img = ss.split()[0]
                    break

            # Location — only featured cards have it
            location = ""
            if is_featured:
                for lbl in card.select("td._PanelSpecLabel"):
                    if "location" in lbl.get_text(strip=True).lower():
                        loc_td = lbl.find_next_sibling("td")
                        if loc_td:
                            location = loc_td.get_text(strip=True)
                            break

            results.append({
                "url":         full_url,
                "title":       title[:120],
                "snippet":     f"{price} {location}".strip(),
                "price_text":  price,
                "img_url":     img,
                "location":    location,
                "source":      src.get("name",     "Apollo Duck"),
                "source_type": src.get("type",     "broker"),
                "category":    src.get("category", "Venta Especializada"),
            })

        # Featured cards — always query-filtered on search results (~60-100/page)
        for card in soup.select("div._FeatureAdPanel"):
            _parse_card(card, "a._FeatureTitle", "span._FeaturePrice",
                        is_featured=True)

        # Sidebar cards — only when browsing a category (NOT on keyword search,
        # because sidebar is always the same 101 generic listings regardless of query)
        if not is_search:
            for card in soup.select("div.eastSDFPPanel"):
                _parse_card(card, "a.SidebarTitle", "a.SidebarPrice")

        print(f"[{src['name']}] {len(results)} listings")

    except Exception as e:
        print(f"[{src['name']}] Error: {e}")

    return results


def scrape_boatsdotcom(src: dict, query: str, filters: dict) -> list[dict]:
    """
    Boats.com scraper — uses Playwright (same Boats Group infrastructure as BoatTrader).

    Two card types:
      Sponsored/real: li[data-listing-id]  → h2+div.year, div.price,
                       div.img-container img, div.country
      OEM specs:      li.enhanced.oem       → h2+div.year, div.price,
                       div.img-container img  (no location)

    Listing URL pattern: https://www.boats.com/{type}/{year}-{make}-{id}/
    """
    results = []
    seen    = set()

    raw_url = src.get("search_url", "") or "https://www.boats.com/boats-for-sale/?query={query}"
    clean_q = requests.utils.quote(query.strip())
    url = raw_url.replace("{query}", clean_q)

    try:
        from playwright.sync_api import sync_playwright
        with sync_playwright() as p:
            browser = p.chromium.launch(
                headless=True,
                args=["--disable-blink-features=AutomationControlled",
                      "--no-sandbox", "--disable-dev-shm-usage"]
            )
            context = browser.new_context(
                viewport={"width": 1280, "height": 900},
                user_agent=random.choice(USER_AGENTS),
                locale="en-US",
                timezone_id="America/New_York",
                ignore_https_errors=True,
            )
            context.add_init_script(
                "Object.defineProperty(navigator,'webdriver',{get:()=>undefined});"
                "window.chrome={runtime:{}};"
            )
            page = context.new_page()
            try:
                page.goto(url, timeout=35000, wait_until="domcontentloaded")
                page.wait_for_timeout(random.randint(4000, 6000))
                page.evaluate("window.scrollBy(0, 600)")
                page.wait_for_timeout(1500)
                html = page.content()
            except Exception as e:
                print(f"[{src['name']}] Playwright nav error: {e}")
                html = ""
            finally:
                try: page.close()
                except: pass
            browser.close()

        if not html:
            return []

        soup = BeautifulSoup(html, "html.parser")

        def _extract_card(card, has_location=True):
            # URL
            a = card.find("a", href=re.compile(r'^/'))
            if not a:
                return
            href = "https://www.boats.com" + a["href"]
            if href in seen:
                return
            seen.add(href)

            # Title = year + model name
            year_el  = card.select_one("div.year")
            name_el  = card.select_one("h2")
            year  = year_el.get_text(strip=True)  if year_el  else ""
            name  = name_el.get_text(strip=True)  if name_el  else ""
            title = f"{year} {name}".strip() if year else name
            if not title:
                return

            # Price
            price_el = card.select_one("div.price")
            price    = ""
            if price_el:
                raw_p = price_el.get_text(" ", strip=True)
                pm    = re.search(r'\$\s*([\d,]+)', raw_p)
                price = f"${pm.group(1)}" if pm else raw_p[:30]

            # Image
            img = ""
            img_container = card.select_one("div.img-container")
            if img_container:
                im = img_container.find("img")
                if im:
                    img = (_extract_best_src(im) or im.get("src","")
                           or im.get("data-src",""))

            # Location
            location = ""
            if has_location:
                loc_el = card.select_one("div.country")
                if loc_el:
                    location = loc_el.get_text(strip=True)

            results.append({
                "url":         href,
                "title":       title[:120],
                "snippet":     f"{price} {location}".strip(),
                "price_text":  price,
                "img_url":     img,
                "location":    location,
                "source":      src.get("name",     "Boats.com"),
                "source_type": src.get("type",     "broker"),
                "category":    src.get("category", "Venta Especializada"),
            })

        # Sponsored/real marketplace listings
        for card in soup.select("li[data-listing-id]"):
            _extract_card(card, has_location=True)

        # OEM spec sheets
        for card in soup.select("li.enhanced.oem"):
            _extract_card(card, has_location=False)

        print(f"[{src['name']}] {len(results)} listings")

    except Exception as e:
        print(f"[{src['name']}] Error: {e}")

    return results


def scrape_craigslist(src: dict, query: str, filters: dict) -> list[dict]:
    """
    Craigslist boats scraper — plain requests + BS4.

    Card root : div[data-pid]  (class="cl-search-result")
    Title     : a.posting-title span.label
    URL       : a.main[href]  (full absolute URL with regional subdomain)
    Price     : span.priceinfo
    Location  : span.result-location
    Image     : img[data-image-index="0"] inside div.cl-gallery
    """
    results = []
    seen    = set()

    # Craigslist has no national search — scrape several major coastal cities
    CITIES = ["sfbay", "losangeles", "seattle", "miami", "boston",
              "newyork", "chicago", "houston", "dallas", "denver",
              "phoenix", "atlanta", "portland", "sandiego", "tampa",
              "minneapolis", "stlouis", "nashville", "raleigh", "saltlakecity"]
    qs = requests.utils.quote(query.strip())

    try:
        from playwright.sync_api import sync_playwright
        all_html_parts = []
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True, args=["--no-sandbox"])
            ctx = browser.new_context(
                user_agent=random.choice(USER_AGENTS),
                locale="en-US",
                ignore_https_errors=True,
            )
            # Fetch 3 random cities to keep runtime reasonable
            for city in random.sample(CITIES, min(3, len(CITIES))):
                city_url = f"https://{city}.craigslist.org/search/boa?query={qs}&sort=rel"
                page = ctx.new_page()
                try:
                    page.goto(city_url, timeout=25000, wait_until="domcontentloaded")
                    page.wait_for_timeout(2500)
                    all_html_parts.append(page.content())
                except Exception:
                    pass
                finally:
                    try: page.close()
                    except: pass
            browser.close()

        if not all_html_parts:
            return []

        # Parse all city HTMLs
        for html in all_html_parts:
            soup = BeautifulSoup(html, "html.parser")
            for card in soup.find_all(attrs={"data-pid": True}):
                try:
                    # URL — from the main image link (absolute)
                    a_main = card.find("a", class_="main")
                    if not a_main:
                        continue
                    listing_url = a_main.get("href", "")
                    if not listing_url or listing_url in seen:
                        continue
                    seen.add(listing_url)

                    # Title — from card title attr or span.label
                    title = card.get("title", "")
                    if not title:
                        span = card.find("span", class_="label")
                        title = span.get_text(strip=True) if span else ""
                    if not title:
                        continue

                    # Price
                    price_el = card.find("span", class_="priceinfo")
                    price    = price_el.get_text(strip=True) if price_el else ""

                    # Location
                    loc_el   = card.find("span", class_="result-location")
                    location = loc_el.get_text(strip=True) if loc_el else ""

                    # Image — first img with data-image-index="0"
                    img = ""
                    im  = card.find("img", attrs={"data-image-index": "0"})
                    if im:
                        img = im.get("src", "") or im.get("data-src", "")
                    if not img:
                        im = card.find("img")
                        if im:
                            img = im.get("src", "") or im.get("data-src", "")

                    results.append({
                        "url":         listing_url,
                        "title":       title[:120],
                        "snippet":     f"{price} {location}".strip(),
                        "price_text":  price,
                        "img_url":     img,
                        "location":    location,
                        "source":      src.get("name",     "Craigslist Boats"),
                        "source_type": src.get("type",     "classifieds"),
                        "category":    src.get("category", "Clasificados Generales"),
                    })
                except Exception:
                    continue

        print(f"[{src['name']}] {len(results)} listings")

    except Exception as e:
        print(f"[{src['name']}] Error: {e}")

    return results


def scrape_rightboat(src: dict, query: str, filters: dict) -> list[dict]:
    """
    Rightboat scraper — Playwright (JS-rendered, Tailwind CSS).

    Card root : div[data-tracking-bound="true"]
    Image     : img.object-cover  (first inside card)
    Title     : first <a> with href containing /boats-for-sale/ that has text
    Price     : element containing fa-tag icon's sibling text
    Location  : element containing fa-location-pin icon's sibling text
    """
    results = []
    seen    = set()

    raw_url = (src.get("search_url", "")
               or "https://www.rightboat.com/boats-for-sale/?q={query}")
    clean_q = requests.utils.quote(query.strip())
    url = raw_url.replace("{query}", clean_q)

    try:
        from playwright.sync_api import sync_playwright
        with sync_playwright() as p:
            browser = p.chromium.launch(
                headless=True,
                args=["--disable-blink-features=AutomationControlled",
                      "--no-sandbox", "--disable-dev-shm-usage"]
            )
            context = browser.new_context(
                viewport={"width": 1280, "height": 900},
                user_agent=random.choice(USER_AGENTS),
                locale="en-US",
                ignore_https_errors=True,
            )
            context.add_init_script(
                "Object.defineProperty(navigator,'webdriver',{get:()=>undefined});"
            )
            page = context.new_page()
            try:
                page.goto(url, timeout=35000, wait_until="domcontentloaded")
                page.wait_for_timeout(random.randint(5000, 7000))
                page.evaluate("window.scrollBy(0, 800)")
                page.wait_for_timeout(1500)
                html = page.content()
            except Exception as e:
                print(f"[{src['name']}] Playwright nav error: {e}")
                html = ""
            finally:
                try: page.close()
                except: pass
            browser.close()

        if not html:
            return []

        soup = BeautifulSoup(html, "html.parser")

        # Cards are div[data-tracking-bound="true"]
        cards = soup.find_all(attrs={"data-tracking-bound": "true"})

        for card in cards:
            try:
                # URL — the card ITSELF is the <a> element
                href = card.get("href", "")
                if not href or "/boats-for-sale/" not in href:
                    continue
                listing_url = ("https://www.rightboat.com" + href
                               if href.startswith("/") else href)
                if listing_url in seen:
                    continue
                seen.add(listing_url)

                # Image — first object-cover img (main photo)
                img = ""
                im = card.find("img", class_=re.compile(r'object-cover'))
                if im:
                    img = im.get("src", "") or im.get("data-src", "")

                # Title — from img alt attribute (most reliable) or heading
                title = ""
                if im:
                    title = im.get("alt", "").strip()
                if not title:
                    h_el = card.find(re.compile(r'^h[1-4]$'))
                    title = h_el.get_text(strip=True) if h_el else ""
                if not title:
                    # Build from URL slug: /boats-for-sale/make/model/rbXXX
                    parts = href.strip("/").split("/")
                    if len(parts) >= 3:
                        title = " ".join(parts[1:-1]).replace("-", " ").title()
                if not title:
                    continue

                # Price — <p class="...mb-2 ml-auto font-bold..."> or regex fallback
                price = ""
                price_el = card.find("p", class_=re.compile(r'font-bold'))
                if price_el:
                    pt = price_el.get_text(strip=True)
                    if re.search(r'[\$£€]', pt):
                        price = pt
                if not price:
                    pm = re.search(r'[\$£€]\s*[\d,]+', card.get_text())
                    if pm:
                        price = pm.group(0)

                # Location — text inside same div as fa-location-pin icon
                location = ""
                pin_icon = card.find("i", class_=re.compile(r'fa-location'))
                if pin_icon:
                    # Typically: <div><i fa-location-pin/> "City, State"</div>
                    row = pin_icon.find_parent()
                    if row:
                        location = row.get_text(" ", strip=True).strip()

                results.append({
                    "url":         listing_url,
                    "title":       title[:120],
                    "snippet":     f"{price} {location}".strip(),
                    "price_text":  price,
                    "img_url":     img,
                    "location":    location,
                    "source":      src.get("name",     "Rightboat"),
                    "source_type": src.get("type",     "broker"),
                    "category":    src.get("category", "Venta Especializada"),
                })
            except Exception:
                continue

        print(f"[{src['name']}] {len(results)} listings")

    except Exception as e:
        print(f"[{src['name']}] Error: {e}")

    return results


def scrape_cooperss(src: dict, query: str, filters: dict) -> list[dict]:
    """
    Cooper Capital Specialty Salvage (cooperss.com).
    Salvage / insurance-loss vessels.

    Structure (paired divs, same index):
      div.listing-thumb  — image + link (assets/detail/?name=marine&id=N)
      div.listing-detail — h5.blue (name) + table (Year,Size,Location,Min Bid…)
    """
    results = []
    seen    = set()
    base    = "https://www.cooperss.com"

    try:
        headers = {"User-Agent": random.choice(USER_AGENTS),
                   "Accept-Language": "en-US,en;q=0.9"}
        resp = requests.get(base + "/", headers=headers, timeout=20)
        resp.raise_for_status()
        soup = BeautifulSoup(resp.text, "html.parser")

        thumbs  = [el for el in soup.find_all(class_="listing-thumb")
                   if "slick-cloned" not in (el.get("class") or [])]
        details = [el for el in soup.find_all(class_="listing-detail")
                   if "slick-cloned" not in (el.get("class") or [])]

        for thumb, detail in zip(thumbs, details):
            try:
                # URL
                a = thumb.find("a", href=True)
                if not a:
                    continue
                href = a["href"]
                if not href.startswith("http"):
                    href = base + "/" + href.lstrip("/")
                if href in seen:
                    continue
                seen.add(href)

                # Image
                img_tag = thumb.find("img")
                img = img_tag.get("src", "") if img_tag else ""
                if img and not img.startswith("http"):
                    img = base + "/" + img.lstrip("/")

                # Title — h5.blue (vessel name)
                h5 = detail.find("h5", class_="blue")
                title = h5.get_text(strip=True).split("\n")[0].strip() if h5 else ""
                # Remove video-button text artifact
                for tag in (h5.find_all("a") if h5 else []):
                    tag.decompose()
                title = h5.get_text(strip=True) if h5 else title
                if not title:
                    continue

                # Parse the detail table
                rows = {td.get_text(strip=True): tds[1].get_text(strip=True)
                        for tr in detail.find_all("tr")
                        if len(tds := tr.find_all("td")) == 2
                        for td in [tds[0]]}
                year     = rows.get("Year", "")
                size     = rows.get("Size", "")
                location = rows.get("Location", "")
                min_bid  = rows.get("Minimum Bid", "")
                loss_type= rows.get("Type of Loss", "")
                deadline = rows.get("Bid Deadline", "")

                if year:
                    title = f"{year} {title}".strip()
                price = f"Min Bid ${min_bid}" if min_bid else ""
                snippet_parts = [p for p in [price, loss_type, location, f"Deadline: {deadline}" if deadline else ""] if p]

                results.append({
                    "url":         href,
                    "title":       title[:120],
                    "snippet":     " | ".join(snippet_parts),
                    "price_text":  price,
                    "img_url":     img,
                    "location":    location,
                    "size_m":      size,
                    "source":      src.get("name",     "Cooper Salvage"),
                    "source_type": "salvage",
                    "category":    src.get("category", "Salvage & Wrecks"),
                })
            except Exception:
                continue

        print(f"[{src['name']}] {len(results)} listings")

    except Exception as e:
        print(f"[{src['name']}] Error: {e}")

    return results


def scrape_inautia(src: dict, query: str, filters: dict) -> list[dict]:
    """
    iNautia scraper — same Boats Group platform as BoatTrader/Boats.com.

    Card: div[data-grid-index]
    Link: a.grid-listing-link[href]  →  /boat/YEAR-MAKE-MODEL-ID/
    Title: [class*=listingTitle]
    Price: data-ssr-meta="make|type|len||price_eur"  (5th field)
    Location: [class*=listingBody]
    Image: first CDN img in card
    """
    results = []
    seen    = set()

    raw_url = (src.get("search_url", "")
               or "https://www.inautia.com/boats/?q={query}")
    url = raw_url.replace("{query}", requests.utils.quote(query.strip()))

    try:
        from playwright.sync_api import sync_playwright
        with sync_playwright() as p:
            browser = p.chromium.launch(
                headless=True,
                args=["--disable-blink-features=AutomationControlled",
                      "--no-sandbox", "--disable-dev-shm-usage"])
            context = browser.new_context(
                viewport={"width": 1280, "height": 900},
                user_agent=random.choice(USER_AGENTS),
                locale="en-US", ignore_https_errors=True)
            context.add_init_script(
                "Object.defineProperty(navigator,'webdriver',{get:()=>undefined});"
                "window.chrome={runtime:{}};")
            page = context.new_page()
            try:
                page.goto(url, timeout=35000, wait_until="domcontentloaded")
                page.wait_for_timeout(random.randint(4000, 6000))
                page.evaluate("window.scrollBy(0,600)")
                page.wait_for_timeout(1500)
                html = page.content()
            except Exception as e:
                print(f"[{src['name']}] nav error: {e}")
                html = ""
            finally:
                try: page.close()
                except: pass
            browser.close()

        if not html:
            return []

        soup = BeautifulSoup(html, "html.parser")
        cards = soup.find_all(attrs={"data-grid-index": True})

        for card in cards:
            try:
                link_tag = card.find("a", class_=re.compile(r'grid-listing-link'))
                if not link_tag:
                    continue
                href = link_tag.get("href", "")
                if not href:
                    continue
                full_url = ("https://www.inautia.com" + href
                            if href.startswith("/") else href)
                if full_url in seen:
                    continue
                seen.add(full_url)

                # Title
                title_el = card.find(class_=re.compile(r'listingTitle', re.I))
                title = title_el.get_text(strip=True) if title_el else ""
                if not title:
                    slug = href.strip("/").split("/")[-1]
                    title = slug.rsplit("-", 1)[0].replace("-", " ").title()
                if not title:
                    continue

                # Price from data-ssr-meta (make|type|length||price_eur)
                price = ""
                meta = link_tag.get("data-ssr-meta", "")
                if meta:
                    parts = meta.split("|")
                    if len(parts) >= 5 and parts[4]:
                        try:
                            price = f"€{int(float(parts[4])):,}"
                        except ValueError:
                            pass
                if not price:
                    price_el = card.find(class_=re.compile(r'listingPrice', re.I))
                    if price_el:
                        raw_p = price_el.get_text(" ", strip=True)
                        pm = re.search(r'[\$€£]\s*[\d,]+', raw_p)
                        price = pm.group(0) if pm else ""

                # Location — listingBody contains "Broker | City, Country"
                loc_el = card.find(class_=re.compile(r'listingBody', re.I))
                location = loc_el.get_text(" ", strip=True) if loc_el else ""

                # Image
                img = ""
                for im in card.find_all("img"):
                    raw = (_extract_best_src(im) or im.get("src","") or im.get("data-src",""))
                    if raw and raw.startswith("http") and not raw.endswith(".svg"):
                        img = raw
                        break

                results.append({
                    "url":         full_url,
                    "title":       title[:120],
                    "snippet":     f"{price} {location}".strip(),
                    "price_text":  price,
                    "img_url":     img,
                    "location":    location,
                    "source":      src.get("name",     "iNautia"),
                    "source_type": src.get("type",     "broker"),
                    "category":    src.get("category", "Venta Especializada"),
                })
            except Exception:
                continue

        print(f"[{src['name']}] {len(results)} listings")

    except Exception as e:
        print(f"[{src['name']}] Error: {e}")

    return results


def scrape_boat24(src: dict, query: str, filters: dict) -> list[dict]:
    """
    Boat24 scraper — European marketplace, plain requests.

    Card: div.blurb.blurb--strip
    Link: data-link attr (base64 → ROT13 → URL)
    Title: h3.blurb__title
    Price: p.blurb__price
    Location: p.blurb__location
    Image: lazy via slider — extract from li.slider__slide img[src] or data-src
    """
    results = []
    seen    = set()
    BASE    = "https://www.boat24.com"

    raw_url = (src.get("search_url", "")
               or "https://www.boat24.com/en/usedboats/")
    url = raw_url.replace("{query}", requests.utils.quote(query.strip()))

    _rot13 = str.maketrans(
        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
        "NOPQRSTUVWXYZABCDEFGHIJKLMnopqrstuvwxyzabcdefghijklm")

    def _decode_link(encoded: str) -> str:
        try:
            import base64
            rot = base64.b64decode(encoded).decode("utf-8", errors="ignore")
            return rot.translate(_rot13)
        except Exception:
            return ""

    try:
        from playwright.sync_api import sync_playwright
        with sync_playwright() as p:
            browser = p.chromium.launch(
                headless=True,
                args=["--disable-blink-features=AutomationControlled",
                      "--no-sandbox", "--disable-dev-shm-usage"])
            context = browser.new_context(
                viewport={"width": 1280, "height": 900},
                user_agent=random.choice(USER_AGENTS),
                locale="en-US", ignore_https_errors=True)
            context.add_init_script(
                "Object.defineProperty(navigator,'webdriver',{get:()=>undefined});")
            page = context.new_page()
            try:
                page.goto(url, timeout=35000, wait_until="domcontentloaded")
                page.wait_for_timeout(random.randint(4000, 6000))
                html = page.content()
            except Exception as e:
                print(f"[{src['name']}] nav error: {e}")
                html = ""
            finally:
                try: page.close()
                except: pass
            browser.close()

        if not html:
            return []

        soup = BeautifulSoup(html, "html.parser")
        cards = soup.find_all("div", class_=re.compile(r'\bblurb\b'))
        for card in cards:
            try:
                encoded = card.get("data-link", "")
                if not encoded:
                    continue
                listing_url = _decode_link(encoded)
                if not listing_url or not listing_url.startswith("http"):
                    # Try building from title link
                    a = card.find("a", href=re.compile(r'/en/'))
                    if a:
                        listing_url = (BASE + a["href"] if a["href"].startswith("/")
                                       else a["href"])
                    else:
                        continue
                if listing_url in seen:
                    continue
                seen.add(listing_url)

                title_el = card.select_one("h3.blurb__title, h2.blurb__title")
                title = title_el.get_text(strip=True) if title_el else ""
                if not title:
                    continue

                price_el = card.select_one("p.blurb__price")
                price = price_el.get_text(strip=True) if price_el else ""

                loc_el = card.select_one("p.blurb__location")
                location = ""
                if loc_el:
                    location = re.sub(r'\s+', ' ',
                                      loc_el.get_text(" ", strip=True)).strip()

                # Image — try slider slides or first img
                img = ""
                for im in card.find_all("img"):
                    raw = (im.get("data-src") or im.get("data-lazy")
                           or im.get("srcset","").split()[0] or im.get("src",""))
                    if raw and raw.startswith("http") and "/alpha.gif" not in raw:
                        img = raw
                        break

                results.append({
                    "url":         listing_url,
                    "title":       title[:120],
                    "snippet":     f"{price} {location}".strip(),
                    "price_text":  price,
                    "img_url":     img,
                    "location":    location,
                    "source":      src.get("name",     "Boat24"),
                    "source_type": src.get("type",     "broker"),
                    "category":    src.get("category", "Venta Especializada"),
                })
            except Exception:
                continue

        print(f"[{src['name']}] {len(results)} listings")

    except Exception as e:
        print(f"[{src['name']}] Error: {e}")

    return results


def scrape_facebook_marketplace(src: dict, query: str, filters: dict) -> list[dict]:
    """
    Facebook Marketplace scraper.

    Requires a saved session file: fb_session.json (cookies from a logged-in session).
    If not found, returns a single instructional result.

    Setup: POST /api/fb-setup  →  launches a visible browser for the user to log in.
    Session file is saved automatically after login.
    """
    import json as _json
    results = []
    seen    = set()

    SESSION_FILE = os.path.join(os.path.dirname(__file__), "fb_session.json")
    SEARCH_URL   = ("https://www.facebook.com/marketplace/search/"
                    f"?query={requests.utils.quote(query.strip())}"
                    "&deliveryMethod=local_pick_up")

    if not os.path.exists(SESSION_FILE):
        return [{
            "url":         "https://www.facebook.com/marketplace/",
            "title":       "⚠ Facebook Marketplace — Configuración requerida",
            "snippet":     ("Para habilitar Facebook Marketplace, ve a Fuentes y "
                            "haz clic en 'Configurar FB'. Solo se necesita una vez."),
            "price_text":  "",
            "img_url":     "",
            "location":    "",
            "source":      "Facebook Marketplace",
            "source_type": "setup_required",
            "category":    src.get("category", "Clasificados Generales"),
        }]

    try:
        from playwright.sync_api import sync_playwright
        with open(SESSION_FILE) as f:
            cookies = _json.load(f)

        with sync_playwright() as p:
            browser = p.chromium.launch(
                headless=True,
                args=["--disable-blink-features=AutomationControlled",
                      "--no-sandbox", "--disable-dev-shm-usage"])
            context = browser.new_context(
                viewport={"width": 1280, "height": 900},
                user_agent=random.choice(USER_AGENTS),
                locale="en-US", ignore_https_errors=True)
            context.add_cookies(cookies)
            context.add_init_script(
                "Object.defineProperty(navigator,'webdriver',{get:()=>undefined});"
                "window.chrome={runtime:{}};")
            page = context.new_page()
            try:
                page.goto(SEARCH_URL, timeout=35000, wait_until="domcontentloaded")
                page.wait_for_timeout(random.randint(5000, 7000))
                page.evaluate("window.scrollBy(0,800)")
                page.wait_for_timeout(2000)
                html = page.content()
            except Exception as e:
                print(f"[Facebook Marketplace] nav error: {e}")
                html = ""
            finally:
                try: page.close()
                except: pass
            browser.close()

        if not html:
            return []

        soup = BeautifulSoup(html, "html.parser")

        # FB Marketplace listing cards — data-testid or aria-label patterns
        # Each listing is usually an <a> with href /marketplace/item/ID/
        listing_links = soup.find_all(
            "a", href=re.compile(r'/marketplace/item/\d+'))

        for a in listing_links:
            try:
                href = a.get("href", "")
                full_url = ("https://www.facebook.com" + href
                            if href.startswith("/") else href)
                # Normalize: remove query params after item ID
                full_url = re.sub(r'(/marketplace/item/\d+/).*', r'\1', full_url)
                if full_url in seen:
                    continue
                seen.add(full_url)

                # Title — span or div with listing title
                title_el = (a.find("span", style=re.compile(r'line-clamp'))
                             or a.find("span", class_=re.compile(r'x1lliihq|xt0psk2'))
                             or a.find("div", class_=re.compile(r'x1lliihq')))
                title = title_el.get_text(strip=True) if title_el else ""
                if not title:
                    # Try aria-label on the card
                    title = a.get("aria-label", "")
                if not title:
                    continue

                # Price
                price = ""
                for span in a.find_all("span"):
                    t = span.get_text(strip=True)
                    if re.match(r'[\$£€][\d,]+', t):
                        price = t
                        break

                # Image
                img = ""
                im = a.find("img")
                if im:
                    img = im.get("src", "") or im.get("data-src", "")

                # Location — usually a second span below price
                location = ""
                spans = [s.get_text(strip=True) for s in a.find_all("span")
                         if s.get_text(strip=True) and s.get_text(strip=True) != title]
                for s in spans:
                    if re.search(r'[A-Z][a-z]+,\s+[A-Z]{2}', s) or (
                            not re.match(r'[\$£€\d]', s) and len(s) > 3 and s != price):
                        location = s
                        break

                results.append({
                    "url":         full_url,
                    "title":       title[:120],
                    "snippet":     f"{price} {location}".strip(),
                    "price_text":  price,
                    "img_url":     img,
                    "location":    location,
                    "source":      "Facebook Marketplace",
                    "source_type": "classifieds",
                    "category":    src.get("category", "Clasificados Generales"),
                })
            except Exception:
                continue

        print(f"[Facebook Marketplace] {len(results)} listings")

    except Exception as e:
        print(f"[Facebook Marketplace] Error: {e}")

    return results


def scrape_hmy(src: dict, query: str, filters: dict) -> list[dict]:
    """
    HMY Yachts — queries Algolia directly (app ECN3QX1VBL).
    Fast, no Playwright needed.
    """
    results = []
    seen    = set()

    ALGOLIA_URL = "https://ecn3qx1vbl-dsn.algolia.net/1/indexes/*/queries"
    ALGOLIA_HEADERS = {
        "x-algolia-application-id": "ECN3QX1VBL",
        "x-algolia-api-key":        "d86ccdd9ac0292ba76ee4755693d0c10",
        "content-type":             "application/json",
        "referer":                  "https://www.hmy.com/",
        "user-agent":               random.choice(USER_AGENTS),
    }

    import urllib.parse
    params_str = urllib.parse.urlencode({
        "filters":       "SalesStatus:Active",
        "facetFilters":  '[["SaleClassCode:used"]]',
        "query":         query,
        "hitsPerPage":   40,
        "page":          0,
    })

    payload = {
        "requests": [{
            "indexName": "production_oceanelite_yachts",
            "params":    params_str,
        }]
    }

    try:
        resp = requests.post(ALGOLIA_URL, json=payload, headers=ALGOLIA_HEADERS, timeout=15)
        resp.raise_for_status()
        data = resp.json()
        hits = data.get("results", [{}])[0].get("hits", [])

        for h in hits:
            try:
                slug    = h.get("Slug", "")
                url     = h.get("URL") or (f"https://www.hmy.com/yachts-for-sale/{slug}" if slug else "")
                if not url or url in seen:
                    continue
                seen.add(url)

                year    = h.get("ModelYear", "")
                make    = h.get("MakeStringExact", "")
                model   = h.get("ModelExact", "")
                name    = h.get("BoatName", "")
                title   = f"{year} {make} {model}".strip()
                if name:
                    title += f' "{name}"'

                price_raw = h.get("NormPrice", 0)
                price_text = f"${int(price_raw):,}" if price_raw else ""

                length  = h.get("NominalLengthNormalized", "")
                country = h.get("country", "USA")
                location = f"{length}ft · {country}" if length else country

                img = h.get("mainImage", "")

                results.append({
                    "url":         url,
                    "title":       title[:120],
                    "snippet":     f"{price_text} · {location}".strip(" ·"),
                    "price_text":  price_text,
                    "img_url":     img,
                    "location":    country,
                    "source":      src.get("name", "HMY Yachts"),
                    "source_type": src.get("type", "broker"),
                    "category":    src.get("category", "Venta Especializada"),
                })
            except Exception:
                continue

        print(f"[{src.get('name','HMY')}] {len(results)} listings")

    except Exception as e:
        print(f"[{src.get('name','HMY')}] Error: {e}")

    return results


def scrape_boatcrazy(src: dict, query: str, filters: dict) -> list[dict]:
    """
    BoatCrazy — US aggregator with 105+ listings per page.

    Card: div.boat-list-item
    Link: a[href*="/boat-for-sale/"]
    Image: div.item-img img or div.list-itemimg img
    Details: div.item-details
    URL pattern: /boat-for-sale/YEAR-MAKE-LOCATION-id
    """
    results = []
    seen    = set()

    raw_url = src.get("search_url", "") or "https://boatcrazy.com/boats?q={query}"
    url = raw_url.replace("{query}", requests.utils.quote(query.strip()))

    try:
        from playwright.sync_api import sync_playwright
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True,
                args=["--disable-blink-features=AutomationControlled","--no-sandbox"])
            context = browser.new_context(
                viewport={"width": 1280, "height": 900},
                user_agent=random.choice(USER_AGENTS),
                locale="en-US", ignore_https_errors=True)
            context.add_init_script(
                "Object.defineProperty(navigator,'webdriver',{get:()=>undefined});"
                "window.chrome={runtime:{}};")
            page = context.new_page()
            try:
                page.goto(url, timeout=35000, wait_until="domcontentloaded")
                page.wait_for_timeout(random.randint(4000, 6000))
                html = page.content()
            except Exception as e:
                print(f"[{src['name']}] nav error: {e}"); html = ""
            finally:
                try: page.close()
                except: pass
            browser.close()

        if not html:
            return []

        soup = BeautifulSoup(html, "html.parser")
        cards = soup.find_all(class_="boat-list-item")
        if not cards:
            # fallback: find by link pattern
            cards = []
            for a in soup.find_all("a", href=re.compile(r'/boat-for-sale/')):
                parent = a.find_parent(class_=re.compile(r'boat|list|item|card'))
                if parent and parent not in cards:
                    cards.append(parent)

        for card in cards:
            try:
                a = card.find("a", href=re.compile(r'/boat-for-sale/'))
                if not a:
                    continue
                href = a["href"]
                full_url = href if href.startswith("http") else "https://boatcrazy.com" + href
                if full_url in seen:
                    continue
                seen.add(full_url)

                # Title — prefer h3, then aria-label, then slug
                title = ""
                h3 = card.find("h3")
                if h3:
                    title = h3.get_text(strip=True)[:80]
                if not title:
                    al = card.find(attrs={"aria-label": True})
                    if al:
                        title = al["aria-label"][:80]
                if not title:
                    slug = href.rstrip("/").split("/")[-1]
                    slug_clean = re.sub(r'-id[-\w]*$', '', slug).replace("-", " ")
                    title = slug_clean.title()[:80]
                if not title:
                    continue

                # Price
                price = ""
                price_el = card.find(class_=re.compile(r'\bprice\b'))
                if price_el:
                    pm = re.search(r'\$[\d,]+', price_el.get_text())
                    if pm:
                        price = pm.group(0)
                if not price:
                    pm = re.search(r'\$[\d,]+', card.get_text(" ", strip=True))
                    if pm:
                        price = pm.group(0)

                # Location
                location = ""
                loc_el = card.find(class_="location")
                if loc_el:
                    location = loc_el.get_text(strip=True)[:60]
                if not location:
                    lm = re.search(r'([A-Z][a-z]+(?:\s[A-Z][a-z]+)?,\s*[A-Z]{2})', card.get_text(" ", strip=True))
                    if lm:
                        location = lm.group(1)

                # Image
                img = ""
                img_div = card.find(class_=re.compile(r'item.?img|list.?item.?img'))
                if img_div:
                    im = img_div.find("img")
                    if im:
                        img = (_extract_best_src(im) or im.get("src","") or im.get("data-src",""))
                if not img:
                    im = card.find("img")
                    if im:
                        img = im.get("src","") or im.get("data-src","")

                results.append({
                    "url":         full_url,
                    "title":       title,
                    "snippet":     f"{price} {location}".strip(),
                    "price_text":  price,
                    "img_url":     img,
                    "location":    location,
                    "source":      src.get("name", "BoatCrazy"),
                    "source_type": src.get("type", "classifieds"),
                    "category":    src.get("category", "Clasificados Generales"),
                })
            except Exception:
                continue

        print(f"[{src['name']}] {len(results)} listings")

    except Exception as e:
        print(f"[{src['name']}] Error: {e}")

    return results


def scrape_denison(src: dict, query: str, filters: dict) -> list:
    """
    Denison Yachting — static HTML, 30 cards per page.

    Card: div.boat-item
    URL:  a[href*=/yachts-for-sale/SLUG] (non-dashboard link)
    Title: boat_length + make/model + year + name
    Price: h4.boat_price[data-price] + [data-default_currency]
    Location: h3 text | Image: div.news_pic img
    Search: ?search={query}
    """
    results = []
    seen    = set()

    base = "https://www.denisonyachtsales.com/yachts-for-sale/"
    url  = f"{base}?search={requests.utils.quote(query.strip())}"

    LISTING_RE = re.compile(r'/yachts-for-sale/[a-z][a-z0-9-]{4,}$', re.I)
    CURRENCY_SYMBOLS = {"USD": "$", "EUR": "€", "GBP": "£", "AUD": "A$"}

    try:
        resp = requests.get(url, headers={"User-Agent": random.choice(USER_AGENTS)},
                            timeout=20, verify=False)
        resp.raise_for_status()
        soup = BeautifulSoup(resp.text, "html.parser")

        for card in soup.find_all(class_="boat-item"):
            try:
                a = card.find("a", href=LISTING_RE)
                if not a:
                    continue
                href = a["href"]
                full_url = href if href.startswith("http") else "https://www.denisonyachtsales.com" + href
                if full_url in seen:
                    continue
                seen.add(full_url)

                # Title: length + make/model year + "name"
                h2 = card.find("h2")
                if h2:
                    length_el = h2.find(class_="boat_length")
                    length_txt = length_el.get_text(strip=True) if length_el else ""
                    if length_el:
                        length_el.extract()
                    name_el = h2.find("span")
                    name_txt = name_el.get_text(strip=True) if name_el else ""
                    if name_el:
                        name_el.extract()
                    rest = " ".join(h2.get_text(" ", strip=True).split())
                    parts = [p for p in [length_txt, rest, f'"{name_txt}"' if name_txt else ""] if p]
                    title = " ".join(parts)[:100]
                else:
                    title = (a.get("title", "") or "")[:100]
                if not title:
                    continue

                # Price
                price_text = ""
                price_el = card.find(class_="boat_price")
                if price_el:
                    raw_price = price_el.get("data-price", "")
                    currency  = price_el.get("data-default_currency", "USD")
                    sym = CURRENCY_SYMBOLS.get(currency, currency + " ")
                    if raw_price:
                        try:
                            price_text = f"{sym}{int(raw_price):,}"
                        except ValueError:
                            price_text = price_el.get_text(strip=True)[:30]

                # Location
                location = ""
                h3 = card.find("h3")
                if h3:
                    location = h3.get_text(strip=True)[:80]

                # Image
                img = ""
                pic_div = card.find(class_="news_pic")
                if pic_div:
                    im = pic_div.find("img")
                    if im:
                        img = im.get("src", "") or im.get("data-src", "")

                results.append({
                    "url":         full_url,
                    "title":       title,
                    "snippet":     f"{price_text} · {location}".strip(" ·"),
                    "price_text":  price_text,
                    "img_url":     img,
                    "location":    location,
                    "source":      src.get("name", "Denison Yachting"),
                    "source_type": src.get("type", "broker"),
                    "category":    src.get("category", "Brokers USA"),
                })
            except Exception:
                continue

        print(f"[{src.get('name','Denison')}] {len(results)} listings")

    except Exception as e:
        print(f"[{src.get('name','Denison')}] Error: {e}")

    return results


# =============================================================================
# SCRAPER: GovPlanet + IronPlanet  (Ritchie Bros family — same HTML .sr_lot)
# =============================================================================
def scrape_govplanet(src: dict, query: str, filters: dict) -> list[dict]:
    """
    GovPlanet (recreational marine) and IronPlanet (commercial marine).
    Both share Ritchie Bros HTML: listing cards use .sr_lot selector.
    GovPlanet: https://www.govplanet.com/Recreational+Marine
    IronPlanet: https://www.ironplanet.com/Commercial+Marine+Vessels
    """
    results = []
    try:
        url = src["search_url"]
        base = "https://" + url.split("/")[2]
        headers = get_headers(referer=base + "/")
        time.sleep(1.0)
        r = requests.get(url, headers=headers, timeout=25, verify=False)
        if r.status_code not in (200, 206):
            print(f"[{src['name']}] HTTP {r.status_code}")
            return []
        soup = BeautifulSoup(r.text, "html.parser")
        seen = set()
        for card in soup.select(".sr_lot, .lot-tile, article.lot, [class*=srItem]"):
            try:
                a = card.find("a", href=True)
                if not a:
                    continue
                href = a["href"]
                if not href.startswith("http"):
                    href = base + href
                if href in seen:
                    continue
                seen.add(href)
                title = a.get_text(strip=True)[:100] or card.get_text(" ", strip=True)[:80]
                price_el = card.select_one(".price, .lot-price, span[class*=price]")
                price_txt = price_el.get_text(strip=True) if price_el else ""
                img_el = card.find("img")
                img = _extract_best_src(img_el) if img_el else ""
                if img and img.startswith("/"):
                    img = base + img
                if title and len(title) > 4:
                    results.append({
                        "title": title,
                        "url": href,
                        "snippet": card.get_text(" ", strip=True)[:200],
                        "price_text": price_txt,
                        "location": "",
                        "img_url": img,
                        "source": src["name"],
                        "source_type": src.get("type", "auction"),
                        "category": src.get("category", ""),
                    })
            except Exception:
                continue
        print(f"[{src['name']}] {len(results)} listings")
    except Exception as e:
        print(f"[{src['name']}] Error: {e}")
    return results


# =============================================================================
# SCRAPER: HiBid  (React SPA — Playwright required)
# =============================================================================
def scrape_hibid(src: dict, query: str, filters: dict) -> list[dict]:
    """
    HiBid online auction platform — React SPA requires Playwright.
    URL: https://www.hibid.com/lots?q={query}+boat
    Cards: .lot-tile  Title: h3/.lot-title  Price: .high-bid/.lot-price
    """
    results = []
    try:
        q = requests.utils.quote((query.strip() + " boat"))
        url = f"https://www.hibid.com/lots?q={q}"
        from playwright.sync_api import sync_playwright
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True, args=["--no-sandbox"])
            ctx = browser.new_context(
                user_agent=random.choice(USER_AGENTS),
                viewport={"width": 1280, "height": 900},
                locale="en-US",
                ignore_https_errors=True,
            )
            ctx.add_init_script(
                "Object.defineProperty(navigator,'webdriver',{get:()=>undefined});"
            )
            page = ctx.new_page()
            try:
                page.goto(url, timeout=30000, wait_until="domcontentloaded")
                page.wait_for_timeout(4000)
                html = page.content()
            finally:
                try: page.close()
                except: pass
            browser.close()

        soup = BeautifulSoup(html, "html.parser")
        seen = set()
        for card in soup.select(".lot-tile, [class*=lot-item], [class*=LotTile], [class*=lotCard]"):
            try:
                a = card.find("a", href=True)
                if not a:
                    continue
                href = a["href"]
                if not href.startswith("http"):
                    href = "https://www.hibid.com" + href
                if href in seen:
                    continue
                seen.add(href)
                title_el = card.select_one("h3, .lot-title, [class*=lot-title], [class*=lotTitle]")
                title = (title_el.get_text(strip=True) if title_el else a.get_text(strip=True))[:100]
                price_el = card.select_one(".high-bid, .lot-price, [class*=bid], [class*=price]")
                price_txt = price_el.get_text(strip=True) if price_el else ""
                img_el = card.find("img")
                img = _extract_best_src(img_el) if img_el else ""
                if title and len(title) > 4:
                    results.append({
                        "title": title,
                        "url": href,
                        "snippet": card.get_text(" ", strip=True)[:200],
                        "price_text": price_txt,
                        "location": "",
                        "img_url": img,
                        "source": src["name"],
                        "source_type": src.get("type", "auction"),
                        "category": src.get("category", ""),
                    })
            except Exception:
                continue
        print(f"[{src['name']}] {len(results)} lots")
    except Exception as e:
        print(f"[{src['name']}] Error: {e}")
    return results


# =============================================================================
# SCRAPER: Copart salvage boats  (heavy JS SPA — Playwright)
# =============================================================================
def scrape_copart(src: dict, query: str, filters: dict) -> list[dict]:
    """
    Copart salvage/insurance lots for watercraft.
    URL: https://www.copart.com/vehicleFinderSection/?searchStr={query}&vehicleType=BOAT
    Lots render in a React table after JS executes.
    """
    results = []
    try:
        q = requests.utils.quote(query.strip())
        url = f"https://www.copart.com/vehicleFinderSection/?searchStr={q}&vehicleType=BOAT"
        from playwright.sync_api import sync_playwright
        with sync_playwright() as p:
            browser = p.chromium.launch(
                headless=True,
                args=["--no-sandbox", "--disable-blink-features=AutomationControlled"]
            )
            ctx = browser.new_context(
                user_agent=random.choice(USER_AGENTS),
                viewport={"width": 1280, "height": 900},
                locale="en-US",
                ignore_https_errors=True,
            )
            ctx.add_init_script(
                "Object.defineProperty(navigator,'webdriver',{get:()=>undefined});"
                "window.chrome={runtime:{}};"
            )
            page = ctx.new_page()
            try:
                page.goto(url, timeout=35000, wait_until="domcontentloaded")
                page.wait_for_timeout(5000)
                try:
                    page.wait_for_selector(
                        ".lot-row, tr[data-lot], .lot-details, [class*=lottile], [class*=lot-card]",
                        timeout=8000
                    )
                except Exception:
                    pass
                html = page.content()
            finally:
                try: page.close()
                except: pass
            browser.close()

        soup = BeautifulSoup(html, "html.parser")
        seen = set()
        for row in soup.select(
            "tr[data-lot], .lot-row, [class*=lot-card], [class*=lottile], [class*=lot-item]"
        ):
            try:
                a = row.find("a", href=re.compile(r"/lot/"))
                if not a:
                    continue
                href = a["href"]
                if not href.startswith("http"):
                    href = "https://www.copart.com" + href
                if href in seen:
                    continue
                seen.add(href)
                title_el = row.select_one("[class*=title], [class*=desc], td.des")
                title = (title_el.get_text(strip=True) if title_el else a.get_text(strip=True))[:100]
                price_el = row.select_one("[class*=bid], [class*=price], td.bid")
                price_txt = price_el.get_text(strip=True) if price_el else ""
                img_el = row.find("img")
                img = _extract_best_src(img_el) if img_el else ""
                if title and len(title) > 4:
                    results.append({
                        "title": title,
                        "url": href,
                        "snippet": row.get_text(" ", strip=True)[:200],
                        "price_text": price_txt,
                        "location": "",
                        "img_url": img,
                        "source": src["name"],
                        "source_type": "salvage",
                        "category": src.get("category", ""),
                    })
            except Exception:
                continue
        print(f"[{src['name']}] {len(results)} lots")
    except Exception as e:
        print(f"[{src['name']}] Error: {e}")
    return results


# =============================================================================
# SCRAPER: Trade a Boat AU  (server-rendered Material-UI)
# =============================================================================
def scrape_tradeaboat(src: dict, query: str, filters: dict) -> list[dict]:
    """
    TradeABoat Australia — server-rendered with Material-UI CSS classes.
    Cards use jss* dynamic class names; fallback to /details/ link detection.
    URL: https://www.tradeaboat.com.au/search/Boats?category=Sail&keywords={query}
    """
    results = []
    try:
        q = requests.utils.quote(query.strip())
        url = f"https://www.tradeaboat.com.au/search/Boats?category=Sail&keywords={q}"
        headers = get_headers(referer="https://www.tradeaboat.com.au/")
        time.sleep(1.0)
        r = requests.get(url, headers=headers, timeout=25, verify=False)
        if r.status_code not in (200, 206):
            print(f"[Trade a Boat AU] HTTP {r.status_code}")
            return []
        soup = BeautifulSoup(r.text, "html.parser")
        base = "https://www.tradeaboat.com.au"
        seen = set()
        # MUI class names are dynamic (jss77, jss78 …) — find cards via /details/ links
        detail_links = soup.find_all("a", href=re.compile(r"/details/"))
        visited_parents = set()
        for a in detail_links:
            try:
                href = a["href"]
                if not href.startswith("http"):
                    href = base + href
                if href in seen:
                    continue
                seen.add(href)
                # Walk up to find card container
                card = a.find_parent("div") or a
                card_id = id(card)
                if card_id in visited_parents:
                    continue
                visited_parents.add(card_id)
                title_el = card.select_one("h2, h3, [class*=title]")
                title = (title_el.get_text(strip=True) if title_el else a.get_text(strip=True))[:100]
                price_el = card.select_one("[class*=price], [class*=Price]")
                price_txt = price_el.get_text(strip=True) if price_el else ""
                img_el = card.find("img")
                img = _extract_best_src(img_el) if img_el else ""
                if img and img.startswith("/"):
                    img = base + img
                if title and len(title) > 4:
                    results.append({
                        "title": title,
                        "url": href,
                        "snippet": card.get_text(" ", strip=True)[:200],
                        "price_text": price_txt,
                        "location": "Australia",
                        "img_url": img,
                        "source": "Trade a Boat AU",
                        "source_type": "broker",
                        "category": src.get("category", ""),
                    })
            except Exception:
                continue
        print(f"[Trade a Boat AU] {len(results)} listings")
    except Exception as e:
        print(f"[Trade a Boat AU] Error: {e}")
    return results


# =============================================================================
# SCRAPER: Galati Yachts  (requests, WordPress / YSP plugin)
# =============================================================================
def scrape_galati(src: dict, query: str, filters: dict) -> list[dict]:
    """
    Galati Yachts — server-rendered WordPress with YachtSalesPlugin.
    URL: https://www.galatiyachts.com/yachts-for-sale/?keywords={query}
    """
    results = []
    try:
        q = requests.utils.quote(query.strip())
        url = f"https://www.galatiyachts.com/yachts-for-sale/?keywords={q}"
        headers = get_headers(referer="https://www.galatiyachts.com/")
        time.sleep(1.0)
        r = requests.get(url, headers=headers, timeout=25, verify=False)
        if r.status_code not in (200, 206):
            print(f"[Galati Yachts] HTTP {r.status_code}")
            return []
        soup = BeautifulSoup(r.text, "html.parser")
        base = "https://www.galatiyachts.com"
        seen = set()
        # YSP listing cards — try common selectors, fallback to /yachts/ links
        cards = soup.select(".ysp-listing, .listing-card, .yacht-card, [class*=yacht-listing]")
        if not cards:
            # fallback: group by /yachts/details/ anchor
            for a in soup.find_all("a", href=re.compile(r"/yachts/")):
                href = a["href"]
                if not href.startswith("http"):
                    href = base + href
                if href in seen or "galatiyachts.com" not in href:
                    continue
                if href.count("/") < 4:
                    continue
                seen.add(href)
                card = a.find_parent("div") or a
                title_el = card.select_one("h2, h3, [class*=title]")
                title = (title_el.get_text(strip=True) if title_el else a.get_text(strip=True))[:100]
                price_el = card.select_one("[class*=price], .price")
                price_txt = price_el.get_text(strip=True) if price_el else ""
                img_el = card.find("img")
                img = _extract_best_src(img_el) if img_el else ""
                if img and img.startswith("/"):
                    img = base + img
                if title and len(title) > 4:
                    results.append({
                        "title": title, "url": href,
                        "snippet": card.get_text(" ", strip=True)[:200],
                        "price_text": price_txt, "location": "USA",
                        "img_url": img, "source": "Galati Yachts",
                        "source_type": "broker", "category": src.get("category", ""),
                    })
        else:
            for card in cards:
                try:
                    a = card.find("a", href=True)
                    if not a:
                        continue
                    href = a["href"]
                    if not href.startswith("http"):
                        href = base + href
                    if href in seen:
                        continue
                    seen.add(href)
                    title_el = card.select_one("h2, h3, [class*=title]")
                    title = (title_el.get_text(strip=True) if title_el else a.get_text(strip=True))[:100]
                    price_el = card.select_one("[class*=price], .price")
                    price_txt = price_el.get_text(strip=True) if price_el else ""
                    img_el = card.find("img")
                    img = _extract_best_src(img_el) if img_el else ""
                    if img and img.startswith("/"):
                        img = base + img
                    if title and len(title) > 4:
                        results.append({
                            "title": title, "url": href,
                            "snippet": card.get_text(" ", strip=True)[:200],
                            "price_text": price_txt, "location": "USA",
                            "img_url": img, "source": "Galati Yachts",
                            "source_type": "broker", "category": src.get("category", ""),
                        })
                except Exception:
                    continue
        print(f"[Galati Yachts] {len(results)} listings")
    except Exception as e:
        print(f"[Galati Yachts] Error: {e}")
    return results


# =============================================================================
# SCRAPER: Luxury brokers (Fraser, Burgess, Worth Ave, Merle Wood, N&J)
# Playwright — JS-heavy sites that won't render with plain requests
# =============================================================================
def scrape_luxury_broker(src: dict, query: str, filters: dict) -> list[dict]:
    """
    Generic Playwright scraper for luxury yacht broker sites.
    Covers: Fraser Yachts, Worth Ave Yachts, Merle Wood, Burgess, N&J.
    Follows internal links with /yacht/, /vessel/, /boat/, /listing/ in path.
    """
    results = []
    name = src.get("name", "Broker")
    try:
        raw_url = src["search_url"]
        url = raw_url.replace("{query}", requests.utils.quote(query.strip()))
        base = "https://" + url.split("/")[2]

        from playwright.sync_api import sync_playwright
        with sync_playwright() as p:
            browser = p.chromium.launch(
                headless=True,
                args=["--no-sandbox", "--disable-blink-features=AutomationControlled"]
            )
            ctx = browser.new_context(
                user_agent=random.choice(USER_AGENTS),
                viewport={"width": 1280, "height": 900},
                locale="en-US",
                ignore_https_errors=True,
            )
            ctx.add_init_script(
                "Object.defineProperty(navigator,'webdriver',{get:()=>undefined});"
                "window.chrome={runtime:{}};"
            )
            page = ctx.new_page()
            try:
                page.goto(url, timeout=35000, wait_until="domcontentloaded")
                page.wait_for_timeout(3000)
                page.evaluate("window.scrollTo(0, document.body.scrollHeight / 2)")
                page.wait_for_timeout(1500)
                html = page.content()
            finally:
                try: page.close()
                except: pass
            browser.close()

        soup = BeautifulSoup(html, "html.parser")
        seen = set()
        LISTING_RE = re.compile(
            r'/(yacht[s]?|vessel[s]?|boat[s]?|listing[s]?|detail[s]?|sale|for-sale)/',
            re.I
        )
        for a in soup.find_all("a", href=LISTING_RE):
            try:
                href = a["href"]
                if not href.startswith("http"):
                    href = base + href
                if href in seen or len(href) < 25:
                    continue
                path = href.split("?")[0].rstrip("/")
                if path.count("/") < 3:
                    continue
                seen.add(href)
                parent = a.find_parent("div") or a.find_parent("li") or a
                title = a.get_text(strip=True) or parent.get_text(" ", strip=True)[:80]
                title = " ".join(title.split())[:100]
                if len(title) < 5:
                    continue
                ctx_txt = parent.get_text(" ", strip=True)[:300]
                pm = re.search(r'[\$€£]\s*[\d,\.]+(?:\s*[Mm]illion|M)?', ctx_txt)
                price_txt = pm.group() if pm else ""
                img_el = parent.find("img")
                img = _extract_best_src(img_el) if img_el else ""
                if img and img.startswith("/"):
                    img = base + img
                results.append({
                    "title": title, "url": href,
                    "snippet": ctx_txt[:200], "price_text": price_txt,
                    "location": "", "img_url": img,
                    "source": name, "source_type": src.get("type", "broker"),
                    "category": src.get("category", ""),
                })
                if len(results) >= 30:
                    break
            except Exception:
                continue
        print(f"[{name}] {len(results)} listings")
    except Exception as e:
        print(f"[{name}] Error: {e}")
    return results


# =============================================================================
# SCRAPER: EU/International brokers blocked on requests (Playwright)
# Covers: Boat24, YachtAll, Annonces Bateau, Inautia ES, Boats&Outboards UK,
#         Boatsales AU, YachtMarket, Apollo Duck UK subdomain
# =============================================================================
def scrape_eu_broker(src: dict, query: str, filters: dict) -> list[dict]:
    """
    Generic Playwright scraper for EU/AU/UK broker sites that block plain
    requests (403/ECONNREFUSED). Navigates with real browser, extracts listings.
    """
    results = []
    name = src.get("name", "EU Broker")
    try:
        raw_url = src["search_url"]
        url = raw_url.replace("{query}", requests.utils.quote(query.strip()))
        base = "https://" + url.split("/")[2]
        domain = url.split("/")[2]

        from playwright.sync_api import sync_playwright
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True, args=["--no-sandbox"])
            ctx = browser.new_context(
                user_agent=random.choice(USER_AGENTS),
                viewport={"width": 1280, "height": 900},
                locale="en-US",
                ignore_https_errors=True,
            )
            ctx.add_init_script(
                "Object.defineProperty(navigator,'webdriver',{get:()=>undefined});"
            )
            page = ctx.new_page()
            try:
                page.goto(url, timeout=35000, wait_until="domcontentloaded")
                page.wait_for_timeout(3000)
                html = page.content()
            finally:
                try: page.close()
                except: pass
            browser.close()

        soup = BeautifulSoup(html, "html.parser")
        seen = set()
        for a in soup.find_all("a", href=True):
            try:
                href = a["href"]
                if not href.startswith("http"):
                    href = base + href
                if domain not in href or href in seen:
                    continue
                path = href.split("?")[0].rstrip("/")
                if path.count("/") < 3:
                    continue
                if any(s in href.lower() for s in [
                    "login","register","contact","about","help","privacy",
                    "sitemap","category","search","tag","page=","lang="
                ]):
                    continue
                seen.add(href)
                parent = a.find_parent("div") or a.find_parent("li") or a
                title = a.get_text(strip=True) or parent.get_text(" ", strip=True)[:80]
                title = " ".join(title.split())[:100]
                if len(title) < 5:
                    continue
                ctx_txt = parent.get_text(" ", strip=True)[:300]
                pm = re.search(r'[\$€£]\s*[\d,\.]+', ctx_txt)
                price_txt = pm.group() if pm else ""
                img_el = parent.find("img")
                img = _extract_best_src(img_el) if img_el else ""
                if img and img.startswith("/"):
                    img = base + img
                results.append({
                    "title": title, "url": href,
                    "snippet": ctx_txt[:200], "price_text": price_txt,
                    "location": "", "img_url": img,
                    "source": name, "source_type": src.get("type", "broker"),
                    "category": src.get("category", ""),
                })
                if len(results) >= 30:
                    break
            except Exception:
                continue
        print(f"[{name}] {len(results)} listings")
    except Exception as e:
        print(f"[{name}] Error: {e}")
    return results


# =============================================================================
# SCRAPER: Forum For-Sale sections (TheHullTruth, Cruisers Forum)
# =============================================================================
def scrape_forum_fs(src: dict, query: str, filters: dict) -> list[dict]:
    """
    Scrapes For-Sale classified threads from boating forums (Playwright).
    TheHullTruth: /boating-forum/search.php?do=process&query={query}&prefixid=FS
    Cruisers Forum: /forums/f152/  (Classifieds subforum)
    """
    results = []
    name = src.get("name", "Forum")
    try:
        raw_url = src["search_url"]
        url = raw_url.replace("{query}", requests.utils.quote(query.strip()))
        base = "https://" + url.split("/")[2]

        from playwright.sync_api import sync_playwright
        with sync_playwright() as p:
            browser = p.chromium.launch(headless=True, args=["--no-sandbox"])
            ctx = browser.new_context(
                user_agent=random.choice(USER_AGENTS),
                viewport={"width": 1280, "height": 900},
                locale="en-US",
                ignore_https_errors=True,
            )
            page = ctx.new_page()
            try:
                page.goto(url, timeout=30000, wait_until="domcontentloaded")
                page.wait_for_timeout(2000)
                html = page.content()
            finally:
                try: page.close()
                except: pass
            browser.close()

        soup = BeautifulSoup(html, "html.parser")
        seen = set()
        # vBulletin/XenForo thread rows
        for row in soup.select(
            "li.threadbit, div.threadbit, .thread-item, "
            "tr.odd, tr.even, .search-result, [class*=thread], "
            ".js-threadListItem, li[id*=thread]"
        ):
            try:
                a = row.find("a", href=re.compile(
                    r'showthread|/thread[s]?/|/t/\d|/post', re.I
                ))
                if not a:
                    a = row.find("a", href=True)
                if not a:
                    continue
                href = a["href"]
                if not href.startswith("http"):
                    href = base + href
                if href in seen:
                    continue
                seen.add(href)
                title = a.get_text(strip=True)[:100]
                ctx_txt = row.get_text(" ", strip=True)[:200]
                pm = re.search(r'\$\s*[\d,]{3,}', ctx_txt)
                price_txt = pm.group() if pm else ""
                if title and len(title) > 5:
                    results.append({
                        "title": title, "url": href,
                        "snippet": ctx_txt, "price_text": price_txt,
                        "location": "", "img_url": "",
                        "source": name, "source_type": "classifieds",
                        "category": src.get("category", ""),
                    })
            except Exception:
                continue
        print(f"[{name}] {len(results)} threads")
    except Exception as e:
        print(f"[{name}] Error: {e}")
    return results


def scrape_source_router(src: dict, query: str, filters: dict, page: int = 1):
    """Central dispatcher — routes each source to its dedicated scraper."""
    name = src.get("name", "")

    # ── Dedicated scrapers ────────────────────────────────────────────────────
    if name == "YachtWorld":
        return scrape_yachtworld(query, filters, max_pages=1)

    if name.startswith("eBay"):          # covers all 5 eBay entries
        return scrape_ebay(src, query, filters)

    if name == "BoatTrader":
        return scrape_boattrader(src, query, filters)

    if name in ("Apollo Duck", "Apollo Duck Workboats"):
        return scrape_apolloduck(src, query, filters)

    if name == "Boats.com":
        return scrape_boatsdotcom(src, query, filters)

    if name == "Craigslist":               # single multi-city Craigslist entry
        return scrape_craigslist(src, query, filters)

    if name.startswith("Craigslist "):    # individual city entries — one request each
        return scrape_direct_source(src, query, filters)

    if name in ("GovPlanet", "GovPlanet Recreational",
                "IronPlanet", "IronPlanet Marine"):
        return scrape_govplanet(src, query, filters)

    if name == "HiBid":
        return scrape_hibid(src, query, filters)

    if name in ("Copart Marine", "Copart Boats", "Copart Watercraft"):
        return scrape_copart(src, query, filters)

    if name == "Trade a Boat AU":
        return scrape_tradeaboat(src, query, filters)

    if name == "Galati Yachts":
        return scrape_galati(src, query, filters)

    if name in ("Fraser Yachts", "Burgess Yachts", "Northrop & Johnson",
                "Worth Ave Yachts"):
        return scrape_luxury_broker(src, query, filters)

    # Boat24 handled below by dedicated scrape_boat24; Inautia handled by scrape_inautia
    if name in ("Boat24 EU", "YachtAll", "Annonces Bateau",
                "Annonces Bateau FR", "Inautia ES", "Boats & Outboards UK",
                "Boats Outboards UK", "Apollo Duck UK",
                "Boatsales AU", "YachtMarket", "Boatpoint AU"):
        return scrape_eu_broker(src, query, filters)

    if name in ("TheHullTruth", "Cruisers Forum"):
        return scrape_forum_fs(src, query, filters)

    if name == "YachtWorld Commercial":
        return scrape_yachtworld(query, filters, max_pages=1)

    if name == "Rightboat":
        return scrape_rightboat(src, query, filters)

    if name in ("Cooper Salvage", "Cooper Capital Salvage"):
        return scrape_cooperss(src, query, filters)

    if name == "Inautia":
        return scrape_inautia(src, query, filters)

    if name == "Boat24":
        return scrape_boat24(src, query, filters)

    if name == "Facebook Marketplace":
        return scrape_facebook_marketplace(src, query, filters)

    if name == "HMY Yachts":
        return scrape_hmy(src, query, filters)

    if name == "BoatCrazy":
        return scrape_boatcrazy(src, query, filters)

    if name == "Denison Yachting":
        return scrape_denison(src, query, filters)

    # ── Generic HTML scraper (fallback) ──────────────────────────────────────
    return scrape_direct_source(src, query, filters)


def extract_vessel_fast(raw: dict) -> dict | None:
    """
    Pure-regex vessel extraction — no Ollama call.
    Used for results from known boat marketplaces (broker/classifieds/auction/etc.)
    Returns a data dict compatible with save_vessel(), or None if too sparse.
    """
    title      = (raw.get("title")      or "").strip()
    snippet    = (raw.get("snippet")    or "")
    price_text = (raw.get("price_text") or "")
    location   = (raw.get("location")   or "")
    src_name   = (raw.get("source")     or "").lower()
    src_type   = (raw.get("source_type") or "")
    category   = (raw.get("category")   or "").lower()

    if not title or len(title) < 5:
        return None

    combined = f"{title} {snippet} {price_text}"

    # ── Price ────────────────────────────────────────────────────────────────
    price_usd = None
    currency_out = "USD"
    for txt in [price_text, snippet, title]:
        # USD
        m = re.search(r'\$\s*([\d,]{3,})', txt)
        if m:
            try:
                v = float(m.group(1).replace(",",""))
                if 500 < v < 50_000_000:
                    price_usd = v; currency_out = "USD"; break
            except: pass
        # GBP
        m = re.search(r'£\s*([\d,]{3,})', txt)
        if m:
            try:
                v = float(m.group(1).replace(",","")) * 1.27
                if 500 < v < 50_000_000:
                    price_usd = round(v); currency_out = "GBP"; break
            except: pass
        # EUR
        m = re.search(r'€\s*([\d,]{3,})', txt)
        if m:
            try:
                v = float(m.group(1).replace(",","")) * 1.09
                if 500 < v < 50_000_000:
                    price_usd = round(v); currency_out = "EUR"; break
            except: pass
        # plain number + currency word
        m = re.search(r'([\d,]{4,})\s*(?:USD|usd|GBP|gbp|EUR|eur)', txt)
        if m:
            try:
                v = float(m.group(1).replace(",",""))
                if 500 < v < 50_000_000:
                    price_usd = round(v); break
            except: pass

    # ── LOA ──────────────────────────────────────────────────────────────────
    loa_m = None
    for pat, in_meters in [
        (r'(?:loa|length)[:\s]+([\d.]+)\s*(?:ft|\'|feet)', False),
        (r'^(\d{2,3}(?:\.\d)?)\s*(?:\'|ft|feet)',           False),  # starts with size
        (r'\b(\d{2,3}(?:\.\d)?)\s*(?:ft|feet)\b',           False),
        (r"(\d{2,3}(?:\.\d)?)'",                             False),
        (r'(?:loa|length)[:\s]+([\d.]+)\s*m\b',             True),
    ]:
        m = re.search(pat, combined, re.IGNORECASE)
        if m:
            try:
                v = float(m.group(1))
                if in_meters:
                    if 5 < v < 200: loa_m = round(v, 1); break
                else:
                    if 10 < v < 500: loa_m = round(v * 0.3048, 1); break
            except: pass

    # ── Year ─────────────────────────────────────────────────────────────────
    year = None
    ym = re.search(r'\b(19[5-9]\d|20[0-2]\d)\b', title)
    if ym: year = int(ym.group(1))

    # ── Vessel type ──────────────────────────────────────────────────────────
    cl = combined.lower()
    if any(k in src_name for k in ["sailboat","sail"]) or "veleros" in category:
        vtype = "Sailboat"
    elif any(k in src_name for k in ["workboat","commercial","osv","offshore"]):
        vtype = "Offshore"
    elif "tug" in src_name:  vtype = "Tug"
    elif "barge" in src_name: vtype = "Barge"
    elif any(k in cl for k in ["sailboat","sailing","velero","ketch","sloop","schooner",
                                "yawl","cutter","catamaran","trimaran","voilier"]):
        vtype = "Sailboat"
    elif any(k in cl for k in ["tugboat","tug boat","remolcador"]): vtype = "Tug"
    elif "barge" in cl or "barcaza" in cl:  vtype = "Barge"
    elif any(k in cl for k in ["offshore","osv","supply vessel","crew boat"]): vtype = "Offshore"
    elif any(k in cl for k in ["fishing","trawler","seiner","pesquero"]): vtype = "Fishing"
    elif any(k in cl for k in ["yacht","motor yacht","motoryacht"]): vtype = "Yacht"
    else: vtype = "Motor"

    status = ("auction" if src_type == "auction" else
              "salvage" if src_type == "salvage" else "active")

    # Infer location from source name when missing (e.g. "Craigslist Houston" → "Houston")
    if not location and raw.get("source"):
        src_full = raw["source"]
        if re.search(r'[Cc]raigslist', src_full):
            city = re.sub(r'[Cc]raigslist\s*', '', src_full).strip()
            if city: location = city
        elif "Kijiji"  in src_full: location = "Canada"
        elif "Gumtree" in src_full: location = "Australia"
        elif "LeBonCoin" in src_full: location = "France"
        elif "Subito"  in src_full: location = "Italy"

    # For trusted marketplace sources keep the result even with partial data.
    # For web-search results require at least one data point to avoid garbage.
    is_trusted = src_type in ("broker", "classifieds", "salvage", "commercial", "auction")
    if not is_trusted and not (price_usd or loa_m or year or location):
        return None

    score = 50
    if loa_m:
        score += min(10, int(loa_m - 10))
    if year and year > 1990:
        score += min(10, (year - 1990) // 3)
    if price_usd and loa_m:
        pft = price_usd / max(loa_m / 0.3048, 1)
        if pft < 600:   score += 15
        elif pft < 1200: score += 8
    score = min(100, max(0, score))

    return {
        "_fast":       True,          # flag: skip unit-conversion block downstream
        "skip":        False,
        "name":        title[:100],
        "vessel_type": vtype,
        "loa_m":       loa_m,
        "beam_m":      None,
        "draft_m":     None,
        "year_built":  year,
        "hull":        "Unknown",
        "propulsion":  "Sail" if vtype == "Sailboat" else "Diesel",
        "status":      status,
        "price_usd":   price_usd,
        "currency":    currency_out,
        "location":    location,
        "country":     None,
        "description": f"{title[:140]}",
        "flags":       [],
        "score":       score,
    }


def search_with_ai(query: str, filters: dict) -> list:
    """
    Hybrid search: direct scraping of open sources + web search to reach
    blocked sites (YachtWorld, Boats.com, Apollo Duck, etc.)
    """
    vessel_type = filters.get("type", "")
    region = filters.get("region", "").lower()

    base = query
    if vessel_type and vessel_type.lower() not in query.lower():
        base = f"{vessel_type} {base}"

    # Filter sources by region if specified
    # Load custom sources from DB and merge with built-in
    try:
        conn = get_db()
        custom = [dict(r) for r in conn.execute(
            "SELECT * FROM custom_sources WHERE active=1").fetchall()]
        conn.close()
        all_sources = DIRECT_SOURCES + [{
            "name": c["name"],
            "category": c["category"],
            "search_url": c["search_url"],
            "result_sel": "a[href]",
            "price_sel": "",
            "img_sel": "img",
            "loc_sel": "",
            "type": c["source_type"],
        } for c in custom]
    except:
        all_sources = DIRECT_SOURCES

    sources_to_use = all_sources
    if region and region not in ["global", "todo", "all", ""]:
        region_map = {
            "usa": ["USA", "Clasificados USA", "Subastas Gobierno USA", "Subastas USA", "Subastas Gobierno", "Comercial Offshore"],
            "europa": ["Europa", "Brokers Europa", "Francia", "Italia", "Reino Unido", "España", "España / Global"],
            "caribe": ["Latinoamérica", "Latinoamérica / España", "España / Global"],
            "latin": ["Latinoamérica", "Latinoamérica / España", "España", "España / Global"],
            "asia": ["Australia / Pacífico"],
            "australia": ["Australia / Pacífico"],
        }
        allowed_cats = None
        for key, cats in region_map.items():
            if key in region:
                allowed_cats = cats
                break
        if allowed_cats:
            sources_to_use = [s for s in all_sources if any(c in s["category"] for c in allowed_cats)]
        if not sources_to_use:
            sources_to_use = all_sources

    # Filter by status
    status = filters.get("status", "")
    if status == "auction":
        sources_to_use = [s for s in sources_to_use if s["type"] in ["auction", "salvage"]] or sources_to_use
    elif status == "salvage":
        sources_to_use = [s for s in sources_to_use if s["type"] == "salvage"] or sources_to_use
    elif status not in ("salvage",):
        # Exclude salvage-only sources unless explicitly searching for salvage
        sources_to_use = [s for s in sources_to_use if s["type"] != "salvage"] or sources_to_use

    # Vessel-type-aware source prioritization
    OFFSHORE_TYPES  = {"offshore", "tug", "barge", "ferry", "fishing", "commercial", "salvage"}
    SAILBOAT_TYPES  = {"sailboat", "sail", "velero", "ketch", "sloop", "cutter", "schooner"}
    COMMERCIAL_ONLY_SOURCES = {
        "Seaboats Tug", "Seaboats Barge", "Seaboats Offshore", "Seaboats Fishing",
        "OSV Broker", "OSVBroker", "WorkBoat Classifieds", "VT Halter Marine",
        "Maritime Connector", "ShipXchange", "Commercial Vessel",
    }
    SAILBOAT_ONLY_SOURCES = {"SailboatListings", "SailboatListings View", "Cruisers Forum", "Sailboat Listing"}
    vessel_type_lower = vessel_type.lower() if vessel_type else ""

    if vessel_type_lower in OFFSHORE_TYPES:
        # Skip sailboat-only sources, float commercial ones to front
        sources_to_use = [s for s in sources_to_use if s["name"] not in SAILBOAT_ONLY_SOURCES]
        commercial = [s for s in sources_to_use if s["type"] in ("commercial", "salvage", "auction")]
        rest = [s for s in sources_to_use if s["type"] not in ("commercial", "salvage", "auction")]
        sources_to_use = commercial + rest
    elif vessel_type_lower in SAILBOAT_TYPES or "sail" in base.lower() or "velero" in base.lower():
        # Skip commercial-only offshore sources for sailboat searches
        sources_to_use = [s for s in sources_to_use if s["name"] not in COMMERCIAL_ONLY_SOURCES]
    elif not vessel_type_lower:
        # Generic search: keep all but put commercial sources after general ones
        commercial = [s for s in sources_to_use if s["name"] in COMMERCIAL_ONLY_SOURCES]
        rest = [s for s in sources_to_use if s["name"] not in COMMERCIAL_ONLY_SOURCES]
        sources_to_use = rest + commercial

    print(f"[Search] Querying {len(sources_to_use)} sources for: {base}")
    search_state['total_sources'] = len(sources_to_use)
    search_state['log'].append(f"Consultando {len(sources_to_use)} fuentes...")

    def get_query_for_source(src):
        """Match query language to source region."""
        cat = src.get("category","").lower()
        if any(x in cat for x in ["france","franc","veleros franc"]):
            return base
        elif any(x in cat for x in ["spain","españa","espana","mexico","colombia","latin"]):
            return base
        else:
            return f"{base} for sale" if "for sale" not in base.lower() else base

    # Build web search queries targeting specific sites
    web_queries = build_web_queries(base, filters)

    total = len(sources_to_use) + len(web_queries)
    search_state['total_sources'] = total
    search_state['log'].append(f"Consultando {len(sources_to_use)} sitios directos + {len(web_queries)} búsquedas web...")
    print(f"[Search] {len(sources_to_use)} direct + {len(web_queries)} web searches for: {base}")

    # Run BOTH direct scraping AND web searches in parallel
    all_raw = []

    # ── SailboatListings: dedicated parallel thread (handles its own AI extraction) ──
    # Only for sailboat/velero or generic searches, not for offshore/tug/barge/etc.
    sbl_thread = None
    if vessel_type_lower not in OFFSHORE_TYPES and vessel_type_lower not in {"motor", "motorboat"}:
        sbl_thread = threading.Thread(
            target=scrape_and_extract_sailboatlistings,
            args=(query, filters, search_state.get('search_id', ''), 8),
            daemon=True,
        )
        sbl_thread.start()
        search_state['log'].append("SailboatListings: iniciado en paralelo (hilo dedicado)...")
        print("[Search] SailboatListings dedicated thread started")

    # ── Breadth-First Search across all sources ──────────────────────────────
    # Round 1: page 1 of all sources simultaneously
    # Round 2: page 2 of sources that had results
    # Round 3: page 3, etc.
    # Between rounds, a natural pause occurs as we process results
    # This avoids hammering any single source with consecutive requests

    MAX_ROUNDS   = 6    # max pages per source
    active_srcs  = {src["name"]: {"src": src, "page": 1, "has_more": True}
                    for src in sources_to_use}

    # Web searches only run once (no pagination)
    web_done = False

    for round_num in range(1, MAX_ROUNDS + 1):
        if search_state.get("cancelled"):
            break

        round_sources = {name: info for name, info in active_srcs.items()
                        if info["has_more"]}
        if not round_sources:
            break

        search_state['log'].append(f"Ronda {round_num}: consultando {len(round_sources)} fuentes...")
        print(f"[Search] Round {round_num}: {len(round_sources)} active sources")

        round_raw = []
        with ThreadPoolExecutor(max_workers=12) as executor:
            futures = {}

            # Submit page N of all active sources
            for name, info in round_sources.items():
                src = info["src"]
                q   = get_query_for_source(src)
                # Add page parameter to URL if supported and page > 1
                src_with_page = dict(src)
                if round_num > 1:
                    url = src["search_url"]
                    # Common pagination patterns
                    if "craigslist.org" in url:
                        src_with_page["search_url"] = url + f"&s={round_num * 25 - 25}"
                    elif "ebay.com" in url:
                        src_with_page["search_url"] = url + f"&_pgn={round_num}"
                    elif "seaboats.net" in url:
                        src_with_page["search_url"] = url + f"&page={round_num}"
                    elif "kijiji.ca" in url:
                        src_with_page["search_url"] = url.rstrip('/') + f"/page-{round_num}/"
                    else:
                        # Most sites don't support pagination via URL params we know
                        # Mark as done after page 1
                        active_srcs[name]["has_more"] = False
                        continue
                futures[executor.submit(scrape_source_router, src_with_page, q, filters, round_num)] = name

            # Web searches on round 1 only
            if round_num == 1 and not web_done:
                for wq in web_queries:
                    futures[executor.submit(web_search, wq, 6)] = f"Web:{wq[:20]}"
                web_done = True

            # Collect results for this round
            for future in as_completed(futures, timeout=90):
                name = futures[future]
                try:
                    results = future.result()
                    count = len(results)
                    round_raw.extend(results)
                    search_state['sources_done'] += 1

                    if name.startswith("Web:"):
                        if count:
                            search_state['log'].append(f"🌐 Web: {count} resultados")
                    else:
                        if count:
                            search_state['log'].append(f"✓ {name} p{round_num}: {count}")
                            print(f"[Round {round_num}] {name}: {count} listings")
                        else:
                            # No results this round — remove from future rounds
                            if name in active_srcs:
                                active_srcs[name]["has_more"] = False
                except Exception as e:
                    search_state['sources_done'] += 1
                    if name in active_srcs:
                        active_srcs[name]["has_more"] = False

        all_raw.extend(round_raw)
        print(f"[Search] Round {round_num} complete: {len(round_raw)} new results (total: {len(all_raw)})")

        # Small pause between rounds — natural break
        if round_num < MAX_ROUNDS and not search_state.get("cancelled"):
            polite_pause("BFS-round")

    print(f"[Search] Got {len(all_raw)} raw results, extracting vessel data...")

    if not all_raw:
        return []

    # Extract vessel data — parallel with dedup and real-time save
    vessels = []
    lock = threading.Lock()
    max_price = float(filters.get("max_price") or 0)
    min_loa   = float(filters.get("min_loa") or 0)
    query_words = [w.lower() for w in query.split() if len(w) > 2]

    # Deduplicate raw results by URL
    seen_urls = set()
    unique_raw = []
    for r in all_raw:
        if r["url"] not in seen_urls:
            seen_urls.add(r["url"])
            unique_raw.append(r)

    print(f"[Extract] Processing {len(unique_raw)} unique URLs...")

    SYNONYMS = {
        "sailboat":["sail","velero","vela","ketch","sloop","schooner","yawl","voilier"],
        "velero":  ["sail","sailboat","vela","ketch","sloop"],
        "tug":     ["tugboat","remolcador","tug boat","schlepper"],
        "barge":   ["barcaza","chaland","ponton","landing craft","lct"],
        "fishing": ["pesquero","trawler","seiner","longliner","fisher"],
        "offshore":["osv","supply vessel","supply boat","platform"],
        "yacht":   ["yate","motoryacht","m/y"],
        "motor":   ["motorboat","lancha","speedboat","cruiser"],
    }
    NON_VESSELS = ["outboard motor","engine only","motor only","parts only",
                   "trailer only","propeller","honda bf","yamaha f","suzuki df",
                   "life jacket","anchor","marine insurance","boat storage",
                   # Land vehicles — never boats
                   "ford expedition","ford explorer","ford f-1","ford ranger",
                   "ford bronco","ford mustang","ford escape","ford transit",
                   "chevy silverado","chevy tahoe","chevy suburban","chevy colorado",
                   "chevrolet silverado","chevrolet tahoe","chevrolet suburban",
                   "gmc sierra","gmc yukon","gmc terrain","gmc canyon",
                   "dodge ram","ram 1500","ram 2500","ram 3500",
                   "jeep wrangler","jeep cherokee","jeep grand","jeep gladiator",
                   "toyota camry","toyota tacoma","toyota tundra","toyota 4runner",
                   "toyota highlander","toyota rav4","toyota sienna",
                   "subaru outback","subaru forester","subaru crosstrek",
                   "honda cr-v","honda pilot","honda accord","honda civic","honda odyssey",
                   "tesla model","bmw x","mercedes benz","audi q","volkswagen jetta",
                   "cadillac escalade","cadillac xt","buick enclave","buick encore",
                   # Non-vessel services
                   "sailing lesson","sailing partner","sailing school","sailing class",
                   "sailing instruction","boating lesson","boat lesson","boating class",
                   "sailing instructor","boat rental","kayak rental","canoe rental",
                   ]

    def expand_query(words):
        expanded = set(words)
        for w in words:
            for key, syns in SYNONYMS.items():
                if w == key or w in syns:
                    expanded.add(key)
                    expanded.update(syns)
        return expanded

    expanded_query = expand_query(query_words)

    GENERIC_NAMES = {
        "sailboat","velero","barco","yacht","boat","vessel","embarcación",
        "sailboat for sale","velero en venta","boat for sale","barco en venta",
        "motor boat","motorboat","fishing boat","tug boat","tugboat",
        "within25 mi","within 25 mi","results","listing","listings",
    }

    def process_one(raw):
        try:
            if search_state.get("cancelled"):
                return

            # Quick title pre-check
            title_lower = raw["title"].lower()
            if any(kw in title_lower for kw in NON_VESSELS):
                return

            src_type  = raw.get("source_type", "")
            all_images = []
            data = None

            # ── FAST PATH: known boat marketplace → pure regex, no AI ────────
            if src_type in ("broker","classifieds","auction","salvage","commercial"):
                data = extract_vessel_fast(raw)
                if data:
                    img = raw.get("img_url","")
                    if img:
                        all_images = [img]
                    else:
                        # Derive thumbnail from URL (no page fetch needed)
                        listing_url = raw.get("url","")
                        ebay_m = re.search(r'ebay\.com/itm/(\d+)', listing_url)
                        if ebay_m:
                            all_images = [f"https://i.ebayimg.com/images/g/{ebay_m.group(1)}/s-l500.jpg"]
                        cl_m = re.search(r'craigslist\.org/.+/(\d{10})\.html', listing_url)
                        if cl_m:
                            all_images = [f"https://images.craigslist.org/{cl_m.group(1)}_600x450.jpg"]

            # ── Fast path: validate the listing is actually a boat ──────────────
            if data and data.get("_fast"):
                combined_text = (raw.get("title","") + " " + raw.get("snippet","")).lower()
                url_l = raw.get("url","").lower()

                # URLs that are guaranteed to be boat listings (trusted sections)
                BOAT_URLS = ("/boa","/boat","/sail","sailboatlistings","yachtworld",
                             "boattrader","seaboats","apolloduck","rightboat","boat24",
                             "annonces-bateau","barcos.net","tradeaboat","marinetraffic")
                is_boat_url = any(k in url_l for k in BOAT_URLS)

                # General auction sites (sell everything) need a boat keyword in the text
                BOAT_WORDS = ["boat","sail","yacht","vessel","ketch","sloop","catamaran",
                              "trimaran","mast","hull","marina","keel","watercraft","cruiser",
                              "trawler","dinghy","skiff","pontoon","motorboat","powerboat",
                              "sailboat","barge","tugboat","outboard","inboard","nautical",
                              "marine","stern","bow","aft","draft","beam","knot","starboard"]
                has_boat_word = any(k in combined_text for k in BOAT_WORDS)

                if not is_boat_url and not has_boat_word:
                    return  # Cars, furniture, etc. from general auction sites — skip

            # ── SLOW PATH: web-search results → fetch page + AI ──────────────
            if not data:
                page_text, page_images = "", []
                try:
                    fut = ThreadPoolExecutor(max_workers=1).submit(fetch_page_with_images, raw["url"])
                    page_text, page_images = fut.result(timeout=12)
                except Exception:
                    page_text = (f"Title: {raw['title']} "
                                 f"| Location: {raw.get('location','')} | {raw.get('snippet','')}")

                if not page_images and raw.get("img_url"):
                    page_images = [raw["img_url"]]
                if not page_images:
                    listing_url = raw.get("url", "")
                    ebay_m = re.search(r'ebay\.com/itm/(\d+)', listing_url)
                    if ebay_m:
                        page_images = [f"https://i.ebayimg.com/images/g/{ebay_m.group(1)}/s-l500.jpg"]
                    cl_m = re.search(r'craigslist\.org/.+/(\d{10})\.html', listing_url)
                    if cl_m:
                        page_images = [f"https://images.craigslist.org/{cl_m.group(1)}_600x450.jpg"]
                all_images = page_images

                status = ("auction" if src_type == "auction"
                          else "salvage" if src_type == "salvage"
                          else "active")

                context = ("URL: " + raw["url"] + "\nTitle: " + raw["title"] +
                           "\nPrice: " + raw.get("price_text","") + "\n" + page_text[:1500])

                prompt = (
                    "Analyze this boat listing from " + str(raw.get('source','')) +
                    ". Search was: " + query + "\n"
                    "TEXT: " + context + "\n\n"
                    "If NOT a boat for sale respond {skip:true}. "
                    "If IS a boat respond JSON with: skip=false, name, vessel_type "
                    "(Yacht|Motor|Sailboat|Fishing|Tug|Barge|Offshore|Ferry|Other), "
                    "loa_m, beam_m, draft_m (ALWAYS in METERS — detect unit from text; "
                    "if feet multiply by 0.3048, e.g. 45ft=13.7m, 60ft=18.3m, 100ft=30.5m), "
                    "year_built, hull, propulsion, "
                    "status=" + status + ", price_usd, currency, location, country, "
                    "description (Spanish max 150 chars), flags=[], score 0-100."
                )

                response = ollama_generate(prompt, model=MODELS['classify'], json_mode=True)
                m = re.search(r'\{.*\}', response or '', re.DOTALL)
                if not m:
                    return
                data = json.loads(m.group())
                if data.get("skip") or not data.get("name"):
                    return

                # Override AI loa_m with regex (AI misses feet→m conversion)
                loa_from_ctx = None
                for pat in [
                    r'(?:length|loa|eslora)[:\s]+([\d.]+)\s*(?:ft|\'|feet)',
                    r'\b(\d{2,3}(?:\.\d)?)\s*(?:ft|feet|\')',
                    r'^(\d{2,3}(?:\.\d)?)\s*\'',
                ]:
                    lm = re.search(pat, context, re.IGNORECASE)
                    if not lm:
                        lm = re.search(pat, raw.get("title",""), re.IGNORECASE)
                    if lm:
                        try:
                            ft = float(lm.group(1))
                            if 10 < ft < 500:
                                loa_from_ctx = round(ft * 0.3048, 1)
                                break
                        except: pass
                if loa_from_ctx and not data.get("loa_m"):
                    data["loa_m"] = loa_from_ctx
                elif loa_from_ctx and data.get("loa_m") and data["loa_m"] > 25:
                    data["loa_m"] = round(data["loa_m"] * 0.3048, 1)

                # AI unit conversion guard (only needed for AI output)
                ctx_lower = (page_text + " " + raw.get("title","")).lower()
                has_feet  = bool(re.search(r"\d+\s*(?:ft|feet|')\b|loa[:\s]+\d+\s*(?:ft|')", ctx_lower))
                vtype_lower = data.get("vessel_type","").lower()
                MAX_M = {"sailboat":25,"yacht":35,"motor":30,"fishing":30,
                         "tug":60,"barge":120,"offshore":90,"ferry":100,"other":50}
                max_reasonable = MAX_M.get(vtype_lower, 50)
                for dim in ["loa_m","beam_m","draft_m"]:
                    val = data.get(dim)
                    if not val or not isinstance(val,(int,float)):
                        continue
                    convert = False
                    if   dim == "loa_m"  and (val > 100 or val > max_reasonable or (val > 25 and has_feet)): convert = True
                    elif dim == "beam_m" and (val > 30  or (val > 8 and has_feet)):  convert = True
                    elif dim == "draft_m"and (val > 15  or (val > 5 and has_feet)):  convert = True
                    if convert:
                        data[dim] = round(val * 0.3048, 1)

            # ── Shared post-processing (fast path + AI path) ──────────────────
            if not data or not data.get("name"):
                return

            # Query match check
            combined = (data.get("name","") + " " + data.get("description","") +
                        " " + data.get("vessel_type","") + " " +
                        raw.get("title","") + " " + raw.get("url","")).lower()
            if query_words:
                if not any(qw in combined for qw in expanded_query):
                    # Skip query-match filter for results from direct scrapers (not web search).
                    # Web search results have category="Web Search" and may return off-topic pages.
                    # Direct scraper results already passed through a relevant search query.
                    is_web_search = raw.get("category","").lower() == "web search"
                    if is_web_search:
                        source_lower = raw.get("source","").lower()
                        if not any(kw in source_lower for kw in
                                   ["sailboat","yacht","workboat","offshore","tug","commercial",
                                    "boats","boattrader","apolloduck","rightboat","seaboats",
                                    "yachtworld","govplanet","govdeals","hibid","copart","ebay",
                                    "salvex","kijiji","craigslist","denison","galati","hmy"]):
                            return

            # Non-vessel + generic name check
            if any(kw in data.get("name","").lower() for kw in NON_VESSELS):
                return
            if data.get("name","").lower().strip() in GENERIC_NAMES:
                return

            # Filters (price + LOA)
            if max_price and data.get("price_usd") and data["price_usd"] > max_price * 1.01:
                return
            if min_loa and data.get("loa_m") and data["loa_m"] < (min_loa - 0.15):
                return

            data["images"]      = all_images[:8]
            data["source_url"]  = raw["url"]
            data["source_name"] = raw["source"]

            vid = save_vessel(data)
            if vid > 0:
                with lock:
                    search_state["found"] += 1
                    vessels.append(data)
                    tag = "[Fast]" if data.get("_fast") else "[AI]"
                    msg = f"✓ {data.get('name','?')} — {raw['source']}"
                    print(f"{tag} {msg}")
                    search_state["log"].append(msg)
        except Exception as e:
            print(f"[Extract] Error: {e}")

    # Fast path: more workers + more URLs since most results skip AI now
    with ThreadPoolExecutor(max_workers=16) as ex:
        futs = [ex.submit(process_one, r) for r in unique_raw[:300]]
        for f in as_completed(futs, timeout=180):
            if search_state.get("cancelled"):
                break
            try:
                f.result()
            except Exception:
                pass

    print(f"[Search] Done — {len(vessels)} vessels found")
    return vessels

    return vessels


# ── Fingerprint ───────────────────────────────────────────────────────────────
def fingerprint(v: dict) -> str:
    raw = f"{v.get('name','').lower().strip()}|{round(v.get('loa_m') or 0)}|{v.get('year_built',0)}|{v.get('vessel_type','')}"
    return hashlib.sha256(raw.encode()).hexdigest()[:16]

def save_vessel(v: dict) -> int:
    # Reject pure shells — need at least name + 1 real data field
    if not v.get("name") or v["name"].strip() in ("", "Unknown"):
        return -1
    data_points = sum(1 for f in ['price_usd', 'loa_m', 'year_built', 'location'] if v.get(f))
    if data_points < 1:
        return -1

    fp = fingerprint(v)
    conn = get_db()
    c = conn.cursor()
    existing = c.execute("SELECT id FROM vessels WHERE fingerprint=?", (fp,)).fetchone()
    if existing:
        conn.close()
        return existing['id']
    try:
        c.execute("""INSERT INTO vessels
            (name,vessel_type,loa_m,beam_m,draft_m,year_built,hull,propulsion,
             status,price_usd,currency,location,country,source_name,source_url,
             description,images,flags,score,fingerprint,raw_data)
            VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)""",
            (v.get('name'), v.get('vessel_type'), v.get('loa_m'),
             v.get('beam_m'), v.get('draft_m'), v.get('year_built'),
             v.get('hull'), v.get('propulsion'), v.get('status','active'),
             v.get('price_usd'), v.get('currency','USD'),
             v.get('location'), v.get('country'),
             v.get('source_name'), v.get('source_url'),
             v.get('description'), json.dumps(v.get('images',[])),
             json.dumps(v.get('flags',[])), v.get('score',50),
             fp, json.dumps(v)))
        vid = c.lastrowid
        conn.commit()
    except Exception as e:
        print(f"[DB] Error: {e}")
        vid = -1
    finally:
        conn.close()
    return vid

# ── API Routes ────────────────────────────────────────────────────────────────

def hash_pw(pw):
    return _hashlib.sha256(pw.encode()).hexdigest()

def seed_admin():
    conn = get_db()
    existing = conn.execute("SELECT id FROM users WHERE username='admin'").fetchone()
    if not existing:
        conn.execute("INSERT INTO users (username,password,role) VALUES (?,?,?)",
                     ('admin', hash_pw('admin123'), 'admin'))
        conn.commit()
        print("[Auth] Default user created: admin / admin123")
    conn.close()

@app.route('/api/login', methods=['POST'])
def login():
    body = request.json or {}
    username = body.get('username','').strip()
    password = body.get('password','')
    conn = get_db()
    user = conn.execute("SELECT * FROM users WHERE username=? AND password=?",
                        (username, hash_pw(password))).fetchone()
    conn.close()
    if user:
        session['user_id'] = user['id']
        session['username'] = user['username']
        session['role'] = user['role']
        return jsonify({'ok': True, 'username': user['username'], 'role': user['role']})
    return jsonify({'ok': False, 'error': 'Usuario o contraseña incorrectos'}), 401

@app.route('/api/logout', methods=['POST'])
def logout():
    session.clear()
    return jsonify({'ok': True})

@app.route('/api/me')
def me():
    if 'user_id' not in session:
        return jsonify({'logged_in': False}), 401
    return jsonify({'logged_in': True, 'username': session.get('username'), 'role': session.get('role')})

@app.route('/api/users', methods=['GET'])
def list_users():
    if session.get('role') != 'admin':
        return jsonify({'error': 'forbidden'}), 403
    conn = get_db()
    rows = [dict(r) for r in conn.execute("SELECT id,username,role,created_at FROM users").fetchall()]
    conn.close()
    return jsonify({'users': rows})

@app.route('/api/users', methods=['POST'])
def create_user():
    if session.get('role') != 'admin':
        return jsonify({'error': 'forbidden'}), 403
    body = request.json or {}
    username = body.get('username','').strip()
    password = body.get('password','')
    role = body.get('role','user')
    if not username or not password:
        return jsonify({'error': 'username and password required'}), 400
    conn = get_db()
    try:
        conn.execute("INSERT INTO users (username,password,role) VALUES (?,?,?)",
                     (username, hash_pw(password), role))
        conn.commit()
        conn.close()
        return jsonify({'ok': True})
    except:
        conn.close()
        return jsonify({'error': 'username already exists'}), 400

@app.route('/api/change_password', methods=['POST'])
def change_password():
    if 'user_id' not in session:
        return jsonify({'error': 'not logged in'}), 401
    body = request.json or {}
    old_pw = body.get('old_password','')
    new_pw = body.get('new_password','')
    conn = get_db()
    user = conn.execute("SELECT * FROM users WHERE id=? AND password=?",
                        (session['user_id'], hash_pw(old_pw))).fetchone()
    if not user:
        conn.close()
        return jsonify({'error': 'Contraseña actual incorrecta'}), 400
    conn.execute("UPDATE users SET password=? WHERE id=?", (hash_pw(new_pw), session['user_id']))
    conn.commit()
    conn.close()
    return jsonify({'ok': True})

@app.route('/')
def index():
    return send_from_directory('static', 'index.html')

@app.route('/api/status')
def status():
    models = ollama_models()
    conn = get_db()
    counts = {
        'vessels': conn.execute("SELECT COUNT(*) FROM vessels").fetchone()[0],
        'saved':   conn.execute("SELECT COUNT(*) FROM saved_vessels").fetchone()[0],
        'alerts':  conn.execute("SELECT COUNT(*) FROM alerts WHERE active=1").fetchone()[0],
    }
    conn.close()
    return jsonify({
        'ok': True,
        'ollama_models': models,
        'active_model': MODELS['extract'],
        'db_counts': counts,
        'sources_count': len(DIRECT_SOURCES),
        'categories': list(set(s['category'] for s in DIRECT_SOURCES)),
    })

@app.route('/api/vessels')
def list_vessels():
    conn = get_db()
    q = "SELECT * FROM vessels WHERE 1=1"
    params = []
    if t := request.args.get('type'):
        q += " AND vessel_type=?"; params.append(t)
    if s := request.args.get('status'):
        q += " AND status=?"; params.append(s)
    if h := request.args.get('hull'):
        q += " AND hull=?"; params.append(h)
    if mp := request.args.get('max_price'):
        q += " AND price_usd <= ?"; params.append(float(mp))
    if ml := request.args.get('min_loa'):
        q += " AND loa_m IS NOT NULL AND loa_m >= ?"; params.append(round(float(ml) - 0.15, 2))
    if yr_min := request.args.get('year_min'):
        try: q += " AND year_built >= ?"; params.append(int(yr_min))
        except: pass
    if yr_max := request.args.get('year_max'):
        try: q += " AND year_built <= ?"; params.append(int(yr_max))
        except: pass
    sort = request.args.get('sort', 'score')
    sorts = {
        'score':'score DESC', 'price_asc':'price_usd ASC',
        'price_desc':'price_usd DESC', 'loa':'loa_m DESC',
        'year':'year_built DESC', 'newest':'created_at DESC'
    }
    q += f" ORDER BY {sorts.get(sort,'score DESC')}"
    q += f" LIMIT {min(int(request.args.get('limit',200)),500)}"
    rows = [dict(r) for r in conn.execute(q, params).fetchall()]
    for r in rows:
        r['flags']  = json.loads(r.get('flags')  or '[]')
        r['images'] = json.loads(r.get('images') or '[]')
    conn.close()
    return jsonify({'vessels': rows, 'count': len(rows)})

_PROXY_ALLOWED = [
    'sailboatlistings.com', 'yachtworld.com', 'boattrader.com',
    'apolloduck.com', 'rightboat.com', 'boat24.com', 'seaboats.net',
    'boats.com', 'iboats.com', 'yachtworld.co.uk',
]

@app.route('/api/img_proxy')
def img_proxy():
    url = request.args.get('url', '')
    if not url:
        return '', 404
    from urllib.parse import urlparse
    host = urlparse(url).hostname or ''
    if not any(d in host for d in _PROXY_ALLOWED):
        return '', 403
    try:
        resp = requests.get(url, timeout=10, headers={
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
            'Referer': f'https://{host}/',
            'Accept': 'image/webp,image/apng,image/*,*/*;q=0.8',
        })
        if resp.status_code == 200:
            ct = resp.headers.get('content-type', 'image/jpeg')
            return Response(resp.content, content_type=ct,
                            headers={'Cache-Control': 'public, max-age=86400'})
        return '', resp.status_code
    except Exception as e:
        app.logger.debug(f"img_proxy error: {e}")
        return '', 502


# Global search state
search_state = {
    'running': False,
    'cancelled': False,
    'query': '',
    'found': 0,
    'total_sources': 0,
    'sources_done': 0,
    'log': [],
}

@app.route('/api/search', methods=['POST'])
def search():
    body    = request.json or {}
    query   = body.get('query', '')
    filters = body.get('filters', {})
    if not query:
        return jsonify({'error': 'query requerido'}), 400

    # Clear previous results immediately
    conn = get_db()
    conn.execute("DELETE FROM vessels")
    conn.execute("DELETE FROM saved_vessels")
    conn.execute("INSERT INTO search_history (query,filters) VALUES (?,?)",
                 (query, json.dumps(filters)))
    conn.commit()
    conn.close()

    # Reset state
    search_state['running']       = True
    search_state['cancelled']     = False
    search_state['query']         = query
    search_state['found']         = 0
    search_state['sources_done']  = 0
    search_state['total_sources'] = len(DIRECT_SOURCES)
    search_state['log']           = [f"Iniciando búsqueda: {query}"]

    # Tag this search with a unique ID so old threads don't pollute new searches
    import uuid
    search_id = str(uuid.uuid4())
    search_state['search_id'] = search_id

    # Run search in background thread
    def run_bg(sid):
        try:
            search_with_ai(query, filters)
        except Exception as e:
            search_state['log'].append(f"Error: {e}")
            print(f"[BG] Error: {e}")
        finally:
            if search_state.get('search_id') == sid:
                search_state['running'] = False
                total = search_state['found']
                msg = f"✓ Búsqueda completa — {total} embarcaciones encontradas"
                search_state['log'].append(msg)
                print(f"[BG] {msg}")

    t = threading.Thread(target=run_bg, args=(search_id,), daemon=True)
    t.start()

    return jsonify({'ok': True, 'message': 'Búsqueda iniciada en background'})

@app.route('/api/search/status')
def search_status():
    return jsonify(search_state)

@app.route('/api/search/cancel', methods=['POST'])
def cancel_search():
    import uuid
    search_state['cancelled']  = True
    search_state['running']    = False
    search_state['search_id']  = str(uuid.uuid4())  # invalidate any running thread
    search_state['log'].append('⏹ Búsqueda cancelada por el usuario')
    return jsonify({'ok': True})

@app.route('/api/fb-status')
def fb_status():
    SESSION_FILE = os.path.join(os.path.dirname(__file__), "fb_session.json")
    return jsonify({"active": os.path.exists(SESSION_FILE)})


@app.route('/api/fb-setup', methods=['POST'])
def fb_setup():
    """
    Launch a visible Chromium window so the user can log in to Facebook.
    After login is detected (marketplace URL is accessible), saves cookies to fb_session.json.
    """
    SESSION_FILE = os.path.join(os.path.dirname(__file__), "fb_session.json")
    import json as _json
    try:
        from playwright.sync_api import sync_playwright
        result = {"ok": False, "msg": ""}
        with sync_playwright() as p:
            browser = p.chromium.launch(
                headless=False,
                args=["--disable-blink-features=AutomationControlled"])
            context = browser.new_context(
                viewport={"width": 1100, "height": 800},
                user_agent=("Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                            "AppleWebKit/537.36 (KHTML, like Gecko) "
                            "Chrome/122.0.0.0 Safari/537.36"))
            page = context.new_page()
            page.goto("https://www.facebook.com/login", timeout=30000,
                      wait_until="domcontentloaded")
            # Wait up to 3 minutes for user to log in and reach marketplace
            try:
                page.wait_for_url(
                    re.compile(r'facebook\.com/(marketplace|home|feed)'),
                    timeout=180000)
                # Give extra time to fully load
                page.wait_for_timeout(3000)
                cookies = context.cookies()
                with open(SESSION_FILE, "w") as f:
                    _json.dump(cookies, f)
                result = {"ok": True,
                          "msg": f"Sesión guardada ({len(cookies)} cookies). "
                                  "Facebook Marketplace activado."}
            except Exception as e:
                result = {"ok": False, "msg": f"Tiempo agotado o error: {e}"}
            finally:
                try: page.close()
                except: pass
            browser.close()
        return jsonify(result)
    except Exception as e:
        return jsonify({"ok": False, "msg": str(e)}), 500


@app.route('/api/vessels/<int:vid>', methods=['GET'])
def get_vessel(vid):
    conn = get_db()
    row  = conn.execute("SELECT * FROM vessels WHERE id=?", (vid,)).fetchone()
    conn.close()
    if not row:
        return jsonify({'error': 'not found'}), 404
    v = dict(row)
    v['flags']  = json.loads(v.get('flags')  or '[]')
    v['images'] = json.loads(v.get('images') or '[]')
    return jsonify(v)

@app.route('/api/vessels', methods=['POST'])
def add_vessel():
    v = request.json or {}
    v['source_name'] = v.get('source_name', 'Manual')
    vid = save_vessel(v)
    return jsonify({'id': vid, 'ok': True})

@app.route('/api/vessels/<int:vid>', methods=['PUT'])
def update_vessel(vid):
    body = request.json or {}
    conn = get_db()
    fields  = ['name','vessel_type','loa_m','beam_m','draft_m','year_built',
               'hull','propulsion','status','price_usd','location','description','score']
    updates = {k: body[k] for k in fields if k in body}
    if updates:
        set_clause = ', '.join(f"{k}=?" for k in updates)
        conn.execute(f"UPDATE vessels SET {set_clause}, updated_at=datetime('now') WHERE id=?",
                     [*updates.values(), vid])
        conn.commit()
    conn.close()
    return jsonify({'ok': True})

@app.route('/api/vessels/<int:vid>', methods=['DELETE'])
def delete_vessel(vid):
    conn = get_db()
    conn.execute("DELETE FROM vessels WHERE id=?", (vid,))
    conn.execute("DELETE FROM saved_vessels WHERE vessel_id=?", (vid,))
    conn.commit()
    conn.close()
    return jsonify({'ok': True})

@app.route('/api/saved', methods=['GET'])
def list_saved():
    conn = get_db()
    rows = conn.execute("""
        SELECT v.*, s.notes, s.saved_at
        FROM vessels v JOIN saved_vessels s ON v.id=s.vessel_id
        ORDER BY s.saved_at DESC
    """).fetchall()
    result = []
    for r in rows:
        v = dict(r)
        v['flags']  = json.loads(v.get('flags')  or '[]')
        v['images'] = json.loads(v.get('images') or '[]')
        result.append(v)
    conn.close()
    return jsonify({'vessels': result, 'count': len(result)})

@app.route('/api/saved/<int:vid>', methods=['POST'])
def save_vessel_fav(vid):
    notes = (request.json or {}).get('notes', '')
    conn  = get_db()
    existing = conn.execute("SELECT id FROM saved_vessels WHERE vessel_id=?", (vid,)).fetchone()
    if not existing:
        conn.execute("INSERT INTO saved_vessels (vessel_id, notes) VALUES (?,?)", (vid, notes))
        conn.commit()
    conn.close()
    return jsonify({'ok': True})

@app.route('/api/saved/<int:vid>', methods=['DELETE'])
def unsave_vessel(vid):
    conn = get_db()
    conn.execute("DELETE FROM saved_vessels WHERE vessel_id=?", (vid,))
    conn.commit()
    conn.close()
    return jsonify({'ok': True})

@app.route('/api/alerts', methods=['GET'])
def list_alerts():
    conn = get_db()
    rows = [dict(r) for r in conn.execute("SELECT * FROM alerts WHERE active=1").fetchall()]
    conn.close()
    return jsonify({'alerts': rows})

@app.route('/api/alerts', methods=['POST'])
def create_alert():
    body = request.json or {}
    conn = get_db()
    conn.execute("INSERT INTO alerts (name, filters) VALUES (?,?)",
                 (body.get('name','Alerta'), json.dumps(body.get('filters',{}))))
    conn.commit()
    conn.close()
    return jsonify({'ok': True})

@app.route('/api/alerts/<int:aid>', methods=['DELETE'])
def delete_alert(aid):
    conn = get_db()
    conn.execute("UPDATE alerts SET active=0 WHERE id=?", (aid,))
    conn.commit()
    conn.close()
    return jsonify({'ok': True})

@app.route('/api/sources')
def list_sources():
    by_cat = {}
    for s in DIRECT_SOURCES:
        cat = s['category']
        if cat not in by_cat:
            by_cat[cat] = []
        by_cat[cat].append({'name': s['name'], 'url': s['search_url'].split('?')[0], 'type': s['type'], 'builtin': True})
    # Add custom sources
    try:
        conn = get_db()
        custom = [dict(r) for r in conn.execute("SELECT * FROM custom_sources ORDER BY category").fetchall()]
        conn.close()
        for c in custom:
            cat = c['category'] or 'Custom'
            if cat not in by_cat:
                by_cat[cat] = []
            by_cat[cat].append({
                'name': c['name'], 'url': c['search_url'].split('?')[0],
                'type': c['source_type'], 'builtin': False,
                'id': c['id'], 'active': bool(c['active'])
            })
    except:
        pass
    return jsonify({'sources': by_cat, 'total': sum(len(v) for v in by_cat.values())})

@app.route('/api/history')
def search_history():
    conn = get_db()
    rows = [dict(r) for r in conn.execute(
        "SELECT * FROM search_history ORDER BY searched_at DESC LIMIT 50").fetchall()]
    conn.close()
    return jsonify({'history': rows})

@app.route('/api/analyze', methods=['POST'])
def analyze_text():
    body   = request.json or {}
    text   = body.get('text', '')
    source = body.get('source', 'Manual')
    if not text:
        return jsonify({'error': 'text requerido'}), 400
    result = extract_vessel_from_text(text, source)
    if result:
        vid = save_vessel({**result, 'source_name': source})
        result['id'] = vid
    return jsonify(result)

@app.route('/api/collections', methods=['GET'])
def list_collections():
    conn = get_db()
    cols = [dict(r) for r in conn.execute(
        "SELECT c.*, COUNT(cv.vessel_id) as vessel_count FROM collections c "
        "LEFT JOIN collection_vessels cv ON c.id=cv.collection_id "
        "GROUP BY c.id ORDER BY c.created_at DESC").fetchall()]
    conn.close()
    return jsonify({'collections': cols})

@app.route('/api/collections', methods=['POST'])
def create_collection():
    body = request.json or {}
    name = body.get('name','').strip()
    if not name:
        return jsonify({'error': 'name required'}), 400
    conn = get_db()
    conn.execute("INSERT INTO collections (name,description,color,icon) VALUES (?,?,?,?)",
                 (name, body.get('description',''), body.get('color','#00b4ff'), body.get('icon','📁')))
    conn.commit()
    cid = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
    conn.close()
    return jsonify({'ok': True, 'id': cid})

@app.route('/api/collections/<int:cid>', methods=['DELETE'])
def delete_collection(cid):
    conn = get_db()
    conn.execute("DELETE FROM collection_vessels WHERE collection_id=?", (cid,))
    conn.execute("DELETE FROM collections WHERE id=?", (cid,))
    conn.commit()
    conn.close()
    return jsonify({'ok': True})

@app.route('/api/collections/<int:cid>/vessels', methods=['GET'])
def collection_vessels(cid):
    conn = get_db()
    rows = conn.execute("""
        SELECT v.*, cv.notes, cv.added_at FROM vessels v
        JOIN collection_vessels cv ON v.id=cv.vessel_id
        WHERE cv.collection_id=? ORDER BY cv.added_at DESC""", (cid,)).fetchall()
    result = []
    for r in rows:
        v = dict(r)
        v['flags']  = json.loads(v.get('flags')  or '[]')
        v['images'] = json.loads(v.get('images') or '[]')
        result.append(v)
    conn.close()
    return jsonify({'vessels': result, 'count': len(result)})

@app.route('/api/collections/<int:cid>/vessels', methods=['POST'])
def add_to_collection(cid):
    body = request.json or {}
    vessel_ids = body.get('vessel_ids', [])
    notes = body.get('notes', '')
    conn = get_db()
    added = 0
    for vid in vessel_ids:
        try:
            conn.execute("INSERT OR IGNORE INTO collection_vessels (collection_id,vessel_id,notes) VALUES (?,?,?)",
                         (cid, vid, notes))
            added += 1
        except:
            pass
    conn.commit()
    conn.close()
    return jsonify({'ok': True, 'added': added})

@app.route('/api/collections/<int:cid>/vessels/<int:vid>', methods=['DELETE'])
def remove_from_collection(cid, vid):
    conn = get_db()
    conn.execute("DELETE FROM collection_vessels WHERE collection_id=? AND vessel_id=?", (cid, vid))
    conn.commit()
    conn.close()
    return jsonify({'ok': True})

@app.route('/api/custom_sources', methods=['GET'])
def get_custom_sources():
    conn = get_db()
    rows = [dict(r) for r in conn.execute(
        "SELECT * FROM custom_sources ORDER BY created_at DESC").fetchall()]
    conn.close()
    return jsonify({'sources': rows})

@app.route('/api/custom_sources', methods=['POST'])
def add_custom_source():
    body = request.json or {}
    name = body.get('name','').strip()
    url  = body.get('search_url','').strip()
    if not name or not url:
        return jsonify({'error': 'name and search_url required'}), 400
    # Ensure URL has {query} placeholder
    if '{query}' not in url:
        url = url.rstrip('/') + '?q={query}'
    conn = get_db()
    conn.execute("""INSERT INTO custom_sources (name,category,search_url,source_type,added_by)
                    VALUES (?,?,?,?,?)""",
                 (name, body.get('category','Custom'),
                  url, body.get('source_type','broker'),
                  session.get('username','admin')))
    conn.commit()
    sid = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
    conn.close()
    return jsonify({'ok': True, 'id': sid})

@app.route('/api/custom_sources/<int:sid>', methods=['PUT'])
def update_custom_source(sid):
    body = request.json or {}
    conn = get_db()
    fields = ['name','category','search_url','source_type','active']
    updates = {k: body[k] for k in fields if k in body}
    if updates:
        set_clause = ', '.join(f"{k}=?" for k in updates)
        conn.execute(f"UPDATE custom_sources SET {set_clause} WHERE id=?",
                     [*updates.values(), sid])
        conn.commit()
    conn.close()
    return jsonify({'ok': True})

@app.route('/api/custom_sources/<int:sid>', methods=['DELETE'])
def delete_custom_source(sid):
    conn = get_db()
    conn.execute("DELETE FROM custom_sources WHERE id=?", (sid,))
    conn.commit()
    conn.close()
    return jsonify({'ok': True})

@app.route('/api/stats')
def stats():
    conn = get_db()
    c    = conn.cursor()
    data = {
        'total':     c.execute("SELECT COUNT(*) FROM vessels").fetchone()[0],
        'saved':     c.execute("SELECT COUNT(*) FROM saved_vessels").fetchone()[0],
        'by_type':   dict(c.execute("SELECT vessel_type, COUNT(*) FROM vessels GROUP BY vessel_type").fetchall()),
        'by_status': dict(c.execute("SELECT status, COUNT(*) FROM vessels GROUP BY status").fetchall()),
        'by_country':dict((k or 'Unknown', v) for k,v in c.execute("SELECT country, COUNT(*) FROM vessels WHERE country IS NOT NULL GROUP BY country ORDER BY COUNT(*) DESC LIMIT 10").fetchall()),
        'avg_score': c.execute("SELECT AVG(score) FROM vessels").fetchone()[0] or 0,
        'avg_price': c.execute("SELECT AVG(price_usd) FROM vessels WHERE price_usd > 0").fetchone()[0] or 0,
        'top_opportunities': [dict(r) for r in c.execute(
            "SELECT id,name,vessel_type,price_usd,score,location FROM vessels ORDER BY score DESC LIMIT 5").fetchall()],
    }
    conn.close()
    return jsonify(data)

# ── Seed sample data ──────────────────────────────────────────────────────────
def seed_sample_data():
    samples = [
        {"name":"M/Y Stella Maris","vessel_type":"Yacht","loa_m":28.4,"beam_m":6.8,"draft_m":1.9,"year_built":2008,"hull":"Fiberglass","propulsion":"Diesel","status":"active","price_usd":189000,"location":"Fort Lauderdale, FL","country":"US","source_name":"YachtWorld","source_url":"https://yachtworld.com","description":"Yate motor bien mantenido, twin Volvo IPS, refit 2022.","flags":["below_market","motivated_seller"],"score":87},
        {"name":"F/V Cape Hatteras","vessel_type":"Fishing","loa_m":19.2,"beam_m":5.1,"draft_m":1.4,"year_built":1997,"hull":"Steel","propulsion":"Diesel","status":"salvage","price_usd":22000,"location":"Gloucester, MA","country":"US","source_name":"GovDeals","source_url":"https://govdeals.com","description":"Ex buque NOAA, motor operativo, casco requiere trabajo.","flags":["rare","salvage_value","below_market"],"score":94},
        {"name":"TUG Bravo Eagle","vessel_type":"Tug","loa_m":32.0,"beam_m":9.4,"draft_m":3.8,"year_built":1989,"hull":"Steel","propulsion":"Diesel","status":"auction","price_usd":310000,"location":"New Orleans, LA","country":"US","source_name":"AuctionTime","source_url":"https://auctiontime.com","description":"Remolcador 2400HP, clase ABS, listo para operación comercial.","flags":["rare","auction","motivated_seller"],"score":91},
        {"name":"OSV Pacific Ranger","vessel_type":"Offshore","loa_m":52.0,"beam_m":13.2,"draft_m":4.1,"year_built":2005,"hull":"Steel","propulsion":"Diesel","status":"auction","price_usd":890000,"location":"Port Fourchon, LA","country":"US","source_name":"GovPlanet","source_url":"https://govplanet.com","description":"Buque apoyo offshore DP1, 400T carga, documentación completa.","flags":["rare","auction","government_surplus"],"score":79},
        {"name":"Barge RJ-440","vessel_type":"Barge","loa_m":44.0,"beam_m":12.0,"draft_m":1.8,"year_built":1978,"hull":"Steel","propulsion":"None","status":"active","price_usd":55000,"location":"Houston, TX","country":"US","source_name":"WorkBoat Classifieds","source_url":"https://workboat.com","description":"Barcaza cubierta, capacidad 800T, buen estado estructural.","flags":["below_market","rare"],"score":73},
        {"name":"LCT Endeavour","vessel_type":"Barge","loa_m":61.0,"beam_m":14.6,"draft_m":1.5,"year_built":1968,"hull":"Steel","propulsion":"Diesel","status":"salvage","price_usd":38000,"location":"Manila, Filipinas","country":"PH","source_name":"Salvex","source_url":"https://salvex.com","description":"Landing craft, estructura sólida, motores requieren overhaul.","flags":["salvage_value","rare","below_market"],"score":82},
    ]
    for s in samples:
        save_vessel(s)

# ── Main ──────────────────────────────────────────────────────────────────────
if __name__ == '__main__':
    import socket, signal, atexit, sys

    BASE_DIR = os.path.dirname(os.path.abspath(__file__))
    PID_FILE = os.path.join(BASE_DIR, ".server.pid")

    # ── Handle existing instance ───────────────────────────────────────────────
    def kill_pid(pid):
        try:
            import ctypes
            handle = ctypes.windll.kernel32.OpenProcess(1, False, pid)
            ctypes.windll.kernel32.TerminateProcess(handle, -1)
            ctypes.windll.kernel32.CloseHandle(handle)
            return True
        except:
            try:
                os.kill(pid, 9)
                return True
            except:
                return False

    def pid_running(pid):
        try:
            os.kill(pid, 0)
            return True
        except OSError:
            return False

    if os.path.exists(PID_FILE):
        try:
            old_pid = int(open(PID_FILE).read().strip())
            if pid_running(old_pid):
                print(f"\n  ⚠️  Ya hay una instancia corriendo (PID {old_pid})")
                resp = input("  ¿Cerrar la instancia anterior y continuar? [S/n]: ").strip().lower()
                if resp in ("", "s", "si", "sí", "y", "yes"):
                    if kill_pid(old_pid):
                        print(f"  ✓ Instancia anterior (PID {old_pid}) cerrada.")
                        import time; time.sleep(1)
                    else:
                        print(f"  ✗ No se pudo cerrar. Ciérrala manualmente y vuelve a intentar.")
                        sys.exit(1)
                else:
                    print("  Saliendo sin cambios.")
                    sys.exit(0)
        except (ValueError, IOError):
            pass  # PID file corrupted — ignore

    # ── Write PID file ─────────────────────────────────────────────────────────
    with open(PID_FILE, "w") as f:
        f.write(str(os.getpid()))

    def cleanup_pid():
        try: os.remove(PID_FILE)
        except: pass

    atexit.register(cleanup_pid)

    def handle_signal(sig, frame):
        print("\n\n  👋  Cerrando Boat&Ship-Finder...")
        cleanup_pid()
        sys.exit(0)

    signal.signal(signal.SIGINT,  handle_signal)
    signal.signal(signal.SIGTERM, handle_signal)

    # ── Port selection ─────────────────────────────────────────────────────────
    def port_free(p):
        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
            s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
            try:    s.bind(("0.0.0.0", p)); return True
            except: return False

    desired = int(os.environ.get('MARINE_PORT', 8765))
    port = desired
    if not port_free(desired):
        for p in range(desired + 1, desired + 20):
            if port_free(p):
                port = p
                break
        print(f"\n  ⚠️  Puerto {desired} ocupado — usando {port}")

    # ── DB init ────────────────────────────────────────────────────────────────
    print("\n" + "="*55)
    print("  Boat&Ship-Finder — Iniciando...")
    print("="*55)
    init_db()
    seed_admin()
    conn  = get_db()
    count = conn.execute("SELECT COUNT(*) FROM vessels").fetchone()[0]
    conn.close()
    if count == 0:
        print("[DB] Base de datos vacía — lista para búsquedas reales")
    else:
        print(f"[DB] {count} embarcaciones en caché de sesión anterior")

    print(f"\n  Local:    http://localhost:{port}")
    print(f"  Tailscale: http://<tu-ip-tailscale>:{port}")
    print(f"  Fuentes directas: {len(DIRECT_SOURCES)}")
    print(f"  Modelos Ollama: {list(MODELS.values())}")
    print(f"  PID: {os.getpid()} (guardado en .server.pid)")
    print("\n  [Ctrl+C para detener]\n")
    app.run(host='0.0.0.0', port=port, debug=False)