رفع الملفات إلى "app"

2025-09-07 00:04:26 +00:00
--- a/app/Dockerfile
+++ b/app/Dockerfile
@@ -0,0 +1,123 @@
+# Official Playwright image (includes browsers)
+FROM mcr.microsoft.com/playwright/python:v1.44.0
+
+# Build-time arg: اجعلها 1 لفشل البناء إذا وُجدت أي استخدامات pkg_resources في site-packages
+ARG FAIL_ON_PKG_RESOURCES=0
+ENV FAIL_ON_PKG_RESOURCES=${FAIL_ON_PKG_RESOURCES}
+
+# Non-interactive apt
+ENV DEBIAN_FRONTEND=noninteractive
+WORKDIR /app
+
+# Copy requirements first to leverage Docker cache
+COPY requirements.txt .
+
+# Install useful system packages
+RUN apt-get update \
+ && apt-get install -y --no-install-recommends \
+    git \
+    build-essential \
+    libxml2-dev \
+    libxslt1-dev \
+    libssl-dev \
+    libffi-dev \
+    ca-certificates \
+    curl \
+ && rm -rf /var/lib/apt/lists/*
+
+# Upgrade pip/setuptools/wheel to latest (we aim to support setuptools >= 81 after code migration)
+RUN python -m pip install --upgrade pip setuptools wheel
+
+# Install runtime Python deps from requirements
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Install auxiliary packages / backports & tooling we rely on
+# - packaging: requirement parsing & version handling
+# - importlib_metadata / importlib_resources: backports if running on older Python
+# - wafw00f: WAF detection tool used by the project
+RUN pip install --no-cache-dir \
+    packaging \
+    importlib_metadata \
+    importlib_resources \
+    wafw00f
+
+# Copy the rest of the project files
+COPY . .
+
+# Create evidence directory in tmpfs location and set permissions (used by utils.store_raw_evidence)
+ENV SUPERR_EVIDENCE_DIR=/dev/shm/superrecon_evidence
+RUN mkdir -p ${SUPERR_EVIDENCE_DIR} \
+ && chown -R pwuser:pwuser ${SUPERR_EVIDENCE_DIR} \
+ && chmod 750 ${SUPERR_EVIDENCE_DIR} || true
+
+# Optional build-time check: look for any remaining 'import pkg_resources' usages
+# If FAIL_ON_PKG_RESOURCES=1 the build will fail when any occurrences are found.
+# This check scans site-packages for python files mentioning pkg_resources.
+RUN python - <<'PY' || (test "$FAIL_ON_PKG_RESOURCES" = "0" && exit 0)
+import os, sys, site
+from pathlib import Path
+
+def scan_paths(paths):
+    hits = []
+    for root in paths:
+        rootp = Path(root)
+        if not rootp.exists():
+            continue
+        for p in rootp.rglob("*.py"):
+            try:
+                txt = p.read_text(encoding="utf-8", errors="ignore")
+            except Exception:
+                continue
+            if "import pkg_resources" in txt or "pkg_resources." in txt:
+                hits.append(str(p))
+    return hits
+
+paths = []
+try:
+    sp = site.getsitepackages()
+    for p in sp:
+        paths.append(p)
+except Exception:
+    # fallback common locations
+    paths += [
+        "/usr/local/lib/python3.10/site-packages",
+        "/usr/lib/python3/dist-packages",
+        "/usr/local/lib/python3.9/site-packages",
+    ]
+
+hits = scan_paths(paths)
+if hits:
+    print("==========================================")
+    print("WARNING: Detected uses of pkg_resources in installed packages (first 200 shown):")
+    for h in hits[:200]:
+        print(" -", h)
+    print("==========================================")
+    # If FAIL_ON_PKG_RESOURCES is set, fail the build
+    if os.environ.get("FAIL_ON_PKG_RESOURCES", "0") == "1":
+        print("FAIL_ON_PKG_RESOURCES=1 -> Failing build due to pkg_resources usages.")
+        sys.exit(1)
+else:
+    print("No pkg_resources usages found in scanned site-packages paths.")
+PY
+
+# Ensure non-root runtime (pwuser exists in Playwright base image)
+USER pwuser
+
+# Expose application port (configurable via APP_PORT env)
+ENV APP_PORT=8000
+EXPOSE ${APP_PORT}
+
+# Healthcheck
+HEALTHCHECK --interval=30s --timeout=5s --start-period=5s --retries=3 \
+  CMD curl -f http://127.0.0.1:${APP_PORT}/health || exit 1
+
+# Default environment variables (can be overridden at runtime)
+ENV PYTHONUNBUFFERED=1
+ENV MAX_CONCURRENT_SCANS=8
+ENV SCAN_TIMEOUT=180
+ENV RATE_LIMIT="15/minute"
+ENV LOG_LEVEL=INFO
+ENV UVICORN_WORKERS=1
+
+# Default command: run Uvicorn (assumes app package path app.main:app)
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "1"]
--- a/app/analyze_site.py
+++ b/app/analyze_site.py
@@ -0,0 +1,274 @@
+# analyze_site.py
+# Updated to match the improved utils.py (compat_resources, run_scan_for_url, etc.)
+import logging
+from typing import Dict, Any, Optional
+import asyncio
+import sys
+
+# Try flexible imports so this file works whether utils.py is at project root or inside `app` package.
+try:
+    # Preferred when utils is inside the `app` package (app/utils.py)
+    from app.utils import safe_json, run_scan_for_url, generate_scan_id
+except Exception:
+    try:
+        # Fallback to top-level utils.py
+        from utils import safe_json, run_scan_for_url, generate_scan_id  # type: ignore
+    except Exception as e:
+        raise ImportError("Could not import required utilities (safe_json, run_scan_for_url, generate_scan_id).") from e
+
+
+logger = logging.getLogger("SuperRecon")
+if not logger.handlers:
+    handler = logging.StreamHandler()
+    formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
+logger.setLevel("INFO")
+
+
+async def run_scan(target_url: str, render_js: bool = False) -> Dict[str, Any]:
+    """
+    Orchestrates a full site scan for a single URL using run_scan_for_url from utils.
+    Returns the raw report (dict) or a safe_json-wrapped error dict.
+    """
+    scan_id = generate_scan_id()
+    logger.info(f"Starting scan {scan_id} for URL: {target_url} (render_js={render_js})")
+
+    try:
+        # run_scan_for_url already accepts scan_id and render_js and returns a dict
+        report = await run_scan_for_url(target_url, render_js=render_js, scan_id=scan_id)
+        logger.info(f"Scan {scan_id} completed successfully for {target_url}.")
+        # Ensure report is a dict and include scan_id
+        if not isinstance(report, dict):
+            report = {"error": "invalid_report", "details": "Scanner returned non-dict result", "raw": str(report)}
+        report.setdefault("scan_id", scan_id)
+        report.setdefault("scanned_url", report.get("url", target_url))
+        return report
+    except Exception as e:
+        logger.error(f"Scan {scan_id} failed with error: {e}", exc_info=True)
+        return safe_json({"error": "Scan failed", "details": str(e), "scan_id": scan_id, "scanned_url": target_url})
+
+
+def _fmt_confidence(conf: Optional[Any]) -> str:
+    try:
+        if conf is None:
+            return "0%"
+        # If float in [0,1], convert to percent
+        if isinstance(conf, float) and 0.0 <= conf <= 1.0:
+            return f"{int(round(conf * 100))}%"
+        # else try numeric
+        val = int(float(conf))
+        if 0 <= val <= 100:
+            return f"{val}%"
+        return f"{max(0, min(val, 100))}%"
+    except Exception:
+        try:
+            return f"{int(conf)}%"
+        except Exception:
+            return str(conf)
+
+
+def format_final_report(report_data: Dict[str, Any]) -> str:
+    """
+    Formats the raw scan report data into a human-readable, well-structured string (Arabic).
+    Tolerant to different shapes of report_data (single report or wrapper).
+    """
+    if "error" in report_data:
+        return f"❌ تقرير الفحص: حدث خطأ\n\n{report_data.get('details', 'لا يوجد تفاصيل')}"
+
+    # Accept either {"full_report": [...]} or a single report dict
+    full_reports = report_data.get("full_report")
+    if not full_reports:
+        # If the provided dict already looks like a single scan report, wrap it
+        if "scanned_url" in report_data or "url" in report_data:
+            full_reports = [report_data]
+        else:
+            # If a summary with list of reports is provided, try extracting
+            if isinstance(report_data.get("reports"), list):
+                full_reports = report_data.get("reports")
+            else:
+                return "⚠️ لم يتم العثور على تقارير فحص.\nقد يكون الموقع غير متاح أو لم يتم تنفيذ الفحص."
+
+    output_str = "✨ **تقرير فحص شامل للموقع** ✨\n\n"
+    output_str += "---\n\n"
+
+    # Summary of scanned URLs (if available)
+    scanned_urls_summary = report_data.get("summary", {}).get("scanned_urls", [])
+    output_str += "**✅ الصفحات التي تم فحصها:**\n"
+    if scanned_urls_summary:
+        output_str += "\n".join([f"• {url}" for url in scanned_urls_summary]) + "\n\n"
+    else:
+        collected = [r.get("scanned_url") or r.get("url") for r in full_reports if r.get("scanned_url") or r.get("url")]
+        if collected:
+            output_str += "\n".join([f"• {url}" for url in collected]) + "\n\n"
+        else:
+            output_str += "• لم يتم توفير ملخص للروابط المفحوصة.\n\n"
+
+    for report in full_reports:
+        url = report.get("scanned_url", report.get("url", "URL غير معروف"))
+        scan_id = report.get("scan_id", "")
+        scanned_at = report.get("scanned_at", report.get("scanned_at", "غير معروف"))
+
+        output_str += "---\n\n"
+        output_str += f"### **🌐 تقرير الفحص لصفحة: {url}**\n"
+        if scan_id:
+            output_str += f"- **معرّف الفحص:** `{scan_id}`\n"
+        if scanned_at:
+            output_str += f"- **وقت الفحص:** {scanned_at}\n"
+        output_str += "\n"
+
+        # Security Headers
+        output_str += "**🛡️ رؤوس الأمان (Security Headers):**\n"
+        sec_headers = report.get("security_headers", {})
+        if sec_headers:
+            for h, d in sec_headers.items():
+                try:
+                    # d may be dict with status/value
+                    if isinstance(d, dict):
+                        status = d.get("status", "")
+                        value = d.get("value", "")
+                        output_str += f"  - **{h}**: {status} — `{value}`\n"
+                    else:
+                        output_str += f"  - **{h}**: {d}\n"
+                except Exception:
+                    output_str += f"  - **{h}**: {d}\n"
+        else:
+            output_str += "  - لم يتم العثور على رؤوس أمان أساسية.\n"
+        output_str += "\n"
+
+        # DNS Records
+        output_str += "**📡 معلومات DNS:**\n"
+        dns_records = report.get("dns_records", {})
+        if dns_records:
+            for rtype, records in dns_records.items():
+                try:
+                    recs_display = ", ".join(records) if isinstance(records, (list, tuple)) and records else str(records)
+                except Exception:
+                    recs_display = str(records)
+                output_str += f"  - **{rtype}**: {recs_display}\n"
+        else:
+            output_str += "  - لا توجد سجلات DNS أو لم يتم استردادها.\n"
+        output_str += "\n"
+
+        # SSL Info
+        output_str += "**🔒 شهادة SSL:**\n"
+        ssl_info = report.get("ssl_info", {}) or {}
+        if ssl_info.get("valid"):
+            not_after = ssl_info.get("not_after", "غير معروف")
+            issuer = ssl_info.get("issuer") or {}
+            issuer_cn = issuer.get("CN") if isinstance(issuer, dict) else issuer
+            output_str += f"  - ✅ صالحة حتى: {not_after}\n"
+            output_str += f"  - جهة الإصدار: {issuer_cn if issuer_cn else issuer}\n"
+        elif ssl_info.get("error"):
+            output_str += f"  - ❌ خطأ في فحص الشهادة: {ssl_info.get('error')}\n"
+        else:
+            output_str += "  - ❌ غير مفعلة أو غير متاحة.\n"
+        output_str += "\n"
+
+        # Technologies
+        output_str += "**🛠️ التقنيات المكتشفة:**\n"
+        teks = report.get("technologies", []) or []
+        if teks:
+            # Sort by confidence desc and show all (or limit if you want)
+            for t in sorted(teks, key=lambda x: x.get('confidence', 0), reverse=True):
+                name = t.get("name", "غير معروف")
+                confidence = _fmt_confidence(t.get("confidence", 0))
+                category = t.get("categories") or t.get("category") or []
+                if isinstance(category, (list, tuple)):
+                    cat_display = ", ".join(category) if category else "غير محدد"
+                else:
+                    cat_display = str(category)
+                source = t.get("source", "غير معروف")
+                version = t.get("version", "") or ""
+                emoji = "⭐" if int(confidence.strip("%")) > 90 else "👍" if int(confidence.strip("%")) > 70 else "🧐"
+                output_str += f"  - {emoji} **{name}**"
+                if version:
+                    output_str += f" (الإصدار: {version})"
+                output_str += f"\n"
+                output_str += f"    - **الفئة**: {cat_display}\n"
+                output_str += f"    - **الثقة**: {confidence}\n"
+                output_str += f"    - **المصدر**: {source}\n"
+        else:
+            output_str += "  - لم يتم العثور على تقنيات.\n"
+        output_str += "\n"
+
+        # Robots.txt
+        output_str += "**🤖 ملف Robots.txt:**\n"
+        robots_info = report.get("robots_info", {}) or {}
+        if robots_info.get("exists"):
+            output_str += f"  - ✅ **موجود** في: {robots_info.get('fetched_from')}\n"
+            if robots_info.get("sitemaps"):
+                s = robots_info.get("sitemaps")
+                output_str += f"  - **Sitemaps**: {', '.join(s)}\n"
+            if robots_info.get("rules"):
+                output_str += "  - **قواعد**: يحتوي على قواعد Allow/Disallow.\n"
+        else:
+            tried = robots_info.get("tried") or []
+            if tried:
+                output_str += f"  - ❌ غير موجود بعد محاولة الوصول إلى: {', '.join(tried)}\n"
+            else:
+                output_str += "  - ❌ غير موجود أو لم يتم فحصه.\n"
+        output_str += "\n"
+
+        # Payment Methods
+        output_str += "**💳 طرق الدفع:**\n"
+        payment_methods = report.get("payment_methods", []) or []
+        if payment_methods:
+            names = []
+            for method in payment_methods:
+                if isinstance(method, dict):
+                    names.append(method.get("name") or str(method))
+                else:
+                    names.append(str(method))
+            output_str += f"  - تم العثور على: {', '.join(names)}\n"
+        else:
+            output_str += "  - لم يتم العثور على طرق دفع معروفة.\n"
+        output_str += "\n"
+
+        # Trackers & Analytics
+        output_str += "**📈 المتتبعات (Trackers & Analytics):**\n"
+        trackers_info = report.get("trackers_and_analytics", []) or []
+        if trackers_info:
+            output_str += "  - " + ", ".join(trackers_info) + "\n"
+        else:
+            output_str += "  - لا توجد متتبعات معروفة.\n"
+        output_str += "\n"
+
+        # WAF & CDN
+        output_str += "**🛡️ WAF و CDN (استدلالي):**\n"
+        waf = report.get("waf_info") or report.get("waf") or {}
+        if waf and waf.get("detected"):
+            output_str += f"  - WAF مكتشف: {waf.get('provider')} (ثقة: {_fmt_confidence(waf.get('confidence'))})\n"
+        else:
+            output_str += "  - لا يوجد WAF واضح أو لم يتم اكتشافه.\n"
+        cdn = report.get("cdn_info") or report.get("cdn") or {}
+        if cdn and cdn.get("provider"):
+            output_str += f"  - CDN مفترض/مكتشف: {cdn.get('provider')} (ثقة: {_fmt_confidence(cdn.get('confidence'))})\n"
+        else:
+            output_str += "  - لا يوجد CDN واضح.\n"
+        output_str += "\n"
+
+        # Final notes
+        output_str += f"**📝 ملاحظات:**\n"
+        output_str += f"- مسار الأدلة الخام محفوظ في: {report.get('raw_evidence', {}).get('body', {}).get('path', 'غير متوفر')} (إن وُجد)\n"
+        output_str += "\n\n"
+
+    output_str += "---\n\n✨ تم الفحص بنجاح.\n"
+    return output_str
+
+
+if __name__ == "__main__":
+    # CLI usage: python analyze_site.py <url>
+    if len(sys.argv) > 1:
+        test_url = sys.argv[1]
+        render_js_flag = False
+        if len(sys.argv) > 2 and sys.argv[2].lower() in ("true", "1", "yes", "y"):
+            render_js_flag = True
+        try:
+            res = asyncio.run(run_scan(test_url, render_js=render_js_flag))
+            formatted = format_final_report({"full_report": [res], "summary": {"scanned_urls": [test_url]}})
+            print(formatted)
+        except Exception as e:
+            print("فشل تشغيل الفحص:", e)
+    else:
+        print("Usage: python analyze_site.py <url_to_scan> [render_js: true|false]")
--- a/app/main.py
+++ b/app/main.py
@@ -0,0 +1,220 @@
+import os
+import logging
+import asyncio
+from fastapi import FastAPI, HTTPException, Query, Request
+from fastapi.responses import JSONResponse, HTMLResponse
+from fastapi.middleware.cors import CORSMiddleware
+from slowapi import Limiter, _rate_limit_exceeded_handler
+from slowapi.util import get_remote_address
+from slowapi.middleware import SlowAPIMiddleware
+from pydantic import BaseModel, HttpUrl
+from typing import Optional, List
+from dotenv import load_dotenv
+from datetime import datetime, timezone
+from urllib.parse import urlparse
+import ipaddress
+
+# تحميل المتغيرات من ملف .env
+load_dotenv()
+
+# Logging
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
+logger = logging.getLogger("SuperReconAPI")
+logger.setLevel(os.getenv("LOG_LEVEL", "INFO"))
+
+# إنشاء تطبيق FastAPI
+app = FastAPI(
+    title="SuperRecon API",
+    version="1.0.0",
+    docs_url="/docs",
+    redoc_url="/redoc"
+)
+
+# تكوين CORS — تنظيف القيم وفحص القوائم الفارغة
+raw_origins = os.getenv("CORS_ALLOW_ORIGINS", "")
+if raw_origins.strip() == "":
+    allow_origins: List[str] = ["*"]
+else:
+    allow_origins = [o.strip() for o in raw_origins.split(",") if o.strip()]
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=allow_origins,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# تهيئة Rate Limiter
+limiter = Limiter(key_func=get_remote_address)
+app.state.limiter = limiter
+app.add_exception_handler(429, _rate_limit_exceeded_handler)
+app.add_middleware(SlowAPIMiddleware)
+
+# المتغيرات البيئية
+MAX_CONCURRENT_SCANS = int(os.getenv("MAX_CONCURRENT_SCANS", "8"))
+SCAN_TIMEOUT = int(os.getenv("SCAN_TIMEOUT", "180"))  # seconds
+rate_limit = os.getenv("RATE_LIMIT", "15/minute")
+
+scan_semaphore = asyncio.Semaphore(MAX_CONCURRENT_SCANS)
+
+# محاولة استيراد run_scan بمرونة (app.analyze_site أو analyze_site)
+try:
+    from app.analyze_site import run_scan  # type: ignore
+except Exception:
+    try:
+        from analyze_site import run_scan  # type: ignore
+    except Exception as e:
+        # لنفشل مبكرًا مع رسالة واضحة لو لم نتمكن من إيجاد دالة الفحص
+        logger.exception("Cannot import run_scan from app.analyze_site or analyze_site.")
+        raise ImportError("Could not import run_scan from app.analyze_site or analyze_site.") from e
+
+
+class ReconRequest(BaseModel):
+    url: HttpUrl
+    render_js: Optional[bool] = True
+
+
+@app.get("/", response_class=HTMLResponse)
+async def index():
+    html = """
+    <title>SuperRecon API</title>
+    <style>body { font-family: Arial, sans-serif; margin: 40px; }</style>
+    ✅ SuperRecon API جاهز<br>
+    استخدم الرابط التالي لفحص موقع:<br>
+    /recon?url=https://example.com<br><br>
+    <a href="/docs">عرض وثائق API</a>
+    """
+    return HTMLResponse(content=html)
+
+
+@app.get("/health")
+async def health():
+    return {
+        "status": "healthy",
+        "service": "SuperRecon API",
+        "version": "1.0.0",
+        "timestamp": datetime.now(timezone.utc).isoformat()
+    }
+
+
+def _is_ip_private(ip_str: str) -> bool:
+    """Returns True if ip_str is private/reserved/loopback/link-local/multicast."""
+    try:
+        ip_obj = ipaddress.ip_address(ip_str)
+        return (
+            ip_obj.is_private
+            or ip_obj.is_loopback
+            or ip_obj.is_link_local
+            or ip_obj.is_reserved
+            or ip_obj.is_multicast
+            or ip_obj.is_unspecified
+        )
+    except Exception:
+        return False
+
+
+async def _ensure_not_local_target(parsed_url):
+    """If the target resolves to private/loopback IPs, raise HTTPException (for safety)."""
+    host = parsed_url.hostname
+    if not host:
+        raise HTTPException(status_code=400, detail="Invalid host in URL.")
+    # if host is an IP literal
+    try:
+        ipaddress.ip_address(host)
+        if _is_ip_private(host):
+            raise HTTPException(status_code=400, detail="Scanning private/loopback addresses is not allowed.")
+        return
+    except ValueError:
+        # hostname - resolve asynchronously using event loop resolver
+        try:
+            loop = asyncio.get_running_loop()
+            # getaddrinfo returns list of tuples; we'll extract the sockaddr[0]
+            infos = await loop.getaddrinfo(host, None)
+            ips = set(sockaddr[0] for _, _, _, _, sockaddr in infos if sockaddr)
+            if not ips:
+                raise HTTPException(status_code=400, detail="Target hostname could not be resolved to any IP.")
+            for ip in ips:
+                if _is_ip_private(ip):
+                    raise HTTPException(status_code=400, detail="Target resolves to private/loopback addresses; scanning is blocked.")
+            return
+        except HTTPException:
+            raise
+        except Exception as e:
+            logger.debug(f"DNS resolution error for host {host}: {e}")
+            raise HTTPException(status_code=400, detail="Target hostname could not be resolved.") from e
+
+
+@app.get("/recon")
+@limiter.limit(rate_limit)
+async def recon_get(
+    request: Request,
+    url: str = Query(..., description="Target URL to analyze (e.g., https://example.com)"),
+    render_js: bool = Query(True, description="Render page with JavaScript before analysis")
+):
+    # validate via pydantic model then dispatch to the POST handler
+    payload = ReconRequest(url=url, render_js=render_js)
+    return await recon_post(request, payload)
+
+
+@app.post("/recon")
+@limiter.limit(rate_limit)
+async def recon_post(request: Request, payload: ReconRequest):
+    url_str = str(payload.url)
+    render_js = payload.render_js
+
+    if not url_str:
+        raise HTTPException(status_code=400, detail="Missing 'url' in payload")
+
+    # basic sanity: avoid extremely long URLs (simple DoS protection)
+    if len(url_str) > 4096:
+        raise HTTPException(status_code=400, detail="URL too long.")
+
+    parsed = urlparse(url_str)
+    if parsed.scheme.lower() not in ("http", "https"):
+        raise HTTPException(status_code=400, detail="Only http and https schemes are allowed.")
+
+    # Ensure the target is not local/private
+    await _ensure_not_local_target(parsed)
+
+    # get remote address (with fallback)
+    try:
+        remote_addr = get_remote_address(request)
+    except Exception:
+        try:
+            remote_addr = request.client.host  # type: ignore
+        except Exception:
+            remote_addr = "unknown"
+
+    logger.info(f"Scan requested by {remote_addr} for {url_str} (render_js={render_js})")
+
+    async with scan_semaphore:
+        try:
+            logger.info(f"Starting scan for {url_str}")
+            # run_scan is expected to return a serializable dict (or safe_json already)
+            result = await asyncio.wait_for(
+                run_scan(url_str, render_js=render_js),
+                timeout=SCAN_TIMEOUT
+            )
+            logger.info(f"Scan completed for {url_str}")
+            # ensure result is JSON serializable; if not, wrap minimally
+            if not isinstance(result, dict):
+                logger.warning("run_scan returned non-dict result; coercing to dict.")
+                result = {"result": str(result)}
+            return JSONResponse(content=result)
+        except asyncio.TimeoutError:
+            logger.warning(f"Scan timed out for {url_str}")
+            return JSONResponse(
+                status_code=504,
+                content={
+                    "success": False,
+                    "error": "timeout",
+                    "message": f"Scan timed out after {SCAN_TIMEOUT} seconds"
+                }
+            )
+        except HTTPException:
+            # re-raise HTTPException as-is (e.g., blocked target)
+            raise
+        except Exception as e:
+            logger.exception("Scan failed")
+            raise HTTPException(status_code=500, detail="Internal server error during scan.") from e
--- a/app/requirements.txt
+++ b/app/requirements.txt
@@ -0,0 +1,30 @@
+aiohttp
+apify-client
+beautifulsoup4
+builtwith
+certifi
+charset-normalizer
+cryptography
+dnspython
+fake-useragent
+fastapi[all]
+httpx[http2]
+importlib_metadata; python_version < "3.10"
+importlib_resources; python_version < "3.9"
+ipwhois
+js2py
+lxml
+packaging
+playwright
+pyOpenSSL
+python-Wappalyzer
+python-dotenv
+python-socks
+python-whois
+redis
+requests
+slowapi
+tenacity
+tldextract
+uvicorn[standard]
+wafw00f
--- a/app/utils.py
+++ b/app/utils.py