رفع الملفات إلى "app"
هذا الالتزام موجود في:
123
app/Dockerfile
Normal file
123
app/Dockerfile
Normal file
@@ -0,0 +1,123 @@
|
||||
# Official Playwright image (includes browsers)
|
||||
FROM mcr.microsoft.com/playwright/python:v1.44.0
|
||||
|
||||
# Build-time arg: اجعلها 1 لفشل البناء إذا وُجدت أي استخدامات pkg_resources في site-packages
|
||||
ARG FAIL_ON_PKG_RESOURCES=0
|
||||
ENV FAIL_ON_PKG_RESOURCES=${FAIL_ON_PKG_RESOURCES}
|
||||
|
||||
# Non-interactive apt
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
WORKDIR /app
|
||||
|
||||
# Copy requirements first to leverage Docker cache
|
||||
COPY requirements.txt .
|
||||
|
||||
# Install useful system packages
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
git \
|
||||
build-essential \
|
||||
libxml2-dev \
|
||||
libxslt1-dev \
|
||||
libssl-dev \
|
||||
libffi-dev \
|
||||
ca-certificates \
|
||||
curl \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Upgrade pip/setuptools/wheel to latest (we aim to support setuptools >= 81 after code migration)
|
||||
RUN python -m pip install --upgrade pip setuptools wheel
|
||||
|
||||
# Install runtime Python deps from requirements
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Install auxiliary packages / backports & tooling we rely on
|
||||
# - packaging: requirement parsing & version handling
|
||||
# - importlib_metadata / importlib_resources: backports if running on older Python
|
||||
# - wafw00f: WAF detection tool used by the project
|
||||
RUN pip install --no-cache-dir \
|
||||
packaging \
|
||||
importlib_metadata \
|
||||
importlib_resources \
|
||||
wafw00f
|
||||
|
||||
# Copy the rest of the project files
|
||||
COPY . .
|
||||
|
||||
# Create evidence directory in tmpfs location and set permissions (used by utils.store_raw_evidence)
|
||||
ENV SUPERR_EVIDENCE_DIR=/dev/shm/superrecon_evidence
|
||||
RUN mkdir -p ${SUPERR_EVIDENCE_DIR} \
|
||||
&& chown -R pwuser:pwuser ${SUPERR_EVIDENCE_DIR} \
|
||||
&& chmod 750 ${SUPERR_EVIDENCE_DIR} || true
|
||||
|
||||
# Optional build-time check: look for any remaining 'import pkg_resources' usages
|
||||
# If FAIL_ON_PKG_RESOURCES=1 the build will fail when any occurrences are found.
|
||||
# This check scans site-packages for python files mentioning pkg_resources.
|
||||
RUN python - <<'PY' || (test "$FAIL_ON_PKG_RESOURCES" = "0" && exit 0)
|
||||
import os, sys, site
|
||||
from pathlib import Path
|
||||
|
||||
def scan_paths(paths):
|
||||
hits = []
|
||||
for root in paths:
|
||||
rootp = Path(root)
|
||||
if not rootp.exists():
|
||||
continue
|
||||
for p in rootp.rglob("*.py"):
|
||||
try:
|
||||
txt = p.read_text(encoding="utf-8", errors="ignore")
|
||||
except Exception:
|
||||
continue
|
||||
if "import pkg_resources" in txt or "pkg_resources." in txt:
|
||||
hits.append(str(p))
|
||||
return hits
|
||||
|
||||
paths = []
|
||||
try:
|
||||
sp = site.getsitepackages()
|
||||
for p in sp:
|
||||
paths.append(p)
|
||||
except Exception:
|
||||
# fallback common locations
|
||||
paths += [
|
||||
"/usr/local/lib/python3.10/site-packages",
|
||||
"/usr/lib/python3/dist-packages",
|
||||
"/usr/local/lib/python3.9/site-packages",
|
||||
]
|
||||
|
||||
hits = scan_paths(paths)
|
||||
if hits:
|
||||
print("==========================================")
|
||||
print("WARNING: Detected uses of pkg_resources in installed packages (first 200 shown):")
|
||||
for h in hits[:200]:
|
||||
print(" -", h)
|
||||
print("==========================================")
|
||||
# If FAIL_ON_PKG_RESOURCES is set, fail the build
|
||||
if os.environ.get("FAIL_ON_PKG_RESOURCES", "0") == "1":
|
||||
print("FAIL_ON_PKG_RESOURCES=1 -> Failing build due to pkg_resources usages.")
|
||||
sys.exit(1)
|
||||
else:
|
||||
print("No pkg_resources usages found in scanned site-packages paths.")
|
||||
PY
|
||||
|
||||
# Ensure non-root runtime (pwuser exists in Playwright base image)
|
||||
USER pwuser
|
||||
|
||||
# Expose application port (configurable via APP_PORT env)
|
||||
ENV APP_PORT=8000
|
||||
EXPOSE ${APP_PORT}
|
||||
|
||||
# Healthcheck
|
||||
HEALTHCHECK --interval=30s --timeout=5s --start-period=5s --retries=3 \
|
||||
CMD curl -f http://127.0.0.1:${APP_PORT}/health || exit 1
|
||||
|
||||
# Default environment variables (can be overridden at runtime)
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV MAX_CONCURRENT_SCANS=8
|
||||
ENV SCAN_TIMEOUT=180
|
||||
ENV RATE_LIMIT="15/minute"
|
||||
ENV LOG_LEVEL=INFO
|
||||
ENV UVICORN_WORKERS=1
|
||||
|
||||
# Default command: run Uvicorn (assumes app package path app.main:app)
|
||||
CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "1"]
|
274
app/analyze_site.py
Normal file
274
app/analyze_site.py
Normal file
@@ -0,0 +1,274 @@
|
||||
# analyze_site.py
|
||||
# Updated to match the improved utils.py (compat_resources, run_scan_for_url, etc.)
|
||||
import logging
|
||||
from typing import Dict, Any, Optional
|
||||
import asyncio
|
||||
import sys
|
||||
|
||||
# Try flexible imports so this file works whether utils.py is at project root or inside `app` package.
|
||||
try:
|
||||
# Preferred when utils is inside the `app` package (app/utils.py)
|
||||
from app.utils import safe_json, run_scan_for_url, generate_scan_id
|
||||
except Exception:
|
||||
try:
|
||||
# Fallback to top-level utils.py
|
||||
from utils import safe_json, run_scan_for_url, generate_scan_id # type: ignore
|
||||
except Exception as e:
|
||||
raise ImportError("Could not import required utilities (safe_json, run_scan_for_url, generate_scan_id).") from e
|
||||
|
||||
|
||||
logger = logging.getLogger("SuperRecon")
|
||||
if not logger.handlers:
|
||||
handler = logging.StreamHandler()
|
||||
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
||||
handler.setFormatter(formatter)
|
||||
logger.addHandler(handler)
|
||||
logger.setLevel("INFO")
|
||||
|
||||
|
||||
async def run_scan(target_url: str, render_js: bool = False) -> Dict[str, Any]:
|
||||
"""
|
||||
Orchestrates a full site scan for a single URL using run_scan_for_url from utils.
|
||||
Returns the raw report (dict) or a safe_json-wrapped error dict.
|
||||
"""
|
||||
scan_id = generate_scan_id()
|
||||
logger.info(f"Starting scan {scan_id} for URL: {target_url} (render_js={render_js})")
|
||||
|
||||
try:
|
||||
# run_scan_for_url already accepts scan_id and render_js and returns a dict
|
||||
report = await run_scan_for_url(target_url, render_js=render_js, scan_id=scan_id)
|
||||
logger.info(f"Scan {scan_id} completed successfully for {target_url}.")
|
||||
# Ensure report is a dict and include scan_id
|
||||
if not isinstance(report, dict):
|
||||
report = {"error": "invalid_report", "details": "Scanner returned non-dict result", "raw": str(report)}
|
||||
report.setdefault("scan_id", scan_id)
|
||||
report.setdefault("scanned_url", report.get("url", target_url))
|
||||
return report
|
||||
except Exception as e:
|
||||
logger.error(f"Scan {scan_id} failed with error: {e}", exc_info=True)
|
||||
return safe_json({"error": "Scan failed", "details": str(e), "scan_id": scan_id, "scanned_url": target_url})
|
||||
|
||||
|
||||
def _fmt_confidence(conf: Optional[Any]) -> str:
|
||||
try:
|
||||
if conf is None:
|
||||
return "0%"
|
||||
# If float in [0,1], convert to percent
|
||||
if isinstance(conf, float) and 0.0 <= conf <= 1.0:
|
||||
return f"{int(round(conf * 100))}%"
|
||||
# else try numeric
|
||||
val = int(float(conf))
|
||||
if 0 <= val <= 100:
|
||||
return f"{val}%"
|
||||
return f"{max(0, min(val, 100))}%"
|
||||
except Exception:
|
||||
try:
|
||||
return f"{int(conf)}%"
|
||||
except Exception:
|
||||
return str(conf)
|
||||
|
||||
|
||||
def format_final_report(report_data: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Formats the raw scan report data into a human-readable, well-structured string (Arabic).
|
||||
Tolerant to different shapes of report_data (single report or wrapper).
|
||||
"""
|
||||
if "error" in report_data:
|
||||
return f"❌ تقرير الفحص: حدث خطأ\n\n{report_data.get('details', 'لا يوجد تفاصيل')}"
|
||||
|
||||
# Accept either {"full_report": [...]} or a single report dict
|
||||
full_reports = report_data.get("full_report")
|
||||
if not full_reports:
|
||||
# If the provided dict already looks like a single scan report, wrap it
|
||||
if "scanned_url" in report_data or "url" in report_data:
|
||||
full_reports = [report_data]
|
||||
else:
|
||||
# If a summary with list of reports is provided, try extracting
|
||||
if isinstance(report_data.get("reports"), list):
|
||||
full_reports = report_data.get("reports")
|
||||
else:
|
||||
return "⚠️ لم يتم العثور على تقارير فحص.\nقد يكون الموقع غير متاح أو لم يتم تنفيذ الفحص."
|
||||
|
||||
output_str = "✨ **تقرير فحص شامل للموقع** ✨\n\n"
|
||||
output_str += "---\n\n"
|
||||
|
||||
# Summary of scanned URLs (if available)
|
||||
scanned_urls_summary = report_data.get("summary", {}).get("scanned_urls", [])
|
||||
output_str += "**✅ الصفحات التي تم فحصها:**\n"
|
||||
if scanned_urls_summary:
|
||||
output_str += "\n".join([f"• {url}" for url in scanned_urls_summary]) + "\n\n"
|
||||
else:
|
||||
collected = [r.get("scanned_url") or r.get("url") for r in full_reports if r.get("scanned_url") or r.get("url")]
|
||||
if collected:
|
||||
output_str += "\n".join([f"• {url}" for url in collected]) + "\n\n"
|
||||
else:
|
||||
output_str += "• لم يتم توفير ملخص للروابط المفحوصة.\n\n"
|
||||
|
||||
for report in full_reports:
|
||||
url = report.get("scanned_url", report.get("url", "URL غير معروف"))
|
||||
scan_id = report.get("scan_id", "")
|
||||
scanned_at = report.get("scanned_at", report.get("scanned_at", "غير معروف"))
|
||||
|
||||
output_str += "---\n\n"
|
||||
output_str += f"### **🌐 تقرير الفحص لصفحة: {url}**\n"
|
||||
if scan_id:
|
||||
output_str += f"- **معرّف الفحص:** `{scan_id}`\n"
|
||||
if scanned_at:
|
||||
output_str += f"- **وقت الفحص:** {scanned_at}\n"
|
||||
output_str += "\n"
|
||||
|
||||
# Security Headers
|
||||
output_str += "**🛡️ رؤوس الأمان (Security Headers):**\n"
|
||||
sec_headers = report.get("security_headers", {})
|
||||
if sec_headers:
|
||||
for h, d in sec_headers.items():
|
||||
try:
|
||||
# d may be dict with status/value
|
||||
if isinstance(d, dict):
|
||||
status = d.get("status", "")
|
||||
value = d.get("value", "")
|
||||
output_str += f" - **{h}**: {status} — `{value}`\n"
|
||||
else:
|
||||
output_str += f" - **{h}**: {d}\n"
|
||||
except Exception:
|
||||
output_str += f" - **{h}**: {d}\n"
|
||||
else:
|
||||
output_str += " - لم يتم العثور على رؤوس أمان أساسية.\n"
|
||||
output_str += "\n"
|
||||
|
||||
# DNS Records
|
||||
output_str += "**📡 معلومات DNS:**\n"
|
||||
dns_records = report.get("dns_records", {})
|
||||
if dns_records:
|
||||
for rtype, records in dns_records.items():
|
||||
try:
|
||||
recs_display = ", ".join(records) if isinstance(records, (list, tuple)) and records else str(records)
|
||||
except Exception:
|
||||
recs_display = str(records)
|
||||
output_str += f" - **{rtype}**: {recs_display}\n"
|
||||
else:
|
||||
output_str += " - لا توجد سجلات DNS أو لم يتم استردادها.\n"
|
||||
output_str += "\n"
|
||||
|
||||
# SSL Info
|
||||
output_str += "**🔒 شهادة SSL:**\n"
|
||||
ssl_info = report.get("ssl_info", {}) or {}
|
||||
if ssl_info.get("valid"):
|
||||
not_after = ssl_info.get("not_after", "غير معروف")
|
||||
issuer = ssl_info.get("issuer") or {}
|
||||
issuer_cn = issuer.get("CN") if isinstance(issuer, dict) else issuer
|
||||
output_str += f" - ✅ صالحة حتى: {not_after}\n"
|
||||
output_str += f" - جهة الإصدار: {issuer_cn if issuer_cn else issuer}\n"
|
||||
elif ssl_info.get("error"):
|
||||
output_str += f" - ❌ خطأ في فحص الشهادة: {ssl_info.get('error')}\n"
|
||||
else:
|
||||
output_str += " - ❌ غير مفعلة أو غير متاحة.\n"
|
||||
output_str += "\n"
|
||||
|
||||
# Technologies
|
||||
output_str += "**🛠️ التقنيات المكتشفة:**\n"
|
||||
teks = report.get("technologies", []) or []
|
||||
if teks:
|
||||
# Sort by confidence desc and show all (or limit if you want)
|
||||
for t in sorted(teks, key=lambda x: x.get('confidence', 0), reverse=True):
|
||||
name = t.get("name", "غير معروف")
|
||||
confidence = _fmt_confidence(t.get("confidence", 0))
|
||||
category = t.get("categories") or t.get("category") or []
|
||||
if isinstance(category, (list, tuple)):
|
||||
cat_display = ", ".join(category) if category else "غير محدد"
|
||||
else:
|
||||
cat_display = str(category)
|
||||
source = t.get("source", "غير معروف")
|
||||
version = t.get("version", "") or ""
|
||||
emoji = "⭐" if int(confidence.strip("%")) > 90 else "👍" if int(confidence.strip("%")) > 70 else "🧐"
|
||||
output_str += f" - {emoji} **{name}**"
|
||||
if version:
|
||||
output_str += f" (الإصدار: {version})"
|
||||
output_str += f"\n"
|
||||
output_str += f" - **الفئة**: {cat_display}\n"
|
||||
output_str += f" - **الثقة**: {confidence}\n"
|
||||
output_str += f" - **المصدر**: {source}\n"
|
||||
else:
|
||||
output_str += " - لم يتم العثور على تقنيات.\n"
|
||||
output_str += "\n"
|
||||
|
||||
# Robots.txt
|
||||
output_str += "**🤖 ملف Robots.txt:**\n"
|
||||
robots_info = report.get("robots_info", {}) or {}
|
||||
if robots_info.get("exists"):
|
||||
output_str += f" - ✅ **موجود** في: {robots_info.get('fetched_from')}\n"
|
||||
if robots_info.get("sitemaps"):
|
||||
s = robots_info.get("sitemaps")
|
||||
output_str += f" - **Sitemaps**: {', '.join(s)}\n"
|
||||
if robots_info.get("rules"):
|
||||
output_str += " - **قواعد**: يحتوي على قواعد Allow/Disallow.\n"
|
||||
else:
|
||||
tried = robots_info.get("tried") or []
|
||||
if tried:
|
||||
output_str += f" - ❌ غير موجود بعد محاولة الوصول إلى: {', '.join(tried)}\n"
|
||||
else:
|
||||
output_str += " - ❌ غير موجود أو لم يتم فحصه.\n"
|
||||
output_str += "\n"
|
||||
|
||||
# Payment Methods
|
||||
output_str += "**💳 طرق الدفع:**\n"
|
||||
payment_methods = report.get("payment_methods", []) or []
|
||||
if payment_methods:
|
||||
names = []
|
||||
for method in payment_methods:
|
||||
if isinstance(method, dict):
|
||||
names.append(method.get("name") or str(method))
|
||||
else:
|
||||
names.append(str(method))
|
||||
output_str += f" - تم العثور على: {', '.join(names)}\n"
|
||||
else:
|
||||
output_str += " - لم يتم العثور على طرق دفع معروفة.\n"
|
||||
output_str += "\n"
|
||||
|
||||
# Trackers & Analytics
|
||||
output_str += "**📈 المتتبعات (Trackers & Analytics):**\n"
|
||||
trackers_info = report.get("trackers_and_analytics", []) or []
|
||||
if trackers_info:
|
||||
output_str += " - " + ", ".join(trackers_info) + "\n"
|
||||
else:
|
||||
output_str += " - لا توجد متتبعات معروفة.\n"
|
||||
output_str += "\n"
|
||||
|
||||
# WAF & CDN
|
||||
output_str += "**🛡️ WAF و CDN (استدلالي):**\n"
|
||||
waf = report.get("waf_info") or report.get("waf") or {}
|
||||
if waf and waf.get("detected"):
|
||||
output_str += f" - WAF مكتشف: {waf.get('provider')} (ثقة: {_fmt_confidence(waf.get('confidence'))})\n"
|
||||
else:
|
||||
output_str += " - لا يوجد WAF واضح أو لم يتم اكتشافه.\n"
|
||||
cdn = report.get("cdn_info") or report.get("cdn") or {}
|
||||
if cdn and cdn.get("provider"):
|
||||
output_str += f" - CDN مفترض/مكتشف: {cdn.get('provider')} (ثقة: {_fmt_confidence(cdn.get('confidence'))})\n"
|
||||
else:
|
||||
output_str += " - لا يوجد CDN واضح.\n"
|
||||
output_str += "\n"
|
||||
|
||||
# Final notes
|
||||
output_str += f"**📝 ملاحظات:**\n"
|
||||
output_str += f"- مسار الأدلة الخام محفوظ في: {report.get('raw_evidence', {}).get('body', {}).get('path', 'غير متوفر')} (إن وُجد)\n"
|
||||
output_str += "\n\n"
|
||||
|
||||
output_str += "---\n\n✨ تم الفحص بنجاح.\n"
|
||||
return output_str
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# CLI usage: python analyze_site.py <url>
|
||||
if len(sys.argv) > 1:
|
||||
test_url = sys.argv[1]
|
||||
render_js_flag = False
|
||||
if len(sys.argv) > 2 and sys.argv[2].lower() in ("true", "1", "yes", "y"):
|
||||
render_js_flag = True
|
||||
try:
|
||||
res = asyncio.run(run_scan(test_url, render_js=render_js_flag))
|
||||
formatted = format_final_report({"full_report": [res], "summary": {"scanned_urls": [test_url]}})
|
||||
print(formatted)
|
||||
except Exception as e:
|
||||
print("فشل تشغيل الفحص:", e)
|
||||
else:
|
||||
print("Usage: python analyze_site.py <url_to_scan> [render_js: true|false]")
|
220
app/main.py
Normal file
220
app/main.py
Normal file
@@ -0,0 +1,220 @@
|
||||
import os
|
||||
import logging
|
||||
import asyncio
|
||||
from fastapi import FastAPI, HTTPException, Query, Request
|
||||
from fastapi.responses import JSONResponse, HTMLResponse
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from slowapi import Limiter, _rate_limit_exceeded_handler
|
||||
from slowapi.util import get_remote_address
|
||||
from slowapi.middleware import SlowAPIMiddleware
|
||||
from pydantic import BaseModel, HttpUrl
|
||||
from typing import Optional, List
|
||||
from dotenv import load_dotenv
|
||||
from datetime import datetime, timezone
|
||||
from urllib.parse import urlparse
|
||||
import ipaddress
|
||||
|
||||
# تحميل المتغيرات من ملف .env
|
||||
load_dotenv()
|
||||
|
||||
# Logging
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
|
||||
logger = logging.getLogger("SuperReconAPI")
|
||||
logger.setLevel(os.getenv("LOG_LEVEL", "INFO"))
|
||||
|
||||
# إنشاء تطبيق FastAPI
|
||||
app = FastAPI(
|
||||
title="SuperRecon API",
|
||||
version="1.0.0",
|
||||
docs_url="/docs",
|
||||
redoc_url="/redoc"
|
||||
)
|
||||
|
||||
# تكوين CORS — تنظيف القيم وفحص القوائم الفارغة
|
||||
raw_origins = os.getenv("CORS_ALLOW_ORIGINS", "")
|
||||
if raw_origins.strip() == "":
|
||||
allow_origins: List[str] = ["*"]
|
||||
else:
|
||||
allow_origins = [o.strip() for o in raw_origins.split(",") if o.strip()]
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=allow_origins,
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# تهيئة Rate Limiter
|
||||
limiter = Limiter(key_func=get_remote_address)
|
||||
app.state.limiter = limiter
|
||||
app.add_exception_handler(429, _rate_limit_exceeded_handler)
|
||||
app.add_middleware(SlowAPIMiddleware)
|
||||
|
||||
# المتغيرات البيئية
|
||||
MAX_CONCURRENT_SCANS = int(os.getenv("MAX_CONCURRENT_SCANS", "8"))
|
||||
SCAN_TIMEOUT = int(os.getenv("SCAN_TIMEOUT", "180")) # seconds
|
||||
rate_limit = os.getenv("RATE_LIMIT", "15/minute")
|
||||
|
||||
scan_semaphore = asyncio.Semaphore(MAX_CONCURRENT_SCANS)
|
||||
|
||||
# محاولة استيراد run_scan بمرونة (app.analyze_site أو analyze_site)
|
||||
try:
|
||||
from app.analyze_site import run_scan # type: ignore
|
||||
except Exception:
|
||||
try:
|
||||
from analyze_site import run_scan # type: ignore
|
||||
except Exception as e:
|
||||
# لنفشل مبكرًا مع رسالة واضحة لو لم نتمكن من إيجاد دالة الفحص
|
||||
logger.exception("Cannot import run_scan from app.analyze_site or analyze_site.")
|
||||
raise ImportError("Could not import run_scan from app.analyze_site or analyze_site.") from e
|
||||
|
||||
|
||||
class ReconRequest(BaseModel):
|
||||
url: HttpUrl
|
||||
render_js: Optional[bool] = True
|
||||
|
||||
|
||||
@app.get("/", response_class=HTMLResponse)
|
||||
async def index():
|
||||
html = """
|
||||
<title>SuperRecon API</title>
|
||||
<style>body { font-family: Arial, sans-serif; margin: 40px; }</style>
|
||||
✅ SuperRecon API جاهز<br>
|
||||
استخدم الرابط التالي لفحص موقع:<br>
|
||||
/recon?url=https://example.com<br><br>
|
||||
<a href="/docs">عرض وثائق API</a>
|
||||
"""
|
||||
return HTMLResponse(content=html)
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
async def health():
|
||||
return {
|
||||
"status": "healthy",
|
||||
"service": "SuperRecon API",
|
||||
"version": "1.0.0",
|
||||
"timestamp": datetime.now(timezone.utc).isoformat()
|
||||
}
|
||||
|
||||
|
||||
def _is_ip_private(ip_str: str) -> bool:
|
||||
"""Returns True if ip_str is private/reserved/loopback/link-local/multicast."""
|
||||
try:
|
||||
ip_obj = ipaddress.ip_address(ip_str)
|
||||
return (
|
||||
ip_obj.is_private
|
||||
or ip_obj.is_loopback
|
||||
or ip_obj.is_link_local
|
||||
or ip_obj.is_reserved
|
||||
or ip_obj.is_multicast
|
||||
or ip_obj.is_unspecified
|
||||
)
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
async def _ensure_not_local_target(parsed_url):
|
||||
"""If the target resolves to private/loopback IPs, raise HTTPException (for safety)."""
|
||||
host = parsed_url.hostname
|
||||
if not host:
|
||||
raise HTTPException(status_code=400, detail="Invalid host in URL.")
|
||||
# if host is an IP literal
|
||||
try:
|
||||
ipaddress.ip_address(host)
|
||||
if _is_ip_private(host):
|
||||
raise HTTPException(status_code=400, detail="Scanning private/loopback addresses is not allowed.")
|
||||
return
|
||||
except ValueError:
|
||||
# hostname - resolve asynchronously using event loop resolver
|
||||
try:
|
||||
loop = asyncio.get_running_loop()
|
||||
# getaddrinfo returns list of tuples; we'll extract the sockaddr[0]
|
||||
infos = await loop.getaddrinfo(host, None)
|
||||
ips = set(sockaddr[0] for _, _, _, _, sockaddr in infos if sockaddr)
|
||||
if not ips:
|
||||
raise HTTPException(status_code=400, detail="Target hostname could not be resolved to any IP.")
|
||||
for ip in ips:
|
||||
if _is_ip_private(ip):
|
||||
raise HTTPException(status_code=400, detail="Target resolves to private/loopback addresses; scanning is blocked.")
|
||||
return
|
||||
except HTTPException:
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.debug(f"DNS resolution error for host {host}: {e}")
|
||||
raise HTTPException(status_code=400, detail="Target hostname could not be resolved.") from e
|
||||
|
||||
|
||||
@app.get("/recon")
|
||||
@limiter.limit(rate_limit)
|
||||
async def recon_get(
|
||||
request: Request,
|
||||
url: str = Query(..., description="Target URL to analyze (e.g., https://example.com)"),
|
||||
render_js: bool = Query(True, description="Render page with JavaScript before analysis")
|
||||
):
|
||||
# validate via pydantic model then dispatch to the POST handler
|
||||
payload = ReconRequest(url=url, render_js=render_js)
|
||||
return await recon_post(request, payload)
|
||||
|
||||
|
||||
@app.post("/recon")
|
||||
@limiter.limit(rate_limit)
|
||||
async def recon_post(request: Request, payload: ReconRequest):
|
||||
url_str = str(payload.url)
|
||||
render_js = payload.render_js
|
||||
|
||||
if not url_str:
|
||||
raise HTTPException(status_code=400, detail="Missing 'url' in payload")
|
||||
|
||||
# basic sanity: avoid extremely long URLs (simple DoS protection)
|
||||
if len(url_str) > 4096:
|
||||
raise HTTPException(status_code=400, detail="URL too long.")
|
||||
|
||||
parsed = urlparse(url_str)
|
||||
if parsed.scheme.lower() not in ("http", "https"):
|
||||
raise HTTPException(status_code=400, detail="Only http and https schemes are allowed.")
|
||||
|
||||
# Ensure the target is not local/private
|
||||
await _ensure_not_local_target(parsed)
|
||||
|
||||
# get remote address (with fallback)
|
||||
try:
|
||||
remote_addr = get_remote_address(request)
|
||||
except Exception:
|
||||
try:
|
||||
remote_addr = request.client.host # type: ignore
|
||||
except Exception:
|
||||
remote_addr = "unknown"
|
||||
|
||||
logger.info(f"Scan requested by {remote_addr} for {url_str} (render_js={render_js})")
|
||||
|
||||
async with scan_semaphore:
|
||||
try:
|
||||
logger.info(f"Starting scan for {url_str}")
|
||||
# run_scan is expected to return a serializable dict (or safe_json already)
|
||||
result = await asyncio.wait_for(
|
||||
run_scan(url_str, render_js=render_js),
|
||||
timeout=SCAN_TIMEOUT
|
||||
)
|
||||
logger.info(f"Scan completed for {url_str}")
|
||||
# ensure result is JSON serializable; if not, wrap minimally
|
||||
if not isinstance(result, dict):
|
||||
logger.warning("run_scan returned non-dict result; coercing to dict.")
|
||||
result = {"result": str(result)}
|
||||
return JSONResponse(content=result)
|
||||
except asyncio.TimeoutError:
|
||||
logger.warning(f"Scan timed out for {url_str}")
|
||||
return JSONResponse(
|
||||
status_code=504,
|
||||
content={
|
||||
"success": False,
|
||||
"error": "timeout",
|
||||
"message": f"Scan timed out after {SCAN_TIMEOUT} seconds"
|
||||
}
|
||||
)
|
||||
except HTTPException:
|
||||
# re-raise HTTPException as-is (e.g., blocked target)
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.exception("Scan failed")
|
||||
raise HTTPException(status_code=500, detail="Internal server error during scan.") from e
|
30
app/requirements.txt
Normal file
30
app/requirements.txt
Normal file
@@ -0,0 +1,30 @@
|
||||
aiohttp
|
||||
apify-client
|
||||
beautifulsoup4
|
||||
builtwith
|
||||
certifi
|
||||
charset-normalizer
|
||||
cryptography
|
||||
dnspython
|
||||
fake-useragent
|
||||
fastapi[all]
|
||||
httpx[http2]
|
||||
importlib_metadata; python_version < "3.10"
|
||||
importlib_resources; python_version < "3.9"
|
||||
ipwhois
|
||||
js2py
|
||||
lxml
|
||||
packaging
|
||||
playwright
|
||||
pyOpenSSL
|
||||
python-Wappalyzer
|
||||
python-dotenv
|
||||
python-socks
|
||||
python-whois
|
||||
redis
|
||||
requests
|
||||
slowapi
|
||||
tenacity
|
||||
tldextract
|
||||
uvicorn[standard]
|
||||
wafw00f
|
1423
app/utils.py
Normal file
1423
app/utils.py
Normal file
تم حذف اختلاف الملف لأن الملف كبير جداً
تحميل الاختلاف
المرجع في مشكلة جديدة
حظر مستخدم