Update main.py
هذا الالتزام موجود في:
528
main.py
528
main.py
@@ -1,14 +1,24 @@
|
|||||||
|
# main.py
|
||||||
import os
|
import os
|
||||||
import json
|
import re
|
||||||
import redis
|
import psycopg2
|
||||||
from fastapi import FastAPI, HTTPException, status
|
import psycopg2.extras
|
||||||
from pydantic import BaseModel, Field, UrlConstraints
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from fastapi import FastAPI, HTTPException, status, BackgroundTasks
|
||||||
|
from pydantic import BaseModel, Field, HttpUrl
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
import hashlib
|
||||||
|
from typing import List, Optional
|
||||||
|
from datetime import datetime
|
||||||
|
import logging
|
||||||
|
|
||||||
# --- Configuration ---
|
# --- Configuration ---
|
||||||
# We connect to Redis. In Docker, the hostname is usually the service name 'redis'.
|
DATABASE_URL = os.getenv("DATABASE_URL", "")
|
||||||
REDIS_HOST = os.getenv("REDIS_HOST", "localhost")
|
|
||||||
REDIS_PORT = int(os.getenv("REDIS_PORT", 6379))
|
# --- Logging Setup ---
|
||||||
QUEUE_NAME = "arabic_crawling_queue"
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# --- Models ---
|
# --- Models ---
|
||||||
class SearchPayload(BaseModel):
|
class SearchPayload(BaseModel):
|
||||||
@@ -16,61 +26,475 @@ class SearchPayload(BaseModel):
|
|||||||
Input model matching your description.
|
Input model matching your description.
|
||||||
"""
|
"""
|
||||||
keyword: str = Field(..., description="The search keyword used")
|
keyword: str = Field(..., description="The search keyword used")
|
||||||
results: list[str] = Field(..., description="List of URLs to process")
|
results: List[HttpUrl] = Field(..., description="List of URLs to process")
|
||||||
|
|
||||||
# --- App & Redis Setup ---
|
class ArabicContentResponse(BaseModel):
|
||||||
|
url: str
|
||||||
|
has_arabic: bool
|
||||||
|
title: Optional[str] = None
|
||||||
|
meta_description: Optional[str] = None
|
||||||
|
arabic_content_preview: Optional[str] = None
|
||||||
|
|
||||||
|
# --- App Setup ---
|
||||||
app = FastAPI(title="Arabic Search Ingestion API")
|
app = FastAPI(title="Arabic Search Ingestion API")
|
||||||
|
|
||||||
# Initialize Redis connection
|
# --- Database Setup ---
|
||||||
try:
|
def get_db_connection():
|
||||||
redis_client = redis.Redis(
|
"""Create a database connection"""
|
||||||
host=REDIS_HOST,
|
try:
|
||||||
port=REDIS_PORT,
|
conn = psycopg2.connect(DATABASE_URL)
|
||||||
db=0,
|
return conn
|
||||||
decode_responses=True # Automatically decode bytes to str
|
except Exception as e:
|
||||||
|
logger.error(f"Database connection failed: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def init_database():
|
||||||
|
"""Initialize database tables if they don't exist"""
|
||||||
|
conn = get_db_connection()
|
||||||
|
if not conn:
|
||||||
|
logger.error("Cannot initialize database - no connection")
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
# Create table for raw search data
|
||||||
|
cur.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS search_ingest (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
keyword VARCHAR(500) NOT NULL,
|
||||||
|
url TEXT NOT NULL,
|
||||||
|
url_hash VARCHAR(64) NOT NULL,
|
||||||
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
processed BOOLEAN DEFAULT FALSE,
|
||||||
|
UNIQUE(url_hash)
|
||||||
)
|
)
|
||||||
# Test connection on startup
|
""")
|
||||||
redis_client.ping()
|
|
||||||
print(f"Connected to Redis at {REDIS_HOST}:{REDIS_PORT}")
|
|
||||||
except Exception as e:
|
|
||||||
print(f"Could not connect to Redis: {e}")
|
|
||||||
redis_client = None
|
|
||||||
|
|
||||||
# --- Endpoints ---
|
# Create index for faster lookups
|
||||||
|
cur.execute("""
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_search_ingest_url_hash
|
||||||
|
ON search_ingest(url_hash)
|
||||||
|
""")
|
||||||
|
|
||||||
|
cur.execute("""
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_search_ingest_processed
|
||||||
|
ON search_ingest(processed)
|
||||||
|
""")
|
||||||
|
|
||||||
|
# Create table for Arabic content index
|
||||||
|
cur.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS arabic_index (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
url TEXT NOT NULL UNIQUE,
|
||||||
|
url_hash VARCHAR(64) NOT NULL UNIQUE,
|
||||||
|
title TEXT,
|
||||||
|
meta_description TEXT,
|
||||||
|
arabic_content TEXT,
|
||||||
|
detection_score FLOAT,
|
||||||
|
scraped_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
last_updated TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
source_keyword VARCHAR(500),
|
||||||
|
http_status INTEGER,
|
||||||
|
INDEX(url_hash)
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
|
||||||
|
# Create table for processing queue
|
||||||
|
cur.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS processing_queue (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
search_ingest_id INTEGER REFERENCES search_ingest(id),
|
||||||
|
url TEXT NOT NULL,
|
||||||
|
url_hash VARCHAR(64) NOT NULL,
|
||||||
|
status VARCHAR(50) DEFAULT 'pending',
|
||||||
|
attempts INTEGER DEFAULT 0,
|
||||||
|
last_attempt TIMESTAMP,
|
||||||
|
error_message TEXT,
|
||||||
|
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
UNIQUE(url_hash)
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
logger.info("Database initialized successfully")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Database initialization failed: {e}")
|
||||||
|
conn.rollback()
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
# Initialize database on startup
|
||||||
|
@app.on_event("startup")
|
||||||
|
def startup_event():
|
||||||
|
init_database()
|
||||||
|
|
||||||
|
# --- Arabic Language Detection ---
|
||||||
|
def contains_arabic(text: str) -> bool:
|
||||||
|
"""
|
||||||
|
Check if text contains Arabic characters.
|
||||||
|
Arabic Unicode block: U+0600 to U+06FF
|
||||||
|
"""
|
||||||
|
if not text:
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Arabic Unicode range pattern
|
||||||
|
arabic_pattern = re.compile(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]')
|
||||||
|
return bool(arabic_pattern.search(text))
|
||||||
|
|
||||||
|
def extract_arabic_content(text: str, max_length: int = 500) -> str:
|
||||||
|
"""
|
||||||
|
Extract Arabic content preview from text
|
||||||
|
"""
|
||||||
|
if not text:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
arabic_pattern = re.compile(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]+')
|
||||||
|
matches = arabic_pattern.findall(text)
|
||||||
|
|
||||||
|
if matches:
|
||||||
|
# Join matches and truncate
|
||||||
|
arabic_text = " ".join(matches)
|
||||||
|
return arabic_text[:max_length] + "..." if len(arabic_text) > max_length else arabic_text
|
||||||
|
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def calculate_arabic_score(text: str) -> float:
|
||||||
|
"""
|
||||||
|
Calculate the percentage of Arabic characters in the text
|
||||||
|
"""
|
||||||
|
if not text or len(text) == 0:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
arabic_chars = len(re.findall(r'[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF\uFB50-\uFDFF\uFE70-\uFEFF]', text))
|
||||||
|
total_chars = len(text)
|
||||||
|
|
||||||
|
return (arabic_chars / total_chars) * 100 if total_chars > 0 else 0.0
|
||||||
|
|
||||||
|
def get_url_hash(url: str) -> str:
|
||||||
|
"""Create a unique hash for a URL"""
|
||||||
|
return hashlib.sha256(url.encode('utf-8')).hexdigest()
|
||||||
|
|
||||||
|
# --- Scraping Function ---
|
||||||
|
def scrape_and_check_arabic(url: str, keyword: str = None) -> dict:
|
||||||
|
"""
|
||||||
|
Scrape URL and check for Arabic content
|
||||||
|
"""
|
||||||
|
result = {
|
||||||
|
"url": url,
|
||||||
|
"has_arabic": False,
|
||||||
|
"title": None,
|
||||||
|
"meta_description": None,
|
||||||
|
"arabic_content_preview": None,
|
||||||
|
"arabic_score": 0.0,
|
||||||
|
"http_status": None,
|
||||||
|
"error": None
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Set a user agent to avoid being blocked
|
||||||
|
headers = {
|
||||||
|
'User-Agent': 'Mozilla/5.0 (compatible; ArabicIndexBot/1.0; +http://example.com/bot)'
|
||||||
|
}
|
||||||
|
|
||||||
|
# Make request with timeout
|
||||||
|
response = requests.get(url, headers=headers, timeout=10, allow_redirects=True)
|
||||||
|
result["http_status"] = response.status_code
|
||||||
|
|
||||||
|
if response.status_code == 200:
|
||||||
|
# Parse HTML
|
||||||
|
soup = BeautifulSoup(response.text, 'html.parser')
|
||||||
|
|
||||||
|
# Extract title
|
||||||
|
title_tag = soup.find('title')
|
||||||
|
if title_tag:
|
||||||
|
result["title"] = title_tag.get_text().strip()
|
||||||
|
|
||||||
|
# Extract meta description
|
||||||
|
meta_desc = soup.find('meta', attrs={'name': 'description'})
|
||||||
|
if meta_desc and meta_desc.get('content'):
|
||||||
|
result["meta_description"] = meta_desc['content'].strip()
|
||||||
|
|
||||||
|
# Extract main content (remove script, style tags)
|
||||||
|
for script in soup(["script", "style", "nav", "footer", "header"]):
|
||||||
|
script.decompose()
|
||||||
|
|
||||||
|
# Get text content
|
||||||
|
text_content = soup.get_text(separator=' ', strip=True)
|
||||||
|
|
||||||
|
# Check for Arabic
|
||||||
|
has_arabic_title = contains_arabic(result["title"])
|
||||||
|
has_arabic_meta = contains_arabic(result["meta_description"])
|
||||||
|
has_arabic_content = contains_arabic(text_content)
|
||||||
|
|
||||||
|
result["has_arabic"] = has_arabic_title or has_arabic_meta or has_arabic_content
|
||||||
|
|
||||||
|
if result["has_arabic"]:
|
||||||
|
# Calculate Arabic score
|
||||||
|
result["arabic_score"] = calculate_arabic_score(text_content)
|
||||||
|
|
||||||
|
# Extract Arabic content preview
|
||||||
|
arabic_preview = extract_arabic_content(text_content)
|
||||||
|
result["arabic_content_preview"] = arabic_preview
|
||||||
|
|
||||||
|
# Save to database
|
||||||
|
save_to_arabic_index(result, keyword)
|
||||||
|
|
||||||
|
except requests.Timeout:
|
||||||
|
result["error"] = "Request timeout"
|
||||||
|
except requests.RequestException as e:
|
||||||
|
result["error"] = f"Request failed: {str(e)}"
|
||||||
|
except Exception as e:
|
||||||
|
result["error"] = f"Scraping failed: {str(e)}"
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def save_to_arabic_index(data: dict, keyword: str = None):
|
||||||
|
"""Save Arabic content to the arabic_index table"""
|
||||||
|
conn = get_db_connection()
|
||||||
|
if not conn:
|
||||||
|
logger.error("Cannot save to arabic_index - no database connection")
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
url_hash = get_url_hash(data["url"])
|
||||||
|
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
# Use INSERT ... ON CONFLICT to handle duplicates
|
||||||
|
cur.execute("""
|
||||||
|
INSERT INTO arabic_index
|
||||||
|
(url, url_hash, title, meta_description, arabic_content, detection_score, source_keyword, http_status, last_updated)
|
||||||
|
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, CURRENT_TIMESTAMP)
|
||||||
|
ON CONFLICT (url_hash)
|
||||||
|
DO UPDATE SET
|
||||||
|
last_updated = CURRENT_TIMESTAMP,
|
||||||
|
title = EXCLUDED.title,
|
||||||
|
meta_description = EXCLUDED.meta_description,
|
||||||
|
arabic_content = EXCLUDED.arabic_content,
|
||||||
|
detection_score = EXCLUDED.detection_score,
|
||||||
|
http_status = EXCLUDED.http_status,
|
||||||
|
source_keyword = EXCLUDED.source_keyword
|
||||||
|
""", (
|
||||||
|
data["url"],
|
||||||
|
url_hash,
|
||||||
|
data["title"],
|
||||||
|
data["meta_description"],
|
||||||
|
data["arabic_content_preview"],
|
||||||
|
data["arabic_score"],
|
||||||
|
keyword,
|
||||||
|
data["http_status"]
|
||||||
|
))
|
||||||
|
conn.commit()
|
||||||
|
logger.info(f"Saved/Updated Arabic content for URL: {data['url']}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to save to arabic_index: {e}")
|
||||||
|
conn.rollback()
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
# --- Background Processing ---
|
||||||
|
def process_urls_from_queue():
|
||||||
|
"""Process pending URLs from the queue"""
|
||||||
|
conn = get_db_connection()
|
||||||
|
if not conn:
|
||||||
|
logger.error("Cannot process queue - no database connection")
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
|
||||||
|
# Get pending URLs (limit 10 per batch)
|
||||||
|
cur.execute("""
|
||||||
|
SELECT pq.id, pq.url, pq.url_hash, si.keyword
|
||||||
|
FROM processing_queue pq
|
||||||
|
JOIN search_ingest si ON pq.search_ingest_id = si.id
|
||||||
|
WHERE pq.status = 'pending' AND pq.attempts < 3
|
||||||
|
ORDER BY pq.created_at ASC
|
||||||
|
LIMIT 10
|
||||||
|
FOR UPDATE SKIP LOCKED
|
||||||
|
""")
|
||||||
|
|
||||||
|
pending_items = cur.fetchall()
|
||||||
|
|
||||||
|
for item in pending_items:
|
||||||
|
# Update attempt count
|
||||||
|
cur.execute("""
|
||||||
|
UPDATE processing_queue
|
||||||
|
SET attempts = attempts + 1, last_attempt = CURRENT_TIMESTAMP
|
||||||
|
WHERE id = %s
|
||||||
|
""", (item['id'],))
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
# Process the URL
|
||||||
|
result = scrape_and_check_arabic(item['url'], item['keyword'])
|
||||||
|
|
||||||
|
# Update processing status
|
||||||
|
if result['has_arabic']:
|
||||||
|
# Successfully processed and saved
|
||||||
|
cur.execute("""
|
||||||
|
UPDATE processing_queue
|
||||||
|
SET status = 'completed'
|
||||||
|
WHERE id = %s
|
||||||
|
""", (item['id'],))
|
||||||
|
|
||||||
|
# Mark search_ingest as processed
|
||||||
|
cur.execute("""
|
||||||
|
UPDATE search_ingest
|
||||||
|
SET processed = TRUE
|
||||||
|
WHERE url_hash = %s
|
||||||
|
""", (item['url_hash'],))
|
||||||
|
else:
|
||||||
|
# Failed or no Arabic content
|
||||||
|
status = 'failed' if result['error'] else 'no_arabic'
|
||||||
|
error_msg = result.get('error', 'No Arabic content found')
|
||||||
|
|
||||||
|
cur.execute("""
|
||||||
|
UPDATE processing_queue
|
||||||
|
SET status = %s, error_message = %s
|
||||||
|
WHERE id = %s
|
||||||
|
""", (status, error_msg, item['id']))
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Queue processing failed: {e}")
|
||||||
|
conn.rollback()
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
# --- API Endpoints ---
|
||||||
@app.get("/")
|
@app.get("/")
|
||||||
def health_check():
|
def health_check():
|
||||||
"""Simple health check"""
|
"""Simple health check"""
|
||||||
return {"status": "running", "redis_connected": redis_client is not None}
|
conn = get_db_connection()
|
||||||
|
db_status = conn is not None
|
||||||
@app.post("/api/ingest", status_code=status.HTTP_202_ACCEPTED)
|
if conn:
|
||||||
def ingest_search_data(payload: SearchPayload):
|
conn.close()
|
||||||
"""
|
|
||||||
Receives keywords + 10 URLs, validates them,
|
|
||||||
and pushes them to the background queue.
|
|
||||||
"""
|
|
||||||
if not redis_client:
|
|
||||||
raise HTTPException(status_code=503, detail="Queue service unavailable")
|
|
||||||
|
|
||||||
# 1. Validate (Pydantic handles basic validation)
|
|
||||||
|
|
||||||
# 2. Prepare the job payload for the worker
|
|
||||||
# We structure it as a dictionary
|
|
||||||
job_data = {
|
|
||||||
"keyword": payload.keyword,
|
|
||||||
"urls": payload.results,
|
|
||||||
"count": len(payload.results)
|
|
||||||
}
|
|
||||||
|
|
||||||
# 3. Push to Redis Queue (LPUSH pushes to the left, so workers RPOP from right - FIFO logic)
|
|
||||||
try:
|
|
||||||
# We convert dict to JSON string
|
|
||||||
redis_client.lpush(QUEUE_NAME, json.dumps(job_data))
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"message": "Success. Job queued for processing.",
|
"status": "running",
|
||||||
|
"database_connected": db_status,
|
||||||
|
"service": "Arabic Search Indexer"
|
||||||
|
}
|
||||||
|
|
||||||
|
@app.post("/api/ingest", status_code=status.HTTP_202_ACCEPTED)
|
||||||
|
def ingest_search_data(payload: SearchPayload, background_tasks: BackgroundTasks):
|
||||||
|
"""
|
||||||
|
Receives keywords + URLs, validates them,
|
||||||
|
and stores them in PostgreSQL for processing.
|
||||||
|
"""
|
||||||
|
conn = get_db_connection()
|
||||||
|
if not conn:
|
||||||
|
raise HTTPException(status_code=503, detail="Database service unavailable")
|
||||||
|
|
||||||
|
try:
|
||||||
|
inserted_count = 0
|
||||||
|
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
for url in payload.results:
|
||||||
|
url_str = str(url)
|
||||||
|
url_hash = get_url_hash(url_str)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Insert into search_ingest table (ignore duplicates)
|
||||||
|
cur.execute("""
|
||||||
|
INSERT INTO search_ingest (keyword, url, url_hash, created_at, processed)
|
||||||
|
VALUES (%s, %s, %s, CURRENT_TIMESTAMP, FALSE)
|
||||||
|
ON CONFLICT (url_hash) DO NOTHING
|
||||||
|
RETURNING id
|
||||||
|
""", (payload.keyword, url_str, url_hash))
|
||||||
|
|
||||||
|
result = cur.fetchone()
|
||||||
|
|
||||||
|
if result:
|
||||||
|
# Also add to processing queue
|
||||||
|
cur.execute("""
|
||||||
|
INSERT INTO processing_queue (search_ingest_id, url, url_hash, status)
|
||||||
|
VALUES (%s, %s, %s, 'pending')
|
||||||
|
ON CONFLICT (url_hash) DO NOTHING
|
||||||
|
""", (result[0], url_str, url_hash))
|
||||||
|
|
||||||
|
inserted_count += 1
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to insert URL {url_str}: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
# Trigger background processing
|
||||||
|
if inserted_count > 0:
|
||||||
|
background_tasks.add_task(process_urls_from_queue)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"message": f"Successfully queued {inserted_count} URLs for processing",
|
||||||
"keyword": payload.keyword,
|
"keyword": payload.keyword,
|
||||||
"queued_urls": len(payload.results)
|
"total_received": len(payload.results),
|
||||||
|
"new_urls": inserted_count,
|
||||||
|
"duplicates_skipped": len(payload.results) - inserted_count
|
||||||
|
}
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
conn.rollback()
|
||||||
|
logger.error(f"Failed to ingest data: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail=f"Failed to store data: {str(e)}")
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
@app.post("/api/scrape-and-check")
|
||||||
|
def scrape_and_check_endpoint(url: HttpUrl):
|
||||||
|
"""
|
||||||
|
Endpoint to scrape a single URL and check for Arabic content
|
||||||
|
"""
|
||||||
|
url_str = str(url)
|
||||||
|
result = scrape_and_check_arabic(url_str)
|
||||||
|
|
||||||
|
if result["error"]:
|
||||||
|
raise HTTPException(status_code=400, detail=result["error"])
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
@app.get("/api/arabic-index")
|
||||||
|
def get_arabic_index(limit: int = 100, offset: int = 0):
|
||||||
|
"""
|
||||||
|
Retrieve entries from the Arabic index
|
||||||
|
"""
|
||||||
|
conn = get_db_connection()
|
||||||
|
if not conn:
|
||||||
|
raise HTTPException(status_code=503, detail="Database service unavailable")
|
||||||
|
|
||||||
|
try:
|
||||||
|
with conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
|
||||||
|
cur.execute("""
|
||||||
|
SELECT id, url, title, meta_description,
|
||||||
|
detection_score, scraped_at, source_keyword
|
||||||
|
FROM arabic_index
|
||||||
|
ORDER BY scraped_at DESC
|
||||||
|
LIMIT %s OFFSET %s
|
||||||
|
""", (limit, offset))
|
||||||
|
|
||||||
|
results = [dict(row) for row in cur.fetchall()]
|
||||||
|
|
||||||
|
# Get total count
|
||||||
|
cur.execute("SELECT COUNT(*) FROM arabic_index")
|
||||||
|
total = cur.fetchone()[0]
|
||||||
|
|
||||||
|
return {
|
||||||
|
"total": total,
|
||||||
|
"offset": offset,
|
||||||
|
"limit": limit,
|
||||||
|
"results": results
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise HTTPException(status_code=500, detail=f"Failed to queue job: {str(e)}")
|
logger.error(f"Failed to fetch arabic index: {e}")
|
||||||
|
raise HTTPException(status_code=500, detail="Failed to fetch data")
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
@app.post("/api/process-queue")
|
||||||
|
def trigger_queue_processing(background_tasks: BackgroundTasks):
|
||||||
|
"""
|
||||||
|
Manually trigger queue processing
|
||||||
|
"""
|
||||||
|
background_tasks.add_task(process_urls_from_queue)
|
||||||
|
return {"message": "Queue processing triggered"}
|
||||||
|
|
||||||
|
# --- Run with: uvicorn main:app --reload
|
||||||
المرجع في مشكلة جديدة
حظر مستخدم