this directory have our services, this file used to scrabe data from the docs

هذا الالتزام موجود في:
2025-09-30 01:21:51 +03:00
الأصل ab2eb53294
التزام 8826f34a23

15
services/scrape.py Normal file
عرض الملف

@@ -0,0 +1,15 @@
import httpx
from bs4 import BeautifulSoup
# Function: fetch webpage content and extract plain text
async def fetch_url(url: str):
async with httpx.AsyncClient() as client:
try:
response = await client.get(url, timeout=30.0)
soup = BeautifulSoup(response.text, "html.parser")
# Clean text (remove extra spaces/newlines)
text = " ".join(soup.get_text().split())
return text
except httpx.TimeoutException:
return "Timeout error"