this directory have our services, this file used to scrabe data from the docs
هذا الالتزام موجود في:
15
services/scrape.py
Normal file
15
services/scrape.py
Normal file
@@ -0,0 +1,15 @@
|
||||
import httpx
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
# Function: fetch webpage content and extract plain text
|
||||
async def fetch_url(url: str):
|
||||
async with httpx.AsyncClient() as client:
|
||||
try:
|
||||
response = await client.get(url, timeout=30.0)
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
# Clean text (remove extra spaces/newlines)
|
||||
text = " ".join(soup.get_text().split())
|
||||
return text
|
||||
except httpx.TimeoutException:
|
||||
return "Timeout error"
|
المرجع في مشكلة جديدة
حظر مستخدم