From 8826f34a236b20bc028da67e95df0c22dcabcf5d Mon Sep 17 00:00:00 2001 From: MohamedAlawakey Date: Tue, 30 Sep 2025 01:21:51 +0300 Subject: [PATCH] this directory have our services, this file used to scrabe data from the docs --- services/scrape.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 services/scrape.py diff --git a/services/scrape.py b/services/scrape.py new file mode 100644 index 0000000..1f31cbf --- /dev/null +++ b/services/scrape.py @@ -0,0 +1,15 @@ +import httpx +from bs4 import BeautifulSoup + + +# Function: fetch webpage content and extract plain text +async def fetch_url(url: str): + async with httpx.AsyncClient() as client: + try: + response = await client.get(url, timeout=30.0) + soup = BeautifulSoup(response.text, "html.parser") + # Clean text (remove extra spaces/newlines) + text = " ".join(soup.get_text().split()) + return text + except httpx.TimeoutException: + return "Timeout error"