this directory have our services, this file used to scrabe data from the docs

2025-09-30 01:21:51 +03:00
--- a/services/scrape.py
+++ b/services/scrape.py
@@ -0,0 +1,15 @@
+import httpx
+from bs4 import BeautifulSoup
+
+
+# Function: fetch webpage content and extract plain text
+async def fetch_url(url: str):
+    async with httpx.AsyncClient() as client:
+        try:
+            response = await client.get(url, timeout=30.0)
+            soup = BeautifulSoup(response.text, "html.parser")
+            # Clean text (remove extra spaces/newlines)
+            text = " ".join(soup.get_text().split())
+            return text
+        except httpx.TimeoutException:
+            return "Timeout error"