16 أسطر
521 B
Python
16 أسطر
521 B
Python
import httpx
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
# Function: fetch webpage content and extract plain text
|
|
async def fetch_url(url: str):
|
|
async with httpx.AsyncClient() as client:
|
|
try:
|
|
response = await client.get(url, timeout=30.0)
|
|
soup = BeautifulSoup(response.text, "html.parser")
|
|
# Clean text (remove extra spaces/newlines)
|
|
text = " ".join(soup.get_text().split())
|
|
return text
|
|
except httpx.TimeoutException:
|
|
return "Timeout error"
|