MCP_SERCVER_SEARCH/services/scrape.py

import httpx
from bs4 import BeautifulSoup


# Function: fetch webpage content and extract plain text
async def fetch_url(url: str):
    async with httpx.AsyncClient() as client:
        try:
            response = await client.get(url, timeout=30.0)
            soup = BeautifulSoup(response.text, "html.parser")
            # Clean text (remove extra spaces/newlines)
            text = " ".join(soup.get_text().split())
            return text
        except httpx.TimeoutException:
            return "Timeout error"