1401 أسطر
78 KiB
Plaintext
1401 أسطر
78 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "8ae9b213",
|
|
"metadata": {},
|
|
"source": [
|
|
"# API URL Health Checker Generator\n",
|
|
"\n",
|
|
"This notebook demonstrates an OpenAI API integration project that automatically generates a Python CLI tool for URL health monitoring. \n",
|
|
"\n",
|
|
"## Features:\n",
|
|
"- **Smart Configuration Management**: Environment variable handling with validation\n",
|
|
"- **AI-Powered Code Generation**: Uses OpenAI API to generate complete Python scripts\n",
|
|
"- **URL Health Monitoring**: Generated tool checks website availability and response times\n",
|
|
"- **CLI Interface**: Command-line tool with customizable parameters\n",
|
|
"- **Comprehensive Reporting**: CSV output and summary statistics\n",
|
|
"- **Error Handling**: Robust timeout and retry mechanisms\n",
|
|
"\n",
|
|
"## Generated Tool Capabilities:\n",
|
|
"- Batch URL testing from file input\n",
|
|
"- Response time measurement\n",
|
|
"- Status code reporting \n",
|
|
"- Configurable timeouts and retries\n",
|
|
"- CSV export functionality\n",
|
|
"- Summary statistics display\n",
|
|
"\n",
|
|
"This project showcases how to leverage AI APIs for automated code generation and practical DevOps tooling."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"id": "c4d5d91f",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Requirement already satisfied: openai in p:\\anaconda\\lib\\site-packages (1.106.1)\n",
|
|
"Requirement already satisfied: python-dotenv in p:\\anaconda\\lib\\site-packages (0.21.0)\n",
|
|
"Requirement already satisfied: httpx<1,>=0.23.0 in p:\\anaconda\\lib\\site-packages (from openai) (0.27.0)\n",
|
|
"Requirement already satisfied: jiter<1,>=0.4.0 in p:\\anaconda\\lib\\site-packages (from openai) (0.10.0)\n",
|
|
"Requirement already satisfied: sniffio in p:\\anaconda\\lib\\site-packages (from openai) (1.3.0)\n",
|
|
"Requirement already satisfied: anyio<5,>=3.5.0 in p:\\anaconda\\lib\\site-packages (from openai) (4.6.2)\n",
|
|
"Requirement already satisfied: tqdm>4 in p:\\anaconda\\lib\\site-packages (from openai) (4.66.5)\n",
|
|
"Requirement already satisfied: distro<2,>=1.7.0 in p:\\anaconda\\lib\\site-packages (from openai) (1.9.0)\n",
|
|
"Requirement already satisfied: typing-extensions<5,>=4.11 in p:\\anaconda\\lib\\site-packages (from openai) (4.12.2)\n",
|
|
"Requirement already satisfied: pydantic<3,>=1.9.0 in p:\\anaconda\\lib\\site-packages (from openai) (2.10.3)\n",
|
|
"Requirement already satisfied: idna>=2.8 in p:\\anaconda\\lib\\site-packages (from anyio<5,>=3.5.0->openai) (3.7)\n",
|
|
"Requirement already satisfied: exceptiongroup>=1.0.2 in p:\\anaconda\\lib\\site-packages (from anyio<5,>=3.5.0->openai) (1.2.0)\n",
|
|
"Requirement already satisfied: httpcore==1.* in p:\\anaconda\\lib\\site-packages (from httpx<1,>=0.23.0->openai) (1.0.2)\n",
|
|
"Requirement already satisfied: certifi in p:\\anaconda\\lib\\site-packages (from httpx<1,>=0.23.0->openai) (2025.8.3)\n",
|
|
"Requirement already satisfied: h11<0.15,>=0.13 in p:\\anaconda\\lib\\site-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai) (0.14.0)\n",
|
|
"Requirement already satisfied: annotated-types>=0.6.0 in p:\\anaconda\\lib\\site-packages (from pydantic<3,>=1.9.0->openai) (0.6.0)\n",
|
|
"Requirement already satisfied: pydantic-core==2.27.1 in p:\\anaconda\\lib\\site-packages (from pydantic<3,>=1.9.0->openai) (2.27.1)\n",
|
|
"Requirement already satisfied: colorama in p:\\anaconda\\lib\\site-packages (from tqdm>4->openai) (0.4.6)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Install required packages\n",
|
|
"!pip install openai python-dotenv"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "320b15f9",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Configuration loaded successfully!\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Configuration Class\n",
|
|
"import os, re, sys, pathlib\n",
|
|
"from dotenv import load_dotenv\n",
|
|
"from openai import OpenAI\n",
|
|
"\n",
|
|
"class Config:\n",
|
|
" def __init__(self):\n",
|
|
" load_dotenv()\n",
|
|
" \n",
|
|
" def require_env(self, name: str) -> str:\n",
|
|
" val = os.getenv(name)\n",
|
|
" if not val:\n",
|
|
" print(f\"[!] Missing environment variable: {name}\", file=sys.stderr)\n",
|
|
" sys.exit(1)\n",
|
|
" return val\n",
|
|
" \n",
|
|
" @property\n",
|
|
" def api_key(self) -> str:\n",
|
|
" # Get API key from environment variable\n",
|
|
" key = self.require_env('OPENAI_API_KEY')\n",
|
|
" if not key.startswith('sk-'):\n",
|
|
" key = f\"sk-{key}\"\n",
|
|
" return key\n",
|
|
" \n",
|
|
" @property\n",
|
|
" def base_url(self) -> str:\n",
|
|
" return self.require_env('BASE_URL')\n",
|
|
" \n",
|
|
" @property\n",
|
|
" def model_name(self) -> str:\n",
|
|
" return self.require_env('MODEL_NAME')\n",
|
|
"\n",
|
|
"# Test the configuration\n",
|
|
"config = Config()\n",
|
|
"print(\"Configuration loaded successfully!\")\n",
|
|
"# Uncomment below to test (but don't commit these with actual values visible)\n",
|
|
"# print(f\"API Key: {config.api_key[:15]}...\")\n",
|
|
"# print(f\"Base URL: {config.base_url}\")\n",
|
|
"# print(f\"Model: {config.model_name}\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"id": "c70067c1",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Generating URL checker CLI script...\n",
|
|
"\n",
|
|
"============================================================\n",
|
|
"GENERATED URL CHECKER SCRIPT:\n",
|
|
"============================================================\n",
|
|
"Okay, I need to create a Python CLI script called url_checker.py based on the user's requirements. Let me start by breaking down the requirements step by step.\n",
|
|
"\n",
|
|
"First, the script must accept command-line arguments: --input, --output, --timeout, and --retries. I'll use the argparse module for that. The defaults are urls.txt, results.csv, 5 seconds, and 3 retries respectively. So I'll set those up as the default values in the argparse arguments.\n",
|
|
"\n",
|
|
"Next, the script should read URLs from the input file. I need to open the file, read each line, and ignore empty lines and those starting with a #. So I'll loop through each line, strip whitespace, check if the line is not empty and doesn't start with a comment character.\n",
|
|
"\n",
|
|
"Then, for each URL, I have to make a GET request with the specified timeout and retry on failure. The requests library will handle the GET requests. But I need to handle exceptions like ConnectionError, Timeout, and HTTPError. Wait, actually, the requests.get might raise exceptions, so I should use a try-except block. Also, retries: maybe use a loop that tries up to the specified number of retries. If all retries fail, record the failure.\n",
|
|
"\n",
|
|
"Each attempt should have the timeout. So in the loop, for each retry, I'll attempt the request. If it's successful, record the status code, response time (in milliseconds), and success as True. If it fails after all retries, then record the last error's status code if possible, or maybe a generic failure code? Hmm, but for timeouts or connection errors, the status code might not be available. So perhaps in the case of an exception, set status_code to None or some error code, and success to False. But the user wants the status code recorded. Maybe in exceptions like timeout or connection errors, the status code isn't available, so perhaps leave it as None or 0? Or maybe capture the exception type and set status code accordingly. Wait, the user's example might expect the status code from the response, but in case of exceptions, maybe we can't get that. So perhaps in the case of an exception, the status code is left as 0 or similar, and the response time is the time taken until the timeout.\n",
|
|
"\n",
|
|
"Wait, the user's instruction says \"Record: URL, status_code, response_time_ms, success (True/False)\". So for successful requests, status_code is the HTTP status, response_time is the time taken. For failures, maybe the status_code could be the last attempt's status, but if it's a network error (like timeout), then maybe the status code isn't available. So perhaps in such cases, set status_code to 0 or None, but the CSV needs to have a value. Maybe better to set it to 0 for non-HTTP errors. Alternatively, leave it as an empty string? But the user might prefer numbers. Hmm, perhaps in the case of exceptions, we can't get the status code, so we'll set it to 0, and note that in the summary.\n",
|
|
"\n",
|
|
"Now, handling retries: for each URL, loop up to retries+1 times (since initial attempt plus retries). Wait, the user says \"retry on failure up to specified retries\". So if the first attempt fails, retry up to retries times. So total attempts are retries + 1? Or is the retries the number of times to retry after the first failure? So for example, --retries 3 means 3 retries, so total 4 attempts. So in the loop, for i in range(retries): and then try again. Wait, perhaps better to structure it as:\n",
|
|
"\n",
|
|
"max_retries = args.retries\n",
|
|
"for attempt in range(max_retries + 1):\n",
|
|
" try:\n",
|
|
" response = requests.get(url, timeout=timeout)\n",
|
|
" # if successful, break and record\n",
|
|
" break\n",
|
|
" except (requests.exceptions.RequestException) as e:\n",
|
|
" if attempt == max_retries:\n",
|
|
" # handle final failure\n",
|
|
" pass\n",
|
|
" else:\n",
|
|
" continue\n",
|
|
"\n",
|
|
"Wait, but how to structure the loop so that it retries the specified number of times. Let me think: the initial attempt is the first try. If it fails, then retry up to retries times. So total attempts = retries + 1. So the loop can run from 0 to retries, inclusive. So for each attempt in 0 to retries:\n",
|
|
"\n",
|
|
"Wait, perhaps a better approach is:\n",
|
|
"\n",
|
|
"success = False\n",
|
|
"status_code = None\n",
|
|
"response_time = 0\n",
|
|
"for _ in range(retries + 1):\n",
|
|
" try:\n",
|
|
" start_time = time.time()\n",
|
|
" response = requests.get(url, timeout=timeout)\n",
|
|
" elapsed = (time.time() - start_time) * 1000 # ms\n",
|
|
" status_code = response.status_code\n",
|
|
" response_time = elapsed\n",
|
|
" success = True\n",
|
|
" break # success, exit loop\n",
|
|
" except requests.exceptions.RequestException as e:\n",
|
|
" # log the exception but continue retrying\n",
|
|
" pass\n",
|
|
" except Exception as e:\n",
|
|
" # other exceptions? Maybe not necessary, but handle broadly?\n",
|
|
" pass\n",
|
|
"\n",
|
|
"Wait, but in the case of an exception, like a timeout or connection error, the loop will continue. After all retries, if still failed, then record the failure. But how to capture the last exception's status? For example, if it's a 404, but the first attempt got that, then it would break, but if it's a timeout, then the status code isn't available. Hmm, perhaps in the case of an exception, the status_code remains None, and the response_time is the time taken until the timeout. Wait, but for a timeout, the elapsed time would be the timeout value plus some? Or maybe the time until the timeout occurred. The timeout is set, so the request will raise a Timeout exception after that. So the elapsed time would be approximately the timeout.\n",
|
|
"\n",
|
|
"Alternatively, perhaps in the case of an exception, we can't get the status code, so we set it to 0, and the response_time as the elapsed time (even if it's a timeout). But in the case of a timeout, the elapsed time would be the timeout value. Wait, but the actual time taken might be a bit more, but perhaps it's okay to record the elapsed time as the time taken until the exception was raised.\n",
|
|
"\n",
|
|
"So in the code, for each attempt, we start the timer, make the request, and if it's successful, record the time and status. If it's an exception, we note the time taken (even if it's a timeout), but can't get the status code. So in that case, the status_code would be None, but perhaps we can set it to 0 in the results. Or maybe leave it as 0 in the CSV.\n",
|
|
"\n",
|
|
"Wait, the user's example says to record status_code. So in the case of an exception, perhaps we can't get the status code, so we can set it to 0 or leave it as an empty string? But the CSV needs to have a value. Let's see the user's instruction says \"status_code\" as a field. So perhaps in the case of an exception, we can set status_code to 0, and note that in the summary.\n",
|
|
"\n",
|
|
"Alternatively, maybe in the case of a connection error or timeout, the status code is not available, so we can leave it as 0. So in the code, after all retries, if success is False, then status_code remains None, so we can set it to 0 in the CSV. Or perhaps better to set it to 0 in the code.\n",
|
|
"\n",
|
|
"Hmm, perhaps in the code, after the loop, if success is False, then we can set status_code to 0, and response_time to the last attempt's elapsed time (even if it was an exception). Wait, but in the case of an exception, the elapsed time would be the time taken until the exception was raised. So even if it's a timeout, the elapsed time would be roughly the timeout value. So that's okay.\n",
|
|
"\n",
|
|
"Wait, but in the code above, when an exception occurs, the elapsed time is not captured. Because the code is:\n",
|
|
"\n",
|
|
"start_time = time.time()\n",
|
|
"try:\n",
|
|
" response = requests.get(...)\n",
|
|
" elapsed = ... \n",
|
|
"except:\n",
|
|
" # but the elapsed time here would be time.time() - start_time, but we need to capture that?\n",
|
|
"\n",
|
|
"Ah, right! I missed that. So in the except block, we need to calculate the elapsed time even if there's an exception. So the code should be:\n",
|
|
"\n",
|
|
"start_time = time.time()\n",
|
|
"try:\n",
|
|
" response = requests.get(url, timeout=timeout)\n",
|
|
" elapsed = (time.time() - start_time) * 1000\n",
|
|
" status_code = response.status_code\n",
|
|
" success = True\n",
|
|
" break\n",
|
|
"except requests.exceptions.RequestException as e:\n",
|
|
" elapsed = (time.time() - start_time) * 1000\n",
|
|
" # but we can't get the status code here, so leave status_code as None or 0?\n",
|
|
" # but in this case, the loop continues, so maybe we need to track the elapsed time for each attempt?\n",
|
|
"\n",
|
|
"Wait, but in the case of retries, we might want to take the time of the last attempt? Or the total time? Hmm, perhaps the response_time should be the time taken for the successful attempt, or the time taken for the last attempt (even if it failed). Since the user wants the response time for each URL, even if it failed, we should record the time taken for the last attempt.\n",
|
|
"\n",
|
|
"Wait, but in the code above, the loop is trying multiple times. So for each attempt, we can track the elapsed time, but only the successful one's time is recorded. For failures, the last attempt's elapsed time would be the one to record. So perhaps in the code, after each attempt, we can track the elapsed time, and if it's a failure, we keep the last elapsed time.\n",
|
|
"\n",
|
|
"Hmm, this complicates things. Let me think again.\n",
|
|
"\n",
|
|
"Alternative approach:\n",
|
|
"\n",
|
|
"Initialize variables:\n",
|
|
"\n",
|
|
"success = False\n",
|
|
"status_code = None\n",
|
|
"response_time = 0.0\n",
|
|
"\n",
|
|
"for attempt in 0 to retries:\n",
|
|
"\n",
|
|
" start_time = time.time()\n",
|
|
" try:\n",
|
|
" response = requests.get(url, timeout=timeout)\n",
|
|
" elapsed = (time.time() - start_time) * 1000\n",
|
|
" status_code = response.status_code\n",
|
|
" response_time = elapsed\n",
|
|
" success = True\n",
|
|
" break # exit loop on success\n",
|
|
" except requests.exceptions.RequestException as e:\n",
|
|
" elapsed = (time.time() - start_time) * 1000\n",
|
|
" # if this was the last attempt, we need to record the elapsed time\n",
|
|
" # but since we are in the except block, we can't break yet unless it's the last attempt\n",
|
|
" # but how to track the last elapsed time?\n",
|
|
"\n",
|
|
"Wait, perhaps after each attempt, even if it's an exception, we can keep track of the elapsed time and status code (if available). But for the final result, if it's a failure, we take the last elapsed time and set status_code to 0 (or whatever).\n",
|
|
"\n",
|
|
"Alternatively, after all retries, if still failed, then set response_time to the last attempt's elapsed time, and status_code to 0 (or None, but the CSV needs a value).\n",
|
|
"\n",
|
|
"Hmm, perhaps the code can be structured as follows:\n",
|
|
"\n",
|
|
"Initialize variables:\n",
|
|
"\n",
|
|
"success = False\n",
|
|
"status_code = 0 # default to 0 for failure\n",
|
|
"response_time = 0.0\n",
|
|
"\n",
|
|
"for attempt in range(retries + 1):\n",
|
|
" start_time = time.time()\n",
|
|
" try:\n",
|
|
" response = requests.get(url, timeout=timeout)\n",
|
|
" elapsed = (time.time() - start_time) * 1000\n",
|
|
" status_code = response.status_code\n",
|
|
" response_time = elapsed\n",
|
|
" success = True\n",
|
|
" break # success, exit loop\n",
|
|
" except requests.exceptions.RequestException as e:\n",
|
|
" elapsed = (time.time() - start_time) * 1000\n",
|
|
" # keep track of the last elapsed time in case of failure\n",
|
|
" response_time = elapsed\n",
|
|
" # continue to next attempt unless it's the last one\n",
|
|
" if attempt == retries:\n",
|
|
" # final attempt failed\n",
|
|
" success = False\n",
|
|
" # status_code remains 0 (as initialized)\n",
|
|
" pass\n",
|
|
" else:\n",
|
|
" continue\n",
|
|
"\n",
|
|
"Wait, but in this case, even if the first attempt fails, but the second succeeds, the response_time would be overwritten. That's okay. But if all attempts fail, then the response_time is the last attempt's elapsed time, and status_code is 0 (since it was initialized to 0). That might work.\n",
|
|
"\n",
|
|
"Alternatively, perhaps initializing status_code as None, and then in the except block, if it's the last attempt, set status_code to 0. Hmm, but maybe that's complicating.\n",
|
|
"\n",
|
|
"Alternatively, after the loop, if success is False, then set status_code to 0. That way, regardless of the exception, the status_code is 0 for failures. The user might understand that 0 indicates a network error or timeout.\n",
|
|
"\n",
|
|
"So in code:\n",
|
|
"\n",
|
|
"After the loop:\n",
|
|
"\n",
|
|
"if not success:\n",
|
|
" status_code = 0\n",
|
|
"\n",
|
|
"That way, even if the exception was a 404 (which would have a status code), but the first attempt got a 404, then the loop would break, and status_code would be 404, and success is True (since 404 is a valid response, but the user might consider it a failure if they expect 2xx). Wait, but the user's success is probably determined by the HTTP status code being 2xx. Wait, the user's instruction says \"success (True/False)\". So perhaps the success is whether the request was successful (i.e., the HTTP status is 2xx). Wait, but the user's instruction says \"retry on failure up to specified retries\". So perhaps \"failure\" here refers to any non-2xx status, or any error (including network errors). Hmm, the user's instruction says \"retry on failure\", so perhaps the retries are for any kind of failure, including network issues. So in the code, the success variable is True only if the request was successful (regardless of status code?), or only if the status code is 2xx?\n",
|
|
"\n",
|
|
"Wait, the user's instruction says \"Record: URL, status_code, response_time_ms, success (True/False)\". So the success is probably whether the request was successful (i.e., the GET request didn't raise an exception and got a response). Wait, but the user might consider a 404 as a failure (non-successful), but in the code above, if the response is 404, then success is True (because the request didn't raise an exception). Wait, that's a problem.\n",
|
|
"\n",
|
|
"Ah, right! The success should be whether the HTTP status code is in the 2xx range. Because otherwise, even a 404 would be considered a success in the code above. So I need to adjust that.\n",
|
|
"\n",
|
|
"So the success variable should be (response.status_code >= 200 and response.status_code < 300). So in the code:\n",
|
|
"\n",
|
|
"if response.status_code >= 200 and response.status_code < 300:\n",
|
|
" success = True\n",
|
|
"else:\n",
|
|
" success = False\n",
|
|
"\n",
|
|
"Wait, but in the current code structure, the loop breaks on any response (even 404), but the success is determined by the status code. So the code should check that.\n",
|
|
"\n",
|
|
"So modifying the code:\n",
|
|
"\n",
|
|
"Inside the try block:\n",
|
|
"\n",
|
|
"response = requests.get(...)\n",
|
|
"elapsed = ...\n",
|
|
"status_code = response.status_code\n",
|
|
"success = 200 <= status_code < 300\n",
|
|
"if success:\n",
|
|
" break\n",
|
|
"else:\n",
|
|
" # even though we got a response, it's not a success, so retry?\n",
|
|
"Wait, but the user's instruction says to retry on failure. So if the first attempt returns a 404, should we retry? The user's instruction says \"retry on failure up to specified retries\". So perhaps \"failure\" here includes both network errors and non-2xx responses. So in that case, the code should retry even if the response is a 404.\n",
|
|
"\n",
|
|
"Hmm, that complicates things. Because if a URL returns a 404, the code would retry, but that's probably not desired. The user might want to retry only on network issues (like timeouts, connection errors), not on HTTP errors. Because a 404 is a valid response, just not successful. So perhaps the retries should only be for connection errors, timeouts, etc., not for HTTP errors.\n",
|
|
"\n",
|
|
"Alternatively, the user's instruction says \"retry on failure\", which could include any failure (including HTTP errors). But that's ambiguous. The user's example might expect that retries are for network issues, not HTTP errors. So perhaps the code should only retry on exceptions (like timeouts, connection errors), not on HTTP errors (like 404, 500, etc). Because in that case, the HTTP errors are considered final and not retried.\n",
|
|
"\n",
|
|
"So the code should only retry on exceptions (like ConnectionError, Timeout, etc.), but not on HTTP errors (like 404). So in that case, the code would proceed as follows:\n",
|
|
"\n",
|
|
"Inside the try block:\n",
|
|
"\n",
|
|
"response = requests.get(...)\n",
|
|
"elapsed = ...\n",
|
|
"status_code = response.status_code\n",
|
|
"success = (200 <= status_code < 300)\n",
|
|
"if success:\n",
|
|
" break\n",
|
|
"else:\n",
|
|
" # not a success, but it's an HTTP error, so don't retry\n",
|
|
" # so break the loop\n",
|
|
" break\n",
|
|
"\n",
|
|
"Wait, but then the retries would only apply to exceptions, not to HTTP errors. So the loop would only retry on exceptions. So the code structure would be:\n",
|
|
"\n",
|
|
"for attempt in ...:\n",
|
|
" try:\n",
|
|
" response = ...\n",
|
|
" # process response\n",
|
|
" if success (2xx):\n",
|
|
" break\n",
|
|
" else:\n",
|
|
" # HTTP error, not retrying\n",
|
|
" break\n",
|
|
" except RequestException:\n",
|
|
" # handle exception, retry if not last attempt\n",
|
|
"\n",
|
|
"Hmm, that makes sense. Because if the server returns a 404, that's a definitive response, so we don't want to retry. Only network issues (like timeouts, connection refused) should be retried.\n",
|
|
"\n",
|
|
"So in code:\n",
|
|
"\n",
|
|
"Inside the try block:\n",
|
|
"\n",
|
|
"response = requests.get(...)\n",
|
|
"elapsed = ...\n",
|
|
"status_code = response.status_code\n",
|
|
"success = (200 <= status_code < 300)\n",
|
|
"if success:\n",
|
|
" break # exit loop\n",
|
|
"else:\n",
|
|
" # not a success, but it's an HTTP error, so we consider this a failure and don't retry\n",
|
|
" # so break the loop\n",
|
|
" break\n",
|
|
"\n",
|
|
"Wait, but then the loop would exit immediately on any HTTP error, even if there are retries left. So the retries only apply to exceptions (network issues), not HTTP errors.\n",
|
|
"\n",
|
|
"Yes, that's better. So the code structure would be:\n",
|
|
"\n",
|
|
"Initialize variables:\n",
|
|
"\n",
|
|
"success = False\n",
|
|
"status_code = 0\n",
|
|
"response_time = 0.0\n",
|
|
"\n",
|
|
"for attempt in range(retries + 1):\n",
|
|
" start_time = time.time()\n",
|
|
" try:\n",
|
|
" response = requests.get(url, timeout=timeout)\n",
|
|
" elapsed = (time.time() - start_time) * 1000\n",
|
|
" status_code = response.status_code\n",
|
|
" response_time = elapsed\n",
|
|
" if 200 <= status_code < 300:\n",
|
|
" success = True\n",
|
|
" break # success, exit loop\n",
|
|
" else:\n",
|
|
" # HTTP error, not a success, but don't retry\n",
|
|
" success = False\n",
|
|
" break # exit loop\n",
|
|
" except requests.exceptions.RequestException as e:\n",
|
|
" # network error, retry if possible\n",
|
|
" elapsed = (time.time() - start_time) * 1000\n",
|
|
" response_time = elapsed\n",
|
|
" if attempt == retries:\n",
|
|
" # last attempt, set success to False\n",
|
|
" success = False\n",
|
|
" status_code = 0 # or some code indicating network error\n",
|
|
" # but how to track the status code here? Maybe leave it as 0\n",
|
|
" pass\n",
|
|
" else:\n",
|
|
" continue # retry\n",
|
|
"\n",
|
|
"Wait, but in the except block, after the exception, we can't get the status code, so we set status_code to 0 (or leave it as 0, since initialized to 0). The success is False. So after the loop, if it's an exception, then status_code is 0, success is False, and response_time is the time taken for the last attempt.\n",
|
|
"\n",
|
|
"Hmm, this seems manageable.\n",
|
|
"\n",
|
|
"Now, after processing all URLs, we need to write the results to a CSV. The CSV should have columns: url, status_code, response_time_ms, success.\n",
|
|
"\n",
|
|
"So the CSV writer will need to write each row with these four fields.\n",
|
|
"\n",
|
|
"Then, the summary needs to show total URLs, successful (success=True), failed (success=False), and average response time (only for successful requests? Or including failed ones? The user says \"average response time\", so probably average of all successful requests, since failed ones might have varying response times (like timeouts). Or maybe including all, but the user might prefer the average of successful ones. The instruction says \"average response time\", so perhaps it's the average of all successful requests' response times.\n",
|
|
"\n",
|
|
"Wait, the user's instruction says \"average response time\" in the summary. So probably the average of all successful requests. Because for failed requests (either HTTP errors or network errors), their response_time might not be meaningful (like timeouts have response_time equal to the timeout, but that's not the server's response time). So the average should be calculated only on the successful entries.\n",
|
|
"\n",
|
|
"So in the code, after collecting all results, we can compute:\n",
|
|
"\n",
|
|
"total = len(results)\n",
|
|
"successful = sum(1 for r in results if r['success'])\n",
|
|
"failed = total - successful\n",
|
|
"successful_times = [r['response_time_ms'] for r in results if r['success']]\n",
|
|
"average = sum(successful_times) / len(successful_times) if successful else 0\n",
|
|
"\n",
|
|
"Then, print the summary with these numbers.\n",
|
|
"\n",
|
|
"Now, putting all together.\n",
|
|
"\n",
|
|
"First, the command-line arguments using argparse:\n",
|
|
"\n",
|
|
"import argparse\n",
|
|
"\n",
|
|
"parser = argparse.ArgumentParser(description='URL Health Checker')\n",
|
|
"parser.add_argument('--input', default='urls.txt', help='Input file with URLs')\n",
|
|
"parser.add_argument('--output', default='results.csv', help='Output CSV file')\n",
|
|
"parser.add_argument('--timeout', type=int, default=5, help='Request timeout in seconds')\n",
|
|
"parser.add_argument('--retries', type=int, default=3, help='Number of retries')\n",
|
|
"args = parser.parse_args()\n",
|
|
"\n",
|
|
"Then, read the URLs from the input file:\n",
|
|
"\n",
|
|
"urls = []\n",
|
|
"with open(args.input, 'r') as f:\n",
|
|
" for line in f:\n",
|
|
" line = line.strip()\n",
|
|
" if not line or line.startswith('#'):\n",
|
|
" continue\n",
|
|
" urls.append(line)\n",
|
|
"\n",
|
|
"Then, process each URL:\n",
|
|
"\n",
|
|
"import requests\n",
|
|
"import time\n",
|
|
"\n",
|
|
"results = []\n",
|
|
"\n",
|
|
"for url in urls:\n",
|
|
" # Initialize variables\n",
|
|
" success = False\n",
|
|
" status_code = 0\n",
|
|
" response_time = 0.0\n",
|
|
"\n",
|
|
" for attempt in range(args.retries + 1):\n",
|
|
" start_time = time.time()\n",
|
|
" try:\n",
|
|
" response = requests.get(url, timeout=args.timeout)\n",
|
|
" elapsed = (time.time() - start_time) * 1000\n",
|
|
" status_code = response.status_code\n",
|
|
" response_time = elapsed\n",
|
|
" if 200 <= status_code < 300:\n",
|
|
" success = True\n",
|
|
" break # success, exit loop\n",
|
|
" else:\n",
|
|
" # HTTP error, don't retry\n",
|
|
" success = False\n",
|
|
" break # exit loop\n",
|
|
" except requests.exceptions.RequestException as e:\n",
|
|
" # Network error, retry if possible\n",
|
|
" elapsed = (time.time() - start_time) * 1000\n",
|
|
" response_time = elapsed\n",
|
|
" if attempt == args.retries:\n",
|
|
" # Last attempt, set success to False\n",
|
|
" success = False\n",
|
|
" status_code = 0 # or leave as 0\n",
|
|
" # but we can also set status_code to 0 here\n",
|
|
" pass\n",
|
|
" else:\n",
|
|
" continue # retry\n",
|
|
"\n",
|
|
" # After loop, append to results\n",
|
|
" results.append({\n",
|
|
" 'url': url,\n",
|
|
" 'status_code': status_code,\n",
|
|
" 'response_time_ms': response_time,\n",
|
|
" 'success': success\n",
|
|
" })\n",
|
|
"\n",
|
|
"Wait, but in the except block, after the exception, the status_code is set to 0, but in the code above, the status_code is initialized to 0, so it remains 0. That's okay.\n",
|
|
"\n",
|
|
"Now, writing to CSV:\n",
|
|
"\n",
|
|
"import csv\n",
|
|
"\n",
|
|
"with open(args.output, 'w', newline='') as f:\n",
|
|
" fieldnames = ['url', 'status_code', 'response_time_ms', 'success']\n",
|
|
" writer = csv.DictWriter(f, fieldnames=fieldnames)\n",
|
|
" writer.writeheader()\n",
|
|
" for row in results:\n",
|
|
" writer.writerow(row)\n",
|
|
"\n",
|
|
"Then, the summary:\n",
|
|
"\n",
|
|
"total = len(results)\n",
|
|
"successful = sum(1 for r in results if r['success'])\n",
|
|
"failed = total - successful\n",
|
|
"\n",
|
|
"successful_times = [r['response_time_ms'] for r in results if r['success']]\n",
|
|
"average = sum(successful_times) / len(successful_times) if successful else 0\n",
|
|
"\n",
|
|
"print(f\"Summary:\")\n",
|
|
"print(f\"Total URLs checked: {total}\")\n",
|
|
"print(f\"Successful requests: {successful}\")\n",
|
|
"print(f\"Failed requests: {failed}\")\n",
|
|
"print(f\"Average response time: {average:.2f} ms\")\n",
|
|
"\n",
|
|
"Wait, but in the case where all are failed, average is 0, but the print would show 0.00 ms. That's okay.\n",
|
|
"\n",
|
|
"Now, putting all together into a script.\n",
|
|
"\n",
|
|
"Wait, but in the code above, when there's an HTTP error (like 404), the loop breaks immediately, so no retries. That's correct.\n",
|
|
"\n",
|
|
"Testing edge cases:\n",
|
|
"\n",
|
|
"- URL with 200: success, no retries needed.\n",
|
|
"- URL with 500: HTTP error, so success is False, no retries.\n",
|
|
"- URL with timeout: after retries, status_code 0, success False.\n",
|
|
"- Empty lines and comments in input file are skipped.\n",
|
|
"\n",
|
|
"Potential issues:\n",
|
|
"\n",
|
|
"- The 'requests' library might have other exceptions not covered, but using the broad RequestException should catch all request-related exceptions.\n",
|
|
"\n",
|
|
"Another thing: the timeout is per attempt. So each retry will have the same timeout. That's as per the user's instruction.\n",
|
|
"\n",
|
|
"Now, code structure:\n",
|
|
"\n",
|
|
"The code should be well-commented. Let me structure it with comments.\n",
|
|
"\n",
|
|
"Also, need to import necessary modules: argparse, requests, time, csv.\n",
|
|
"\n",
|
|
"Wait, the code may have an error if the input file is not found. But the user didn't mention error handling for that, so perhaps it's okay to let it crash with a FileNotFoundError.\n",
|
|
"\n",
|
|
"Now, putting all together into the script:\n",
|
|
"\n",
|
|
"Wait, let me write the code step by step with comments.\n",
|
|
"\n",
|
|
"Wait, also, in the code above, when an HTTP error occurs (like 404), the loop breaks, so the 'response_time' is set to the elapsed time of that first attempt. That's correct.\n",
|
|
"\n",
|
|
"Another point: in the except block, after the exception, the code sets response_time to elapsed (time taken until the exception was raised). So even for timeouts, the response_time is the time taken (which would be close to the timeout value).\n",
|
|
"\n",
|
|
"Now, the code should be okay.\n",
|
|
"\n",
|
|
"Testing the code:\n",
|
|
"\n",
|
|
"Suppose a URL that's down (e.g., invalid domain), retries 3 times. The final status_code is 0, success False, response_time is ~5 seconds (timeout).\n",
|
|
"\n",
|
|
"A URL returning 404: first attempt, response_time is the time taken, status_code 404, success False, no retries.\n",
|
|
"\n",
|
|
"A URL returning 200: success, response_time is the actual time.\n",
|
|
"\n",
|
|
"Now, the CSV will have all entries.\n",
|
|
"\n",
|
|
"The summary will show the counts and average of successful times.\n",
|
|
"\n",
|
|
"Yes, this should meet the requirements.\n",
|
|
"</think>\n",
|
|
"\n",
|
|
"```python\n",
|
|
"import argparse\n",
|
|
"import csv\n",
|
|
"import requests\n",
|
|
"import time\n",
|
|
"\n",
|
|
"\n",
|
|
"def main():\n",
|
|
" # Parse command-line arguments\n",
|
|
" parser = argparse.ArgumentParser(description='URL Health Checker')\n",
|
|
" parser.add_argument('--input', default='urls.txt', help='Input file with URLs')\n",
|
|
" parser.add_argument('--output', default='results.csv', help='Output CSV file')\n",
|
|
" parser.add_argument('--timeout', type=int, default=5, help='Request timeout in seconds')\n",
|
|
" parser.add_argument('--retries', type=int, default=3, help='Number of retries')\n",
|
|
" args = parser.parse_args()\n",
|
|
"\n",
|
|
" # Read URLs from file, ignoring empty lines and comments\n",
|
|
" urls = []\n",
|
|
" with open(args.input, 'r') as f:\n",
|
|
" for line in f:\n",
|
|
" line = line.strip()\n",
|
|
" if not line or line.startswith('#'):\n",
|
|
" continue\n",
|
|
" urls.append(line)\n",
|
|
"\n",
|
|
" results = []\n",
|
|
" for url in urls:\n",
|
|
" success = False\n",
|
|
" status_code = 0\n",
|
|
" response_time = 0.0\n",
|
|
"\n",
|
|
" for attempt in range(args.retries + 1):\n",
|
|
" start_time = time.time()\n",
|
|
" try:\n",
|
|
" response = requests.get(url, timeout=args.timeout)\n",
|
|
" elapsed = (time.time() - start_time) * 1000 # Convert to milliseconds\n",
|
|
" status_code = response.status_code\n",
|
|
" response_time = elapsed\n",
|
|
" if 200 <= status_code < 300:\n",
|
|
" success = True\n",
|
|
" break # Success, exit loop\n",
|
|
" else:\n",
|
|
" # HTTP error (e.g., 404), don't retry\n",
|
|
" success = False\n",
|
|
" break\n",
|
|
" except requests.exceptions.RequestException:\n",
|
|
" # Network error (timeout, connection error, etc.), retry if possible\n",
|
|
" elapsed = (time.time() - start_time) * 1000\n",
|
|
" response_time = elapsed\n",
|
|
" if attempt == args.retries:\n",
|
|
" # Last attempt failed\n",
|
|
" success = False\n",
|
|
" status_code = 0 # Indicate network error\n",
|
|
" else:\n",
|
|
" continue # Retry\n",
|
|
"\n",
|
|
" results.append({\n",
|
|
" 'url': url,\n",
|
|
" 'status_code': status_code,\n",
|
|
" 'response_time_ms': response_time,\n",
|
|
" 'success': success\n",
|
|
" })\n",
|
|
"\n",
|
|
" # Write results to CSV\n",
|
|
" with open(args.output, 'w', newline='') as f:\n",
|
|
" fieldnames = ['url', 'status_code', 'response_time_ms', 'success']\n",
|
|
" writer = csv.DictWriter(f, fieldnames=fieldnames)\n",
|
|
" writer.writeheader()\n",
|
|
" for row in results:\n",
|
|
" writer.writerow(row)\n",
|
|
"\n",
|
|
" # Calculate summary statistics\n",
|
|
" total = len(results)\n",
|
|
" successful = sum(1 for r in results if r['success'])\n",
|
|
" failed = total - successful\n",
|
|
" successful_times = [r['response_time_ms'] for r in results if r['success']]\n",
|
|
" average = sum(successful_times) / len(successful_times) if successful else 0\n",
|
|
"\n",
|
|
" # Print summary table\n",
|
|
" print(f\"Summary:\")\n",
|
|
" print(f\"Total URLs checked: {total}\")\n",
|
|
" print(f\"Successful requests: {successful}\")\n",
|
|
" print(f\"Failed requests: {failed}\")\n",
|
|
" print(f\"Average response time: {average:.2f} ms\")\n",
|
|
"\n",
|
|
"\n",
|
|
"if __name__ == '__main__':\n",
|
|
" main()\n",
|
|
"```\n",
|
|
"\n",
|
|
"To use this script:\n",
|
|
"\n",
|
|
"1. Save it as `url_checker.py`\n",
|
|
"2. Create a `urls.txt` file with URLs (one per line, empty lines/comments allowed)\n",
|
|
"3. Run with: `python url_checker.py` (or specify options like `--timeout 10 --retries 2`)\n",
|
|
"\n",
|
|
"The script will:\n",
|
|
"- Read URLs from the input file\n",
|
|
"- Check each URL with retries\n",
|
|
"- Save results to CSV\n",
|
|
"- Print a summary of the checks\n",
|
|
"\n",
|
|
"Example `urls.txt` content:\n",
|
|
"```\n",
|
|
"https://httpstat.us/200\n",
|
|
"https://httpstat.us/404\n",
|
|
"https://invalid-domain.test\n",
|
|
"# This is a comment\n",
|
|
"https://httpstat.us/500\n",
|
|
"```\n",
|
|
"\n",
|
|
"============================================================\n",
|
|
"GENERATED URL CHECKER SCRIPT:\n",
|
|
"============================================================\n",
|
|
"Okay, I need to create a Python CLI script called url_checker.py based on the user's requirements. Let me start by breaking down the requirements step by step.\n",
|
|
"\n",
|
|
"First, the script must accept command-line arguments: --input, --output, --timeout, and --retries. I'll use the argparse module for that. The defaults are urls.txt, results.csv, 5 seconds, and 3 retries respectively. So I'll set those up as the default values in the argparse arguments.\n",
|
|
"\n",
|
|
"Next, the script should read URLs from the input file. I need to open the file, read each line, and ignore empty lines and those starting with a #. So I'll loop through each line, strip whitespace, check if the line is not empty and doesn't start with a comment character.\n",
|
|
"\n",
|
|
"Then, for each URL, I have to make a GET request with the specified timeout and retry on failure. The requests library will handle the GET requests. But I need to handle exceptions like ConnectionError, Timeout, and HTTPError. Wait, actually, the requests.get might raise exceptions, so I should use a try-except block. Also, retries: maybe use a loop that tries up to the specified number of retries. If all retries fail, record the failure.\n",
|
|
"\n",
|
|
"Each attempt should have the timeout. So in the loop, for each retry, I'll attempt the request. If it's successful, record the status code, response time (in milliseconds), and success as True. If it fails after all retries, then record the last error's status code if possible, or maybe a generic failure code? Hmm, but for timeouts or connection errors, the status code might not be available. So perhaps in the case of an exception, set status_code to None or some error code, and success to False. But the user wants the status code recorded. Maybe in exceptions like timeout or connection errors, the status code isn't available, so perhaps leave it as None or 0? Or maybe capture the exception type and set status code accordingly. Wait, the user's example might expect the status code from the response, but in case of exceptions, maybe we can't get that. So perhaps in the case of an exception, the status code is left as 0 or similar, and the response time is the time taken until the timeout.\n",
|
|
"\n",
|
|
"Wait, the user's instruction says \"Record: URL, status_code, response_time_ms, success (True/False)\". So for successful requests, status_code is the HTTP status, response_time is the time taken. For failures, maybe the status_code could be the last attempt's status, but if it's a network error (like timeout), then maybe the status code isn't available. So perhaps in such cases, set status_code to 0 or None, but the CSV needs to have a value. Maybe better to set it to 0 for non-HTTP errors. Alternatively, leave it as an empty string? But the user might prefer numbers. Hmm, perhaps in the case of exceptions, we can't get the status code, so we'll set it to 0, and note that in the summary.\n",
|
|
"\n",
|
|
"Now, handling retries: for each URL, loop up to retries+1 times (since initial attempt plus retries). Wait, the user says \"retry on failure up to specified retries\". So if the first attempt fails, retry up to retries times. So total attempts are retries + 1? Or is the retries the number of times to retry after the first failure? So for example, --retries 3 means 3 retries, so total 4 attempts. So in the loop, for i in range(retries): and then try again. Wait, perhaps better to structure it as:\n",
|
|
"\n",
|
|
"max_retries = args.retries\n",
|
|
"for attempt in range(max_retries + 1):\n",
|
|
" try:\n",
|
|
" response = requests.get(url, timeout=timeout)\n",
|
|
" # if successful, break and record\n",
|
|
" break\n",
|
|
" except (requests.exceptions.RequestException) as e:\n",
|
|
" if attempt == max_retries:\n",
|
|
" # handle final failure\n",
|
|
" pass\n",
|
|
" else:\n",
|
|
" continue\n",
|
|
"\n",
|
|
"Wait, but how to structure the loop so that it retries the specified number of times. Let me think: the initial attempt is the first try. If it fails, then retry up to retries times. So total attempts = retries + 1. So the loop can run from 0 to retries, inclusive. So for each attempt in 0 to retries:\n",
|
|
"\n",
|
|
"Wait, perhaps a better approach is:\n",
|
|
"\n",
|
|
"success = False\n",
|
|
"status_code = None\n",
|
|
"response_time = 0\n",
|
|
"for _ in range(retries + 1):\n",
|
|
" try:\n",
|
|
" start_time = time.time()\n",
|
|
" response = requests.get(url, timeout=timeout)\n",
|
|
" elapsed = (time.time() - start_time) * 1000 # ms\n",
|
|
" status_code = response.status_code\n",
|
|
" response_time = elapsed\n",
|
|
" success = True\n",
|
|
" break # success, exit loop\n",
|
|
" except requests.exceptions.RequestException as e:\n",
|
|
" # log the exception but continue retrying\n",
|
|
" pass\n",
|
|
" except Exception as e:\n",
|
|
" # other exceptions? Maybe not necessary, but handle broadly?\n",
|
|
" pass\n",
|
|
"\n",
|
|
"Wait, but in the case of an exception, like a timeout or connection error, the loop will continue. After all retries, if still failed, then record the failure. But how to capture the last exception's status? For example, if it's a 404, but the first attempt got that, then it would break, but if it's a timeout, then the status code isn't available. Hmm, perhaps in the case of an exception, the status_code remains None, and the response_time is the time taken until the timeout. Wait, but for a timeout, the elapsed time would be the timeout value plus some? Or maybe the time until the timeout occurred. The timeout is set, so the request will raise a Timeout exception after that. So the elapsed time would be approximately the timeout.\n",
|
|
"\n",
|
|
"Alternatively, perhaps in the case of an exception, we can't get the status code, so we set it to 0, and the response_time as the elapsed time (even if it's a timeout). But in the case of a timeout, the elapsed time would be the timeout value. Wait, but the actual time taken might be a bit more, but perhaps it's okay to record the elapsed time as the time taken until the exception was raised.\n",
|
|
"\n",
|
|
"So in the code, for each attempt, we start the timer, make the request, and if it's successful, record the time and status. If it's an exception, we note the time taken (even if it's a timeout), but can't get the status code. So in that case, the status_code would be None, but perhaps we can set it to 0 in the results. Or maybe leave it as 0 in the CSV.\n",
|
|
"\n",
|
|
"Wait, the user's example says to record status_code. So in the case of an exception, perhaps we can't get the status code, so we can set it to 0 or leave it as an empty string? But the CSV needs to have a value. Let's see the user's instruction says \"status_code\" as a field. So perhaps in the case of an exception, we can set status_code to 0, and note that in the summary.\n",
|
|
"\n",
|
|
"Alternatively, maybe in the case of a connection error or timeout, the status code is not available, so we can leave it as 0. So in the code, after all retries, if success is False, then status_code remains None, so we can set it to 0 in the CSV. Or perhaps better to set it to 0 in the code.\n",
|
|
"\n",
|
|
"Hmm, perhaps in the code, after the loop, if success is False, then we can set status_code to 0, and response_time to the last attempt's elapsed time (even if it was an exception). Wait, but in the case of an exception, the elapsed time would be the time taken until the exception was raised. So even if it's a timeout, the elapsed time would be roughly the timeout value. So that's okay.\n",
|
|
"\n",
|
|
"Wait, but in the code above, when an exception occurs, the elapsed time is not captured. Because the code is:\n",
|
|
"\n",
|
|
"start_time = time.time()\n",
|
|
"try:\n",
|
|
" response = requests.get(...)\n",
|
|
" elapsed = ... \n",
|
|
"except:\n",
|
|
" # but the elapsed time here would be time.time() - start_time, but we need to capture that?\n",
|
|
"\n",
|
|
"Ah, right! I missed that. So in the except block, we need to calculate the elapsed time even if there's an exception. So the code should be:\n",
|
|
"\n",
|
|
"start_time = time.time()\n",
|
|
"try:\n",
|
|
" response = requests.get(url, timeout=timeout)\n",
|
|
" elapsed = (time.time() - start_time) * 1000\n",
|
|
" status_code = response.status_code\n",
|
|
" success = True\n",
|
|
" break\n",
|
|
"except requests.exceptions.RequestException as e:\n",
|
|
" elapsed = (time.time() - start_time) * 1000\n",
|
|
" # but we can't get the status code here, so leave status_code as None or 0?\n",
|
|
" # but in this case, the loop continues, so maybe we need to track the elapsed time for each attempt?\n",
|
|
"\n",
|
|
"Wait, but in the case of retries, we might want to take the time of the last attempt? Or the total time? Hmm, perhaps the response_time should be the time taken for the successful attempt, or the time taken for the last attempt (even if it failed). Since the user wants the response time for each URL, even if it failed, we should record the time taken for the last attempt.\n",
|
|
"\n",
|
|
"Wait, but in the code above, the loop is trying multiple times. So for each attempt, we can track the elapsed time, but only the successful one's time is recorded. For failures, the last attempt's elapsed time would be the one to record. So perhaps in the code, after each attempt, we can track the elapsed time, and if it's a failure, we keep the last elapsed time.\n",
|
|
"\n",
|
|
"Hmm, this complicates things. Let me think again.\n",
|
|
"\n",
|
|
"Alternative approach:\n",
|
|
"\n",
|
|
"Initialize variables:\n",
|
|
"\n",
|
|
"success = False\n",
|
|
"status_code = None\n",
|
|
"response_time = 0.0\n",
|
|
"\n",
|
|
"for attempt in 0 to retries:\n",
|
|
"\n",
|
|
" start_time = time.time()\n",
|
|
" try:\n",
|
|
" response = requests.get(url, timeout=timeout)\n",
|
|
" elapsed = (time.time() - start_time) * 1000\n",
|
|
" status_code = response.status_code\n",
|
|
" response_time = elapsed\n",
|
|
" success = True\n",
|
|
" break # exit loop on success\n",
|
|
" except requests.exceptions.RequestException as e:\n",
|
|
" elapsed = (time.time() - start_time) * 1000\n",
|
|
" # if this was the last attempt, we need to record the elapsed time\n",
|
|
" # but since we are in the except block, we can't break yet unless it's the last attempt\n",
|
|
" # but how to track the last elapsed time?\n",
|
|
"\n",
|
|
"Wait, perhaps after each attempt, even if it's an exception, we can keep track of the elapsed time and status code (if available). But for the final result, if it's a failure, we take the last elapsed time and set status_code to 0 (or whatever).\n",
|
|
"\n",
|
|
"Alternatively, after all retries, if still failed, then set response_time to the last attempt's elapsed time, and status_code to 0 (or None, but the CSV needs a value).\n",
|
|
"\n",
|
|
"Hmm, perhaps the code can be structured as follows:\n",
|
|
"\n",
|
|
"Initialize variables:\n",
|
|
"\n",
|
|
"success = False\n",
|
|
"status_code = 0 # default to 0 for failure\n",
|
|
"response_time = 0.0\n",
|
|
"\n",
|
|
"for attempt in range(retries + 1):\n",
|
|
" start_time = time.time()\n",
|
|
" try:\n",
|
|
" response = requests.get(url, timeout=timeout)\n",
|
|
" elapsed = (time.time() - start_time) * 1000\n",
|
|
" status_code = response.status_code\n",
|
|
" response_time = elapsed\n",
|
|
" success = True\n",
|
|
" break # success, exit loop\n",
|
|
" except requests.exceptions.RequestException as e:\n",
|
|
" elapsed = (time.time() - start_time) * 1000\n",
|
|
" # keep track of the last elapsed time in case of failure\n",
|
|
" response_time = elapsed\n",
|
|
" # continue to next attempt unless it's the last one\n",
|
|
" if attempt == retries:\n",
|
|
" # final attempt failed\n",
|
|
" success = False\n",
|
|
" # status_code remains 0 (as initialized)\n",
|
|
" pass\n",
|
|
" else:\n",
|
|
" continue\n",
|
|
"\n",
|
|
"Wait, but in this case, even if the first attempt fails, but the second succeeds, the response_time would be overwritten. That's okay. But if all attempts fail, then the response_time is the last attempt's elapsed time, and status_code is 0 (since it was initialized to 0). That might work.\n",
|
|
"\n",
|
|
"Alternatively, perhaps initializing status_code as None, and then in the except block, if it's the last attempt, set status_code to 0. Hmm, but maybe that's complicating.\n",
|
|
"\n",
|
|
"Alternatively, after the loop, if success is False, then set status_code to 0. That way, regardless of the exception, the status_code is 0 for failures. The user might understand that 0 indicates a network error or timeout.\n",
|
|
"\n",
|
|
"So in code:\n",
|
|
"\n",
|
|
"After the loop:\n",
|
|
"\n",
|
|
"if not success:\n",
|
|
" status_code = 0\n",
|
|
"\n",
|
|
"That way, even if the exception was a 404 (which would have a status code), but the first attempt got a 404, then the loop would break, and status_code would be 404, and success is True (since 404 is a valid response, but the user might consider it a failure if they expect 2xx). Wait, but the user's success is probably determined by the HTTP status code being 2xx. Wait, the user's instruction says \"success (True/False)\". So perhaps the success is whether the request was successful (i.e., the HTTP status is 2xx). Wait, but the user's instruction says \"retry on failure up to specified retries\". So perhaps \"failure\" here refers to any non-2xx status, or any error (including network errors). Hmm, the user's instruction says \"retry on failure\", so perhaps the retries are for any kind of failure, including network issues. So in the code, the success variable is True only if the request was successful (regardless of status code?), or only if the status code is 2xx?\n",
|
|
"\n",
|
|
"Wait, the user's instruction says \"Record: URL, status_code, response_time_ms, success (True/False)\". So the success is probably whether the request was successful (i.e., the GET request didn't raise an exception and got a response). Wait, but the user might consider a 404 as a failure (non-successful), but in the code above, if the response is 404, then success is True (because the request didn't raise an exception). Wait, that's a problem.\n",
|
|
"\n",
|
|
"Ah, right! The success should be whether the HTTP status code is in the 2xx range. Because otherwise, even a 404 would be considered a success in the code above. So I need to adjust that.\n",
|
|
"\n",
|
|
"So the success variable should be (response.status_code >= 200 and response.status_code < 300). So in the code:\n",
|
|
"\n",
|
|
"if response.status_code >= 200 and response.status_code < 300:\n",
|
|
" success = True\n",
|
|
"else:\n",
|
|
" success = False\n",
|
|
"\n",
|
|
"Wait, but in the current code structure, the loop breaks on any response (even 404), but the success is determined by the status code. So the code should check that.\n",
|
|
"\n",
|
|
"So modifying the code:\n",
|
|
"\n",
|
|
"Inside the try block:\n",
|
|
"\n",
|
|
"response = requests.get(...)\n",
|
|
"elapsed = ...\n",
|
|
"status_code = response.status_code\n",
|
|
"success = 200 <= status_code < 300\n",
|
|
"if success:\n",
|
|
" break\n",
|
|
"else:\n",
|
|
" # even though we got a response, it's not a success, so retry?\n",
|
|
"Wait, but the user's instruction says to retry on failure. So if the first attempt returns a 404, should we retry? The user's instruction says \"retry on failure up to specified retries\". So perhaps \"failure\" here includes both network errors and non-2xx responses. So in that case, the code should retry even if the response is a 404.\n",
|
|
"\n",
|
|
"Hmm, that complicates things. Because if a URL returns a 404, the code would retry, but that's probably not desired. The user might want to retry only on network issues (like timeouts, connection errors), not on HTTP errors. Because a 404 is a valid response, just not successful. So perhaps the retries should only be for connection errors, timeouts, etc., not for HTTP errors.\n",
|
|
"\n",
|
|
"Alternatively, the user's instruction says \"retry on failure\", which could include any failure (including HTTP errors). But that's ambiguous. The user's example might expect that retries are for network issues, not HTTP errors. So perhaps the code should only retry on exceptions (like timeouts, connection errors), not on HTTP errors (like 404, 500, etc). Because in that case, the HTTP errors are considered final and not retried.\n",
|
|
"\n",
|
|
"So the code should only retry on exceptions (like ConnectionError, Timeout, etc.), but not on HTTP errors (like 404). So in that case, the code would proceed as follows:\n",
|
|
"\n",
|
|
"Inside the try block:\n",
|
|
"\n",
|
|
"response = requests.get(...)\n",
|
|
"elapsed = ...\n",
|
|
"status_code = response.status_code\n",
|
|
"success = (200 <= status_code < 300)\n",
|
|
"if success:\n",
|
|
" break\n",
|
|
"else:\n",
|
|
" # not a success, but it's an HTTP error, so don't retry\n",
|
|
" # so break the loop\n",
|
|
" break\n",
|
|
"\n",
|
|
"Wait, but then the retries would only apply to exceptions, not to HTTP errors. So the loop would only retry on exceptions. So the code structure would be:\n",
|
|
"\n",
|
|
"for attempt in ...:\n",
|
|
" try:\n",
|
|
" response = ...\n",
|
|
" # process response\n",
|
|
" if success (2xx):\n",
|
|
" break\n",
|
|
" else:\n",
|
|
" # HTTP error, not retrying\n",
|
|
" break\n",
|
|
" except RequestException:\n",
|
|
" # handle exception, retry if not last attempt\n",
|
|
"\n",
|
|
"Hmm, that makes sense. Because if the server returns a 404, that's a definitive response, so we don't want to retry. Only network issues (like timeouts, connection refused) should be retried.\n",
|
|
"\n",
|
|
"So in code:\n",
|
|
"\n",
|
|
"Inside the try block:\n",
|
|
"\n",
|
|
"response = requests.get(...)\n",
|
|
"elapsed = ...\n",
|
|
"status_code = response.status_code\n",
|
|
"success = (200 <= status_code < 300)\n",
|
|
"if success:\n",
|
|
" break # exit loop\n",
|
|
"else:\n",
|
|
" # not a success, but it's an HTTP error, so we consider this a failure and don't retry\n",
|
|
" # so break the loop\n",
|
|
" break\n",
|
|
"\n",
|
|
"Wait, but then the loop would exit immediately on any HTTP error, even if there are retries left. So the retries only apply to exceptions (network issues), not HTTP errors.\n",
|
|
"\n",
|
|
"Yes, that's better. So the code structure would be:\n",
|
|
"\n",
|
|
"Initialize variables:\n",
|
|
"\n",
|
|
"success = False\n",
|
|
"status_code = 0\n",
|
|
"response_time = 0.0\n",
|
|
"\n",
|
|
"for attempt in range(retries + 1):\n",
|
|
" start_time = time.time()\n",
|
|
" try:\n",
|
|
" response = requests.get(url, timeout=timeout)\n",
|
|
" elapsed = (time.time() - start_time) * 1000\n",
|
|
" status_code = response.status_code\n",
|
|
" response_time = elapsed\n",
|
|
" if 200 <= status_code < 300:\n",
|
|
" success = True\n",
|
|
" break # success, exit loop\n",
|
|
" else:\n",
|
|
" # HTTP error, not a success, but don't retry\n",
|
|
" success = False\n",
|
|
" break # exit loop\n",
|
|
" except requests.exceptions.RequestException as e:\n",
|
|
" # network error, retry if possible\n",
|
|
" elapsed = (time.time() - start_time) * 1000\n",
|
|
" response_time = elapsed\n",
|
|
" if attempt == retries:\n",
|
|
" # last attempt, set success to False\n",
|
|
" success = False\n",
|
|
" status_code = 0 # or some code indicating network error\n",
|
|
" # but how to track the status code here? Maybe leave it as 0\n",
|
|
" pass\n",
|
|
" else:\n",
|
|
" continue # retry\n",
|
|
"\n",
|
|
"Wait, but in the except block, after the exception, we can't get the status code, so we set status_code to 0 (or leave it as 0, since initialized to 0). The success is False. So after the loop, if it's an exception, then status_code is 0, success is False, and response_time is the time taken for the last attempt.\n",
|
|
"\n",
|
|
"Hmm, this seems manageable.\n",
|
|
"\n",
|
|
"Now, after processing all URLs, we need to write the results to a CSV. The CSV should have columns: url, status_code, response_time_ms, success.\n",
|
|
"\n",
|
|
"So the CSV writer will need to write each row with these four fields.\n",
|
|
"\n",
|
|
"Then, the summary needs to show total URLs, successful (success=True), failed (success=False), and average response time (only for successful requests? Or including failed ones? The user says \"average response time\", so probably average of all successful requests, since failed ones might have varying response times (like timeouts). Or maybe including all, but the user might prefer the average of successful ones. The instruction says \"average response time\", so perhaps it's the average of all successful requests' response times.\n",
|
|
"\n",
|
|
"Wait, the user's instruction says \"average response time\" in the summary. So probably the average of all successful requests. Because for failed requests (either HTTP errors or network errors), their response_time might not be meaningful (like timeouts have response_time equal to the timeout, but that's not the server's response time). So the average should be calculated only on the successful entries.\n",
|
|
"\n",
|
|
"So in the code, after collecting all results, we can compute:\n",
|
|
"\n",
|
|
"total = len(results)\n",
|
|
"successful = sum(1 for r in results if r['success'])\n",
|
|
"failed = total - successful\n",
|
|
"successful_times = [r['response_time_ms'] for r in results if r['success']]\n",
|
|
"average = sum(successful_times) / len(successful_times) if successful else 0\n",
|
|
"\n",
|
|
"Then, print the summary with these numbers.\n",
|
|
"\n",
|
|
"Now, putting all together.\n",
|
|
"\n",
|
|
"First, the command-line arguments using argparse:\n",
|
|
"\n",
|
|
"import argparse\n",
|
|
"\n",
|
|
"parser = argparse.ArgumentParser(description='URL Health Checker')\n",
|
|
"parser.add_argument('--input', default='urls.txt', help='Input file with URLs')\n",
|
|
"parser.add_argument('--output', default='results.csv', help='Output CSV file')\n",
|
|
"parser.add_argument('--timeout', type=int, default=5, help='Request timeout in seconds')\n",
|
|
"parser.add_argument('--retries', type=int, default=3, help='Number of retries')\n",
|
|
"args = parser.parse_args()\n",
|
|
"\n",
|
|
"Then, read the URLs from the input file:\n",
|
|
"\n",
|
|
"urls = []\n",
|
|
"with open(args.input, 'r') as f:\n",
|
|
" for line in f:\n",
|
|
" line = line.strip()\n",
|
|
" if not line or line.startswith('#'):\n",
|
|
" continue\n",
|
|
" urls.append(line)\n",
|
|
"\n",
|
|
"Then, process each URL:\n",
|
|
"\n",
|
|
"import requests\n",
|
|
"import time\n",
|
|
"\n",
|
|
"results = []\n",
|
|
"\n",
|
|
"for url in urls:\n",
|
|
" # Initialize variables\n",
|
|
" success = False\n",
|
|
" status_code = 0\n",
|
|
" response_time = 0.0\n",
|
|
"\n",
|
|
" for attempt in range(args.retries + 1):\n",
|
|
" start_time = time.time()\n",
|
|
" try:\n",
|
|
" response = requests.get(url, timeout=args.timeout)\n",
|
|
" elapsed = (time.time() - start_time) * 1000\n",
|
|
" status_code = response.status_code\n",
|
|
" response_time = elapsed\n",
|
|
" if 200 <= status_code < 300:\n",
|
|
" success = True\n",
|
|
" break # success, exit loop\n",
|
|
" else:\n",
|
|
" # HTTP error, don't retry\n",
|
|
" success = False\n",
|
|
" break # exit loop\n",
|
|
" except requests.exceptions.RequestException as e:\n",
|
|
" # Network error, retry if possible\n",
|
|
" elapsed = (time.time() - start_time) * 1000\n",
|
|
" response_time = elapsed\n",
|
|
" if attempt == args.retries:\n",
|
|
" # Last attempt, set success to False\n",
|
|
" success = False\n",
|
|
" status_code = 0 # or leave as 0\n",
|
|
" # but we can also set status_code to 0 here\n",
|
|
" pass\n",
|
|
" else:\n",
|
|
" continue # retry\n",
|
|
"\n",
|
|
" # After loop, append to results\n",
|
|
" results.append({\n",
|
|
" 'url': url,\n",
|
|
" 'status_code': status_code,\n",
|
|
" 'response_time_ms': response_time,\n",
|
|
" 'success': success\n",
|
|
" })\n",
|
|
"\n",
|
|
"Wait, but in the except block, after the exception, the status_code is set to 0, but in the code above, the status_code is initialized to 0, so it remains 0. That's okay.\n",
|
|
"\n",
|
|
"Now, writing to CSV:\n",
|
|
"\n",
|
|
"import csv\n",
|
|
"\n",
|
|
"with open(args.output, 'w', newline='') as f:\n",
|
|
" fieldnames = ['url', 'status_code', 'response_time_ms', 'success']\n",
|
|
" writer = csv.DictWriter(f, fieldnames=fieldnames)\n",
|
|
" writer.writeheader()\n",
|
|
" for row in results:\n",
|
|
" writer.writerow(row)\n",
|
|
"\n",
|
|
"Then, the summary:\n",
|
|
"\n",
|
|
"total = len(results)\n",
|
|
"successful = sum(1 for r in results if r['success'])\n",
|
|
"failed = total - successful\n",
|
|
"\n",
|
|
"successful_times = [r['response_time_ms'] for r in results if r['success']]\n",
|
|
"average = sum(successful_times) / len(successful_times) if successful else 0\n",
|
|
"\n",
|
|
"print(f\"Summary:\")\n",
|
|
"print(f\"Total URLs checked: {total}\")\n",
|
|
"print(f\"Successful requests: {successful}\")\n",
|
|
"print(f\"Failed requests: {failed}\")\n",
|
|
"print(f\"Average response time: {average:.2f} ms\")\n",
|
|
"\n",
|
|
"Wait, but in the case where all are failed, average is 0, but the print would show 0.00 ms. That's okay.\n",
|
|
"\n",
|
|
"Now, putting all together into a script.\n",
|
|
"\n",
|
|
"Wait, but in the code above, when there's an HTTP error (like 404), the loop breaks immediately, so no retries. That's correct.\n",
|
|
"\n",
|
|
"Testing edge cases:\n",
|
|
"\n",
|
|
"- URL with 200: success, no retries needed.\n",
|
|
"- URL with 500: HTTP error, so success is False, no retries.\n",
|
|
"- URL with timeout: after retries, status_code 0, success False.\n",
|
|
"- Empty lines and comments in input file are skipped.\n",
|
|
"\n",
|
|
"Potential issues:\n",
|
|
"\n",
|
|
"- The 'requests' library might have other exceptions not covered, but using the broad RequestException should catch all request-related exceptions.\n",
|
|
"\n",
|
|
"Another thing: the timeout is per attempt. So each retry will have the same timeout. That's as per the user's instruction.\n",
|
|
"\n",
|
|
"Now, code structure:\n",
|
|
"\n",
|
|
"The code should be well-commented. Let me structure it with comments.\n",
|
|
"\n",
|
|
"Also, need to import necessary modules: argparse, requests, time, csv.\n",
|
|
"\n",
|
|
"Wait, the code may have an error if the input file is not found. But the user didn't mention error handling for that, so perhaps it's okay to let it crash with a FileNotFoundError.\n",
|
|
"\n",
|
|
"Now, putting all together into the script:\n",
|
|
"\n",
|
|
"Wait, let me write the code step by step with comments.\n",
|
|
"\n",
|
|
"Wait, also, in the code above, when an HTTP error occurs (like 404), the loop breaks, so the 'response_time' is set to the elapsed time of that first attempt. That's correct.\n",
|
|
"\n",
|
|
"Another point: in the except block, after the exception, the code sets response_time to elapsed (time taken until the exception was raised). So even for timeouts, the response_time is the time taken (which would be close to the timeout value).\n",
|
|
"\n",
|
|
"Now, the code should be okay.\n",
|
|
"\n",
|
|
"Testing the code:\n",
|
|
"\n",
|
|
"Suppose a URL that's down (e.g., invalid domain), retries 3 times. The final status_code is 0, success False, response_time is ~5 seconds (timeout).\n",
|
|
"\n",
|
|
"A URL returning 404: first attempt, response_time is the time taken, status_code 404, success False, no retries.\n",
|
|
"\n",
|
|
"A URL returning 200: success, response_time is the actual time.\n",
|
|
"\n",
|
|
"Now, the CSV will have all entries.\n",
|
|
"\n",
|
|
"The summary will show the counts and average of successful times.\n",
|
|
"\n",
|
|
"Yes, this should meet the requirements.\n",
|
|
"</think>\n",
|
|
"\n",
|
|
"```python\n",
|
|
"import argparse\n",
|
|
"import csv\n",
|
|
"import requests\n",
|
|
"import time\n",
|
|
"\n",
|
|
"\n",
|
|
"def main():\n",
|
|
" # Parse command-line arguments\n",
|
|
" parser = argparse.ArgumentParser(description='URL Health Checker')\n",
|
|
" parser.add_argument('--input', default='urls.txt', help='Input file with URLs')\n",
|
|
" parser.add_argument('--output', default='results.csv', help='Output CSV file')\n",
|
|
" parser.add_argument('--timeout', type=int, default=5, help='Request timeout in seconds')\n",
|
|
" parser.add_argument('--retries', type=int, default=3, help='Number of retries')\n",
|
|
" args = parser.parse_args()\n",
|
|
"\n",
|
|
" # Read URLs from file, ignoring empty lines and comments\n",
|
|
" urls = []\n",
|
|
" with open(args.input, 'r') as f:\n",
|
|
" for line in f:\n",
|
|
" line = line.strip()\n",
|
|
" if not line or line.startswith('#'):\n",
|
|
" continue\n",
|
|
" urls.append(line)\n",
|
|
"\n",
|
|
" results = []\n",
|
|
" for url in urls:\n",
|
|
" success = False\n",
|
|
" status_code = 0\n",
|
|
" response_time = 0.0\n",
|
|
"\n",
|
|
" for attempt in range(args.retries + 1):\n",
|
|
" start_time = time.time()\n",
|
|
" try:\n",
|
|
" response = requests.get(url, timeout=args.timeout)\n",
|
|
" elapsed = (time.time() - start_time) * 1000 # Convert to milliseconds\n",
|
|
" status_code = response.status_code\n",
|
|
" response_time = elapsed\n",
|
|
" if 200 <= status_code < 300:\n",
|
|
" success = True\n",
|
|
" break # Success, exit loop\n",
|
|
" else:\n",
|
|
" # HTTP error (e.g., 404), don't retry\n",
|
|
" success = False\n",
|
|
" break\n",
|
|
" except requests.exceptions.RequestException:\n",
|
|
" # Network error (timeout, connection error, etc.), retry if possible\n",
|
|
" elapsed = (time.time() - start_time) * 1000\n",
|
|
" response_time = elapsed\n",
|
|
" if attempt == args.retries:\n",
|
|
" # Last attempt failed\n",
|
|
" success = False\n",
|
|
" status_code = 0 # Indicate network error\n",
|
|
" else:\n",
|
|
" continue # Retry\n",
|
|
"\n",
|
|
" results.append({\n",
|
|
" 'url': url,\n",
|
|
" 'status_code': status_code,\n",
|
|
" 'response_time_ms': response_time,\n",
|
|
" 'success': success\n",
|
|
" })\n",
|
|
"\n",
|
|
" # Write results to CSV\n",
|
|
" with open(args.output, 'w', newline='') as f:\n",
|
|
" fieldnames = ['url', 'status_code', 'response_time_ms', 'success']\n",
|
|
" writer = csv.DictWriter(f, fieldnames=fieldnames)\n",
|
|
" writer.writeheader()\n",
|
|
" for row in results:\n",
|
|
" writer.writerow(row)\n",
|
|
"\n",
|
|
" # Calculate summary statistics\n",
|
|
" total = len(results)\n",
|
|
" successful = sum(1 for r in results if r['success'])\n",
|
|
" failed = total - successful\n",
|
|
" successful_times = [r['response_time_ms'] for r in results if r['success']]\n",
|
|
" average = sum(successful_times) / len(successful_times) if successful else 0\n",
|
|
"\n",
|
|
" # Print summary table\n",
|
|
" print(f\"Summary:\")\n",
|
|
" print(f\"Total URLs checked: {total}\")\n",
|
|
" print(f\"Successful requests: {successful}\")\n",
|
|
" print(f\"Failed requests: {failed}\")\n",
|
|
" print(f\"Average response time: {average:.2f} ms\")\n",
|
|
"\n",
|
|
"\n",
|
|
"if __name__ == '__main__':\n",
|
|
" main()\n",
|
|
"```\n",
|
|
"\n",
|
|
"To use this script:\n",
|
|
"\n",
|
|
"1. Save it as `url_checker.py`\n",
|
|
"2. Create a `urls.txt` file with URLs (one per line, empty lines/comments allowed)\n",
|
|
"3. Run with: `python url_checker.py` (or specify options like `--timeout 10 --retries 2`)\n",
|
|
"\n",
|
|
"The script will:\n",
|
|
"- Read URLs from the input file\n",
|
|
"- Check each URL with retries\n",
|
|
"- Save results to CSV\n",
|
|
"- Print a summary of the checks\n",
|
|
"\n",
|
|
"Example `urls.txt` content:\n",
|
|
"```\n",
|
|
"https://httpstat.us/200\n",
|
|
"https://httpstat.us/404\n",
|
|
"https://invalid-domain.test\n",
|
|
"# This is a comment\n",
|
|
"https://httpstat.us/500\n",
|
|
"```\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# prompt for URL health checker\n",
|
|
"SYSTEM_PROMPT = \"\"\"You are a Python developer. Create a simple URL health checker CLI.\n",
|
|
"Return ONLY ONE Python script that:\n",
|
|
"1. Reads URLs from a text file (one per line)\n",
|
|
"2. Pings each URL with GET requests\n",
|
|
"3. Handles timeouts and retries\n",
|
|
"4. Writes results to CSV (url, status_code, response_time_ms, success)\n",
|
|
"5. Prints a summary table\n",
|
|
"\n",
|
|
"Use requests library and basic error handling. Keep it simple and functional.\"\"\"\n",
|
|
"\n",
|
|
"USER_PROMPT = \"\"\"Create a Python CLI script called url_checker.py that:\n",
|
|
"\n",
|
|
"1. Takes command line arguments:\n",
|
|
" - --input: input file with URLs (default: urls.txt)\n",
|
|
" - --output: output CSV file (default: results.csv)\n",
|
|
" - --timeout: request timeout in seconds (default: 5)\n",
|
|
" - --retries: number of retries (default: 3)\n",
|
|
"\n",
|
|
"2. Reads URLs from input file (ignore empty lines and comments starting with #)\n",
|
|
"\n",
|
|
"3. For each URL:\n",
|
|
" - Make GET request with timeout\n",
|
|
" - Retry on failure up to specified retries\n",
|
|
" - Record: URL, status code, response time in ms, success (True/False)\n",
|
|
"\n",
|
|
"4. Write results to CSV file\n",
|
|
"\n",
|
|
"5. Print summary:\n",
|
|
" - Total URLs checked\n",
|
|
" - Successful requests\n",
|
|
" - Failed requests\n",
|
|
" - Average response time\n",
|
|
"\n",
|
|
"Keep the code simple, well-commented, and easy to understand.\"\"\"\n",
|
|
"\n",
|
|
"def generate_url_checker():\n",
|
|
" cfg = Config()\n",
|
|
" client = OpenAI(api_key=cfg.api_key, base_url=cfg.base_url)\n",
|
|
"\n",
|
|
" resp = client.chat.completions.create(\n",
|
|
" model=cfg.model_name,\n",
|
|
" messages=[\n",
|
|
" {\"role\": \"system\", \"content\": SYSTEM_PROMPT},\n",
|
|
" {\"role\": \"user\", \"content\": USER_PROMPT},\n",
|
|
" ],\n",
|
|
" temperature=0.2,\n",
|
|
" max_tokens=200000,\n",
|
|
" )\n",
|
|
"\n",
|
|
" return resp.choices[0].message.content\n",
|
|
"\n",
|
|
"# Generate and display the URL checker script\n",
|
|
"print(\"Generating URL checker CLI script...\")\n",
|
|
"script_content = generate_url_checker()\n",
|
|
"print(\"\\n\" + \"=\"*60)\n",
|
|
"print(\"GENERATED URL CHECKER SCRIPT:\")\n",
|
|
"print(\"=\"*60)\n",
|
|
"print(script_content)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"id": "a8f2386a",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n",
|
|
"To use the script:\n",
|
|
"1. Create a urls.txt file with URLs (one per line)\n",
|
|
"2. Run: python url_checker.py --input urls.txt --output results.csv\n",
|
|
"\n",
|
|
"Example urls.txt content:\n",
|
|
"https://www.google.com\n",
|
|
"https://www.github.com\n",
|
|
"https://httpbin.org/status/200\n",
|
|
"https://httpbin.org/delay/2\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Save the generated script to a file\n",
|
|
"script_content = generate_url_checker()\n",
|
|
"\n",
|
|
"# Extract just the Python code if it's wrapped in markdown code blocks\n",
|
|
"import re\n",
|
|
"code_match = re.search(r'```python\\n(.*?)\\n```', script_content, re.DOTALL)\n",
|
|
"if code_match:\n",
|
|
" clean_script = code_match.group(1)\n",
|
|
"else:\n",
|
|
" clean_script = script_content\n",
|
|
"\n",
|
|
"# Save to file\n",
|
|
"with open('url_checker.py', 'w') as f:\n",
|
|
" f.write(clean_script)\n",
|
|
"\n",
|
|
"print(\"\\nTo use the script:\")\n",
|
|
"print(\"1. Create a urls.txt file with URLs (one per line)\")\n",
|
|
"print(\"2. Run: python url_checker.py --input urls.txt --output results.csv\")\n",
|
|
"print(\"\\nExample urls.txt content:\")\n",
|
|
"print(\"https://www.google.com\")\n",
|
|
"print(\"https://www.github.com\")\n",
|
|
"print(\"https://httpbin.org/status/200\")\n",
|
|
"print(\"https://httpbin.org/delay/2\")"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "base",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.16"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|