الملفات
LLM-Powered-URL-Health-Chec…/url_checker.py
2025-09-08 17:07:13 +00:00

102 أسطر
3.6 KiB
Python

import argparse
import requests
import csv
import time
def get_parser():
"""Parse command-line arguments."""
parser = argparse.ArgumentParser(description='URL Health Checker')
parser.add_argument('--input', default='urls.txt', help='Input file with URLs')
parser.add_argument('--output', default='results.csv', help='Output CSV file')
parser.add_argument('--timeout', type=int, default=5, help='Request timeout in seconds')
parser.add_argument('--retries', type=int, default=3, help='Number of retries')
return parser
def read_urls(file_path):
"""Read URLs from a file, ignoring empty lines and comments."""
urls = []
with open(file_path, 'r') as f:
for line in f:
line = line.strip()
if not line or line.startswith('#'):
continue
urls.append(line)
return urls
def check_url(url, timeout, retries):
"""Check a single URL with retries and return result."""
result = {
'url': url,
'status_code': None,
'response_time_ms': None,
'success': False
}
for attempt in range(retries + 1): # +1 to include the initial attempt
try:
start_time = time.time()
response = requests.get(url, timeout=timeout)
elapsed = (time.time() - start_time) * 1000
if 200 <= response.status_code < 300:
result['status_code'] = response.status_code
result['response_time_ms'] = elapsed
result['success'] = True
break # Success, exit loop
else:
# Non-2xx response; retry if not last attempt
if attempt < retries:
continue
else:
# Last attempt, record failure
result['status_code'] = response.status_code
result['response_time_ms'] = elapsed
result['success'] = False
break
except requests.exceptions.RequestException as e:
# Network error (e.g., connection timeout, DNS issue)
elapsed = (time.time() - start_time) * 1000
result['status_code'] = 0 # Indicate exception occurred
result['response_time_ms'] = elapsed
result['success'] = False
if attempt < retries:
continue
else:
break # No more retries, record failure
return result
def main():
parser = get_parser()
args = parser.parse_args()
urls = read_urls(args.input)
results = []
for url in urls:
result = check_url(url, args.timeout, args.retries)
results.append(result)
# Write results to CSV
with open(args.output, mode='w', newline='') as file:
fieldnames = ['url', 'status_code', 'response_time_ms', 'success']
writer = csv.DictWriter(file, fieldnames=fieldnames)
writer.writeheader()
for row in results:
writer.writerow(row)
# Summary statistics
total = len(results)
successful = sum(1 for r in results if r['success'])
failed = total - successful
total_response_time = sum(r['response_time_ms'] for r in results)
average = total_response_time / total if total else 0
print("\nSummary:")
print(f"Total URLs checked: {total}")
print(f"Successful requests: {successful}")
print(f"Failed requests: {failed}")
print(f"Average response time: {average:.2f} ms")
if __name__ == '__main__':
main()