From 65bd214a6126d9532d852622bf87978b0169f9ad Mon Sep 17 00:00:00 2001 From: Ahmed Hesham Date: Wed, 1 Oct 2025 12:49:40 +0300 Subject: [PATCH] add dockerfile separated code --- .dockerignore | 6 +++ .gitignore | 3 +- Dockerfile | 11 +++++ ai.py | 3 -- filter.py | 93 ++++++++++++++++++++++++++++++++++++++ jobs.py | 4 +- main.py | 113 +++++++---------------------------------------- requirements.txt | 35 +-------------- 8 files changed, 131 insertions(+), 137 deletions(-) create mode 100644 .dockerignore create mode 100644 Dockerfile create mode 100644 filter.py diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..bdfc479 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,6 @@ +__pycache__ +.venv +.vscode +.dockerignore +.env +jobs.csv \ No newline at end of file diff --git a/.gitignore b/.gitignore index d9c316d..fa74427 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,5 @@ jobs.csv .venv __pycache__ .env -.vscode \ No newline at end of file +.vscode +.env2 \ No newline at end of file diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..d592bc8 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,11 @@ +FROM python:3.12.3-slim + +WORKDIR /jobfitai + +COPY requirements.txt . + +RUN pip install --no-cache-dir -r requirements.txt + +COPY . . + +CMD [ "python3", "main.py"] diff --git a/ai.py b/ai.py index a295d95..513b1c3 100644 --- a/ai.py +++ b/ai.py @@ -1,12 +1,9 @@ # To run this code you need to install the following dependencies: # pip install google-genai -import base64 -import os from google import genai from google.genai import types - def generate(description, instruction, api_key): client = genai.Client(api_key=api_key) diff --git a/filter.py b/filter.py new file mode 100644 index 0000000..90757e9 --- /dev/null +++ b/filter.py @@ -0,0 +1,93 @@ +import logging +import time +from ai import generate +import json +from google.genai.errors import ServerError, ClientError + +total_fail = 0 +total_overload = 0 +total_fail_overload = 0 +total_empty_response = 0 +total_fail_empty_response = 0 + + +def filter_jobs(jobs, cv, api_keys, good_fit_jobs): + key_number = 0 + + for i, job in jobs.iterrows(): + # print("index is :", i) # for debugging + + if (i + 1) % 10 == 0 and i != 0: + logging.warning("sleeping to avoid API rate limits") + time.sleep(60) + try_count = 3 + + while try_count > 0: + + try: + cleaned_description = "\n".join( + [line for line in job["description"].splitlines() if line.strip()] + ) + ai_response = generate(cleaned_description, cv, api_keys[key_number]) + ai_response_dict = json.loads(ai_response) + break + + except json.JSONDecodeError as e: + try_count -= 1 + total_empty_response += 1 + if try_count == 0: + total_fail += 1 + total_fail_empty_response += 1 + + logging.warning("Sleeping after JSONDecodeError") + time.sleep(6) + + except ServerError as e: + + if e.details["error"]["code"] == 503: + try_count -= 1 + total_overload += 1 + if try_count == 0: + total_fail += 1 + total_fail_overload += 1 + logging.warning("sleeping to after The model is overloaded.") + print(e.details) + time.sleep(10) + else: + logging.critical(e.details) + return 1 + + except ClientError as e: + if e.details["error"]["code"] == 429: + logging.warning("api limit hit") + key_number += 1 + if key_number > len(api_keys) - 1: + logging.critical("All api keys hit the limit") + return 1 + else: + logging.critical(e.details) + return 1 + + else: + logging.critical("All attempts failed") + continue + + if ai_response_dict["percentage"] > 50: + good_fit_jobs.append( + { + "title": job["title"], + "url": job["job_url"], + "percentage": ai_response_dict["percentage"], + "why I'm I a good fit": ai_response_dict["why I'm I a good fit"], + "what I'm I missing": ai_response_dict["what I'm I missing"], + } + ) + print_stats + return good_fit_jobs + + +def print_stats(): + stats = f"""total fail: {total_fail} +total empty responses: {total_empty_response} fail: {total_fail_empty_response} +Total overloads: {total_overload} fail: {total_fail_overload}""" + print(stats) diff --git a/jobs.py b/jobs.py index 7a06a5c..76a7328 100644 --- a/jobs.py +++ b/jobs.py @@ -1,5 +1,5 @@ from jobspy import scrape_jobs - +import logging def getJobs(jobTitle, results_wanted, hours_old): jobs = scrape_jobs( @@ -22,7 +22,7 @@ def getJobs(jobTitle, results_wanted, hours_old): linkedin_fetch_description=True, # gets more info such as description, direct job url (slower) # proxies=["208.195.175.46:65095", "208.195.175.45:65095", "localhost"], ) - print(f"Found {len(jobs)} jobs") + logging.warning(f"Found {len(jobs)} {jobTitle} jobs") # print(jobs) return jobs # jobs.to_csv( diff --git a/main.py b/main.py index 8f381a1..9d5dce5 100644 --- a/main.py +++ b/main.py @@ -1,12 +1,10 @@ from jobs import getJobs -from ai import generate -from google.genai.errors import ServerError, ClientError from alert import send_email -import json -import time +from filter import filter_jobs import os import logging from random import shuffle +import pandas as pd logging.basicConfig( level=logging.WARNING, format="%(asctime)s - %(levelname)s - %(message)s" @@ -16,110 +14,31 @@ SENDER = os.getenv("smtp_email") PASSWORD = os.getenv("smtp_password") RECEIVER = os.getenv("receiver_email") api_keys = os.getenv("api_keys").split(",") - -good_fit_jobs = [] - -# stats -total_fail = 0 -total_fail_overload = 0 -total_overload = 0 -total_empty_response = 0 -total_fail_empty_response = 0 - shuffle(api_keys) +all_jobs = pd.DataFrame() +good_fit_jobs = [] + with open("instruction.txt", "r") as f: CV = f.read() -def get_jobs(job_title, cv, results_wanted, hours_old): - global total_fail, total_fail_overload, total_overload, total_empty_response, total_fail_empty_response - key_number = 0 - +def get_jobs(job_title, results_wanted, hours_old): + global all_jobs jobs = getJobs(job_title, results_wanted, hours_old) - for i, job in jobs.iterrows(): - print("index is :", i) # for debugging - - if (i + 1) % 10 == 0 and i != 0: - logging.warning("sleeping to avoid API rate limits") - time.sleep(60) - try_count = 3 - - while try_count > 0: - - try: - cleaned_description = "\n".join( - [line for line in job["description"].splitlines() if line.strip()] - ) - ai_response = generate(cleaned_description, cv, api_keys[key_number]) - ai_response_dict = json.loads(ai_response) - break - - except json.JSONDecodeError as e: - try_count -= 1 - total_empty_response += 1 - if try_count == 0: - total_fail += 1 - total_fail_empty_response += 1 - - logging.warning("Sleeping after JSONDecodeError") - time.sleep(6) - - except ServerError as e: - - if e.details["error"]["code"] == 503: - try_count -= 1 - total_overload += 1 - if try_count == 0: - total_fail += 1 - total_fail_overload += 1 - logging.warning("sleeping to after The model is overloaded.") - print(e.details) - time.sleep(10) - else: - logging.critical(e.details) - return 1 - - except ClientError as e: - if e.details["error"]["code"] == 429: - logging.warning("api limit hit") - key_number += 1 - if key_number > len(api_keys) - 1: - logging.critical("All api keys hit the limit") - return 1 - else: - logging.critical(e.details) - return 1 - - else: - logging.critical("All attempts failed") - continue - - if ai_response_dict["percentage"] > 50: - good_fit_jobs.append( - { - "title": job["title"], - "url": job["job_url"], - "percentage": ai_response_dict["percentage"], - "why I'm I a good fit": ai_response_dict["why I'm I a good fit"], - "what I'm I missing": ai_response_dict["what I'm I missing"], - } - ) - - -def print_stats(): - stats = f"""total fail: {total_fail} -total empty responses: {total_empty_response} fail: {total_fail_empty_response} -Total overloads: {total_overload} fail: {total_fail_overload}""" - print(stats) + all_jobs = pd.concat([all_jobs, jobs], ignore_index=True) if __name__ == "__main__": - get_jobs("devops", CV, results_wanted=30, hours_old=2) - get_jobs("backend", CV, results_wanted=30, hours_old=2) - get_jobs("software engineer", CV, results_wanted=30, hours_old=2) + get_jobs("devops", results_wanted=30, hours_old=2) + get_jobs("backend", results_wanted=30, hours_old=2) + get_jobs("software engineer", results_wanted=30, hours_old=2) + get_jobs("cloud", results_wanted=30, hours_old=2) + get_jobs("sre", results_wanted=30, hours_old=2) + get_jobs("intern", results_wanted=30, hours_old=2) + all_jobs.drop_duplicates(inplace=True, ignore_index=True) + filter_jobs(all_jobs, CV, api_keys, good_fit_jobs) if len(good_fit_jobs) > 0: send_email(SENDER, RECEIVER, PASSWORD, good_fit_jobs) else: print("no good fit jobs") - print_stats() diff --git a/requirements.txt b/requirements.txt index 1dd3c07..af63f03 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,35 +1,2 @@ -annotated-types==0.7.0 -anyio==4.11.0 -beautifulsoup4==4.13.5 -cachetools==5.5.2 -certifi==2025.8.3 -charset-normalizer==3.4.3 -google-auth==2.40.3 -google-genai==1.38.0 -h11==0.16.0 -httpcore==1.0.9 -httpx==0.28.1 -idna==3.10 -markdownify==0.13.1 -numpy==1.26.3 -pandas==2.3.2 -pyasn1==0.6.1 -pyasn1_modules==0.4.2 -pydantic==2.11.9 -pydantic_core==2.33.2 -python-dateutil==2.9.0.post0 python-jobspy==1.1.82 -pytz==2025.2 -regex==2024.11.6 -requests==2.32.5 -rsa==4.9.1 -six==1.17.0 -sniffio==1.3.1 -soupsieve==2.8 -tenacity==9.1.2 -tls-client==1.0.1 -typing-inspection==0.4.1 -typing_extensions==4.15.0 -tzdata==2025.2 -urllib3==2.5.0 -websockets==15.0.1 +google-genai==1.38.0 \ No newline at end of file