add dockerfile

separated code
2025-10-01 12:49:40 +03:00
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,6 @@
 __pycache__
 .venv
 .vscode
 .dockerignore
 .env
 jobs.csv
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,4 @@ jobs.csv
 __pycache__
 .env
 .vscode
 .env2
--- a/11
+++ b/11
@@ -0,0 +1,11 @@
 FROM python:3.12.3-slim
 WORKDIR /jobfitai
 COPY requirements.txt .
 RUN pip install --no-cache-dir -r requirements.txt
 COPY . .
 CMD [ "python3", "main.py"]
--- a/ai.py
+++ b/ai.py
@@ -1,12 +1,9 @@
 # To run this code you need to install the following dependencies:
 # pip install google-genai
 import base64
 import os
 from google import genai
 from google.genai import types
 def generate(description, instruction, api_key):
    client = genai.Client(api_key=api_key)
--- a/filter.py
+++ b/filter.py
@@ -0,0 +1,93 @@
 import logging
 import time
 from ai import generate
 import json
 from google.genai.errors import ServerError, ClientError
 total_fail = 0
 total_overload = 0
 total_fail_overload = 0
 total_empty_response = 0
 total_fail_empty_response = 0
 def filter_jobs(jobs, cv, api_keys, good_fit_jobs):
    key_number = 0
    for i, job in jobs.iterrows():
        # print("index is :", i)  # for debugging
        if (i + 1) % 10 == 0 and i != 0:
            logging.warning("sleeping to avoid API rate limits")
            time.sleep(60)
        try_count = 3
        while try_count > 0:
            try:
                cleaned_description = "\n".join(
                    [line for line in job["description"].splitlines() if line.strip()]
                )
                ai_response = generate(cleaned_description, cv, api_keys[key_number])
                ai_response_dict = json.loads(ai_response)
                break
            except json.JSONDecodeError as e:
                try_count -= 1
                total_empty_response += 1
                if try_count == 0:
                    total_fail += 1
                    total_fail_empty_response += 1
                logging.warning("Sleeping after JSONDecodeError")
                time.sleep(6)
            except ServerError as e:
                if e.details["error"]["code"] == 503:
                    try_count -= 1
                    total_overload += 1
                    if try_count == 0:
                        total_fail += 1
                        total_fail_overload += 1
                    logging.warning("sleeping to after The model is overloaded.")
                    print(e.details)
                    time.sleep(10)
                else:
                    logging.critical(e.details)
                    return 1
            except ClientError as e:
                if e.details["error"]["code"] == 429:
                    logging.warning("api limit hit")
                    key_number += 1
                    if key_number > len(api_keys) - 1:
                        logging.critical("All api keys hit the limit")
                        return 1
                else:
                    logging.critical(e.details)
                    return 1
        else:
            logging.critical("All attempts failed")
            continue
        if ai_response_dict["percentage"] > 50:
            good_fit_jobs.append(
                {
                    "title": job["title"],
                    "url": job["job_url"],
                    "percentage": ai_response_dict["percentage"],
                    "why I'm I a good fit": ai_response_dict["why I'm I a good fit"],
                    "what I'm I missing": ai_response_dict["what I'm I missing"],
                }
            )
    print_stats
    return good_fit_jobs
 def print_stats():
    stats = f"""total fail: {total_fail}
 total empty responses: {total_empty_response} fail: {total_fail_empty_response}
 Total overloads:       {total_overload}       fail: {total_fail_overload}"""
    print(stats)
--- a/jobs.py
+++ b/jobs.py
@@ -1,5 +1,5 @@
 from jobspy import scrape_jobs
-
+import logging
 def getJobs(jobTitle, results_wanted, hours_old):
    jobs = scrape_jobs(
@@ -22,7 +22,7 @@ def getJobs(jobTitle, results_wanted, hours_old):
        linkedin_fetch_description=True,  # gets more info such as description, direct job url (slower)
        # proxies=["208.195.175.46:65095", "208.195.175.45:65095", "localhost"],
    )
-    print(f"Found {len(jobs)} jobs")
+    logging.warning(f"Found {len(jobs)} {jobTitle} jobs")
    # print(jobs)
    return jobs
    # jobs.to_csv(
--- a/main.py
+++ b/main.py
@@ -1,12 +1,10 @@
 from jobs import getJobs
 from ai import generate
 from google.genai.errors import ServerError, ClientError
 from alert import send_email
-import json
+from filter import filter_jobs
 import time
 import os
 import logging
 from random import shuffle
 import pandas as pd
 logging.basicConfig(
    level=logging.WARNING, format="%(asctime)s - %(levelname)s - %(message)s"
@@ -16,110 +14,31 @@ SENDER = os.getenv("smtp_email")
 PASSWORD = os.getenv("smtp_password")
 RECEIVER = os.getenv("receiver_email")
 api_keys = os.getenv("api_keys").split(",")
 good_fit_jobs = []
 # stats
 total_fail = 0
 total_fail_overload = 0
 total_overload = 0
 total_empty_response = 0
 total_fail_empty_response = 0
 shuffle(api_keys)
 all_jobs = pd.DataFrame()
 good_fit_jobs = []
 with open("instruction.txt", "r") as f:
    CV = f.read()
-def get_jobs(job_title, cv, results_wanted, hours_old):
+def get_jobs(job_title, results_wanted, hours_old):
-    global total_fail, total_fail_overload, total_overload, total_empty_response, total_fail_empty_response
+    global all_jobs
    key_number = 0
    jobs = getJobs(job_title, results_wanted, hours_old)
-    for i, job in jobs.iterrows():
+    all_jobs = pd.concat([all_jobs, jobs], ignore_index=True)
        print("index is :", i)  # for debugging
        if (i + 1) % 10 == 0 and i != 0:
            logging.warning("sleeping to avoid API rate limits")
            time.sleep(60)
        try_count = 3
        while try_count > 0:
            try:
                cleaned_description = "\n".join(
                    [line for line in job["description"].splitlines() if line.strip()]
                )
                ai_response = generate(cleaned_description, cv, api_keys[key_number])
                ai_response_dict = json.loads(ai_response)
                break
            except json.JSONDecodeError as e:
                try_count -= 1
                total_empty_response += 1
                if try_count == 0:
                    total_fail += 1
                    total_fail_empty_response += 1
                logging.warning("Sleeping after JSONDecodeError")
                time.sleep(6)
            except ServerError as e:
                if e.details["error"]["code"] == 503:
                    try_count -= 1
                    total_overload += 1
                    if try_count == 0:
                        total_fail += 1
                        total_fail_overload += 1
                    logging.warning("sleeping to after The model is overloaded.")
                    print(e.details)
                    time.sleep(10)
                else:
                    logging.critical(e.details)
                    return 1
            except ClientError as e:
                if e.details["error"]["code"] == 429:
                    logging.warning("api limit hit")
                    key_number += 1
                    if key_number > len(api_keys) - 1:
                        logging.critical("All api keys hit the limit")
                        return 1
                else:
                    logging.critical(e.details)
                    return 1
        else:
            logging.critical("All attempts failed")
            continue
        if ai_response_dict["percentage"] > 50:
            good_fit_jobs.append(
                {
                    "title": job["title"],
                    "url": job["job_url"],
                    "percentage": ai_response_dict["percentage"],
                    "why I'm I a good fit": ai_response_dict["why I'm I a good fit"],
                    "what I'm I missing": ai_response_dict["what I'm I missing"],
                }
            )
 def print_stats():
    stats = f"""total fail: {total_fail}
 total empty responses: {total_empty_response} fail: {total_fail_empty_response}
 Total overloads:       {total_overload}       fail: {total_fail_overload}"""
    print(stats)
 if __name__ == "__main__":
-    get_jobs("devops", CV, results_wanted=30, hours_old=2)
+    get_jobs("devops", results_wanted=30, hours_old=2)
-    get_jobs("backend", CV, results_wanted=30, hours_old=2)
+    get_jobs("backend", results_wanted=30, hours_old=2)
-    get_jobs("software engineer", CV, results_wanted=30, hours_old=2)
+    get_jobs("software engineer", results_wanted=30, hours_old=2)
    get_jobs("cloud", results_wanted=30, hours_old=2)
    get_jobs("sre", results_wanted=30, hours_old=2)
    get_jobs("intern", results_wanted=30, hours_old=2)
    all_jobs.drop_duplicates(inplace=True, ignore_index=True)
    filter_jobs(all_jobs, CV, api_keys, good_fit_jobs)
    if len(good_fit_jobs) > 0:
        send_email(SENDER, RECEIVER, PASSWORD, good_fit_jobs)
    else:
        print("no good fit jobs")
    print_stats()
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,35 +1,2 @@
 annotated-types==0.7.0
 anyio==4.11.0
 beautifulsoup4==4.13.5
 cachetools==5.5.2
 certifi==2025.8.3
 charset-normalizer==3.4.3
 google-auth==2.40.3
 google-genai==1.38.0
 h11==0.16.0
 httpcore==1.0.9
 httpx==0.28.1
 idna==3.10
 markdownify==0.13.1
 numpy==1.26.3
 pandas==2.3.2
 pyasn1==0.6.1
 pyasn1_modules==0.4.2
 pydantic==2.11.9
 pydantic_core==2.33.2
 python-dateutil==2.9.0.post0
 python-jobspy==1.1.82
-pytz==2025.2
+google-genai==1.38.0
 regex==2024.11.6
 requests==2.32.5
 rsa==4.9.1
 six==1.17.0
 sniffio==1.3.1
 soupsieve==2.8
 tenacity==9.1.2
 tls-client==1.0.1
 typing-inspection==0.4.1
 typing_extensions==4.15.0
 tzdata==2025.2
 urllib3==2.5.0
 websockets==15.0.1