From 65bd214a6126d9532d852622bf87978b0169f9ad Mon Sep 17 00:00:00 2001
From: Ahmed Hesham <ahmed.hesham.farag@gmail.com>
Date: Wed, 1 Oct 2025 12:49:40 +0300
Subject: [PATCH] add dockerfile separated code

---
 .dockerignore    |   6 +++
 .gitignore       |   3 +-
 Dockerfile       |  11 +++++
 ai.py            |   3 --
 filter.py        |  93 ++++++++++++++++++++++++++++++++++++++
 jobs.py          |   4 +-
 main.py          | 113 +++++++----------------------------------------
 requirements.txt |  35 +--------------
 8 files changed, 131 insertions(+), 137 deletions(-)
 create mode 100644 .dockerignore
 create mode 100644 Dockerfile
 create mode 100644 filter.py

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..bdfc479
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,6 @@
+__pycache__
+.venv
+.vscode
+.dockerignore
+.env
+jobs.csv
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index d9c316d..fa74427 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,5 @@ jobs.csv
 .venv
 __pycache__
 .env
-.vscode
\ No newline at end of file
+.vscode
+.env2
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..d592bc8
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,11 @@
+FROM python:3.12.3-slim
+
+WORKDIR /jobfitai
+
+COPY requirements.txt .
+
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY . .
+
+CMD [ "python3", "main.py"]
diff --git a/ai.py b/ai.py
index a295d95..513b1c3 100644
--- a/ai.py
+++ b/ai.py
@@ -1,12 +1,9 @@
 # To run this code you need to install the following dependencies:
 # pip install google-genai
 
-import base64
-import os
 from google import genai
 from google.genai import types
 
-
 def generate(description, instruction, api_key):
     client = genai.Client(api_key=api_key)
 
diff --git a/filter.py b/filter.py
new file mode 100644
index 0000000..90757e9
--- /dev/null
+++ b/filter.py
@@ -0,0 +1,93 @@
+import logging
+import time
+from ai import generate
+import json
+from google.genai.errors import ServerError, ClientError
+
+total_fail = 0
+total_overload = 0
+total_fail_overload = 0
+total_empty_response = 0
+total_fail_empty_response = 0
+
+
+def filter_jobs(jobs, cv, api_keys, good_fit_jobs):
+    key_number = 0
+
+    for i, job in jobs.iterrows():
+        # print("index is :", i)  # for debugging
+
+        if (i + 1) % 10 == 0 and i != 0:
+            logging.warning("sleeping to avoid API rate limits")
+            time.sleep(60)
+        try_count = 3
+
+        while try_count > 0:
+
+            try:
+                cleaned_description = "\n".join(
+                    [line for line in job["description"].splitlines() if line.strip()]
+                )
+                ai_response = generate(cleaned_description, cv, api_keys[key_number])
+                ai_response_dict = json.loads(ai_response)
+                break
+
+            except json.JSONDecodeError as e:
+                try_count -= 1
+                total_empty_response += 1
+                if try_count == 0:
+                    total_fail += 1
+                    total_fail_empty_response += 1
+
+                logging.warning("Sleeping after JSONDecodeError")
+                time.sleep(6)
+
+            except ServerError as e:
+
+                if e.details["error"]["code"] == 503:
+                    try_count -= 1
+                    total_overload += 1
+                    if try_count == 0:
+                        total_fail += 1
+                        total_fail_overload += 1
+                    logging.warning("sleeping to after The model is overloaded.")
+                    print(e.details)
+                    time.sleep(10)
+                else:
+                    logging.critical(e.details)
+                    return 1
+
+            except ClientError as e:
+                if e.details["error"]["code"] == 429:
+                    logging.warning("api limit hit")
+                    key_number += 1
+                    if key_number > len(api_keys) - 1:
+                        logging.critical("All api keys hit the limit")
+                        return 1
+                else:
+                    logging.critical(e.details)
+                    return 1
+
+        else:
+            logging.critical("All attempts failed")
+            continue
+
+        if ai_response_dict["percentage"] > 50:
+            good_fit_jobs.append(
+                {
+                    "title": job["title"],
+                    "url": job["job_url"],
+                    "percentage": ai_response_dict["percentage"],
+                    "why I'm I a good fit": ai_response_dict["why I'm I a good fit"],
+                    "what I'm I missing": ai_response_dict["what I'm I missing"],
+                }
+            )
+    print_stats
+    return good_fit_jobs
+
+
+def print_stats():
+    stats = f"""total fail: {total_fail}
+total empty responses: {total_empty_response} fail: {total_fail_empty_response}
+Total overloads:       {total_overload}       fail: {total_fail_overload}"""
+    print(stats)
diff --git a/jobs.py b/jobs.py
index 7a06a5c..76a7328 100644
--- a/jobs.py
+++ b/jobs.py
@@ -1,5 +1,5 @@
 from jobspy import scrape_jobs
-
+import logging
 
 def getJobs(jobTitle, results_wanted, hours_old):
     jobs = scrape_jobs(
@@ -22,7 +22,7 @@ def getJobs(jobTitle, results_wanted, hours_old):
         linkedin_fetch_description=True,  # gets more info such as description, direct job url (slower)
         # proxies=["208.195.175.46:65095", "208.195.175.45:65095", "localhost"],
     )
-    print(f"Found {len(jobs)} jobs")
+    logging.warning(f"Found {len(jobs)} {jobTitle} jobs")
     # print(jobs)
     return jobs
     # jobs.to_csv(
diff --git a/main.py b/main.py
index 8f381a1..9d5dce5 100644
--- a/main.py
+++ b/main.py
@@ -1,12 +1,10 @@
 from jobs import getJobs
-from ai import generate
-from google.genai.errors import ServerError, ClientError
 from alert import send_email
-import json
-import time
+from filter import filter_jobs
 import os
 import logging
 from random import shuffle
+import pandas as pd
 
 logging.basicConfig(
     level=logging.WARNING, format="%(asctime)s - %(levelname)s - %(message)s"
@@ -16,110 +14,31 @@ SENDER = os.getenv("smtp_email")
 PASSWORD = os.getenv("smtp_password")
 RECEIVER = os.getenv("receiver_email")
 api_keys = os.getenv("api_keys").split(",")
-
-good_fit_jobs = []
-
-# stats
-total_fail = 0
-total_fail_overload = 0
-total_overload = 0
-total_empty_response = 0
-total_fail_empty_response = 0
-
 shuffle(api_keys)
 
+all_jobs = pd.DataFrame()
+good_fit_jobs = []
+
 with open("instruction.txt", "r") as f:
     CV = f.read()
 
 
-def get_jobs(job_title, cv, results_wanted, hours_old):
-    global total_fail, total_fail_overload, total_overload, total_empty_response, total_fail_empty_response
-    key_number = 0
-
+def get_jobs(job_title, results_wanted, hours_old):
+    global all_jobs
     jobs = getJobs(job_title, results_wanted, hours_old)
-    for i, job in jobs.iterrows():
-        print("index is :", i)  # for debugging
-
-        if (i + 1) % 10 == 0 and i != 0:
-            logging.warning("sleeping to avoid API rate limits")
-            time.sleep(60)
-        try_count = 3
-
-        while try_count > 0:
-
-            try:
-                cleaned_description = "\n".join(
-                    [line for line in job["description"].splitlines() if line.strip()]
-                )
-                ai_response = generate(cleaned_description, cv, api_keys[key_number])
-                ai_response_dict = json.loads(ai_response)
-                break
-
-            except json.JSONDecodeError as e:
-                try_count -= 1
-                total_empty_response += 1
-                if try_count == 0:
-                    total_fail += 1
-                    total_fail_empty_response += 1
-
-                logging.warning("Sleeping after JSONDecodeError")
-                time.sleep(6)
-
-            except ServerError as e:
-
-                if e.details["error"]["code"] == 503:
-                    try_count -= 1
-                    total_overload += 1
-                    if try_count == 0:
-                        total_fail += 1
-                        total_fail_overload += 1
-                    logging.warning("sleeping to after The model is overloaded.")
-                    print(e.details)
-                    time.sleep(10)
-                else:
-                    logging.critical(e.details)
-                    return 1
-
-            except ClientError as e:
-                if e.details["error"]["code"] == 429:
-                    logging.warning("api limit hit")
-                    key_number += 1
-                    if key_number > len(api_keys) - 1:
-                        logging.critical("All api keys hit the limit")
-                        return 1
-                else:
-                    logging.critical(e.details)
-                    return 1
-
-        else:
-            logging.critical("All attempts failed")
-            continue
-
-        if ai_response_dict["percentage"] > 50:
-            good_fit_jobs.append(
-                {
-                    "title": job["title"],
-                    "url": job["job_url"],
-                    "percentage": ai_response_dict["percentage"],
-                    "why I'm I a good fit": ai_response_dict["why I'm I a good fit"],
-                    "what I'm I missing": ai_response_dict["what I'm I missing"],
-                }
-            )
-
-
-def print_stats():
-    stats = f"""total fail: {total_fail}
-total empty responses: {total_empty_response} fail: {total_fail_empty_response}
-Total overloads:       {total_overload}       fail: {total_fail_overload}"""
-    print(stats)
+    all_jobs = pd.concat([all_jobs, jobs], ignore_index=True)
 
 
 if __name__ == "__main__":
-    get_jobs("devops", CV, results_wanted=30, hours_old=2)
-    get_jobs("backend", CV, results_wanted=30, hours_old=2)
-    get_jobs("software engineer", CV, results_wanted=30, hours_old=2)
+    get_jobs("devops", results_wanted=30, hours_old=2)
+    get_jobs("backend", results_wanted=30, hours_old=2)
+    get_jobs("software engineer", results_wanted=30, hours_old=2)
+    get_jobs("cloud", results_wanted=30, hours_old=2)
+    get_jobs("sre", results_wanted=30, hours_old=2)
+    get_jobs("intern", results_wanted=30, hours_old=2)
+    all_jobs.drop_duplicates(inplace=True, ignore_index=True)
+    filter_jobs(all_jobs, CV, api_keys, good_fit_jobs)
     if len(good_fit_jobs) > 0:
         send_email(SENDER, RECEIVER, PASSWORD, good_fit_jobs)
     else:
         print("no good fit jobs")
-    print_stats()
diff --git a/requirements.txt b/requirements.txt
index 1dd3c07..af63f03 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,35 +1,2 @@
-annotated-types==0.7.0
-anyio==4.11.0
-beautifulsoup4==4.13.5
-cachetools==5.5.2
-certifi==2025.8.3
-charset-normalizer==3.4.3
-google-auth==2.40.3
-google-genai==1.38.0
-h11==0.16.0
-httpcore==1.0.9
-httpx==0.28.1
-idna==3.10
-markdownify==0.13.1
-numpy==1.26.3
-pandas==2.3.2
-pyasn1==0.6.1
-pyasn1_modules==0.4.2
-pydantic==2.11.9
-pydantic_core==2.33.2
-python-dateutil==2.9.0.post0
 python-jobspy==1.1.82
-pytz==2025.2
-regex==2024.11.6
-requests==2.32.5
-rsa==4.9.1
-six==1.17.0
-sniffio==1.3.1
-soupsieve==2.8
-tenacity==9.1.2
-tls-client==1.0.1
-typing-inspection==0.4.1
-typing_extensions==4.15.0
-tzdata==2025.2
-urllib3==2.5.0
-websockets==15.0.1
+google-genai==1.38.0
\ No newline at end of file