diff --git a/jobs.py b/jobs.py index 76a7328..cc9ad53 100644 --- a/jobs.py +++ b/jobs.py @@ -1,28 +1,37 @@ from jobspy import scrape_jobs import logging -def getJobs(jobTitle, results_wanted, hours_old): + +def getJobs( + jobTitle, + results_wanted, + hours_old, + country, + location, + is_remote, +): jobs = scrape_jobs( site_name=[ "indeed", "linkedin", # "zip_recruiter", - "google", + # "google", # "glassdoor", # "bayt", # "naukri", # "bdjobs", ], search_term=jobTitle, - location="Cairo", + location=location, results_wanted=results_wanted, - google_search_term=f"{jobTitle} jobs near Cairo since {hours_old} hours", + # google_search_term=f"{jobTitle} jobs near Cairo since {hours_old} hours", hours_old=hours_old, - country_indeed="Egypt", + country_indeed=country, + is_remote=is_remote, linkedin_fetch_description=True, # gets more info such as description, direct job url (slower) # proxies=["208.195.175.46:65095", "208.195.175.45:65095", "localhost"], ) - logging.warning(f"Found {len(jobs)} {jobTitle} jobs") + logging.warning(f"Found {len(jobs)} {jobTitle} jobs in {country},{location}") # print(jobs) return jobs # jobs.to_csv( diff --git a/main.py b/main.py index 2de7a59..a747e50 100644 --- a/main.py +++ b/main.py @@ -23,20 +23,21 @@ with open("instruction.txt", "r") as f: CV = f.read() -def get_jobs(job_title, results_wanted, hours_old): +def get_jobs(job_title, results_wanted, hours_old, country, location, is_remote): global all_jobs - jobs = getJobs(job_title, results_wanted, hours_old) + jobs = getJobs(job_title, results_wanted, hours_old, country, location, is_remote=False) all_jobs = pd.concat([all_jobs, jobs], ignore_index=True) if __name__ == "__main__": - get_jobs("devops", results_wanted=30, hours_old=2) - get_jobs("backend", results_wanted=30, hours_old=2) - get_jobs("software engineer", results_wanted=30, hours_old=2) - get_jobs("cloud", results_wanted=30, hours_old=2) - get_jobs("site reliability engineer", results_wanted=30, hours_old=2) - get_jobs("sre", results_wanted=30, hours_old=2) - get_jobs("intern", results_wanted=30, hours_old=2) + get_jobs("devops", results_wanted=30, hours_old=2, country="egypt", location="cairo") + get_jobs("backend", results_wanted=30, hours_old=2, country="egypt", location="cairo") + get_jobs("software engineer",results_wanted=30,hours_old=2,country="egypt",location="cairo",) + get_jobs("cloud", results_wanted=30, hours_old=2, country="egypt", location="cairo") + get_jobs("site reliability engineer",results_wanted=30,hours_old=2,country="egypt",location="cairo") + get_jobs("sre", results_wanted=30, hours_old=2, country="egypt", location="cairo") + get_jobs("intern", results_wanted=30, hours_old=2, country="egypt", location="cairo") + # get_jobs("devops",results_wanted=200,hours_old=2,country="worldwide",location="",is_remote=True) all_jobs.drop_duplicates(inplace=True, ignore_index=True) logging.warning(f"Total jobs no duplicates: {len(all_jobs)}") filter_jobs(all_jobs, CV, api_keys, good_fit_jobs)