import os import argparse from openai import OpenAI import PyPDF2 from dotenv import load_dotenv load_dotenv() # Load the API key from the .env file client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"), base_url="https://genai.ghaymah.systems") def extract_text_from_pdf(pdf_path): """Extracts text from a PDF file.""" print(f"Extracting text from {pdf_path}...") text = "" try: with open(pdf_path, "rb") as f: reader = PyPDF2.PdfReader(f) for page in reader.pages: text += page.extract_text() except FileNotFoundError: print(f"Error: The file at {pdf_path} was not found.") return None except Exception as e: print(f"An error occurred while reading the PDF: {e}") return None print("Text extraction complete.") return text def summarize_text(text, model="DeepSeek-V3-0324"): """Summarizes the text using a specified model.""" print(f"Summarizing text using {model}...") try: response = client.chat.completions.create( model=model, messages=[ {"role": "system", "content": "You are a helpful assistant that summarizes text."}, {"role": "user", "content": f"Please summarize the following text:\n\n{text}"} ] ) return response.choices[0].message.content except Exception as e: print(f"An error occurred during summarization: {e}") return None def refine_summary(summary, model="DeepSeek-V3-0324"): """Refines the summary using another model.""" print(f"Refining summary using {model}...") try: response = client.chat.completions.create( model=model, messages=[ {"role": "system", "content": "You are a helpful assistant that refines and improves summaries."}, {"role": "user", "content": f"Please refine and improve the following summary:\n\n{summary}"} ] ) return response.choices[0].message.content except Exception as e: print(f"An error occurred during summary refinement: {e}") return None def main(): parser = argparse.ArgumentParser(description="Summarize a PDF document using AI models.") parser.add_argument("pdf_path", help="The path to the PDF file to summarize.") args = parser.parse_args() # 1. Extract text from the PDF pdf_text = extract_text_from_pdf(args.pdf_path) if pdf_text is None: return # 2. Create an initial summary initial_summary = summarize_text(pdf_text) if initial_summary is None: return # 3. Refine the summary refined_summary = refine_summary(initial_summary) if refined_summary is None: return print("\n--- Initial Summary ---") print(initial_summary) print("\n--- Refined Summary ---") print(refined_summary) if __name__ == "__main__": main()