diff --git a/PDFSummerizorV1/.gitignore b/PDFSummerizorV1/.gitignore new file mode 100644 index 0000000..66bca12 --- /dev/null +++ b/PDFSummerizorV1/.gitignore @@ -0,0 +1,2 @@ +.env +venv/ diff --git a/PDFSummerizorV1/README.md b/PDFSummerizorV1/README.md new file mode 100644 index 0000000..e92e64e --- /dev/null +++ b/PDFSummerizorV1/README.md @@ -0,0 +1,21 @@ +# PDF Summarizer V1 + +This project is a command-line tool to summarize PDF documents using a two-step process with AI models. + +## How it works + +1. **Extracts Text**: The tool first extracts the text content from the provided PDF file. +2. **Initial Summary**: It then uses a generative AI model (e.g., `gemma-3-4b-it`) to create an initial summary of the text. +3. **Refined Summary**: This initial summary is then passed to a second, potentially more advanced model (e.g., `QwQ-32B`), to refine and improve the summary. + +## How to use + +1. Install the required dependencies: + ```bash + pip install -r requirements.txt + ``` +2. Place your PDF file in the `uploads` directory. +3. Run the application from your terminal: + ```bash + python app.py uploads/your_file.pdf + ``` diff --git a/PDFSummerizorV1/app.py b/PDFSummerizorV1/app.py new file mode 100644 index 0000000..5e7dbb6 --- /dev/null +++ b/PDFSummerizorV1/app.py @@ -0,0 +1,88 @@ +import os +import argparse +from openai import OpenAI +import PyPDF2 +from dotenv import load_dotenv + +load_dotenv() + +# Load the API key from the .env file +client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"), base_url="https://genai.ghaymah.systems") + +def extract_text_from_pdf(pdf_path): + """Extracts text from a PDF file.""" + print(f"Extracting text from {pdf_path}...") + text = "" + try: + with open(pdf_path, "rb") as f: + reader = PyPDF2.PdfReader(f) + for page in reader.pages: + text += page.extract_text() + except FileNotFoundError: + print(f"Error: The file at {pdf_path} was not found.") + return None + except Exception as e: + print(f"An error occurred while reading the PDF: {e}") + return None + print("Text extraction complete.") + return text + +def summarize_text(text, model="DeepSeek-V3-0324"): + """Summarizes the text using a specified model.""" + print(f"Summarizing text using {model}...") + try: + response = client.chat.completions.create( + model=model, + messages=[ + {"role": "system", "content": "You are a helpful assistant that summarizes text."}, + {"role": "user", "content": f"Please summarize the following text:\n\n{text}"} + ] + ) + return response.choices[0].message.content + except Exception as e: + print(f"An error occurred during summarization: {e}") + return None + +def refine_summary(summary, model="DeepSeek-V3-0324"): + """Refines the summary using another model.""" + print(f"Refining summary using {model}...") + try: + response = client.chat.completions.create( + model=model, + messages=[ + {"role": "system", "content": "You are a helpful assistant that refines and improves summaries."}, + {"role": "user", "content": f"Please refine and improve the following summary:\n\n{summary}"} + ] + ) + return response.choices[0].message.content + except Exception as e: + print(f"An error occurred during summary refinement: {e}") + return None + +def main(): + parser = argparse.ArgumentParser(description="Summarize a PDF document using AI models.") + parser.add_argument("pdf_path", help="The path to the PDF file to summarize.") + args = parser.parse_args() + + # 1. Extract text from the PDF + pdf_text = extract_text_from_pdf(args.pdf_path) + if pdf_text is None: + return + + # 2. Create an initial summary + initial_summary = summarize_text(pdf_text) + if initial_summary is None: + return + + # 3. Refine the summary + refined_summary = refine_summary(initial_summary) + if refined_summary is None: + return + + print("\n--- Initial Summary ---") + print(initial_summary) + print("\n--- Refined Summary ---") + print(refined_summary) + +if __name__ == "__main__": + main() diff --git a/PDFSummerizorV1/requirements.txt b/PDFSummerizorV1/requirements.txt new file mode 100644 index 0000000..34cacca --- /dev/null +++ b/PDFSummerizorV1/requirements.txt @@ -0,0 +1,3 @@ +openai +PyPDF2 +python-dotenv \ No newline at end of file diff --git a/PDFSummerizorV1/uploads/test1.pdf b/PDFSummerizorV1/uploads/test1.pdf new file mode 100644 index 0000000..351ab88 Binary files /dev/null and b/PDFSummerizorV1/uploads/test1.pdf differ