From b1cf9ca408d4b961ab707e407322484b5cbf1ac5 Mon Sep 17 00:00:00 2001
From: MemaroX <boy15maher50@gmail.com>
Date: Tue, 16 Sep 2025 16:20:12 +0300
Subject: [PATCH] Initial commit of the RAG API application

---
 .dockerignore    |  10 ++
 .gitignore       |  29 ++++
 Dockerfile       |  24 +++
 README.md        |  34 ++++
 bool             |   0
 doc_rag_app.py   | 185 +++++++++++++-------
 gyC.sh           | 426 +++++++++++++++++++++++++++++++++++++++++++++++
 requirements.txt |  11 ++
 rerank_test.html | 179 ++++++++++++++++++++
 9 files changed, 841 insertions(+), 57 deletions(-)
 create mode 100644 .dockerignore
 create mode 100644 .gitignore
 create mode 100644 Dockerfile
 create mode 100644 README.md
 create mode 100644 bool
 create mode 100644 gyC.sh
 create mode 100644 requirements.txt
 create mode 100644 rerank_test.html

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..766faad
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1,10 @@
+# Ignore Python cache
+__pycache__/
+*.pyc
+
+# Ignore Git directory
+.git/
+.gitignore
+
+# Ignore environment files
+.env
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..0e98b28
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,29 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+
+# Distribution / packaging
+.Python
+build/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Environment variables
+.env
+.venv
+
+# Other
+*.log
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..c850b3f
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,24 @@
+# Use an official Python runtime as a parent image
+FROM python:3.11-slim
+
+# Set the working directory in the container
+WORKDIR /app
+
+# Copy the requirements file into the container at /app
+COPY requirements.txt .
+
+# Install any needed packages specified in requirements.txt
+# We use --no-cache-dir to keep the image size down
+RUN pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r requirements.txt
+
+# Copy the rest of the application's code into the container at /app
+COPY . .
+
+# Make port 8000 available to the world outside this container
+EXPOSE 8000
+
+# Define environment variable to ensure python prints things without buffering
+ENV PYTHONUNBUFFERED=1
+
+# Run the application
+CMD ["uvicorn", "doc_rag_app:app", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..acddb02
--- /dev/null
+++ b/README.md
@@ -0,0 +1,34 @@
+# Ghaymah Docs RAG API
+
+This project implements a Retrieval-Augmented Generation (RAG) API using FastAPI to answer questions about Ghaymah Cloud documentation. It features a sophisticated two-stage retrieval process involving an initial vector search followed by a more precise re-ranking step to ensure high-quality answers.
+
+## Key Features
+
+- **FastAPI Backend:** A robust and fast API for serving the RAG pipeline.
+- **Two-Stage Retrieval:**
+  1.  **Initial Search:** Uses `sentence-transformers` to perform a broad vector search and retrieve an initial set of candidate documents.
+  2.  **Re-ranking:** Employs a `CrossEncoder` model to re-rank the initial candidates for greater relevance and precision.
+- **Dockerized:** Comes with a `Dockerfile` for easy, repeatable deployment on any platform that supports containers.
+- **Visualization:** Includes a `rerank_test.html` page to visually compare the results before and after the re-ranking step.
+
+## Getting Started
+
+### Prerequisites
+
+- Docker
+- A Git client
+
+### Deployment
+
+This application is designed to be deployed as a Docker container. It can be deployed via a Git-based workflow on a platform like Ghaymah Cloud.
+
+1.  **Push to Git:** Push the code to a GitHub or GitLab repository.
+2.  **Connect Platform:** Connect your cloud platform to the Git repository.
+3.  **Build and Deploy:** The platform will use the included `Dockerfile` to automatically build and deploy the application.
+
+### Configuration
+
+The application requires the following environment variables to be set in the deployment environment:
+
+- `GITPASHA_HOST`: The URL for the remote vector store (GitPasha).
+- `OPENAI_API_KEY`: Your API key for the LLM provider (e.g., OpenAI).
diff --git a/bool b/bool
new file mode 100644
index 0000000..e69de29
diff --git a/doc_rag_app.py b/doc_rag_app.py
index ded5e52..a4cc85c 100644
--- a/doc_rag_app.py
+++ b/doc_rag_app.py
@@ -4,7 +4,7 @@ import json
 import uvicorn
 import requests
 from dotenv import load_dotenv
-from typing import Optional
+from typing import Optional, List
 from openai import OpenAI
 from fastapi import FastAPI, HTTPException
 from fastapi.responses import JSONResponse
@@ -12,13 +12,14 @@ from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.embeddings import HuggingFaceEmbeddings
+from sentence_transformers import CrossEncoder
 
 # Load .env
 load_dotenv()
 
-# -----------------------
+# -----------------------  
 # Configuration
-# -----------------------
+# -----------------------  
 GITPASHA_HOST = os.getenv(
     "GITPASHA_HOST",
     "https://app1-f06df021060b.hosted.ghaymah.systems"
@@ -26,9 +27,9 @@ GITPASHA_HOST = os.getenv(
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")  # used only for final LLM summarization if needed
 DOC_FILE = os.getenv("DOC_FILE", "full_ghaymah_docs.txt")
 
-# -----------------------
+# -----------------------  
 # FastAPI + client
-# -----------------------
+# -----------------------  
 app = FastAPI(title="Ghaymah Docs RAG API (Restarted)", version="1.0")
 
 app.add_middleware(
@@ -44,33 +45,37 @@ client = None
 if OPENAI_API_KEY:
     client = OpenAI(api_key=OPENAI_API_KEY, base_url="https://genai.ghaymah.systems")
 
-# -----------------------
-# Embedding model (512 dims)
-# -----------------------
+# -----------------------  
+# Models (Embedding + Reranking)
+# -----------------------  
 print("Initializing local embedding model (sentence-transformers/distiluse-base-multilingual-cased)...")
 embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/distiluse-base-multilingual-cased")
 print("Embedding model loaded.")
 
-# -----------------------
+print("Initializing local CrossEncoder model (ms-marco-MiniLM-L-6-v2)...")
+cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
+print("CrossEncoder model loaded.")
+
+# -----------------------  
 # Request Models
-# -----------------------
+# -----------------------  
 class QueryRequest(BaseModel):
     query: str
-    k: Optional[int] = 10   # allow overriding k
+    k: Optional[int] = 5   # final number of chunks to use
 
 class IngestRequest(BaseModel):
     # keep for future if want dynamic file name content ingestion
     filename: Optional[str] = None
 
-# -----------------------
+# -----------------------  
 # Helpers
-# -----------------------
-def _embed_texts(texts):
+# -----------------------  
+def _embed_texts(texts: List[str]) -> List[List[float]]:
     """Return list of embeddings for given texts."""
     return embeddings.embed_documents(texts)
 
-def _embed_query(text):
-    """Return single embedding for query (list)."""
+def _embed_query(text: str) -> List[float]:
+    """Return single embedding for query."""
     return embeddings.embed_query(text)
 
 def store_text_chunks_remote(text: str) -> bool:
@@ -114,7 +119,7 @@ def store_text_chunks_remote(text: str) -> bool:
         print(f"[store] Error calling remote insert: {e} / Response: {getattr(e, 'response', None)}")
         raise HTTPException(status_code=500, detail=f"Failed to insert to remote vector store: {e}")
 
-def search_remote_by_vector(vector, k=10):
+def search_remote_by_vector(vector: List[float], k: int = 10):
     """Call remote /search with given vector and return parsed JSON (raw)."""
     try:
         resp = requests.post(
@@ -129,24 +134,9 @@ def search_remote_by_vector(vector, k=10):
         print(f"[search] Error calling remote search: {e}")
         raise HTTPException(status_code=500, detail=f"Remote search failed: {e}")
 
-def build_context_from_search_results(search_results, min_score: Optional[float] = None):
-    """Given remote search results, optionally filter by min_score and return context text and metadata."""
-    if not search_results or "results" not in search_results:
-        return "", []
-
-    items = []
-    for r in search_results["results"]:
-        score = r.get("score", None)
-        payload = r.get("payload", {})
-        text_chunk = payload.get("text_chunk", "")
-        if min_score is None or (score is not None and score >= min_score):
-            items.append({"score": score, "text": text_chunk})
-    context = "\n\n".join([it["text"] for it in items])
-    return context, items
-
-# -----------------------
+# -----------------------  
 # Startup: optionally auto-ingest file on startup
-# -----------------------
+# -----------------------  
 @app.on_event("startup")
 def startup_ingest():
     """On startup, attempt to ingest DOC_FILE automatically (non-fatal)."""
@@ -164,9 +154,9 @@ def startup_ingest():
         # do not prevent server from starting
         print(f"[startup] Ingest error (non-fatal): {e}")
 
-# -----------------------
+# -----------------------  
 # Endpoints
-# -----------------------
+# -----------------------  
 @app.post("/ingest-docs/")
 async def ingest_docs(req: IngestRequest = None):
     """Read full_ghaymah_docs.txt and store it remotely. Returns success message."""
@@ -181,54 +171,135 @@ async def ingest_docs(req: IngestRequest = None):
     if ok:
         return JSONResponse(content={"message": f"Successfully ingested '{filename}' into vector store."})
     raise HTTPException(status_code=500, detail="Ingestion failed.")
+
 @app.post("/query/")
 async def query_docs(request: QueryRequest):
     query = request.query
-    k = request.k or 10
-    print(f"[query] Received query: {query} (k={k})")
+    k_final = request.k or 5  # The final number of documents to use
+    k_initial = 25  # The number of documents to retrieve initially
+    print(f"[query] Received query: '{query}' (k_initial={k_initial}, k_final={k_final})")
 
-    # Embed query
+    # 1. Embed query
     qvec = _embed_query(query)
 
-    # Remote vector search
-    search_results = search_remote_by_vector(qvec, k=k)
-    payloads = [p["text_chunk"] for p in search_results.get("payloads", [])]
+    # 2. Initial Retrieval from vector store
+    search_results = search_remote_by_vector(qvec, k=k_initial)
+    initial_chunks = [p.get("text_chunk", "") for p in search_results.get("payloads", [])]
 
-    if not payloads:
+    if not initial_chunks:
         return {"answer": "No relevant chunks found.", "search_results": search_results}
-
-    # Deduplicate chunks (keep first occurrence)
+    
+    # Deduplicate initial chunks before re-ranking
     seen = set()
-    context_chunks = []
-    for chunk in payloads:
+    unique_chunks = []
+    for chunk in initial_chunks:
         if chunk not in seen:
-            context_chunks.append(chunk)
+            unique_chunks.append(chunk)
             seen.add(chunk)
+    
+    print(f"[query] Retrieved {len(unique_chunks)} unique chunks for re-ranking.")
 
-    context = "\n\n".join(context_chunks)
+    # 3. Re-ranking with CrossEncoder
+    # Create pairs of (query, chunk) for the model
+    rerank_pairs = [(query, chunk) for chunk in unique_chunks]
+    
+    # Predict new relevance scores
+    rerank_scores = cross_encoder.predict(rerank_pairs)
+    
+    # Combine chunks with their new scores
+    reranked_results = list(zip(rerank_scores, unique_chunks))
+    
+    # Sort by the new score in descending order
+    reranked_results.sort(key=lambda x: x[0], reverse=True)
+    
+    # 4. Select top k_final results after re-ranking
+    top_k_chunks = [chunk for score, chunk in reranked_results[:k_final]]
+    top_k_scores = [float(score) for score, chunk in reranked_results[:k_final]]
 
-    # Use LLM if available
+    context = "\n\n".join(top_k_chunks)
+    print(f"[query] Built context with {len(top_k_chunks)} re-ranked chunks.")
+
+    # 5. Use LLM if available to generate a final answer
     if client:
         try:
             completion = client.chat.completions.create(
                 model="DeepSeek-V3-0324",
                 messages=[
-                    {"role": "system", "content": "You are a helpful assistant for Ghaymah Cloud. Answer the question using the context provided."},
+                    {"role": "system", "content": "You are a helpful assistant for Ghaymah Cloud. Answer the question using the context provided."}, 
                     {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {query}"}
                 ],
                 temperature=0.0,
             )
             answer = completion.choices[0].message.content
-            return {"answer": answer, "context": context_chunks, "scores": search_results.get("scores", [])}
+            return {"answer": answer, "context": top_k_chunks, "scores": top_k_scores}
         except Exception as e:
             print(f"[query] LLM failed: {e}")
-            return {"answer": context, "context": context_chunks, "scores": search_results.get("scores", [])}
+            # Fallback to returning the context directly
+            return {"answer": context, "context": top_k_chunks, "scores": top_k_scores}
     else:
-        return {"answer": context, "context": context_chunks, "scores": search_results.get("scores", [])}
+        # If no LLM, return the context as the answer
+        return {"answer": context, "context": top_k_chunks, "scores": top_k_scores}
+
+@app.post("/test-rerank/")
+async def test_rerank(request: QueryRequest):
+    """
+    Endpoint for visualization. Returns initial and re-ranked results.
+    """
+    query = request.query
+    k_final = request.k or 5
+    k_initial = 25
+    print(f"[test-rerank] Received query: '{query}' (k_initial={k_initial}, k_final={k_final})")
+
+    # 1. Embed query
+    qvec = _embed_query(query)
+
+    # 2. Initial Retrieval
+    search_results = search_remote_by_vector(qvec, k=k_initial)
+    
+    initial_payloads = search_results.get("payloads", [])
+    initial_scores = search_results.get("scores", [])
+    
+    # Ensure we have the same number of scores and payloads
+    min_len = min(len(initial_payloads), len(initial_scores))
+    
+    initial_results = [
+        {"text": p.get("text_chunk", ""), "score": s}
+        for p, s in zip(initial_payloads[:min_len], initial_scores[:min_len])
+    ]
+    
+    # Deduplicate
+    seen_texts = set()
+    unique_initial_results = []
+    for res in initial_results:
+        if res["text"] not in seen_texts:
+            unique_initial_results.append(res)
+            seen_texts.add(res["text"])
+
+    unique_chunks = [res["text"] for res in unique_initial_results]
+    
+    if not unique_chunks:
+        return {"initial_results": [], "reranked_results": []}
+
+    # 3. Re-ranking
+    rerank_pairs = [(query, chunk) for chunk in unique_chunks]
+    rerank_scores = cross_encoder.predict(rerank_pairs)
+    
+    reranked_results_with_scores = [
+        {"text": chunk, "score": float(score)}
+        for score, chunk in zip(rerank_scores, unique_chunks)
+    ]
+    
+    # Sort by new score
+    reranked_results_with_scores.sort(key=lambda x: x["score"], reverse=True)
+
+    return {
+        "initial_results": unique_initial_results,
+        "reranked_results": reranked_results_with_scores[:k_final]
+    }
 
 
 @app.post("/debug-search/")
-async def debug_search(request: QueryRequest):
+def debug_search(request: QueryRequest):
     """
     Debug endpoint: returns raw search response from remote vector store for the provided query.
     Use this to inspect exact 'results' and scores returned remotely.
@@ -250,8 +321,8 @@ async def debug_search(request: QueryRequest):
 def read_root():
     return {"message": "Ghaymah Docs RAG API. Use /docs for interactive UI."}
 
-# -----------------------
+# -----------------------  
 # Run
-# -----------------------
+# -----------------------  
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=8000)
diff --git a/gyC.sh b/gyC.sh
new file mode 100644
index 0000000..1f01ab8
--- /dev/null
+++ b/gyC.sh
@@ -0,0 +1,426 @@
+# bash completion V2 for gy                                   -*- shell-script -*-
+
+__gy_debug()
+{
+    if [[ -n ${BASH_COMP_DEBUG_FILE-} ]]; then
+        echo "$*" >> "${BASH_COMP_DEBUG_FILE}"
+    fi
+}
+
+# Macs have bash3 for which the bash-completion package doesn't include
+# _init_completion. This is a minimal version of that function.
+__gy_init_completion()
+{
+    COMPREPLY=()
+    _get_comp_words_by_ref "$@" cur prev words cword
+}
+
+# This function calls the gy program to obtain the completion
+# results and the directive.  It fills the 'out' and 'directive' vars.
+__gy_get_completion_results() {
+    local requestComp lastParam lastChar args
+
+    # Prepare the command to request completions for the program.
+    # Calling ${words[0]} instead of directly gy allows handling aliases
+    args=("${words[@]:1}")
+    requestComp="${words[0]} __complete ${args[*]}"
+
+    lastParam=${words[$((${#words[@]}-1))]}
+    lastChar=${lastParam:$((${#lastParam}-1)):1}
+    __gy_debug "lastParam ${lastParam}, lastChar ${lastChar}"
+
+    if [[ -z ${cur} && ${lastChar} != = ]]; then
+        # If the last parameter is complete (there is a space following it)
+        # We add an extra empty parameter so we can indicate this to the go method.
+        __gy_debug "Adding extra empty parameter"
+        requestComp="${requestComp} ''"
+    fi
+
+    # When completing a flag with an = (e.g., gy -n=<TAB>)
+    # bash focuses on the part after the =, so we need to remove
+    # the flag part from $cur
+    if [[ ${cur} == -*=* ]]; then
+        cur="${cur#*=}"
+    fi
+
+    __gy_debug "Calling ${requestComp}"
+    # Use eval to handle any environment variables and such
+    out=$(eval "${requestComp}" 2>/dev/null)
+
+    # Extract the directive integer at the very end of the output following a colon (:)
+    directive=${out##*:}
+    # Remove the directive
+    out=${out%:*}
+    if [[ ${directive} == "${out}" ]]; then
+        # There is not directive specified
+        directive=0
+    fi
+    __gy_debug "The completion directive is: ${directive}"
+    __gy_debug "The completions are: ${out}"
+}
+
+__gy_process_completion_results() {
+    local shellCompDirectiveError=1
+    local shellCompDirectiveNoSpace=2
+    local shellCompDirectiveNoFileComp=4
+    local shellCompDirectiveFilterFileExt=8
+    local shellCompDirectiveFilterDirs=16
+    local shellCompDirectiveKeepOrder=32
+
+    if (((directive & shellCompDirectiveError) != 0)); then
+        # Error code.  No completion.
+        __gy_debug "Received error from custom completion go code"
+        return
+    else
+        if (((directive & shellCompDirectiveNoSpace) != 0)); then
+            if [[ $(type -t compopt) == builtin ]]; then
+                __gy_debug "Activating no space"
+                compopt -o nospace
+            else
+                __gy_debug "No space directive not supported in this version of bash"
+            fi
+        fi
+        if (((directive & shellCompDirectiveKeepOrder) != 0)); then
+            if [[ $(type -t compopt) == builtin ]]; then
+                # no sort isn't supported for bash less than < 4.4
+                if [[ ${BASH_VERSINFO[0]} -lt 4 || ( ${BASH_VERSINFO[0]} -eq 4 && ${BASH_VERSINFO[1]} -lt 4 ) ]]; then
+                    __gy_debug "No sort directive not supported in this version of bash"
+                else
+                    __gy_debug "Activating keep order"
+                    compopt -o nosort
+                fi
+            else
+                __gy_debug "No sort directive not supported in this version of bash"
+            fi
+        fi
+        if (((directive & shellCompDirectiveNoFileComp) != 0)); then
+            if [[ $(type -t compopt) == builtin ]]; then
+                __gy_debug "Activating no file completion"
+                compopt +o default
+            else
+                __gy_debug "No file completion directive not supported in this version of bash"
+            fi
+        fi
+    fi
+
+    # Separate activeHelp from normal completions
+    local completions=()
+    local activeHelp=()
+    __gy_extract_activeHelp
+
+    if (((directive & shellCompDirectiveFilterFileExt) != 0)); then
+        # File extension filtering
+        local fullFilter="" filter filteringCmd
+
+        # Do not use quotes around the $completions variable or else newline
+        # characters will be kept.
+        for filter in ${completions[*]}; do
+            fullFilter+="$filter|"
+        done
+
+        filteringCmd="_filedir $fullFilter"
+        __gy_debug "File filtering command: $filteringCmd"
+        $filteringCmd
+    elif (((directive & shellCompDirectiveFilterDirs) != 0)); then
+        # File completion for directories only
+
+        local subdir
+        subdir=${completions[0]}
+        if [[ -n $subdir ]]; then
+            __gy_debug "Listing directories in $subdir"
+            pushd "$subdir" >/dev/null 2>&1 && _filedir -d && popd >/dev/null 2>&1 || return
+        else
+            __gy_debug "Listing directories in ."
+            _filedir -d
+        fi
+    else
+        __gy_handle_completion_types
+    fi
+
+    __gy_handle_special_char "$cur" :
+    __gy_handle_special_char "$cur" =
+
+    # Print the activeHelp statements before we finish
+    __gy_handle_activeHelp
+}
+
+__gy_handle_activeHelp() {
+    # Print the activeHelp statements
+    if ((${#activeHelp[*]} != 0)); then
+        if [ -z $COMP_TYPE ]; then
+            # Bash v3 does not set the COMP_TYPE variable.
+            printf "\n";
+            printf "%s\n" "${activeHelp[@]}"
+            printf "\n"
+            __gy_reprint_commandLine
+            return
+        fi
+
+        # Only print ActiveHelp on the second TAB press
+        if [ $COMP_TYPE -eq 63 ]; then
+            printf "\n"
+            printf "%s\n" "${activeHelp[@]}"
+
+            if ((${#COMPREPLY[*]} == 0)); then
+                # When there are no completion choices from the program, file completion
+                # may kick in if the program has not disabled it; in such a case, we want
+                # to know if any files will match what the user typed, so that we know if
+                # there will be completions presented, so that we know how to handle ActiveHelp.
+                # To find out, we actually trigger the file completion ourselves;
+                # the call to _filedir will fill COMPREPLY if files match.
+                if (((directive & shellCompDirectiveNoFileComp) == 0)); then
+                    __gy_debug "Listing files"
+                    _filedir
+                fi
+            fi
+
+            if ((${#COMPREPLY[*]} != 0)); then
+                # If there are completion choices to be shown, print a delimiter.
+                # Re-printing the command-line will automatically be done
+                # by the shell when it prints the completion choices.
+                printf -- "--"
+            else
+                # When there are no completion choices at all, we need
+                # to re-print the command-line since the shell will
+                # not be doing it itself.
+                __gy_reprint_commandLine
+            fi
+        elif [ $COMP_TYPE -eq 37 ] || [ $COMP_TYPE -eq 42 ]; then
+            # For completion type: menu-complete/menu-complete-backward and insert-completions
+            # the completions are immediately inserted into the command-line, so we first
+            # print the activeHelp message and reprint the command-line since the shell won't.
+            printf "\n"
+            printf "%s\n" "${activeHelp[@]}"
+
+            __gy_reprint_commandLine
+        fi
+    fi
+}
+
+__gy_reprint_commandLine() {
+    # The prompt format is only available from bash 4.4.
+    # We test if it is available before using it.
+    if (x=${PS1@P}) 2> /dev/null; then
+        printf "%s" "${PS1@P}${COMP_LINE[@]}"
+    else
+        # Can't print the prompt.  Just print the
+        # text the user had typed, it is workable enough.
+        printf "%s" "${COMP_LINE[@]}"
+    fi
+}
+
+# Separate activeHelp lines from real completions.
+# Fills the $activeHelp and $completions arrays.
+__gy_extract_activeHelp() {
+    local activeHelpMarker="_activeHelp_ "
+    local endIndex=${#activeHelpMarker}
+
+    while IFS='' read -r comp; do
+        [[ -z $comp ]] && continue
+
+        if [[ ${comp:0:endIndex} == $activeHelpMarker ]]; then
+            comp=${comp:endIndex}
+            __gy_debug "ActiveHelp found: $comp"
+            if [[ -n $comp ]]; then
+                activeHelp+=("$comp")
+            fi
+        else
+            # Not an activeHelp line but a normal completion
+            completions+=("$comp")
+        fi
+    done <<<"${out}"
+}
+
+__gy_handle_completion_types() {
+    __gy_debug "__gy_handle_completion_types: COMP_TYPE is $COMP_TYPE"
+
+    case $COMP_TYPE in
+    37|42)
+        # Type: menu-complete/menu-complete-backward and insert-completions
+        # If the user requested inserting one completion at a time, or all
+        # completions at once on the command-line we must remove the descriptions.
+        # https://github.com/spf13/cobra/issues/1508
+
+        # If there are no completions, we don't need to do anything
+        (( ${#completions[@]} == 0 )) && return 0
+
+        local tab=$'\t'
+
+        # Strip any description and escape the completion to handled special characters
+        IFS=$'\n' read -ra completions -d '' < <(printf "%q\n" "${completions[@]%%$tab*}")
+
+        # Only consider the completions that match
+        IFS=$'\n' read -ra COMPREPLY -d '' < <(IFS=$'\n'; compgen -W "${completions[*]}" -- "${cur}")
+
+        # compgen looses the escaping so we need to escape all completions again since they will
+        # all be inserted on the command-line.
+        IFS=$'\n' read -ra COMPREPLY -d '' < <(printf "%q\n" "${COMPREPLY[@]}")
+        ;;
+
+    *)
+        # Type: complete (normal completion)
+        __gy_handle_standard_completion_case
+        ;;
+    esac
+}
+
+__gy_handle_standard_completion_case() {
+    local tab=$'\t'
+
+    # If there are no completions, we don't need to do anything
+    (( ${#completions[@]} == 0 )) && return 0
+
+    # Short circuit to optimize if we don't have descriptions
+    if [[ "${completions[*]}" != *$tab* ]]; then
+        # First, escape the completions to handle special characters
+        IFS=$'\n' read -ra completions -d '' < <(printf "%q\n" "${completions[@]}")
+        # Only consider the completions that match what the user typed
+        IFS=$'\n' read -ra COMPREPLY -d '' < <(IFS=$'\n'; compgen -W "${completions[*]}" -- "${cur}")
+
+        # compgen looses the escaping so, if there is only a single completion, we need to
+        # escape it again because it will be inserted on the command-line.  If there are multiple
+        # completions, we don't want to escape them because they will be printed in a list
+        # and we don't want to show escape characters in that list.
+        if (( ${#COMPREPLY[@]} == 1 )); then
+            COMPREPLY[0]=$(printf "%q" "${COMPREPLY[0]}")
+        fi
+        return 0
+    fi
+
+    local longest=0
+    local compline
+    # Look for the longest completion so that we can format things nicely
+    while IFS='' read -r compline; do
+        [[ -z $compline ]] && continue
+
+        # Before checking if the completion matches what the user typed,
+        # we need to strip any description and escape the completion to handle special
+        # characters because those escape characters are part of what the user typed.
+        # Don't call "printf" in a sub-shell because it will be much slower
+        # since we are in a loop.
+        printf -v comp "%q" "${compline%%$tab*}" &>/dev/null || comp=$(printf "%q" "${compline%%$tab*}")
+
+        # Only consider the completions that match
+        [[ $comp == "$cur"* ]] || continue
+
+        # The completions matches.  Add it to the list of full completions including
+        # its description.  We don't escape the completion because it may get printed
+        # in a list if there are more than one and we don't want show escape characters
+        # in that list.
+        COMPREPLY+=("$compline")
+
+        # Strip any description before checking the length, and again, don't escape
+        # the completion because this length is only used when printing the completions
+        # in a list and we don't want show escape characters in that list.
+        comp=${compline%%$tab*}
+        if ((${#comp}>longest)); then
+            longest=${#comp}
+        fi
+    done < <(printf "%s\n" "${completions[@]}")
+
+    # If there is a single completion left, remove the description text and escape any special characters
+    if ((${#COMPREPLY[*]} == 1)); then
+        __gy_debug "COMPREPLY[0]: ${COMPREPLY[0]}"
+        COMPREPLY[0]=$(printf "%q" "${COMPREPLY[0]%%$tab*}")
+        __gy_debug "Removed description from single completion, which is now: ${COMPREPLY[0]}"
+    else
+        # Format the descriptions
+        __gy_format_comp_descriptions $longest
+    fi
+}
+
+__gy_handle_special_char()
+{
+    local comp="$1"
+    local char=$2
+    if [[ "$comp" == *${char}* && "$COMP_WORDBREAKS" == *${char}* ]]; then
+        local word=${comp%"${comp##*${char}}"}
+        local idx=${#COMPREPLY[*]}
+        while ((--idx >= 0)); do
+            COMPREPLY[idx]=${COMPREPLY[idx]#"$word"}
+        done
+    fi
+}
+
+__gy_format_comp_descriptions()
+{
+    local tab=$'\t'
+    local comp desc maxdesclength
+    local longest=$1
+
+    local i ci
+    for ci in ${!COMPREPLY[*]}; do
+        comp=${COMPREPLY[ci]}
+        # Properly format the description string which follows a tab character if there is one
+        if [[ "$comp" == *$tab* ]]; then
+            __gy_debug "Original comp: $comp"
+            desc=${comp#*$tab}
+            comp=${comp%%$tab*}
+
+            # $COLUMNS stores the current shell width.
+            # Remove an extra 4 because we add 2 spaces and 2 parentheses.
+            maxdesclength=$(( COLUMNS - longest - 4 ))
+
+            # Make sure we can fit a description of at least 8 characters
+            # if we are to align the descriptions.
+            if ((maxdesclength > 8)); then
+                # Add the proper number of spaces to align the descriptions
+                for ((i = ${#comp} ; i < longest ; i++)); do
+                    comp+=" "
+                done
+            else
+                # Don't pad the descriptions so we can fit more text after the completion
+                maxdesclength=$(( COLUMNS - ${#comp} - 4 ))
+            fi
+
+            # If there is enough space for any description text,
+            # truncate the descriptions that are too long for the shell width
+            if ((maxdesclength > 0)); then
+                if ((${#desc} > maxdesclength)); then
+                    desc=${desc:0:$(( maxdesclength - 1 ))}
+                    desc+="…"
+                fi
+                comp+="  ($desc)"
+            fi
+            COMPREPLY[ci]=$comp
+            __gy_debug "Final comp: $comp"
+        fi
+    done
+}
+
+__start_gy()
+{
+    local cur prev words cword split
+
+    COMPREPLY=()
+
+    # Call _init_completion from the bash-completion package
+    # to prepare the arguments properly
+    if declare -F _init_completion >/dev/null 2>&1; then
+        _init_completion -n =: || return
+    else
+        __gy_init_completion -n =: || return
+    fi
+
+    __gy_debug
+    __gy_debug "========= starting completion logic =========="
+    __gy_debug "cur is ${cur}, words[*] is ${words[*]}, #words[@] is ${#words[@]}, cword is $cword"
+
+    # The user could have moved the cursor backwards on the command-line.
+    # We need to trigger completion from the $cword location, so we need
+    # to truncate the command-line ($words) up to the $cword location.
+    words=("${words[@]:0:$cword+1}")
+    __gy_debug "Truncated words[*]: ${words[*]},"
+
+    local out directive
+    __gy_get_completion_results
+    __gy_process_completion_results
+}
+
+if [[ $(type -t compopt) = "builtin" ]]; then
+    complete -o default -F __start_gy gy
+else
+    complete -o default -o nospace -F __start_gy gy
+fi
+
+# ex: ts=4 sw=4 et filetype=sh
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..cdf3dbd
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,11 @@
+uvicorn
+requests
+python-dotenv
+openai
+fastapi
+pydantic
+langchain
+langchain-community
+sentence-transformers
+torch
+transformers
diff --git a/rerank_test.html b/rerank_test.html
new file mode 100644
index 0000000..23227c9
--- /dev/null
+++ b/rerank_test.html
@@ -0,0 +1,179 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>RAG Re-ranking Test</title>
+    <style>
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
+            margin: 0;
+            padding: 20px;
+            background-color: #f7f7f7;
+            color: #333;
+        }
+        .container {
+            max-width: 1200px;
+            margin: 0 auto;
+            background: #fff;
+            padding: 25px;
+            border-radius: 8px;
+            box-shadow: 0 2px 10px rgba(0,0,0,0.1);
+        }
+        h1 {
+            text-align: center;
+            color: #444;
+        }
+        .query-form {
+            display: flex;
+            gap: 10px;
+            margin-bottom: 30px;
+        }
+        #query-input {
+            flex-grow: 1;
+            padding: 10px 15px;
+            border: 1px solid #ccc;
+            border-radius: 4px;
+            font-size: 16px;
+        }
+        #query-button {
+            padding: 10px 20px;
+            border: none;
+            background-color: #007bff;
+            color: white;
+            border-radius: 4px;
+            font-size: 16px;
+            cursor: pointer;
+            transition: background-color 0.3s;
+        }
+        #query-button:hover {
+            background-color: #0056b3;
+        }
+        .results-container {
+            display: flex;
+            gap: 20px;
+            justify-content: space-between;
+        }
+        .results-column {
+            width: 48%;
+        }
+        h2 {
+            color: #555;
+            border-bottom: 2px solid #eee;
+            padding-bottom: 10px;
+        }
+        .result-item {
+            background: #fafafa;
+            border: 1px solid #eee;
+            border-radius: 5px;
+            padding: 15px;
+            margin-bottom: 10px;
+            box-shadow: 0 1px 3px rgba(0,0,0,0.05);
+        }
+        .result-item p {
+            margin: 0 0 10px 0;
+            white-space: pre-wrap; /* Preserve whitespace and newlines */
+        }
+        .result-item .score {
+            font-weight: bold;
+            color: #007bff;
+        }
+        .loader {
+            text-align: center;
+            padding: 20px;
+            font-size: 18px;
+            display: none; /* Hidden by default */
+        }
+    </style>
+</head>
+<body>
+
+    <div class="container">
+        <h1>RAG Re-ranking Visualizer</h1>
+        <div class="query-form">
+            <input type="text" id="query-input" placeholder="Enter your query...">
+            <button id="query-button">Search</button>
+        </div>
+
+        <div class="loader" id="loader">Loading...</div>
+
+        <div class="results-container">
+            <div class="results-column" id="initial-results-col">
+                <h2>Initial Retrieval (Before Re-ranking)</h2>
+                <div id="initial-results"></div>
+            </div>
+            <div class="results-column" id="reranked-results-col">
+                <h2>Re-ranked Results (Top 5)</h2>
+                <div id="reranked-results"></div>
+            </div>
+        </div>
+    </div>
+
+    <script>
+        const queryInput = document.getElementById('query-input');
+        const queryButton = document.getElementById('query-button');
+        const initialResultsDiv = document.getElementById('initial-results');
+        const rerankedResultsDiv = document.getElementById('reranked-results');
+        const loader = document.getElementById('loader');
+
+        queryButton.addEventListener('click', async () => {
+            const query = queryInput.value;
+            if (!query) {
+                alert('Please enter a query.');
+                return;
+            }
+
+            initialResultsDiv.innerHTML = '';
+            rerankedResultsDiv.innerHTML = '';
+            loader.style.display = 'block';
+
+            try {
+                const response = await fetch('http://127.0.0.1:8000/test-rerank/', {
+                    method: 'POST',
+                    headers: {
+                        'Content-Type': 'application/json',
+                    },
+                    body: JSON.stringify({ query: query, k: 5 }),
+                });
+
+                if (!response.ok) {
+                    throw new Error(`HTTP error! status: ${response.status}`);
+                }
+
+                const data = await response.json();
+                displayResults(data.initial_results, initialResultsDiv);
+                displayResults(data.reranked_results, rerankedResultsDiv);
+
+            } catch (error) {
+                console.error('Error fetching data:', error);
+                alert('Failed to fetch results. Check the console for details.');
+            } finally {
+                loader.style.display = 'none';
+            }
+        });
+
+        function displayResults(results, element) {
+            if (!results || results.length === 0) {
+                element.innerHTML = '<p>No results found.</p>';
+                return;
+            }
+
+            results.forEach(item => {
+                const div = document.createElement('div');
+                div.className = 'result-item';
+                
+                const scoreP = document.createElement('p');
+                scoreP.innerHTML = `<span class="score">Score: ${item.score.toFixed(4)}</span>`;
+                
+                const textP = document.createElement('p');
+                textP.textContent = item.text;
+
+                div.appendChild(scoreP);
+                div.appendChild(textP);
+                element.appendChild(div);
+            });
+        }
+    </script>
+
+</body>
+</html>