Initial commit of the RAG API application

2025-09-16 16:20:12 +03:00
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,10 @@
+# Ignore Python cache
+__pycache__/
+*.pyc
+
+# Ignore Git directory
+.git/
+.gitignore
+
+# Ignore environment files
+.env
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,29 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.pyc
+*.pyo
+*.pyd
+
+# Distribution / packaging
+.Python
+build/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Environment variables
+.env
+.venv
+
+# Other
+*.log
--- a/24
+++ b/24
@@ -0,0 +1,24 @@
+# Use an official Python runtime as a parent image
+FROM python:3.11-slim
+
+# Set the working directory in the container
+WORKDIR /app
+
+# Copy the requirements file into the container at /app
+COPY requirements.txt .
+
+# Install any needed packages specified in requirements.txt
+# We use --no-cache-dir to keep the image size down
+RUN pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r requirements.txt
+
+# Copy the rest of the application's code into the container at /app
+COPY . .
+
+# Make port 8000 available to the world outside this container
+EXPOSE 8000
+
+# Define environment variable to ensure python prints things without buffering
+ENV PYTHONUNBUFFERED=1
+
+# Run the application
+CMD ["uvicorn", "doc_rag_app:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/README.md
+++ b/README.md
@@ -0,0 +1,34 @@
+# Ghaymah Docs RAG API
+
+This project implements a Retrieval-Augmented Generation (RAG) API using FastAPI to answer questions about Ghaymah Cloud documentation. It features a sophisticated two-stage retrieval process involving an initial vector search followed by a more precise re-ranking step to ensure high-quality answers.
+
+## Key Features
+
+- **FastAPI Backend:** A robust and fast API for serving the RAG pipeline.
+- **Two-Stage Retrieval:**
+  1.  **Initial Search:** Uses `sentence-transformers` to perform a broad vector search and retrieve an initial set of candidate documents.
+  2.  **Re-ranking:** Employs a `CrossEncoder` model to re-rank the initial candidates for greater relevance and precision.
+- **Dockerized:** Comes with a `Dockerfile` for easy, repeatable deployment on any platform that supports containers.
+- **Visualization:** Includes a `rerank_test.html` page to visually compare the results before and after the re-ranking step.
+
+## Getting Started
+
+### Prerequisites
+
+- Docker
+- A Git client
+
+### Deployment
+
+This application is designed to be deployed as a Docker container. It can be deployed via a Git-based workflow on a platform like Ghaymah Cloud.
+
+1.  **Push to Git:** Push the code to a GitHub or GitLab repository.
+2.  **Connect Platform:** Connect your cloud platform to the Git repository.
+3.  **Build and Deploy:** The platform will use the included `Dockerfile` to automatically build and deploy the application.
+
+### Configuration
+
+The application requires the following environment variables to be set in the deployment environment:
+
+- `GITPASHA_HOST`: The URL for the remote vector store (GitPasha).
+- `OPENAI_API_KEY`: Your API key for the LLM provider (e.g., OpenAI).
--- a/0
+++ b/0
--- a/doc_rag_app.py
+++ b/doc_rag_app.py
@@ -4,7 +4,7 @@ import json
 import uvicorn
 import requests
 from dotenv import load_dotenv
-from typing import Optional
+from typing import Optional, List
 from openai import OpenAI
 from fastapi import FastAPI, HTTPException
 from fastapi.responses import JSONResponse
@@ -12,13 +12,14 @@ from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.embeddings import HuggingFaceEmbeddings
+from sentence_transformers import CrossEncoder

 # Load .env
 load_dotenv()

-# -----------------------
+# -----------------------  
 # Configuration
-# -----------------------
+# -----------------------  
 GITPASHA_HOST = os.getenv(
    "GITPASHA_HOST",
    "https://app1-f06df021060b.hosted.ghaymah.systems"
@@ -26,9 +27,9 @@ GITPASHA_HOST = os.getenv(
 OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")  # used only for final LLM summarization if needed
 DOC_FILE = os.getenv("DOC_FILE", "full_ghaymah_docs.txt")

-# -----------------------
+# -----------------------  
 # FastAPI + client
-# -----------------------
+# -----------------------  
 app = FastAPI(title="Ghaymah Docs RAG API (Restarted)", version="1.0")

 app.add_middleware(
@@ -44,33 +45,37 @@ client = None
 if OPENAI_API_KEY:
    client = OpenAI(api_key=OPENAI_API_KEY, base_url="https://genai.ghaymah.systems")

-# -----------------------
-# Embedding model (512 dims)
-# -----------------------
+# -----------------------  
+# Models (Embedding + Reranking)
+# -----------------------  
 print("Initializing local embedding model (sentence-transformers/distiluse-base-multilingual-cased)...")
 embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/distiluse-base-multilingual-cased")
 print("Embedding model loaded.")

-# -----------------------
+print("Initializing local CrossEncoder model (ms-marco-MiniLM-L-6-v2)...")
+cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
+print("CrossEncoder model loaded.")
+
+# -----------------------  
 # Request Models
-# -----------------------
+# -----------------------  
 class QueryRequest(BaseModel):
    query: str
-    k: Optional[int] = 10   # allow overriding k
+    k: Optional[int] = 5   # final number of chunks to use

 class IngestRequest(BaseModel):
    # keep for future if want dynamic file name content ingestion
    filename: Optional[str] = None

-# -----------------------
+# -----------------------  
 # Helpers
-# -----------------------
-def _embed_texts(texts):
+# -----------------------  
+def _embed_texts(texts: List[str]) -> List[List[float]]:
    """Return list of embeddings for given texts."""
    return embeddings.embed_documents(texts)

-def _embed_query(text):
-    """Return single embedding for query (list)."""
+def _embed_query(text: str) -> List[float]:
+    """Return single embedding for query."""
    return embeddings.embed_query(text)

 def store_text_chunks_remote(text: str) -> bool:
@@ -114,7 +119,7 @@ def store_text_chunks_remote(text: str) -> bool:
        print(f"[store] Error calling remote insert: {e} / Response: {getattr(e, 'response', None)}")
        raise HTTPException(status_code=500, detail=f"Failed to insert to remote vector store: {e}")

-def search_remote_by_vector(vector, k=10):
+def search_remote_by_vector(vector: List[float], k: int = 10):
    """Call remote /search with given vector and return parsed JSON (raw)."""
    try:
        resp = requests.post(
@@ -129,24 +134,9 @@ def search_remote_by_vector(vector, k=10):
        print(f"[search] Error calling remote search: {e}")
        raise HTTPException(status_code=500, detail=f"Remote search failed: {e}")

-def build_context_from_search_results(search_results, min_score: Optional[float] = None):
-    """Given remote search results, optionally filter by min_score and return context text and metadata."""
-    if not search_results or "results" not in search_results:
-        return "", []
-
-    items = []
-    for r in search_results["results"]:
-        score = r.get("score", None)
-        payload = r.get("payload", {})
-        text_chunk = payload.get("text_chunk", "")
-        if min_score is None or (score is not None and score >= min_score):
-            items.append({"score": score, "text": text_chunk})
-    context = "\n\n".join([it["text"] for it in items])
-    return context, items
-
-# -----------------------
+# -----------------------  
 # Startup: optionally auto-ingest file on startup
-# -----------------------
+# -----------------------  
@app.on_event("startup")
 def startup_ingest():
    """On startup, attempt to ingest DOC_FILE automatically (non-fatal)."""
@@ -164,9 +154,9 @@ def startup_ingest():
        # do not prevent server from starting
        print(f"[startup] Ingest error (non-fatal): {e}")

-# -----------------------
+# -----------------------  
 # Endpoints
-# -----------------------
+# -----------------------  
@app.post("/ingest-docs/")
 async def ingest_docs(req: IngestRequest = None):
    """Read full_ghaymah_docs.txt and store it remotely. Returns success message."""
@@ -181,54 +171,135 @@ async def ingest_docs(req: IngestRequest = None):
    if ok:
        return JSONResponse(content={"message": f"Successfully ingested '{filename}' into vector store."})
    raise HTTPException(status_code=500, detail="Ingestion failed.")
+
@app.post("/query/")
 async def query_docs(request: QueryRequest):
    query = request.query
-    k = request.k or 10
-    print(f"[query] Received query: {query} (k={k})")
+    k_final = request.k or 5  # The final number of documents to use
+    k_initial = 25  # The number of documents to retrieve initially
+    print(f"[query] Received query: '{query}' (k_initial={k_initial}, k_final={k_final})")

-    # Embed query
+    # 1. Embed query
    qvec = _embed_query(query)

-    # Remote vector search
-    search_results = search_remote_by_vector(qvec, k=k)
-    payloads = [p["text_chunk"] for p in search_results.get("payloads", [])]
+    # 2. Initial Retrieval from vector store
+    search_results = search_remote_by_vector(qvec, k=k_initial)
+    initial_chunks = [p.get("text_chunk", "") for p in search_results.get("payloads", [])]

-    if not payloads:
+    if not initial_chunks:
        return {"answer": "No relevant chunks found.", "search_results": search_results}
-
-    # Deduplicate chunks (keep first occurrence)
+    
+    # Deduplicate initial chunks before re-ranking
    seen = set()
-    context_chunks = []
-    for chunk in payloads:
+    unique_chunks = []
+    for chunk in initial_chunks:
        if chunk not in seen:
-            context_chunks.append(chunk)
+            unique_chunks.append(chunk)
            seen.add(chunk)
+    
+    print(f"[query] Retrieved {len(unique_chunks)} unique chunks for re-ranking.")

-    context = "\n\n".join(context_chunks)
+    # 3. Re-ranking with CrossEncoder
+    # Create pairs of (query, chunk) for the model
+    rerank_pairs = [(query, chunk) for chunk in unique_chunks]
+    
+    # Predict new relevance scores
+    rerank_scores = cross_encoder.predict(rerank_pairs)
+    
+    # Combine chunks with their new scores
+    reranked_results = list(zip(rerank_scores, unique_chunks))
+    
+    # Sort by the new score in descending order
+    reranked_results.sort(key=lambda x: x[0], reverse=True)
+    
+    # 4. Select top k_final results after re-ranking
+    top_k_chunks = [chunk for score, chunk in reranked_results[:k_final]]
+    top_k_scores = [float(score) for score, chunk in reranked_results[:k_final]]

-    # Use LLM if available
+    context = "\n\n".join(top_k_chunks)
+    print(f"[query] Built context with {len(top_k_chunks)} re-ranked chunks.")
+
+    # 5. Use LLM if available to generate a final answer
    if client:
        try:
            completion = client.chat.completions.create(
                model="DeepSeek-V3-0324",
                messages=[
-                    {"role": "system", "content": "You are a helpful assistant for Ghaymah Cloud. Answer the question using the context provided."},
+                    {"role": "system", "content": "You are a helpful assistant for Ghaymah Cloud. Answer the question using the context provided."}, 
                    {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {query}"}
                ],
                temperature=0.0,
            )
            answer = completion.choices[0].message.content
-            return {"answer": answer, "context": context_chunks, "scores": search_results.get("scores", [])}
+            return {"answer": answer, "context": top_k_chunks, "scores": top_k_scores}
        except Exception as e:
            print(f"[query] LLM failed: {e}")
-            return {"answer": context, "context": context_chunks, "scores": search_results.get("scores", [])}
+            # Fallback to returning the context directly
+            return {"answer": context, "context": top_k_chunks, "scores": top_k_scores}
    else:
-        return {"answer": context, "context": context_chunks, "scores": search_results.get("scores", [])}
+        # If no LLM, return the context as the answer
+        return {"answer": context, "context": top_k_chunks, "scores": top_k_scores}
+
+@app.post("/test-rerank/")
+async def test_rerank(request: QueryRequest):
+    """
+    Endpoint for visualization. Returns initial and re-ranked results.
+    """
+    query = request.query
+    k_final = request.k or 5
+    k_initial = 25
+    print(f"[test-rerank] Received query: '{query}' (k_initial={k_initial}, k_final={k_final})")
+
+    # 1. Embed query
+    qvec = _embed_query(query)
+
+    # 2. Initial Retrieval
+    search_results = search_remote_by_vector(qvec, k=k_initial)
+    
+    initial_payloads = search_results.get("payloads", [])
+    initial_scores = search_results.get("scores", [])
+    
+    # Ensure we have the same number of scores and payloads
+    min_len = min(len(initial_payloads), len(initial_scores))
+    
+    initial_results = [
+        {"text": p.get("text_chunk", ""), "score": s}
+        for p, s in zip(initial_payloads[:min_len], initial_scores[:min_len])
+    ]
+    
+    # Deduplicate
+    seen_texts = set()
+    unique_initial_results = []
+    for res in initial_results:
+        if res["text"] not in seen_texts:
+            unique_initial_results.append(res)
+            seen_texts.add(res["text"])
+
+    unique_chunks = [res["text"] for res in unique_initial_results]
+    
+    if not unique_chunks:
+        return {"initial_results": [], "reranked_results": []}
+
+    # 3. Re-ranking
+    rerank_pairs = [(query, chunk) for chunk in unique_chunks]
+    rerank_scores = cross_encoder.predict(rerank_pairs)
+    
+    reranked_results_with_scores = [
+        {"text": chunk, "score": float(score)}
+        for score, chunk in zip(rerank_scores, unique_chunks)
+    ]
+    
+    # Sort by new score
+    reranked_results_with_scores.sort(key=lambda x: x["score"], reverse=True)
+
+    return {
+        "initial_results": unique_initial_results,
+        "reranked_results": reranked_results_with_scores[:k_final]
+    }


@app.post("/debug-search/")
-async def debug_search(request: QueryRequest):
+def debug_search(request: QueryRequest):
    """
    Debug endpoint: returns raw search response from remote vector store for the provided query.
    Use this to inspect exact 'results' and scores returned remotely.
@@ -250,8 +321,8 @@ async def debug_search(request: QueryRequest):
 def read_root():
    return {"message": "Ghaymah Docs RAG API. Use /docs for interactive UI."}

-# -----------------------
+# -----------------------  
 # Run
-# -----------------------
+# -----------------------  
 if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)
--- a/gyC.sh
+++ b/gyC.sh
@@ -0,0 +1,426 @@
+# bash completion V2 for gy                                   -*- shell-script -*-
+
+__gy_debug()
+{
+    if [[ -n ${BASH_COMP_DEBUG_FILE-} ]]; then
+        echo "$*" >> "${BASH_COMP_DEBUG_FILE}"
+    fi
+}
+
+# Macs have bash3 for which the bash-completion package doesn't include
+# _init_completion. This is a minimal version of that function.
+__gy_init_completion()
+{
+    COMPREPLY=()
+    _get_comp_words_by_ref "$@" cur prev words cword
+}
+
+# This function calls the gy program to obtain the completion
+# results and the directive.  It fills the 'out' and 'directive' vars.
+__gy_get_completion_results() {
+    local requestComp lastParam lastChar args
+
+    # Prepare the command to request completions for the program.
+    # Calling ${words[0]} instead of directly gy allows handling aliases
+    args=("${words[@]:1}")
+    requestComp="${words[0]} __complete ${args[*]}"
+
+    lastParam=${words[$((${#words[@]}-1))]}
+    lastChar=${lastParam:$((${#lastParam}-1)):1}
+    __gy_debug "lastParam ${lastParam}, lastChar ${lastChar}"
+
+    if [[ -z ${cur} && ${lastChar} != = ]]; then
+        # If the last parameter is complete (there is a space following it)
+        # We add an extra empty parameter so we can indicate this to the go method.
+        __gy_debug "Adding extra empty parameter"
+        requestComp="${requestComp} ''"
+    fi
+
+    # When completing a flag with an = (e.g., gy -n=<TAB>)
+    # bash focuses on the part after the =, so we need to remove
+    # the flag part from $cur
+    if [[ ${cur} == -*=* ]]; then
+        cur="${cur#*=}"
+    fi
+
+    __gy_debug "Calling ${requestComp}"
+    # Use eval to handle any environment variables and such
+    out=$(eval "${requestComp}" 2>/dev/null)
+
+    # Extract the directive integer at the very end of the output following a colon (:)
+    directive=${out##*:}
+    # Remove the directive
+    out=${out%:*}
+    if [[ ${directive} == "${out}" ]]; then
+        # There is not directive specified
+        directive=0
+    fi
+    __gy_debug "The completion directive is: ${directive}"
+    __gy_debug "The completions are: ${out}"
+}
+
+__gy_process_completion_results() {
+    local shellCompDirectiveError=1
+    local shellCompDirectiveNoSpace=2
+    local shellCompDirectiveNoFileComp=4
+    local shellCompDirectiveFilterFileExt=8
+    local shellCompDirectiveFilterDirs=16
+    local shellCompDirectiveKeepOrder=32
+
+    if (((directive & shellCompDirectiveError) != 0)); then
+        # Error code.  No completion.
+        __gy_debug "Received error from custom completion go code"
+        return
+    else
+        if (((directive & shellCompDirectiveNoSpace) != 0)); then
+            if [[ $(type -t compopt) == builtin ]]; then
+                __gy_debug "Activating no space"
+                compopt -o nospace
+            else
+                __gy_debug "No space directive not supported in this version of bash"
+            fi
+        fi
+        if (((directive & shellCompDirectiveKeepOrder) != 0)); then
+            if [[ $(type -t compopt) == builtin ]]; then
+                # no sort isn't supported for bash less than < 4.4
+                if [[ ${BASH_VERSINFO[0]} -lt 4 || ( ${BASH_VERSINFO[0]} -eq 4 && ${BASH_VERSINFO[1]} -lt 4 ) ]]; then
+                    __gy_debug "No sort directive not supported in this version of bash"
+                else
+                    __gy_debug "Activating keep order"
+                    compopt -o nosort
+                fi
+            else
+                __gy_debug "No sort directive not supported in this version of bash"
+            fi
+        fi
+        if (((directive & shellCompDirectiveNoFileComp) != 0)); then
+            if [[ $(type -t compopt) == builtin ]]; then
+                __gy_debug "Activating no file completion"
+                compopt +o default
+            else
+                __gy_debug "No file completion directive not supported in this version of bash"
+            fi
+        fi
+    fi
+
+    # Separate activeHelp from normal completions
+    local completions=()
+    local activeHelp=()
+    __gy_extract_activeHelp
+
+    if (((directive & shellCompDirectiveFilterFileExt) != 0)); then
+        # File extension filtering
+        local fullFilter="" filter filteringCmd
+
+        # Do not use quotes around the $completions variable or else newline
+        # characters will be kept.
+        for filter in ${completions[*]}; do
+            fullFilter+="$filter|"
+        done
+
+        filteringCmd="_filedir $fullFilter"
+        __gy_debug "File filtering command: $filteringCmd"
+        $filteringCmd
+    elif (((directive & shellCompDirectiveFilterDirs) != 0)); then
+        # File completion for directories only
+
+        local subdir
+        subdir=${completions[0]}
+        if [[ -n $subdir ]]; then
+            __gy_debug "Listing directories in $subdir"
+            pushd "$subdir" >/dev/null 2>&1 && _filedir -d && popd >/dev/null 2>&1 || return
+        else
+            __gy_debug "Listing directories in ."
+            _filedir -d
+        fi
+    else
+        __gy_handle_completion_types
+    fi
+
+    __gy_handle_special_char "$cur" :
+    __gy_handle_special_char "$cur" =
+
+    # Print the activeHelp statements before we finish
+    __gy_handle_activeHelp
+}
+
+__gy_handle_activeHelp() {
+    # Print the activeHelp statements
+    if ((${#activeHelp[*]} != 0)); then
+        if [ -z $COMP_TYPE ]; then
+            # Bash v3 does not set the COMP_TYPE variable.
+            printf "\n";
+            printf "%s\n" "${activeHelp[@]}"
+            printf "\n"
+            __gy_reprint_commandLine
+            return
+        fi
+
+        # Only print ActiveHelp on the second TAB press
+        if [ $COMP_TYPE -eq 63 ]; then
+            printf "\n"
+            printf "%s\n" "${activeHelp[@]}"
+
+            if ((${#COMPREPLY[*]} == 0)); then
+                # When there are no completion choices from the program, file completion
+                # may kick in if the program has not disabled it; in such a case, we want
+                # to know if any files will match what the user typed, so that we know if
+                # there will be completions presented, so that we know how to handle ActiveHelp.
+                # To find out, we actually trigger the file completion ourselves;
+                # the call to _filedir will fill COMPREPLY if files match.
+                if (((directive & shellCompDirectiveNoFileComp) == 0)); then
+                    __gy_debug "Listing files"
+                    _filedir
+                fi
+            fi
+
+            if ((${#COMPREPLY[*]} != 0)); then
+                # If there are completion choices to be shown, print a delimiter.
+                # Re-printing the command-line will automatically be done
+                # by the shell when it prints the completion choices.
+                printf -- "--"
+            else
+                # When there are no completion choices at all, we need
+                # to re-print the command-line since the shell will
+                # not be doing it itself.
+                __gy_reprint_commandLine
+            fi
+        elif [ $COMP_TYPE -eq 37 ] || [ $COMP_TYPE -eq 42 ]; then
+            # For completion type: menu-complete/menu-complete-backward and insert-completions
+            # the completions are immediately inserted into the command-line, so we first
+            # print the activeHelp message and reprint the command-line since the shell won't.
+            printf "\n"
+            printf "%s\n" "${activeHelp[@]}"
+
+            __gy_reprint_commandLine
+        fi
+    fi
+}
+
+__gy_reprint_commandLine() {
+    # The prompt format is only available from bash 4.4.
+    # We test if it is available before using it.
+    if (x=${PS1@P}) 2> /dev/null; then
+        printf "%s" "${PS1@P}${COMP_LINE[@]}"
+    else
+        # Can't print the prompt.  Just print the
+        # text the user had typed, it is workable enough.
+        printf "%s" "${COMP_LINE[@]}"
+    fi
+}
+
+# Separate activeHelp lines from real completions.
+# Fills the $activeHelp and $completions arrays.
+__gy_extract_activeHelp() {
+    local activeHelpMarker="_activeHelp_ "
+    local endIndex=${#activeHelpMarker}
+
+    while IFS='' read -r comp; do
+        [[ -z $comp ]] && continue
+
+        if [[ ${comp:0:endIndex} == $activeHelpMarker ]]; then
+            comp=${comp:endIndex}
+            __gy_debug "ActiveHelp found: $comp"
+            if [[ -n $comp ]]; then
+                activeHelp+=("$comp")
+            fi
+        else
+            # Not an activeHelp line but a normal completion
+            completions+=("$comp")
+        fi
+    done <<<"${out}"
+}
+
+__gy_handle_completion_types() {
+    __gy_debug "__gy_handle_completion_types: COMP_TYPE is $COMP_TYPE"
+
+    case $COMP_TYPE in
+    37|42)
+        # Type: menu-complete/menu-complete-backward and insert-completions
+        # If the user requested inserting one completion at a time, or all
+        # completions at once on the command-line we must remove the descriptions.
+        # https://github.com/spf13/cobra/issues/1508
+
+        # If there are no completions, we don't need to do anything
+        (( ${#completions[@]} == 0 )) && return 0
+
+        local tab=$'\t'
+
+        # Strip any description and escape the completion to handled special characters
+        IFS=$'\n' read -ra completions -d '' < <(printf "%q\n" "${completions[@]%%$tab*}")
+
+        # Only consider the completions that match
+        IFS=$'\n' read -ra COMPREPLY -d '' < <(IFS=$'\n'; compgen -W "${completions[*]}" -- "${cur}")
+
+        # compgen looses the escaping so we need to escape all completions again since they will
+        # all be inserted on the command-line.
+        IFS=$'\n' read -ra COMPREPLY -d '' < <(printf "%q\n" "${COMPREPLY[@]}")
+        ;;
+
+    *)
+        # Type: complete (normal completion)
+        __gy_handle_standard_completion_case
+        ;;
+    esac
+}
+
+__gy_handle_standard_completion_case() {
+    local tab=$'\t'
+
+    # If there are no completions, we don't need to do anything
+    (( ${#completions[@]} == 0 )) && return 0
+
+    # Short circuit to optimize if we don't have descriptions
+    if [[ "${completions[*]}" != *$tab* ]]; then
+        # First, escape the completions to handle special characters
+        IFS=$'\n' read -ra completions -d '' < <(printf "%q\n" "${completions[@]}")
+        # Only consider the completions that match what the user typed
+        IFS=$'\n' read -ra COMPREPLY -d '' < <(IFS=$'\n'; compgen -W "${completions[*]}" -- "${cur}")
+
+        # compgen looses the escaping so, if there is only a single completion, we need to
+        # escape it again because it will be inserted on the command-line.  If there are multiple
+        # completions, we don't want to escape them because they will be printed in a list
+        # and we don't want to show escape characters in that list.
+        if (( ${#COMPREPLY[@]} == 1 )); then
+            COMPREPLY[0]=$(printf "%q" "${COMPREPLY[0]}")
+        fi
+        return 0
+    fi
+
+    local longest=0
+    local compline
+    # Look for the longest completion so that we can format things nicely
+    while IFS='' read -r compline; do
+        [[ -z $compline ]] && continue
+
+        # Before checking if the completion matches what the user typed,
+        # we need to strip any description and escape the completion to handle special
+        # characters because those escape characters are part of what the user typed.
+        # Don't call "printf" in a sub-shell because it will be much slower
+        # since we are in a loop.
+        printf -v comp "%q" "${compline%%$tab*}" &>/dev/null || comp=$(printf "%q" "${compline%%$tab*}")
+
+        # Only consider the completions that match
+        [[ $comp == "$cur"* ]] || continue
+
+        # The completions matches.  Add it to the list of full completions including
+        # its description.  We don't escape the completion because it may get printed
+        # in a list if there are more than one and we don't want show escape characters
+        # in that list.
+        COMPREPLY+=("$compline")
+
+        # Strip any description before checking the length, and again, don't escape
+        # the completion because this length is only used when printing the completions
+        # in a list and we don't want show escape characters in that list.
+        comp=${compline%%$tab*}
+        if ((${#comp}>longest)); then
+            longest=${#comp}
+        fi
+    done < <(printf "%s\n" "${completions[@]}")
+
+    # If there is a single completion left, remove the description text and escape any special characters
+    if ((${#COMPREPLY[*]} == 1)); then
+        __gy_debug "COMPREPLY[0]: ${COMPREPLY[0]}"
+        COMPREPLY[0]=$(printf "%q" "${COMPREPLY[0]%%$tab*}")
+        __gy_debug "Removed description from single completion, which is now: ${COMPREPLY[0]}"
+    else
+        # Format the descriptions
+        __gy_format_comp_descriptions $longest
+    fi
+}
+
+__gy_handle_special_char()
+{
+    local comp="$1"
+    local char=$2
+    if [[ "$comp" == *${char}* && "$COMP_WORDBREAKS" == *${char}* ]]; then
+        local word=${comp%"${comp##*${char}}"}
+        local idx=${#COMPREPLY[*]}
+        while ((--idx >= 0)); do
+            COMPREPLY[idx]=${COMPREPLY[idx]#"$word"}
+        done
+    fi
+}
+
+__gy_format_comp_descriptions()
+{
+    local tab=$'\t'
+    local comp desc maxdesclength
+    local longest=$1
+
+    local i ci
+    for ci in ${!COMPREPLY[*]}; do
+        comp=${COMPREPLY[ci]}
+        # Properly format the description string which follows a tab character if there is one
+        if [[ "$comp" == *$tab* ]]; then
+            __gy_debug "Original comp: $comp"
+            desc=${comp#*$tab}
+            comp=${comp%%$tab*}
+
+            # $COLUMNS stores the current shell width.
+            # Remove an extra 4 because we add 2 spaces and 2 parentheses.
+            maxdesclength=$(( COLUMNS - longest - 4 ))
+
+            # Make sure we can fit a description of at least 8 characters
+            # if we are to align the descriptions.
+            if ((maxdesclength > 8)); then
+                # Add the proper number of spaces to align the descriptions
+                for ((i = ${#comp} ; i < longest ; i++)); do
+                    comp+=" "
+                done
+            else
+                # Don't pad the descriptions so we can fit more text after the completion
+                maxdesclength=$(( COLUMNS - ${#comp} - 4 ))
+            fi
+
+            # If there is enough space for any description text,
+            # truncate the descriptions that are too long for the shell width
+            if ((maxdesclength > 0)); then
+                if ((${#desc} > maxdesclength)); then
+                    desc=${desc:0:$(( maxdesclength - 1 ))}
+                    desc+="…"
+                fi
+                comp+="  ($desc)"
+            fi
+            COMPREPLY[ci]=$comp
+            __gy_debug "Final comp: $comp"
+        fi
+    done
+}
+
+__start_gy()
+{
+    local cur prev words cword split
+
+    COMPREPLY=()
+
+    # Call _init_completion from the bash-completion package
+    # to prepare the arguments properly
+    if declare -F _init_completion >/dev/null 2>&1; then
+        _init_completion -n =: || return
+    else
+        __gy_init_completion -n =: || return
+    fi
+
+    __gy_debug
+    __gy_debug "========= starting completion logic =========="
+    __gy_debug "cur is ${cur}, words[*] is ${words[*]}, #words[@] is ${#words[@]}, cword is $cword"
+
+    # The user could have moved the cursor backwards on the command-line.
+    # We need to trigger completion from the $cword location, so we need
+    # to truncate the command-line ($words) up to the $cword location.
+    words=("${words[@]:0:$cword+1}")
+    __gy_debug "Truncated words[*]: ${words[*]},"
+
+    local out directive
+    __gy_get_completion_results
+    __gy_process_completion_results
+}
+
+if [[ $(type -t compopt) = "builtin" ]]; then
+    complete -o default -F __start_gy gy
+else
+    complete -o default -o nospace -F __start_gy gy
+fi
+
+# ex: ts=4 sw=4 et filetype=sh
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,11 @@
+uvicorn
+requests
+python-dotenv
+openai
+fastapi
+pydantic
+langchain
+langchain-community
+sentence-transformers
+torch
+transformers
--- a/rerank_test.html
+++ b/rerank_test.html
@@ -0,0 +1,179 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>RAG Re-ranking Test</title>
+    <style>
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
+            margin: 0;
+            padding: 20px;
+            background-color: #f7f7f7;
+            color: #333;
+        }
+        .container {
+            max-width: 1200px;
+            margin: 0 auto;
+            background: #fff;
+            padding: 25px;
+            border-radius: 8px;
+            box-shadow: 0 2px 10px rgba(0,0,0,0.1);
+        }
+        h1 {
+            text-align: center;
+            color: #444;
+        }
+        .query-form {
+            display: flex;
+            gap: 10px;
+            margin-bottom: 30px;
+        }
+        #query-input {
+            flex-grow: 1;
+            padding: 10px 15px;
+            border: 1px solid #ccc;
+            border-radius: 4px;
+            font-size: 16px;
+        }
+        #query-button {
+            padding: 10px 20px;
+            border: none;
+            background-color: #007bff;
+            color: white;
+            border-radius: 4px;
+            font-size: 16px;
+            cursor: pointer;
+            transition: background-color 0.3s;
+        }
+        #query-button:hover {
+            background-color: #0056b3;
+        }
+        .results-container {
+            display: flex;
+            gap: 20px;
+            justify-content: space-between;
+        }
+        .results-column {
+            width: 48%;
+        }
+        h2 {
+            color: #555;
+            border-bottom: 2px solid #eee;
+            padding-bottom: 10px;
+        }
+        .result-item {
+            background: #fafafa;
+            border: 1px solid #eee;
+            border-radius: 5px;
+            padding: 15px;
+            margin-bottom: 10px;
+            box-shadow: 0 1px 3px rgba(0,0,0,0.05);
+        }
+        .result-item p {
+            margin: 0 0 10px 0;
+            white-space: pre-wrap; /* Preserve whitespace and newlines */
+        }
+        .result-item .score {
+            font-weight: bold;
+            color: #007bff;
+        }
+        .loader {
+            text-align: center;
+            padding: 20px;
+            font-size: 18px;
+            display: none; /* Hidden by default */
+        }
+    </style>
+</head>
+<body>
+
+    <div class="container">
+        <h1>RAG Re-ranking Visualizer</h1>
+        <div class="query-form">
+            <input type="text" id="query-input" placeholder="Enter your query...">
+            <button id="query-button">Search</button>
+        </div>
+
+        <div class="loader" id="loader">Loading...</div>
+
+        <div class="results-container">
+            <div class="results-column" id="initial-results-col">
+                <h2>Initial Retrieval (Before Re-ranking)</h2>
+                <div id="initial-results"></div>
+            </div>
+            <div class="results-column" id="reranked-results-col">
+                <h2>Re-ranked Results (Top 5)</h2>
+                <div id="reranked-results"></div>
+            </div>
+        </div>
+    </div>
+
+    <script>
+        const queryInput = document.getElementById('query-input');
+        const queryButton = document.getElementById('query-button');
+        const initialResultsDiv = document.getElementById('initial-results');
+        const rerankedResultsDiv = document.getElementById('reranked-results');
+        const loader = document.getElementById('loader');
+
+        queryButton.addEventListener('click', async () => {
+            const query = queryInput.value;
+            if (!query) {
+                alert('Please enter a query.');
+                return;
+            }
+
+            initialResultsDiv.innerHTML = '';
+            rerankedResultsDiv.innerHTML = '';
+            loader.style.display = 'block';
+
+            try {
+                const response = await fetch('http://127.0.0.1:8000/test-rerank/', {
+                    method: 'POST',
+                    headers: {
+                        'Content-Type': 'application/json',
+                    },
+                    body: JSON.stringify({ query: query, k: 5 }),
+                });
+
+                if (!response.ok) {
+                    throw new Error(`HTTP error! status: ${response.status}`);
+                }
+
+                const data = await response.json();
+                displayResults(data.initial_results, initialResultsDiv);
+                displayResults(data.reranked_results, rerankedResultsDiv);
+
+            } catch (error) {
+                console.error('Error fetching data:', error);
+                alert('Failed to fetch results. Check the console for details.');
+            } finally {
+                loader.style.display = 'none';
+            }
+        });
+
+        function displayResults(results, element) {
+            if (!results || results.length === 0) {
+                element.innerHTML = '<p>No results found.</p>';
+                return;
+            }
+
+            results.forEach(item => {
+                const div = document.createElement('div');
+                div.className = 'result-item';
+                
+                const scoreP = document.createElement('p');
+                scoreP.innerHTML = `<span class="score">Score: ${item.score.toFixed(4)}</span>`;
+                
+                const textP = document.createElement('p');
+                textP.textContent = item.text;
+
+                div.appendChild(scoreP);
+                div.appendChild(textP);
+                element.appendChild(div);
+            });
+        }
+    </script>
+
+</body>
+</html>