Initial commit of the RAG API application

2025-09-16 16:20:12 +03:00
--- a/.dockerignore
+++ b/.dockerignore
@@ -0,0 +1,10 @@
 # Ignore Python cache
 __pycache__/
 *.pyc
 # Ignore Git directory
 .git/
 .gitignore
 # Ignore environment files
 .env
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,29 @@
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.pyc
 *.pyo
 *.pyd
 # Distribution / packaging
 .Python
 build/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 # Environment variables
 .env
 .venv
 # Other
 *.log
--- a/24
+++ b/24
@@ -0,0 +1,24 @@
 # Use an official Python runtime as a parent image
 FROM python:3.11-slim
 # Set the working directory in the container
 WORKDIR /app
 # Copy the requirements file into the container at /app
 COPY requirements.txt .
 # Install any needed packages specified in requirements.txt
 # We use --no-cache-dir to keep the image size down
 RUN pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r requirements.txt
 # Copy the rest of the application's code into the container at /app
 COPY . .
 # Make port 8000 available to the world outside this container
 EXPOSE 8000
 # Define environment variable to ensure python prints things without buffering
 ENV PYTHONUNBUFFERED=1
 # Run the application
 CMD ["uvicorn", "doc_rag_app:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/README.md
+++ b/README.md
@@ -0,0 +1,34 @@
 # Ghaymah Docs RAG API
 This project implements a Retrieval-Augmented Generation (RAG) API using FastAPI to answer questions about Ghaymah Cloud documentation. It features a sophisticated two-stage retrieval process involving an initial vector search followed by a more precise re-ranking step to ensure high-quality answers.
 ## Key Features
 - **FastAPI Backend:** A robust and fast API for serving the RAG pipeline.
 - **Two-Stage Retrieval:**
  1.  **Initial Search:** Uses `sentence-transformers` to perform a broad vector search and retrieve an initial set of candidate documents.
  2.  **Re-ranking:** Employs a `CrossEncoder` model to re-rank the initial candidates for greater relevance and precision.
 - **Dockerized:** Comes with a `Dockerfile` for easy, repeatable deployment on any platform that supports containers.
 - **Visualization:** Includes a `rerank_test.html` page to visually compare the results before and after the re-ranking step.
 ## Getting Started
 ### Prerequisites
 - Docker
 - A Git client
 ### Deployment
 This application is designed to be deployed as a Docker container. It can be deployed via a Git-based workflow on a platform like Ghaymah Cloud.
 1.  **Push to Git:** Push the code to a GitHub or GitLab repository.
 2.  **Connect Platform:** Connect your cloud platform to the Git repository.
 3.  **Build and Deploy:** The platform will use the included `Dockerfile` to automatically build and deploy the application.
 ### Configuration
 The application requires the following environment variables to be set in the deployment environment:
 - `GITPASHA_HOST`: The URL for the remote vector store (GitPasha).
 - `OPENAI_API_KEY`: Your API key for the LLM provider (e.g., OpenAI).
--- a/0
+++ b/0
--- a/doc_rag_app.py
+++ b/doc_rag_app.py
@@ -4,7 +4,7 @@ import json
 import uvicorn
 import requests
 from dotenv import load_dotenv
-from typing import Optional
+from typing import Optional, List
 from openai import OpenAI
 from fastapi import FastAPI, HTTPException
 from fastapi.responses import JSONResponse
@@ -12,6 +12,7 @@ from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from sentence_transformers import CrossEncoder
 # Load .env
 load_dotenv()
@@ -45,18 +46,22 @@ if OPENAI_API_KEY:
    client = OpenAI(api_key=OPENAI_API_KEY, base_url="https://genai.ghaymah.systems")
 # -----------------------  
-# Embedding model (512 dims)
+# Models (Embedding + Reranking)
 # -----------------------  
 print("Initializing local embedding model (sentence-transformers/distiluse-base-multilingual-cased)...")
 embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/distiluse-base-multilingual-cased")
 print("Embedding model loaded.")
 print("Initializing local CrossEncoder model (ms-marco-MiniLM-L-6-v2)...")
 cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
 print("CrossEncoder model loaded.")
 # -----------------------  
 # Request Models
 # -----------------------  
 class QueryRequest(BaseModel):
    query: str
-    k: Optional[int] = 10   # allow overriding k
+    k: Optional[int] = 5   # final number of chunks to use
 class IngestRequest(BaseModel):
    # keep for future if want dynamic file name content ingestion
@@ -65,12 +70,12 @@ class IngestRequest(BaseModel):
 # -----------------------  
 # Helpers
 # -----------------------  
-def _embed_texts(texts):
+def _embed_texts(texts: List[str]) -> List[List[float]]:
    """Return list of embeddings for given texts."""
    return embeddings.embed_documents(texts)
-def _embed_query(text):
+def _embed_query(text: str) -> List[float]:
-    """Return single embedding for query (list)."""
+    """Return single embedding for query."""
    return embeddings.embed_query(text)
 def store_text_chunks_remote(text: str) -> bool:
@@ -114,7 +119,7 @@ def store_text_chunks_remote(text: str) -> bool:
        print(f"[store] Error calling remote insert: {e} / Response: {getattr(e, 'response', None)}")
        raise HTTPException(status_code=500, detail=f"Failed to insert to remote vector store: {e}")
-def search_remote_by_vector(vector, k=10):
+def search_remote_by_vector(vector: List[float], k: int = 10):
    """Call remote /search with given vector and return parsed JSON (raw)."""
    try:
        resp = requests.post(
@@ -129,21 +134,6 @@ def search_remote_by_vector(vector, k=10):
        print(f"[search] Error calling remote search: {e}")
        raise HTTPException(status_code=500, detail=f"Remote search failed: {e}")
 def build_context_from_search_results(search_results, min_score: Optional[float] = None):
    """Given remote search results, optionally filter by min_score and return context text and metadata."""
    if not search_results or "results" not in search_results:
        return "", []
    items = []
    for r in search_results["results"]:
        score = r.get("score", None)
        payload = r.get("payload", {})
        text_chunk = payload.get("text_chunk", "")
        if min_score is None or (score is not None and score >= min_score):
            items.append({"score": score, "text": text_chunk})
    context = "\n\n".join([it["text"] for it in items])
    return context, items
 # -----------------------  
 # Startup: optionally auto-ingest file on startup
 # -----------------------  
@@ -181,33 +171,55 @@ async def ingest_docs(req: IngestRequest = None):
    if ok:
        return JSONResponse(content={"message": f"Successfully ingested '{filename}' into vector store."})
    raise HTTPException(status_code=500, detail="Ingestion failed.")
@app.post("/query/")
 async def query_docs(request: QueryRequest):
    query = request.query
-    k = request.k or 10
+    k_final = request.k or 5  # The final number of documents to use
-    print(f"[query] Received query: {query} (k={k})")
+    k_initial = 25  # The number of documents to retrieve initially
    print(f"[query] Received query: '{query}' (k_initial={k_initial}, k_final={k_final})")
-    # Embed query
+    # 1. Embed query
    qvec = _embed_query(query)
-    # Remote vector search
+    # 2. Initial Retrieval from vector store
-    search_results = search_remote_by_vector(qvec, k=k)
+    search_results = search_remote_by_vector(qvec, k=k_initial)
-    payloads = [p["text_chunk"] for p in search_results.get("payloads", [])]
+    initial_chunks = [p.get("text_chunk", "") for p in search_results.get("payloads", [])]
-    if not payloads:
+    if not initial_chunks:
        return {"answer": "No relevant chunks found.", "search_results": search_results}
-    # Deduplicate chunks (keep first occurrence)
+    # Deduplicate initial chunks before re-ranking
    seen = set()
-    context_chunks = []
+    unique_chunks = []
-    for chunk in payloads:
+    for chunk in initial_chunks:
        if chunk not in seen:
-            context_chunks.append(chunk)
+            unique_chunks.append(chunk)
            seen.add(chunk)
-    context = "\n\n".join(context_chunks)
+    print(f"[query] Retrieved {len(unique_chunks)} unique chunks for re-ranking.")
-    # Use LLM if available
+    # 3. Re-ranking with CrossEncoder
    # Create pairs of (query, chunk) for the model
    rerank_pairs = [(query, chunk) for chunk in unique_chunks]
    # Predict new relevance scores
    rerank_scores = cross_encoder.predict(rerank_pairs)
    # Combine chunks with their new scores
    reranked_results = list(zip(rerank_scores, unique_chunks))
    # Sort by the new score in descending order
    reranked_results.sort(key=lambda x: x[0], reverse=True)
    # 4. Select top k_final results after re-ranking
    top_k_chunks = [chunk for score, chunk in reranked_results[:k_final]]
    top_k_scores = [float(score) for score, chunk in reranked_results[:k_final]]
    context = "\n\n".join(top_k_chunks)
    print(f"[query] Built context with {len(top_k_chunks)} re-ranked chunks.")
    # 5. Use LLM if available to generate a final answer
    if client:
        try:
            completion = client.chat.completions.create(
@@ -219,16 +231,75 @@ async def query_docs(request: QueryRequest):
                temperature=0.0,
            )
            answer = completion.choices[0].message.content
-            return {"answer": answer, "context": context_chunks, "scores": search_results.get("scores", [])}
+            return {"answer": answer, "context": top_k_chunks, "scores": top_k_scores}
        except Exception as e:
            print(f"[query] LLM failed: {e}")
-            return {"answer": context, "context": context_chunks, "scores": search_results.get("scores", [])}
+            # Fallback to returning the context directly
            return {"answer": context, "context": top_k_chunks, "scores": top_k_scores}
    else:
-        return {"answer": context, "context": context_chunks, "scores": search_results.get("scores", [])}
+        # If no LLM, return the context as the answer
        return {"answer": context, "context": top_k_chunks, "scores": top_k_scores}
@app.post("/test-rerank/")
 async def test_rerank(request: QueryRequest):
    """
    Endpoint for visualization. Returns initial and re-ranked results.
    """
    query = request.query
    k_final = request.k or 5
    k_initial = 25
    print(f"[test-rerank] Received query: '{query}' (k_initial={k_initial}, k_final={k_final})")
    # 1. Embed query
    qvec = _embed_query(query)
    # 2. Initial Retrieval
    search_results = search_remote_by_vector(qvec, k=k_initial)
    initial_payloads = search_results.get("payloads", [])
    initial_scores = search_results.get("scores", [])
    # Ensure we have the same number of scores and payloads
    min_len = min(len(initial_payloads), len(initial_scores))
    initial_results = [
        {"text": p.get("text_chunk", ""), "score": s}
        for p, s in zip(initial_payloads[:min_len], initial_scores[:min_len])
    ]
    # Deduplicate
    seen_texts = set()
    unique_initial_results = []
    for res in initial_results:
        if res["text"] not in seen_texts:
            unique_initial_results.append(res)
            seen_texts.add(res["text"])
    unique_chunks = [res["text"] for res in unique_initial_results]
    if not unique_chunks:
        return {"initial_results": [], "reranked_results": []}
    # 3. Re-ranking
    rerank_pairs = [(query, chunk) for chunk in unique_chunks]
    rerank_scores = cross_encoder.predict(rerank_pairs)
    reranked_results_with_scores = [
        {"text": chunk, "score": float(score)}
        for score, chunk in zip(rerank_scores, unique_chunks)
    ]
    # Sort by new score
    reranked_results_with_scores.sort(key=lambda x: x["score"], reverse=True)
    return {
        "initial_results": unique_initial_results,
        "reranked_results": reranked_results_with_scores[:k_final]
    }
@app.post("/debug-search/")
-async def debug_search(request: QueryRequest):
+def debug_search(request: QueryRequest):
    """
    Debug endpoint: returns raw search response from remote vector store for the provided query.
    Use this to inspect exact 'results' and scores returned remotely.
--- a/gyC.sh
+++ b/gyC.sh
@@ -0,0 +1,426 @@
 # bash completion V2 for gy                                   -*- shell-script -*-
 __gy_debug()
 {
    if [[ -n ${BASH_COMP_DEBUG_FILE-} ]]; then
        echo "$*" >> "${BASH_COMP_DEBUG_FILE}"
    fi
 }
 # Macs have bash3 for which the bash-completion package doesn't include
 # _init_completion. This is a minimal version of that function.
 __gy_init_completion()
 {
    COMPREPLY=()
    _get_comp_words_by_ref "$@" cur prev words cword
 }
 # This function calls the gy program to obtain the completion
 # results and the directive.  It fills the 'out' and 'directive' vars.
 __gy_get_completion_results() {
    local requestComp lastParam lastChar args
    # Prepare the command to request completions for the program.
    # Calling ${words[0]} instead of directly gy allows handling aliases
    args=("${words[@]:1}")
    requestComp="${words[0]} __complete ${args[*]}"
    lastParam=${words[$((${#words[@]}-1))]}
    lastChar=${lastParam:$((${#lastParam}-1)):1}
    __gy_debug "lastParam ${lastParam}, lastChar ${lastChar}"
    if [[ -z ${cur} && ${lastChar} != = ]]; then
        # If the last parameter is complete (there is a space following it)
        # We add an extra empty parameter so we can indicate this to the go method.
        __gy_debug "Adding extra empty parameter"
        requestComp="${requestComp} ''"
    fi
    # When completing a flag with an = (e.g., gy -n=<TAB>)
    # bash focuses on the part after the =, so we need to remove
    # the flag part from $cur
    if [[ ${cur} == -*=* ]]; then
        cur="${cur#*=}"
    fi
    __gy_debug "Calling ${requestComp}"
    # Use eval to handle any environment variables and such
    out=$(eval "${requestComp}" 2>/dev/null)
    # Extract the directive integer at the very end of the output following a colon (:)
    directive=${out##*:}
    # Remove the directive
    out=${out%:*}
    if [[ ${directive} == "${out}" ]]; then
        # There is not directive specified
        directive=0
    fi
    __gy_debug "The completion directive is: ${directive}"
    __gy_debug "The completions are: ${out}"
 }
 __gy_process_completion_results() {
    local shellCompDirectiveError=1
    local shellCompDirectiveNoSpace=2
    local shellCompDirectiveNoFileComp=4
    local shellCompDirectiveFilterFileExt=8
    local shellCompDirectiveFilterDirs=16
    local shellCompDirectiveKeepOrder=32
    if (((directive & shellCompDirectiveError) != 0)); then
        # Error code.  No completion.
        __gy_debug "Received error from custom completion go code"
        return
    else
        if (((directive & shellCompDirectiveNoSpace) != 0)); then
            if [[ $(type -t compopt) == builtin ]]; then
                __gy_debug "Activating no space"
                compopt -o nospace
            else
                __gy_debug "No space directive not supported in this version of bash"
            fi
        fi
        if (((directive & shellCompDirectiveKeepOrder) != 0)); then
            if [[ $(type -t compopt) == builtin ]]; then
                # no sort isn't supported for bash less than < 4.4
                if [[ ${BASH_VERSINFO[0]} -lt 4 || ( ${BASH_VERSINFO[0]} -eq 4 && ${BASH_VERSINFO[1]} -lt 4 ) ]]; then
                    __gy_debug "No sort directive not supported in this version of bash"
                else
                    __gy_debug "Activating keep order"
                    compopt -o nosort
                fi
            else
                __gy_debug "No sort directive not supported in this version of bash"
            fi
        fi
        if (((directive & shellCompDirectiveNoFileComp) != 0)); then
            if [[ $(type -t compopt) == builtin ]]; then
                __gy_debug "Activating no file completion"
                compopt +o default
            else
                __gy_debug "No file completion directive not supported in this version of bash"
            fi
        fi
    fi
    # Separate activeHelp from normal completions
    local completions=()
    local activeHelp=()
    __gy_extract_activeHelp
    if (((directive & shellCompDirectiveFilterFileExt) != 0)); then
        # File extension filtering
        local fullFilter="" filter filteringCmd
        # Do not use quotes around the $completions variable or else newline
        # characters will be kept.
        for filter in ${completions[*]}; do
            fullFilter+="$filter|"
        done
        filteringCmd="_filedir $fullFilter"
        __gy_debug "File filtering command: $filteringCmd"
        $filteringCmd
    elif (((directive & shellCompDirectiveFilterDirs) != 0)); then
        # File completion for directories only
        local subdir
        subdir=${completions[0]}
        if [[ -n $subdir ]]; then
            __gy_debug "Listing directories in $subdir"
            pushd "$subdir" >/dev/null 2>&1 && _filedir -d && popd >/dev/null 2>&1 || return
        else
            __gy_debug "Listing directories in ."
            _filedir -d
        fi
    else
        __gy_handle_completion_types
    fi
    __gy_handle_special_char "$cur" :
    __gy_handle_special_char "$cur" =
    # Print the activeHelp statements before we finish
    __gy_handle_activeHelp
 }
 __gy_handle_activeHelp() {
    # Print the activeHelp statements
    if ((${#activeHelp[*]} != 0)); then
        if [ -z $COMP_TYPE ]; then
            # Bash v3 does not set the COMP_TYPE variable.
            printf "\n";
            printf "%s\n" "${activeHelp[@]}"
            printf "\n"
            __gy_reprint_commandLine
            return
        fi
        # Only print ActiveHelp on the second TAB press
        if [ $COMP_TYPE -eq 63 ]; then
            printf "\n"
            printf "%s\n" "${activeHelp[@]}"
            if ((${#COMPREPLY[*]} == 0)); then
                # When there are no completion choices from the program, file completion
                # may kick in if the program has not disabled it; in such a case, we want
                # to know if any files will match what the user typed, so that we know if
                # there will be completions presented, so that we know how to handle ActiveHelp.
                # To find out, we actually trigger the file completion ourselves;
                # the call to _filedir will fill COMPREPLY if files match.
                if (((directive & shellCompDirectiveNoFileComp) == 0)); then
                    __gy_debug "Listing files"
                    _filedir
                fi
            fi
            if ((${#COMPREPLY[*]} != 0)); then
                # If there are completion choices to be shown, print a delimiter.
                # Re-printing the command-line will automatically be done
                # by the shell when it prints the completion choices.
                printf -- "--"
            else
                # When there are no completion choices at all, we need
                # to re-print the command-line since the shell will
                # not be doing it itself.
                __gy_reprint_commandLine
            fi
        elif [ $COMP_TYPE -eq 37 ] || [ $COMP_TYPE -eq 42 ]; then
            # For completion type: menu-complete/menu-complete-backward and insert-completions
            # the completions are immediately inserted into the command-line, so we first
            # print the activeHelp message and reprint the command-line since the shell won't.
            printf "\n"
            printf "%s\n" "${activeHelp[@]}"
            __gy_reprint_commandLine
        fi
    fi
 }
 __gy_reprint_commandLine() {
    # The prompt format is only available from bash 4.4.
    # We test if it is available before using it.
    if (x=${PS1@P}) 2> /dev/null; then
        printf "%s" "${PS1@P}${COMP_LINE[@]}"
    else
        # Can't print the prompt.  Just print the
        # text the user had typed, it is workable enough.
        printf "%s" "${COMP_LINE[@]}"
    fi
 }
 # Separate activeHelp lines from real completions.
 # Fills the $activeHelp and $completions arrays.
 __gy_extract_activeHelp() {
    local activeHelpMarker="_activeHelp_ "
    local endIndex=${#activeHelpMarker}
    while IFS='' read -r comp; do
        [[ -z $comp ]] && continue
        if [[ ${comp:0:endIndex} == $activeHelpMarker ]]; then
            comp=${comp:endIndex}
            __gy_debug "ActiveHelp found: $comp"
            if [[ -n $comp ]]; then
                activeHelp+=("$comp")
            fi
        else
            # Not an activeHelp line but a normal completion
            completions+=("$comp")
        fi
    done <<<"${out}"
 }
 __gy_handle_completion_types() {
    __gy_debug "__gy_handle_completion_types: COMP_TYPE is $COMP_TYPE"
    case $COMP_TYPE in
    37|42)
        # Type: menu-complete/menu-complete-backward and insert-completions
        # If the user requested inserting one completion at a time, or all
        # completions at once on the command-line we must remove the descriptions.
        # https://github.com/spf13/cobra/issues/1508
        # If there are no completions, we don't need to do anything
        (( ${#completions[@]} == 0 )) && return 0
        local tab=$'\t'
        # Strip any description and escape the completion to handled special characters
        IFS=$'\n' read -ra completions -d '' < <(printf "%q\n" "${completions[@]%%$tab*}")
        # Only consider the completions that match
        IFS=$'\n' read -ra COMPREPLY -d '' < <(IFS=$'\n'; compgen -W "${completions[*]}" -- "${cur}")
        # compgen looses the escaping so we need to escape all completions again since they will
        # all be inserted on the command-line.
        IFS=$'\n' read -ra COMPREPLY -d '' < <(printf "%q\n" "${COMPREPLY[@]}")
        ;;
    *)
        # Type: complete (normal completion)
        __gy_handle_standard_completion_case
        ;;
    esac
 }
 __gy_handle_standard_completion_case() {
    local tab=$'\t'
    # If there are no completions, we don't need to do anything
    (( ${#completions[@]} == 0 )) && return 0
    # Short circuit to optimize if we don't have descriptions
    if [[ "${completions[*]}" != *$tab* ]]; then
        # First, escape the completions to handle special characters
        IFS=$'\n' read -ra completions -d '' < <(printf "%q\n" "${completions[@]}")
        # Only consider the completions that match what the user typed
        IFS=$'\n' read -ra COMPREPLY -d '' < <(IFS=$'\n'; compgen -W "${completions[*]}" -- "${cur}")
        # compgen looses the escaping so, if there is only a single completion, we need to
        # escape it again because it will be inserted on the command-line.  If there are multiple
        # completions, we don't want to escape them because they will be printed in a list
        # and we don't want to show escape characters in that list.
        if (( ${#COMPREPLY[@]} == 1 )); then
            COMPREPLY[0]=$(printf "%q" "${COMPREPLY[0]}")
        fi
        return 0
    fi
    local longest=0
    local compline
    # Look for the longest completion so that we can format things nicely
    while IFS='' read -r compline; do
        [[ -z $compline ]] && continue
        # Before checking if the completion matches what the user typed,
        # we need to strip any description and escape the completion to handle special
        # characters because those escape characters are part of what the user typed.
        # Don't call "printf" in a sub-shell because it will be much slower
        # since we are in a loop.
        printf -v comp "%q" "${compline%%$tab*}" &>/dev/null || comp=$(printf "%q" "${compline%%$tab*}")
        # Only consider the completions that match
        [[ $comp == "$cur"* ]] || continue
        # The completions matches.  Add it to the list of full completions including
        # its description.  We don't escape the completion because it may get printed
        # in a list if there are more than one and we don't want show escape characters
        # in that list.
        COMPREPLY+=("$compline")
        # Strip any description before checking the length, and again, don't escape
        # the completion because this length is only used when printing the completions
        # in a list and we don't want show escape characters in that list.
        comp=${compline%%$tab*}
        if ((${#comp}>longest)); then
            longest=${#comp}
        fi
    done < <(printf "%s\n" "${completions[@]}")
    # If there is a single completion left, remove the description text and escape any special characters
    if ((${#COMPREPLY[*]} == 1)); then
        __gy_debug "COMPREPLY[0]: ${COMPREPLY[0]}"
        COMPREPLY[0]=$(printf "%q" "${COMPREPLY[0]%%$tab*}")
        __gy_debug "Removed description from single completion, which is now: ${COMPREPLY[0]}"
    else
        # Format the descriptions
        __gy_format_comp_descriptions $longest
    fi
 }
 __gy_handle_special_char()
 {
    local comp="$1"
    local char=$2
    if [[ "$comp" == *${char}* && "$COMP_WORDBREAKS" == *${char}* ]]; then
        local word=${comp%"${comp##*${char}}"}
        local idx=${#COMPREPLY[*]}
        while ((--idx >= 0)); do
            COMPREPLY[idx]=${COMPREPLY[idx]#"$word"}
        done
    fi
 }
 __gy_format_comp_descriptions()
 {
    local tab=$'\t'
    local comp desc maxdesclength
    local longest=$1
    local i ci
    for ci in ${!COMPREPLY[*]}; do
        comp=${COMPREPLY[ci]}
        # Properly format the description string which follows a tab character if there is one
        if [[ "$comp" == *$tab* ]]; then
            __gy_debug "Original comp: $comp"
            desc=${comp#*$tab}
            comp=${comp%%$tab*}
            # $COLUMNS stores the current shell width.
            # Remove an extra 4 because we add 2 spaces and 2 parentheses.
            maxdesclength=$(( COLUMNS - longest - 4 ))
            # Make sure we can fit a description of at least 8 characters
            # if we are to align the descriptions.
            if ((maxdesclength > 8)); then
                # Add the proper number of spaces to align the descriptions
                for ((i = ${#comp} ; i < longest ; i++)); do
                    comp+=" "
                done
            else
                # Don't pad the descriptions so we can fit more text after the completion
                maxdesclength=$(( COLUMNS - ${#comp} - 4 ))
            fi
            # If there is enough space for any description text,
            # truncate the descriptions that are too long for the shell width
            if ((maxdesclength > 0)); then
                if ((${#desc} > maxdesclength)); then
                    desc=${desc:0:$(( maxdesclength - 1 ))}
                    desc+="…"
                fi
                comp+="  ($desc)"
            fi
            COMPREPLY[ci]=$comp
            __gy_debug "Final comp: $comp"
        fi
    done
 }
 __start_gy()
 {
    local cur prev words cword split
    COMPREPLY=()
    # Call _init_completion from the bash-completion package
    # to prepare the arguments properly
    if declare -F _init_completion >/dev/null 2>&1; then
        _init_completion -n =: || return
    else
        __gy_init_completion -n =: || return
    fi
    __gy_debug
    __gy_debug "========= starting completion logic =========="
    __gy_debug "cur is ${cur}, words[*] is ${words[*]}, #words[@] is ${#words[@]}, cword is $cword"
    # The user could have moved the cursor backwards on the command-line.
    # We need to trigger completion from the $cword location, so we need
    # to truncate the command-line ($words) up to the $cword location.
    words=("${words[@]:0:$cword+1}")
    __gy_debug "Truncated words[*]: ${words[*]},"
    local out directive
    __gy_get_completion_results
    __gy_process_completion_results
 }
 if [[ $(type -t compopt) = "builtin" ]]; then
    complete -o default -F __start_gy gy
 else
    complete -o default -o nospace -F __start_gy gy
 fi
 # ex: ts=4 sw=4 et filetype=sh
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,11 @@
 uvicorn
 requests
 python-dotenv
 openai
 fastapi
 pydantic
 langchain
 langchain-community
 sentence-transformers
 torch
 transformers
--- a/rerank_test.html
+++ b/rerank_test.html
@@ -0,0 +1,179 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>RAG Re-ranking Test</title>
    <style>
        body {
            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
            margin: 0;
            padding: 20px;
            background-color: #f7f7f7;
            color: #333;
        }
        .container {
            max-width: 1200px;
            margin: 0 auto;
            background: #fff;
            padding: 25px;
            border-radius: 8px;
            box-shadow: 0 2px 10px rgba(0,0,0,0.1);
        }
        h1 {
            text-align: center;
            color: #444;
        }
        .query-form {
            display: flex;
            gap: 10px;
            margin-bottom: 30px;
        }
        #query-input {
            flex-grow: 1;
            padding: 10px 15px;
            border: 1px solid #ccc;
            border-radius: 4px;
            font-size: 16px;
        }
        #query-button {
            padding: 10px 20px;
            border: none;
            background-color: #007bff;
            color: white;
            border-radius: 4px;
            font-size: 16px;
            cursor: pointer;
            transition: background-color 0.3s;
        }
        #query-button:hover {
            background-color: #0056b3;
        }
        .results-container {
            display: flex;
            gap: 20px;
            justify-content: space-between;
        }
        .results-column {
            width: 48%;
        }
        h2 {
            color: #555;
            border-bottom: 2px solid #eee;
            padding-bottom: 10px;
        }
        .result-item {
            background: #fafafa;
            border: 1px solid #eee;
            border-radius: 5px;
            padding: 15px;
            margin-bottom: 10px;
            box-shadow: 0 1px 3px rgba(0,0,0,0.05);
        }
        .result-item p {
            margin: 0 0 10px 0;
            white-space: pre-wrap; /* Preserve whitespace and newlines */
        }
        .result-item .score {
            font-weight: bold;
            color: #007bff;
        }
        .loader {
            text-align: center;
            padding: 20px;
            font-size: 18px;
            display: none; /* Hidden by default */
        }
    </style>
 </head>
 <body>
    <div class="container">
        <h1>RAG Re-ranking Visualizer</h1>
        <div class="query-form">
            <input type="text" id="query-input" placeholder="Enter your query...">
            <button id="query-button">Search</button>
        </div>
        <div class="loader" id="loader">Loading...</div>
        <div class="results-container">
            <div class="results-column" id="initial-results-col">
                <h2>Initial Retrieval (Before Re-ranking)</h2>
                <div id="initial-results"></div>
            </div>
            <div class="results-column" id="reranked-results-col">
                <h2>Re-ranked Results (Top 5)</h2>
                <div id="reranked-results"></div>
            </div>
        </div>
    </div>
    <script>
        const queryInput = document.getElementById('query-input');
        const queryButton = document.getElementById('query-button');
        const initialResultsDiv = document.getElementById('initial-results');
        const rerankedResultsDiv = document.getElementById('reranked-results');
        const loader = document.getElementById('loader');
        queryButton.addEventListener('click', async () => {
            const query = queryInput.value;
            if (!query) {
                alert('Please enter a query.');
                return;
            }
            initialResultsDiv.innerHTML = '';
            rerankedResultsDiv.innerHTML = '';
            loader.style.display = 'block';
            try {
                const response = await fetch('http://127.0.0.1:8000/test-rerank/', {
                    method: 'POST',
                    headers: {
                        'Content-Type': 'application/json',
                    },
                    body: JSON.stringify({ query: query, k: 5 }),
                });
                if (!response.ok) {
                    throw new Error(`HTTP error! status: ${response.status}`);
                }
                const data = await response.json();
                displayResults(data.initial_results, initialResultsDiv);
                displayResults(data.reranked_results, rerankedResultsDiv);
            } catch (error) {
                console.error('Error fetching data:', error);
                alert('Failed to fetch results. Check the console for details.');
            } finally {
                loader.style.display = 'none';
            }
        });
        function displayResults(results, element) {
            if (!results || results.length === 0) {
                element.innerHTML = '<p>No results found.</p>';
                return;
            }
            results.forEach(item => {
                const div = document.createElement('div');
                div.className = 'result-item';
                const scoreP = document.createElement('p');
                scoreP.innerHTML = `<span class="score">Score: ${item.score.toFixed(4)}</span>`;
                const textP = document.createElement('p');
                textP.textContent = item.text;
                div.appendChild(scoreP);
                div.appendChild(textP);
                element.appendChild(div);
            });
        }
    </script>
 </body>
 </html>