diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..766faad --- /dev/null +++ b/.dockerignore @@ -0,0 +1,10 @@ +# Ignore Python cache +__pycache__/ +*.pyc + +# Ignore Git directory +.git/ +.gitignore + +# Ignore environment files +.env diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0e98b28 --- /dev/null +++ b/.gitignore @@ -0,0 +1,29 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.pyc +*.pyo +*.pyd + +# Distribution / packaging +.Python +build/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Environment variables +.env +.venv + +# Other +*.log diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..c850b3f --- /dev/null +++ b/Dockerfile @@ -0,0 +1,24 @@ +# Use an official Python runtime as a parent image +FROM python:3.11-slim + +# Set the working directory in the container +WORKDIR /app + +# Copy the requirements file into the container at /app +COPY requirements.txt . + +# Install any needed packages specified in requirements.txt +# We use --no-cache-dir to keep the image size down +RUN pip install --no-cache-dir --extra-index-url https://download.pytorch.org/whl/cpu -r requirements.txt + +# Copy the rest of the application's code into the container at /app +COPY . . + +# Make port 8000 available to the world outside this container +EXPOSE 8000 + +# Define environment variable to ensure python prints things without buffering +ENV PYTHONUNBUFFERED=1 + +# Run the application +CMD ["uvicorn", "doc_rag_app:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..acddb02 --- /dev/null +++ b/README.md @@ -0,0 +1,34 @@ +# Ghaymah Docs RAG API + +This project implements a Retrieval-Augmented Generation (RAG) API using FastAPI to answer questions about Ghaymah Cloud documentation. It features a sophisticated two-stage retrieval process involving an initial vector search followed by a more precise re-ranking step to ensure high-quality answers. + +## Key Features + +- **FastAPI Backend:** A robust and fast API for serving the RAG pipeline. +- **Two-Stage Retrieval:** + 1. **Initial Search:** Uses `sentence-transformers` to perform a broad vector search and retrieve an initial set of candidate documents. + 2. **Re-ranking:** Employs a `CrossEncoder` model to re-rank the initial candidates for greater relevance and precision. +- **Dockerized:** Comes with a `Dockerfile` for easy, repeatable deployment on any platform that supports containers. +- **Visualization:** Includes a `rerank_test.html` page to visually compare the results before and after the re-ranking step. + +## Getting Started + +### Prerequisites + +- Docker +- A Git client + +### Deployment + +This application is designed to be deployed as a Docker container. It can be deployed via a Git-based workflow on a platform like Ghaymah Cloud. + +1. **Push to Git:** Push the code to a GitHub or GitLab repository. +2. **Connect Platform:** Connect your cloud platform to the Git repository. +3. **Build and Deploy:** The platform will use the included `Dockerfile` to automatically build and deploy the application. + +### Configuration + +The application requires the following environment variables to be set in the deployment environment: + +- `GITPASHA_HOST`: The URL for the remote vector store (GitPasha). +- `OPENAI_API_KEY`: Your API key for the LLM provider (e.g., OpenAI). diff --git a/bool b/bool new file mode 100644 index 0000000..e69de29 diff --git a/doc_rag_app.py b/doc_rag_app.py index ded5e52..a4cc85c 100644 --- a/doc_rag_app.py +++ b/doc_rag_app.py @@ -4,7 +4,7 @@ import json import uvicorn import requests from dotenv import load_dotenv -from typing import Optional +from typing import Optional, List from openai import OpenAI from fastapi import FastAPI, HTTPException from fastapi.responses import JSONResponse @@ -12,13 +12,14 @@ from fastapi.middleware.cors import CORSMiddleware from pydantic import BaseModel from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.embeddings import HuggingFaceEmbeddings +from sentence_transformers import CrossEncoder # Load .env load_dotenv() -# ----------------------- +# ----------------------- # Configuration -# ----------------------- +# ----------------------- GITPASHA_HOST = os.getenv( "GITPASHA_HOST", "https://app1-f06df021060b.hosted.ghaymah.systems" @@ -26,9 +27,9 @@ GITPASHA_HOST = os.getenv( OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") # used only for final LLM summarization if needed DOC_FILE = os.getenv("DOC_FILE", "full_ghaymah_docs.txt") -# ----------------------- +# ----------------------- # FastAPI + client -# ----------------------- +# ----------------------- app = FastAPI(title="Ghaymah Docs RAG API (Restarted)", version="1.0") app.add_middleware( @@ -44,33 +45,37 @@ client = None if OPENAI_API_KEY: client = OpenAI(api_key=OPENAI_API_KEY, base_url="https://genai.ghaymah.systems") -# ----------------------- -# Embedding model (512 dims) -# ----------------------- +# ----------------------- +# Models (Embedding + Reranking) +# ----------------------- print("Initializing local embedding model (sentence-transformers/distiluse-base-multilingual-cased)...") embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/distiluse-base-multilingual-cased") print("Embedding model loaded.") -# ----------------------- +print("Initializing local CrossEncoder model (ms-marco-MiniLM-L-6-v2)...") +cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2') +print("CrossEncoder model loaded.") + +# ----------------------- # Request Models -# ----------------------- +# ----------------------- class QueryRequest(BaseModel): query: str - k: Optional[int] = 10 # allow overriding k + k: Optional[int] = 5 # final number of chunks to use class IngestRequest(BaseModel): # keep for future if want dynamic file name content ingestion filename: Optional[str] = None -# ----------------------- +# ----------------------- # Helpers -# ----------------------- -def _embed_texts(texts): +# ----------------------- +def _embed_texts(texts: List[str]) -> List[List[float]]: """Return list of embeddings for given texts.""" return embeddings.embed_documents(texts) -def _embed_query(text): - """Return single embedding for query (list).""" +def _embed_query(text: str) -> List[float]: + """Return single embedding for query.""" return embeddings.embed_query(text) def store_text_chunks_remote(text: str) -> bool: @@ -114,7 +119,7 @@ def store_text_chunks_remote(text: str) -> bool: print(f"[store] Error calling remote insert: {e} / Response: {getattr(e, 'response', None)}") raise HTTPException(status_code=500, detail=f"Failed to insert to remote vector store: {e}") -def search_remote_by_vector(vector, k=10): +def search_remote_by_vector(vector: List[float], k: int = 10): """Call remote /search with given vector and return parsed JSON (raw).""" try: resp = requests.post( @@ -129,24 +134,9 @@ def search_remote_by_vector(vector, k=10): print(f"[search] Error calling remote search: {e}") raise HTTPException(status_code=500, detail=f"Remote search failed: {e}") -def build_context_from_search_results(search_results, min_score: Optional[float] = None): - """Given remote search results, optionally filter by min_score and return context text and metadata.""" - if not search_results or "results" not in search_results: - return "", [] - - items = [] - for r in search_results["results"]: - score = r.get("score", None) - payload = r.get("payload", {}) - text_chunk = payload.get("text_chunk", "") - if min_score is None or (score is not None and score >= min_score): - items.append({"score": score, "text": text_chunk}) - context = "\n\n".join([it["text"] for it in items]) - return context, items - -# ----------------------- +# ----------------------- # Startup: optionally auto-ingest file on startup -# ----------------------- +# ----------------------- @app.on_event("startup") def startup_ingest(): """On startup, attempt to ingest DOC_FILE automatically (non-fatal).""" @@ -164,9 +154,9 @@ def startup_ingest(): # do not prevent server from starting print(f"[startup] Ingest error (non-fatal): {e}") -# ----------------------- +# ----------------------- # Endpoints -# ----------------------- +# ----------------------- @app.post("/ingest-docs/") async def ingest_docs(req: IngestRequest = None): """Read full_ghaymah_docs.txt and store it remotely. Returns success message.""" @@ -181,54 +171,135 @@ async def ingest_docs(req: IngestRequest = None): if ok: return JSONResponse(content={"message": f"Successfully ingested '{filename}' into vector store."}) raise HTTPException(status_code=500, detail="Ingestion failed.") + @app.post("/query/") async def query_docs(request: QueryRequest): query = request.query - k = request.k or 10 - print(f"[query] Received query: {query} (k={k})") + k_final = request.k or 5 # The final number of documents to use + k_initial = 25 # The number of documents to retrieve initially + print(f"[query] Received query: '{query}' (k_initial={k_initial}, k_final={k_final})") - # Embed query + # 1. Embed query qvec = _embed_query(query) - # Remote vector search - search_results = search_remote_by_vector(qvec, k=k) - payloads = [p["text_chunk"] for p in search_results.get("payloads", [])] + # 2. Initial Retrieval from vector store + search_results = search_remote_by_vector(qvec, k=k_initial) + initial_chunks = [p.get("text_chunk", "") for p in search_results.get("payloads", [])] - if not payloads: + if not initial_chunks: return {"answer": "No relevant chunks found.", "search_results": search_results} - - # Deduplicate chunks (keep first occurrence) + + # Deduplicate initial chunks before re-ranking seen = set() - context_chunks = [] - for chunk in payloads: + unique_chunks = [] + for chunk in initial_chunks: if chunk not in seen: - context_chunks.append(chunk) + unique_chunks.append(chunk) seen.add(chunk) + + print(f"[query] Retrieved {len(unique_chunks)} unique chunks for re-ranking.") - context = "\n\n".join(context_chunks) + # 3. Re-ranking with CrossEncoder + # Create pairs of (query, chunk) for the model + rerank_pairs = [(query, chunk) for chunk in unique_chunks] + + # Predict new relevance scores + rerank_scores = cross_encoder.predict(rerank_pairs) + + # Combine chunks with their new scores + reranked_results = list(zip(rerank_scores, unique_chunks)) + + # Sort by the new score in descending order + reranked_results.sort(key=lambda x: x[0], reverse=True) + + # 4. Select top k_final results after re-ranking + top_k_chunks = [chunk for score, chunk in reranked_results[:k_final]] + top_k_scores = [float(score) for score, chunk in reranked_results[:k_final]] - # Use LLM if available + context = "\n\n".join(top_k_chunks) + print(f"[query] Built context with {len(top_k_chunks)} re-ranked chunks.") + + # 5. Use LLM if available to generate a final answer if client: try: completion = client.chat.completions.create( model="DeepSeek-V3-0324", messages=[ - {"role": "system", "content": "You are a helpful assistant for Ghaymah Cloud. Answer the question using the context provided."}, + {"role": "system", "content": "You are a helpful assistant for Ghaymah Cloud. Answer the question using the context provided."}, {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {query}"} ], temperature=0.0, ) answer = completion.choices[0].message.content - return {"answer": answer, "context": context_chunks, "scores": search_results.get("scores", [])} + return {"answer": answer, "context": top_k_chunks, "scores": top_k_scores} except Exception as e: print(f"[query] LLM failed: {e}") - return {"answer": context, "context": context_chunks, "scores": search_results.get("scores", [])} + # Fallback to returning the context directly + return {"answer": context, "context": top_k_chunks, "scores": top_k_scores} else: - return {"answer": context, "context": context_chunks, "scores": search_results.get("scores", [])} + # If no LLM, return the context as the answer + return {"answer": context, "context": top_k_chunks, "scores": top_k_scores} + +@app.post("/test-rerank/") +async def test_rerank(request: QueryRequest): + """ + Endpoint for visualization. Returns initial and re-ranked results. + """ + query = request.query + k_final = request.k or 5 + k_initial = 25 + print(f"[test-rerank] Received query: '{query}' (k_initial={k_initial}, k_final={k_final})") + + # 1. Embed query + qvec = _embed_query(query) + + # 2. Initial Retrieval + search_results = search_remote_by_vector(qvec, k=k_initial) + + initial_payloads = search_results.get("payloads", []) + initial_scores = search_results.get("scores", []) + + # Ensure we have the same number of scores and payloads + min_len = min(len(initial_payloads), len(initial_scores)) + + initial_results = [ + {"text": p.get("text_chunk", ""), "score": s} + for p, s in zip(initial_payloads[:min_len], initial_scores[:min_len]) + ] + + # Deduplicate + seen_texts = set() + unique_initial_results = [] + for res in initial_results: + if res["text"] not in seen_texts: + unique_initial_results.append(res) + seen_texts.add(res["text"]) + + unique_chunks = [res["text"] for res in unique_initial_results] + + if not unique_chunks: + return {"initial_results": [], "reranked_results": []} + + # 3. Re-ranking + rerank_pairs = [(query, chunk) for chunk in unique_chunks] + rerank_scores = cross_encoder.predict(rerank_pairs) + + reranked_results_with_scores = [ + {"text": chunk, "score": float(score)} + for score, chunk in zip(rerank_scores, unique_chunks) + ] + + # Sort by new score + reranked_results_with_scores.sort(key=lambda x: x["score"], reverse=True) + + return { + "initial_results": unique_initial_results, + "reranked_results": reranked_results_with_scores[:k_final] + } @app.post("/debug-search/") -async def debug_search(request: QueryRequest): +def debug_search(request: QueryRequest): """ Debug endpoint: returns raw search response from remote vector store for the provided query. Use this to inspect exact 'results' and scores returned remotely. @@ -250,8 +321,8 @@ async def debug_search(request: QueryRequest): def read_root(): return {"message": "Ghaymah Docs RAG API. Use /docs for interactive UI."} -# ----------------------- +# ----------------------- # Run -# ----------------------- +# ----------------------- if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/gyC.sh b/gyC.sh new file mode 100644 index 0000000..1f01ab8 --- /dev/null +++ b/gyC.sh @@ -0,0 +1,426 @@ +# bash completion V2 for gy -*- shell-script -*- + +__gy_debug() +{ + if [[ -n ${BASH_COMP_DEBUG_FILE-} ]]; then + echo "$*" >> "${BASH_COMP_DEBUG_FILE}" + fi +} + +# Macs have bash3 for which the bash-completion package doesn't include +# _init_completion. This is a minimal version of that function. +__gy_init_completion() +{ + COMPREPLY=() + _get_comp_words_by_ref "$@" cur prev words cword +} + +# This function calls the gy program to obtain the completion +# results and the directive. It fills the 'out' and 'directive' vars. +__gy_get_completion_results() { + local requestComp lastParam lastChar args + + # Prepare the command to request completions for the program. + # Calling ${words[0]} instead of directly gy allows handling aliases + args=("${words[@]:1}") + requestComp="${words[0]} __complete ${args[*]}" + + lastParam=${words[$((${#words[@]}-1))]} + lastChar=${lastParam:$((${#lastParam}-1)):1} + __gy_debug "lastParam ${lastParam}, lastChar ${lastChar}" + + if [[ -z ${cur} && ${lastChar} != = ]]; then + # If the last parameter is complete (there is a space following it) + # We add an extra empty parameter so we can indicate this to the go method. + __gy_debug "Adding extra empty parameter" + requestComp="${requestComp} ''" + fi + + # When completing a flag with an = (e.g., gy -n=) + # bash focuses on the part after the =, so we need to remove + # the flag part from $cur + if [[ ${cur} == -*=* ]]; then + cur="${cur#*=}" + fi + + __gy_debug "Calling ${requestComp}" + # Use eval to handle any environment variables and such + out=$(eval "${requestComp}" 2>/dev/null) + + # Extract the directive integer at the very end of the output following a colon (:) + directive=${out##*:} + # Remove the directive + out=${out%:*} + if [[ ${directive} == "${out}" ]]; then + # There is not directive specified + directive=0 + fi + __gy_debug "The completion directive is: ${directive}" + __gy_debug "The completions are: ${out}" +} + +__gy_process_completion_results() { + local shellCompDirectiveError=1 + local shellCompDirectiveNoSpace=2 + local shellCompDirectiveNoFileComp=4 + local shellCompDirectiveFilterFileExt=8 + local shellCompDirectiveFilterDirs=16 + local shellCompDirectiveKeepOrder=32 + + if (((directive & shellCompDirectiveError) != 0)); then + # Error code. No completion. + __gy_debug "Received error from custom completion go code" + return + else + if (((directive & shellCompDirectiveNoSpace) != 0)); then + if [[ $(type -t compopt) == builtin ]]; then + __gy_debug "Activating no space" + compopt -o nospace + else + __gy_debug "No space directive not supported in this version of bash" + fi + fi + if (((directive & shellCompDirectiveKeepOrder) != 0)); then + if [[ $(type -t compopt) == builtin ]]; then + # no sort isn't supported for bash less than < 4.4 + if [[ ${BASH_VERSINFO[0]} -lt 4 || ( ${BASH_VERSINFO[0]} -eq 4 && ${BASH_VERSINFO[1]} -lt 4 ) ]]; then + __gy_debug "No sort directive not supported in this version of bash" + else + __gy_debug "Activating keep order" + compopt -o nosort + fi + else + __gy_debug "No sort directive not supported in this version of bash" + fi + fi + if (((directive & shellCompDirectiveNoFileComp) != 0)); then + if [[ $(type -t compopt) == builtin ]]; then + __gy_debug "Activating no file completion" + compopt +o default + else + __gy_debug "No file completion directive not supported in this version of bash" + fi + fi + fi + + # Separate activeHelp from normal completions + local completions=() + local activeHelp=() + __gy_extract_activeHelp + + if (((directive & shellCompDirectiveFilterFileExt) != 0)); then + # File extension filtering + local fullFilter="" filter filteringCmd + + # Do not use quotes around the $completions variable or else newline + # characters will be kept. + for filter in ${completions[*]}; do + fullFilter+="$filter|" + done + + filteringCmd="_filedir $fullFilter" + __gy_debug "File filtering command: $filteringCmd" + $filteringCmd + elif (((directive & shellCompDirectiveFilterDirs) != 0)); then + # File completion for directories only + + local subdir + subdir=${completions[0]} + if [[ -n $subdir ]]; then + __gy_debug "Listing directories in $subdir" + pushd "$subdir" >/dev/null 2>&1 && _filedir -d && popd >/dev/null 2>&1 || return + else + __gy_debug "Listing directories in ." + _filedir -d + fi + else + __gy_handle_completion_types + fi + + __gy_handle_special_char "$cur" : + __gy_handle_special_char "$cur" = + + # Print the activeHelp statements before we finish + __gy_handle_activeHelp +} + +__gy_handle_activeHelp() { + # Print the activeHelp statements + if ((${#activeHelp[*]} != 0)); then + if [ -z $COMP_TYPE ]; then + # Bash v3 does not set the COMP_TYPE variable. + printf "\n"; + printf "%s\n" "${activeHelp[@]}" + printf "\n" + __gy_reprint_commandLine + return + fi + + # Only print ActiveHelp on the second TAB press + if [ $COMP_TYPE -eq 63 ]; then + printf "\n" + printf "%s\n" "${activeHelp[@]}" + + if ((${#COMPREPLY[*]} == 0)); then + # When there are no completion choices from the program, file completion + # may kick in if the program has not disabled it; in such a case, we want + # to know if any files will match what the user typed, so that we know if + # there will be completions presented, so that we know how to handle ActiveHelp. + # To find out, we actually trigger the file completion ourselves; + # the call to _filedir will fill COMPREPLY if files match. + if (((directive & shellCompDirectiveNoFileComp) == 0)); then + __gy_debug "Listing files" + _filedir + fi + fi + + if ((${#COMPREPLY[*]} != 0)); then + # If there are completion choices to be shown, print a delimiter. + # Re-printing the command-line will automatically be done + # by the shell when it prints the completion choices. + printf -- "--" + else + # When there are no completion choices at all, we need + # to re-print the command-line since the shell will + # not be doing it itself. + __gy_reprint_commandLine + fi + elif [ $COMP_TYPE -eq 37 ] || [ $COMP_TYPE -eq 42 ]; then + # For completion type: menu-complete/menu-complete-backward and insert-completions + # the completions are immediately inserted into the command-line, so we first + # print the activeHelp message and reprint the command-line since the shell won't. + printf "\n" + printf "%s\n" "${activeHelp[@]}" + + __gy_reprint_commandLine + fi + fi +} + +__gy_reprint_commandLine() { + # The prompt format is only available from bash 4.4. + # We test if it is available before using it. + if (x=${PS1@P}) 2> /dev/null; then + printf "%s" "${PS1@P}${COMP_LINE[@]}" + else + # Can't print the prompt. Just print the + # text the user had typed, it is workable enough. + printf "%s" "${COMP_LINE[@]}" + fi +} + +# Separate activeHelp lines from real completions. +# Fills the $activeHelp and $completions arrays. +__gy_extract_activeHelp() { + local activeHelpMarker="_activeHelp_ " + local endIndex=${#activeHelpMarker} + + while IFS='' read -r comp; do + [[ -z $comp ]] && continue + + if [[ ${comp:0:endIndex} == $activeHelpMarker ]]; then + comp=${comp:endIndex} + __gy_debug "ActiveHelp found: $comp" + if [[ -n $comp ]]; then + activeHelp+=("$comp") + fi + else + # Not an activeHelp line but a normal completion + completions+=("$comp") + fi + done <<<"${out}" +} + +__gy_handle_completion_types() { + __gy_debug "__gy_handle_completion_types: COMP_TYPE is $COMP_TYPE" + + case $COMP_TYPE in + 37|42) + # Type: menu-complete/menu-complete-backward and insert-completions + # If the user requested inserting one completion at a time, or all + # completions at once on the command-line we must remove the descriptions. + # https://github.com/spf13/cobra/issues/1508 + + # If there are no completions, we don't need to do anything + (( ${#completions[@]} == 0 )) && return 0 + + local tab=$'\t' + + # Strip any description and escape the completion to handled special characters + IFS=$'\n' read -ra completions -d '' < <(printf "%q\n" "${completions[@]%%$tab*}") + + # Only consider the completions that match + IFS=$'\n' read -ra COMPREPLY -d '' < <(IFS=$'\n'; compgen -W "${completions[*]}" -- "${cur}") + + # compgen looses the escaping so we need to escape all completions again since they will + # all be inserted on the command-line. + IFS=$'\n' read -ra COMPREPLY -d '' < <(printf "%q\n" "${COMPREPLY[@]}") + ;; + + *) + # Type: complete (normal completion) + __gy_handle_standard_completion_case + ;; + esac +} + +__gy_handle_standard_completion_case() { + local tab=$'\t' + + # If there are no completions, we don't need to do anything + (( ${#completions[@]} == 0 )) && return 0 + + # Short circuit to optimize if we don't have descriptions + if [[ "${completions[*]}" != *$tab* ]]; then + # First, escape the completions to handle special characters + IFS=$'\n' read -ra completions -d '' < <(printf "%q\n" "${completions[@]}") + # Only consider the completions that match what the user typed + IFS=$'\n' read -ra COMPREPLY -d '' < <(IFS=$'\n'; compgen -W "${completions[*]}" -- "${cur}") + + # compgen looses the escaping so, if there is only a single completion, we need to + # escape it again because it will be inserted on the command-line. If there are multiple + # completions, we don't want to escape them because they will be printed in a list + # and we don't want to show escape characters in that list. + if (( ${#COMPREPLY[@]} == 1 )); then + COMPREPLY[0]=$(printf "%q" "${COMPREPLY[0]}") + fi + return 0 + fi + + local longest=0 + local compline + # Look for the longest completion so that we can format things nicely + while IFS='' read -r compline; do + [[ -z $compline ]] && continue + + # Before checking if the completion matches what the user typed, + # we need to strip any description and escape the completion to handle special + # characters because those escape characters are part of what the user typed. + # Don't call "printf" in a sub-shell because it will be much slower + # since we are in a loop. + printf -v comp "%q" "${compline%%$tab*}" &>/dev/null || comp=$(printf "%q" "${compline%%$tab*}") + + # Only consider the completions that match + [[ $comp == "$cur"* ]] || continue + + # The completions matches. Add it to the list of full completions including + # its description. We don't escape the completion because it may get printed + # in a list if there are more than one and we don't want show escape characters + # in that list. + COMPREPLY+=("$compline") + + # Strip any description before checking the length, and again, don't escape + # the completion because this length is only used when printing the completions + # in a list and we don't want show escape characters in that list. + comp=${compline%%$tab*} + if ((${#comp}>longest)); then + longest=${#comp} + fi + done < <(printf "%s\n" "${completions[@]}") + + # If there is a single completion left, remove the description text and escape any special characters + if ((${#COMPREPLY[*]} == 1)); then + __gy_debug "COMPREPLY[0]: ${COMPREPLY[0]}" + COMPREPLY[0]=$(printf "%q" "${COMPREPLY[0]%%$tab*}") + __gy_debug "Removed description from single completion, which is now: ${COMPREPLY[0]}" + else + # Format the descriptions + __gy_format_comp_descriptions $longest + fi +} + +__gy_handle_special_char() +{ + local comp="$1" + local char=$2 + if [[ "$comp" == *${char}* && "$COMP_WORDBREAKS" == *${char}* ]]; then + local word=${comp%"${comp##*${char}}"} + local idx=${#COMPREPLY[*]} + while ((--idx >= 0)); do + COMPREPLY[idx]=${COMPREPLY[idx]#"$word"} + done + fi +} + +__gy_format_comp_descriptions() +{ + local tab=$'\t' + local comp desc maxdesclength + local longest=$1 + + local i ci + for ci in ${!COMPREPLY[*]}; do + comp=${COMPREPLY[ci]} + # Properly format the description string which follows a tab character if there is one + if [[ "$comp" == *$tab* ]]; then + __gy_debug "Original comp: $comp" + desc=${comp#*$tab} + comp=${comp%%$tab*} + + # $COLUMNS stores the current shell width. + # Remove an extra 4 because we add 2 spaces and 2 parentheses. + maxdesclength=$(( COLUMNS - longest - 4 )) + + # Make sure we can fit a description of at least 8 characters + # if we are to align the descriptions. + if ((maxdesclength > 8)); then + # Add the proper number of spaces to align the descriptions + for ((i = ${#comp} ; i < longest ; i++)); do + comp+=" " + done + else + # Don't pad the descriptions so we can fit more text after the completion + maxdesclength=$(( COLUMNS - ${#comp} - 4 )) + fi + + # If there is enough space for any description text, + # truncate the descriptions that are too long for the shell width + if ((maxdesclength > 0)); then + if ((${#desc} > maxdesclength)); then + desc=${desc:0:$(( maxdesclength - 1 ))} + desc+="…" + fi + comp+=" ($desc)" + fi + COMPREPLY[ci]=$comp + __gy_debug "Final comp: $comp" + fi + done +} + +__start_gy() +{ + local cur prev words cword split + + COMPREPLY=() + + # Call _init_completion from the bash-completion package + # to prepare the arguments properly + if declare -F _init_completion >/dev/null 2>&1; then + _init_completion -n =: || return + else + __gy_init_completion -n =: || return + fi + + __gy_debug + __gy_debug "========= starting completion logic ==========" + __gy_debug "cur is ${cur}, words[*] is ${words[*]}, #words[@] is ${#words[@]}, cword is $cword" + + # The user could have moved the cursor backwards on the command-line. + # We need to trigger completion from the $cword location, so we need + # to truncate the command-line ($words) up to the $cword location. + words=("${words[@]:0:$cword+1}") + __gy_debug "Truncated words[*]: ${words[*]}," + + local out directive + __gy_get_completion_results + __gy_process_completion_results +} + +if [[ $(type -t compopt) = "builtin" ]]; then + complete -o default -F __start_gy gy +else + complete -o default -o nospace -F __start_gy gy +fi + +# ex: ts=4 sw=4 et filetype=sh diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..cdf3dbd --- /dev/null +++ b/requirements.txt @@ -0,0 +1,11 @@ +uvicorn +requests +python-dotenv +openai +fastapi +pydantic +langchain +langchain-community +sentence-transformers +torch +transformers diff --git a/rerank_test.html b/rerank_test.html new file mode 100644 index 0000000..23227c9 --- /dev/null +++ b/rerank_test.html @@ -0,0 +1,179 @@ + + + + + + RAG Re-ranking Test + + + + +
+

RAG Re-ranking Visualizer

+
+ + +
+ +
Loading...
+ +
+
+

Initial Retrieval (Before Re-ranking)

+
+
+
+

Re-ranked Results (Top 5)

+
+
+
+
+ + + + +