Spaces:

AdithyaSK
/

NetraEmbed

Running on Zero

App Files Files Community

AdithyaSK commited on 5 days ago

Commit

bb60941

1 Parent(s): 0bb07b7

Refactor requirements.txt: streamline dependencies and update package sources

Browse files

Files changed (3) hide show

README.md +48 -5
app.py +868 -529
requirements.txt +9 -14

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
-title: VARAG
-emoji: 🐠
 colorFrom: yellow
 colorTo: purple
 sdk: gradio
@@ -8,9 +8,52 @@ sdk_version: 4.44.0
 app_file: app.py
 pinned: false
 license: mit
-short_description: Vision First RAG engine
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
-First commit

 ---
+title: NetraEmbed
+emoji: 👁️
 colorFrom: yellow
 colorTo: purple
 sdk: gradio
 app_file: app.py
 pinned: false
 license: mit
+short_description: Universal Multilingual Multimodal Document Retrieval
+hardware: zero-gpu
 ---
+# NetraEmbed - Universal Multilingual Multimodal Document Retrieval
+This Space demonstrates **NetraEmbed** and **ColNetraEmbed**, state-of-the-art multilingual multimodal document retrieval models based on the BiGemma3 and ColGemma3 architectures.
+## Features
+- **NetraEmbed (BiGemma3)**: Single-vector embedding with Matryoshka representation for fast retrieval
+- **ColNetraEmbed (ColGemma3)**: Multi-vector embedding with late interaction for high-quality retrieval with attention heatmaps
+- **ZeroGPU Integration**: Efficient dynamic GPU allocation for on-demand model loading
+- **PDF Document Support**: Upload PDFs and perform semantic search across pages
+- **Side-by-side Comparison**: Compare both models simultaneously
+## Citation
+If you use NetraEmbed or ColNetraEmbed in your research, please cite:
+```bibtex
+@misc{kolavi2025m3druniversalmultilingualmultimodal,
+  title={M3DR: Towards Universal Multilingual Multimodal Document Retrieval},
+  author={Adithya S Kolavi and Vyoman Jain},
+  year={2025},
+  eprint={2512.03514},
+  archivePrefix={arXiv},
+  primaryClass={cs.IR},
+  url={https://arxiv.org/abs/2512.03514}
+}
+```
+## Links
+- 📄 [Paper](https://arxiv.org/abs/2512.03514)
+- 💻 [GitHub](https://github.com/adithya-s-k/colpali)
+- 🤗 [Models on Hugging Face](https://huggingface.co/Cognitive-Lab)
+- 🌐 [CognitiveLab Website](https://www.cognitivelab.in)
+## Usage
+1. **Load Model**: Select your preferred model (NetraEmbed, ColNetraEmbed, or Both) and click "Load Model"
+2. **Upload PDF**: Upload a PDF document to index
+3. **Index Document**: Click "Index Document" to process and embed the pages
+4. **Query**: Enter your search query and click "Search" to retrieve relevant pages
+This Space uses ZeroGPU for dynamic GPU allocation. Models are loaded on-demand when functions are called.
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -1,577 +1,916 @@
-import gradio as gr
-import os
-import lancedb
-from sentence_transformers import SentenceTransformer
-from dotenv import load_dotenv
-from typing import List
-from PIL import Image
-import base64
 import io
-import time
-from collections import namedtuple
-import pandas as pd
-import concurrent.futures
-from varag.rag import SimpleRAG, VisionRAG, ColpaliRAG, HybridColpaliRAG
-from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
-from varag.chunking import FixedTokenChunker
-from varag.utils import get_model_colpali
-import argparse
-import spaces
 import torch
-from docling.document_converter import DocumentConverter
-load_dotenv()
-# Initialize shared database
-shared_db = lancedb.connect("~/rag_demo_db")
-# Initialize embedding models
-# text_embedding_model = SentenceTransformer("all-MiniLM-L6-v2", trust_remote_code=True)
-text_embedding_model = SentenceTransformer(
-    "BAAI/bge-base-en", trust_remote_code=True
-)
-# text_embedding_model = SentenceTransformer("BAAI/bge-large-en-v1.5", trust_remote_code=True)
-# text_embedding_model = SentenceTransformer("BAAI/bge-small-en-v1.5", trust_remote_code=True)
-image_embedding_model = SentenceTransformer(
-    "jinaai/jina-clip-v1", trust_remote_code=True
-)
-colpali_model, colpali_processor = get_model_colpali("vidore/colpali-v1.2")
-converter = DocumentConverter()
-# Initialize RAG instances
-simple_rag = SimpleRAG(
-    text_embedding_model=text_embedding_model, db=shared_db, table_name="simpleDemo"
-)
-vision_rag = VisionRAG(
-    image_embedding_model=image_embedding_model, db=shared_db, table_name="visionDemo"
-)
-colpali_rag = ColpaliRAG(
-    colpali_model=colpali_model,
-    colpali_processor=colpali_processor,
-    db=shared_db,
-    table_name="colpaliDemo",
-)
-hybrid_rag = HybridColpaliRAG(
-    colpali_model=colpali_model,
-    colpali_processor=colpali_processor,
-    image_embedding_model=image_embedding_model,
-    db=shared_db,
-    table_name="hybridDemo",
-)
-IngestResult = namedtuple("IngestResult", ["status_text", "progress_table"])
-# @spaces.GPU(duration=120)
-# def ingest_data(pdf_files, use_ocr, chunk_size, progress=gr.Progress()):
-#     file_paths = [pdf_file.name for pdf_file in pdf_files]
-#     total_start_time = time.time()
-#     progress_data = []
-#     # SimpleRAG
-#     yield IngestResult(
-#         status_text="Starting SimpleRAG ingestion...\n",
-#         progress_table=pd.DataFrame(progress_data),
-#     )
-#     start_time = time.time()
-#     simple_rag.index(
-#         file_paths,
-#         recursive=False,
-#         chunking_strategy=FixedTokenChunker(chunk_size=chunk_size),
-#         metadata={"source": "gradio_upload"},
-#         overwrite=True,
-#         verbose=True,
-#         ocr=use_ocr,
-#     )
-#     simple_time = time.time() - start_time
-#     progress_data.append(
-#         {"Technique": "SimpleRAG", "Time Taken (s)": f"{simple_time:.2f}"}
-#     )
-#     yield IngestResult(
-#         status_text=f"SimpleRAG ingestion complete. Time taken: {simple_time:.2f} seconds\n\n",
-#         progress_table=pd.DataFrame(progress_data),
-#     )
-#     # progress(0.25, desc="SimpleRAG complete")
-#     # VisionRAG
-#     yield IngestResult(
-#         status_text="Starting VisionRAG ingestion...\n",
-#         progress_table=pd.DataFrame(progress_data),
-#     )
-#     start_time = time.time()
-#     vision_rag.index(file_paths, overwrite=False, recursive=False, verbose=True)
-#     vision_time = time.time() - start_time
-#     progress_data.append(
-#         {"Technique": "VisionRAG", "Time Taken (s)": f"{vision_time:.2f}"}
-#     )
-#     yield IngestResult(
-#         status_text=f"VisionRAG ingestion complete. Time taken: {vision_time:.2f} seconds\n\n",
-#         progress_table=pd.DataFrame(progress_data),
-#     )
-#     # progress(0.5, desc="VisionRAG complete")
-#     # ColpaliRAG
-#     yield IngestResult(
-#         status_text="Starting ColpaliRAG ingestion...\n",
-#         progress_table=pd.DataFrame(progress_data),
-#     )
-#     start_time = time.time()
-#     colpali_rag.index(file_paths, overwrite=False, recursive=False, verbose=True)
-#     colpali_time = time.time() - start_time
-#     progress_data.append(
-#         {"Technique": "ColpaliRAG", "Time Taken (s)": f"{colpali_time:.2f}"}
-#     )
-#     yield IngestResult(
-#         status_text=f"ColpaliRAG ingestion complete. Time taken: {colpali_time:.2f} seconds\n\n",
-#         progress_table=pd.DataFrame(progress_data),
-#     )
-#     # progress(0.75, desc="ColpaliRAG complete")
-#     # HybridColpaliRAG
-#     yield IngestResult(
-#         status_text="Starting HybridColpaliRAG ingestion...\n",
-#         progress_table=pd.DataFrame(progress_data),
-#     )
-#     start_time = time.time()
-#     hybrid_rag.index(file_paths, overwrite=False, recursive=False, verbose=True)
-#     hybrid_time = time.time() - start_time
-#     progress_data.append(
-#         {"Technique": "HybridColpaliRAG", "Time Taken (s)": f"{hybrid_time:.2f}"}
-#     )
-#     yield IngestResult(
-#         status_text=f"HybridColpaliRAG ingestion complete. Time taken: {hybrid_time:.2f} seconds\n\n",
-#         progress_table=pd.DataFrame(progress_data),
-#     )
-#     # progress(1.0, desc="HybridColpaliRAG complete")
-#     total_time = time.time() - total_start_time
-#     progress_data.append({"Technique": "Total", "Time Taken (s)": f"{total_time:.2f}"})
-#     yield IngestResult(
-#         status_text=f"Total ingestion time: {total_time:.2f} seconds",
-#         progress_table=pd.DataFrame(progress_data),
-#     )
-def ingest_data(pdf_files, use_ocr, chunk_size, progress=gr.Progress()):
-    file_paths = [pdf_file.name for pdf_file in pdf_files]
-    total_start_time = time.time()
-    progress_data = []
-    @spaces.GPU(duration=120)
-    def ingest_simple_rag():
-        yield IngestResult(
-            status_text="Starting SimpleRAG ingestion...\n",
-            progress_table=pd.DataFrame(progress_data),
-        )
-        start_time = time.time()
-        simple_rag.index(
-            file_paths,
-            recursive=False,
-            chunking_strategy=FixedTokenChunker(chunk_size=chunk_size),
-            metadata={"source": "gradio_upload"},
-            overwrite=True,
-            verbose=True,
-            ocr=use_ocr,
-        )
-        simple_time = time.time() - start_time
-        progress_data.append(
-            {"Technique": "SimpleRAG", "Time Taken (s)": f"{simple_time:.2f}"}
-        )
-        yield IngestResult(
-            status_text=f"SimpleRAG ingestion complete. Time taken: {simple_time:.2f} seconds\n\n",
-            progress_table=pd.DataFrame(progress_data),
-        )
-    @spaces.GPU(duration=120)
-    def ingest_vision_rag():
-        yield IngestResult(
-            status_text="Starting VisionRAG ingestion...\n",
-            progress_table=pd.DataFrame(progress_data),
-        )
-        start_time = time.time()
-        vision_rag.index(file_paths, overwrite=False, recursive=False, verbose=True)
-        vision_time = time.time() - start_time
-        progress_data.append(
-            {"Technique": "VisionRAG", "Time Taken (s)": f"{vision_time:.2f}"}
-        )
-        yield IngestResult(
-            status_text=f"VisionRAG ingestion complete. Time taken: {vision_time:.2f} seconds\n\n",
-            progress_table=pd.DataFrame(progress_data),
         )
-    @spaces.GPU(duration=120)
-    def ingest_colpali_rag():
-        yield IngestResult(
-            status_text="Starting ColpaliRAG ingestion...\n",
-            progress_table=pd.DataFrame(progress_data),
-        )
-        start_time = time.time()
-        colpali_rag.index(file_paths, overwrite=False, recursive=False, verbose=True)
-        colpali_time = time.time() - start_time
-        progress_data.append(
-            {"Technique": "ColpaliRAG", "Time Taken (s)": f"{colpali_time:.2f}"}
-        )
-        yield IngestResult(
-            status_text=f"ColpaliRAG ingestion complete. Time taken: {colpali_time:.2f} seconds\n\n",
-            progress_table=pd.DataFrame(progress_data),
-        )
-    @spaces.GPU(duration=120)
-    def ingest_hybrid_rag():
-        yield IngestResult(
-            status_text="Starting HybridColpaliRAG ingestion...\n",
-            progress_table=pd.DataFrame(progress_data),
-        )
-        start_time = time.time()
-        hybrid_rag.index(file_paths, overwrite=False, recursive=False, verbose=True)
-        hybrid_time = time.time() - start_time
-        progress_data.append(
-            {"Technique": "HybridColpaliRAG", "Time Taken (s)": f"{hybrid_time:.2f}"}
         )
-        yield IngestResult(
-            status_text=f"HybridColpaliRAG ingestion complete. Time taken: {hybrid_time:.2f} seconds\n\n",
-            progress_table=pd.DataFrame(progress_data),
         )
-    # Call each ingestion function
-    yield from ingest_simple_rag()
-    yield from ingest_vision_rag()
-    yield from ingest_colpali_rag()
-    yield from ingest_hybrid_rag()
-    total_time = time.time() - total_start_time
-    progress_data.append({"Technique": "Total", "Time Taken (s)": f"{total_time:.2f}"})
-    yield IngestResult(
-        status_text=f"Total ingestion time: {total_time:.2f} seconds",
-        progress_table=pd.DataFrame(progress_data),
     )
-@spaces.GPU(duration=120)
-def retrieve_data(query, top_k, sequential=False):
-    results = {}
-    timings = {}
-    def retrieve_simple():
-        start_time = time.time()
-        simple_results = simple_rag.search(query, k=top_k)
-        print(simple_results)
-        simple_context = []
-        for i, r in enumerate(simple_results, 1):
-            context_piece = f"Result {i}:\n"
-            context_piece += f"Source: {r.get('document_name', 'Unknown')}\n"
-            context_piece += f"Chunk Index: {r.get('chunk_index', 'Unknown')}\n"
-            context_piece += f"Content:\n{r['text']}\n"
-            context_piece += "-" * 40 + "\n"  # Separator
-            simple_context.append(context_piece)
-        simple_context = "\n".join(simple_context)
-        end_time = time.time()
-        return "SimpleRAG", simple_context, end_time - start_time
-    def retrieve_vision():
-        start_time = time.time()
-        vision_results = vision_rag.search(query, k=top_k)
-        vision_images = [r["image"] for r in vision_results]
-        end_time = time.time()
-        return "VisionRAG", vision_images, end_time - start_time
-    def retrieve_colpali():
-        start_time = time.time()
-        colpali_results = colpali_rag.search(query, k=top_k)
-        colpali_images = [r["image"] for r in colpali_results]
-        end_time = time.time()
-        return "ColpaliRAG", colpali_images, end_time - start_time
-    def retrieve_hybrid():
-        start_time = time.time()
-        hybrid_results = hybrid_rag.search(query, k=top_k, use_image_search=True)
-        hybrid_images = [r["image"] for r in hybrid_results]
-        end_time = time.time()
-        return "HybridColpaliRAG", hybrid_images, end_time - start_time
-    retrieval_functions = [
-        retrieve_simple,
-        retrieve_vision,
-        retrieve_colpali,
-        retrieve_hybrid,
-    ]
-    if sequential:
-        for func in retrieval_functions:
-            rag_type, content, timing = func()
-            results[rag_type] = content
-            timings[rag_type] = timing
-    else:
-        with concurrent.futures.ThreadPoolExecutor() as executor:
-            future_results = [executor.submit(func) for func in retrieval_functions]
-            for future in concurrent.futures.as_completed(future_results):
-                rag_type, content, timing = future.result()
-                results[rag_type] = content
-                timings[rag_type] = timing
-    return results, timings
-# @spaces.GPU
-# def query_data(query, retrieved_results):
-#     results = {}
-#     # SimpleRAG
-#     simple_context = retrieved_results["SimpleRAG"]
-#     simple_response = llm.query(
-#         context=simple_context,
-#         system_prompt="Given the below information answer the questions",
-#         query=query,
-#     )
-#     results["SimpleRAG"] = {"response": simple_response, "context": simple_context}
-#     # VisionRAG
-#     vision_images = retrieved_results["VisionRAG"]
-#     vision_context = f"Query: {query}\n\nRelevant image information:\n" + "\n".join(
-#         [f"Image {i+1}" for i in range(len(vision_images))]
-#     )
-#     vision_response = vlm.query(vision_context, vision_images, max_tokens=500)
-#     results["VisionRAG"] = {
-#         "response": vision_response,
-#         "context": vision_context,
-#         "images": vision_images,
-#     }
-#     # ColpaliRAG
-#     colpali_images = retrieved_results["ColpaliRAG"]
-#     colpali_context = f"Query: {query}\n\nRelevant image information:\n" + "\n".join(
-#         [f"Image {i+1}" for i in range(len(colpali_images))]
-#     )
-#     colpali_response = vlm.query(colpali_context, colpali_images, max_tokens=500)
-#     results["ColpaliRAG"] = {
-#         "response": colpali_response,
-#         "context": colpali_context,
-#         "images": colpali_images,
-#     }
-#     # HybridColpaliRAG
-#     hybrid_images = retrieved_results["HybridColpaliRAG"]
-#     hybrid_context = f"Query: {query}\n\nRelevant image information:\n" + "\n".join(
-#         [f"Image {i+1}" for i in range(len(hybrid_images))]
-#     )
-#     hybrid_response = vlm.query(hybrid_context, hybrid_images, max_tokens=500)
-#     results["HybridColpaliRAG"] = {
-#         "response": hybrid_response,
-#         "context": hybrid_context,
-#         "images": hybrid_images,
-#     }
-#     return results
-def update_api_key(api_key):
-    os.environ["OPENAI_API_KEY"] = api_key
-    return "API key updated successfully."
-def change_table(simple_table, vision_table, colpali_table, hybrid_table):
-    simple_rag.change_table(simple_table)
-    vision_rag.change_table(vision_table)
-    colpali_rag.change_table(colpali_table)
-    hybrid_rag.change_table(hybrid_table)
-    return "Table names updated successfully."
-def gradio_interface():
-    with gr.Blocks(
-        theme=gr.themes.Monochrome(radius_size=gr.themes.sizes.radius_none)
-    ) as demo:
-        gr.Markdown(
-            """
-# 👁️👁️ Vision RAG Playground
-### Explore and Compare Vision-Augmented Retrieval Techniques
-Built on [VARAG](https://github.com/adithya-s-k/VARAG) - Vision-Augmented Retrieval and Generation
-**[⭐ Star the Repository](https://github.com/adithya-s-k/VARAG)** to support the project!
-1. **Simple RAG**: Text-based retrieval with OCR support for scanned documents.
-2. **Vision RAG**: Combines text and image retrieval using cross-modal embeddings.
-3. **ColPali RAG**: Embeds entire document pages as images for layout-aware retrieval.
-4. **Hybrid ColPali RAG**: Two-stage retrieval combining image embeddings and ColPali's token-level matching.
-            """
         )
-        with gr.Tab("Ingest Data"):
-            gr.Markdown(
-                """
-## ⚠️ Important Note on Data Ingestion
-This Space has a maximum GPU-enabled time of 120 seconds. It's recommended to try ingesting only 1 or 2 pdfs at a time.
-If you want to ingest a larger amount of data, please try it out in a Google Colab notebook:
-[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/adithya-s-k/VARAG/blob/main/docs/demo.ipynb)
-                """
             )
-            pdf_input = gr.File(
-                label="Upload PDF(s)", file_count="multiple", file_types=["pdf"]
             )
-            use_ocr = gr.Checkbox(label="Use OCR (for SimpleRAG)")
-            chunk_size = gr.Slider(
-                50, 5000, value=200, step=10, label="Chunk Size (for SimpleRAG)"
             )
-            ingest_button = gr.Button("Ingest PDFs")
-            ingest_output = gr.Markdown(
-                label="Ingestion Status :",
             )
-            progress_table = gr.DataFrame(
-                label="Ingestion Progress", headers=["Technique", "Time Taken (s)"]
             )
-        with gr.Tab("Retrieve and Query Data"):
-            query_input = gr.Textbox(label="Enter your query")
-            top_k_slider = gr.Slider(1, 10, value=3, step=1, label="Top K Results")
-            sequential_checkbox = gr.Checkbox(label="Sequential Retrieval", value=False)
-            retrieve_button = gr.Button("Retrieve")
-            query_button = gr.Button("Query")
-            retrieval_timing = gr.DataFrame(
-                label="Retrieval Timings", headers=["RAG Type", "Time (s)"]
             )
-            with gr.Row():
-                with gr.Column():
-                    with gr.Accordion("SimpleRAG", open=True):
-                        simple_content = gr.Textbox(
-                            label="SimpleRAG Content", lines=10, max_lines=10
-                        )
-                        simple_response = gr.Markdown(label="SimpleRAG Response")
-                with gr.Column():
-                    with gr.Accordion("VisionRAG", open=True):
-                        vision_gallery = gr.Gallery(label="VisionRAG Images")
-                        vision_response = gr.Markdown(label="VisionRAG Response")
-            with gr.Row():
-                with gr.Column():
-                    with gr.Accordion("ColpaliRAG", open=True):
-                        colpali_gallery = gr.Gallery(label="ColpaliRAG Images")
-                        colpali_response = gr.Markdown(label="ColpaliRAG Response")
-                with gr.Column():
-                    with gr.Accordion("HybridColpaliRAG", open=True):
-                        hybrid_gallery = gr.Gallery(label="HybridColpaliRAG Images")
-                        hybrid_response = gr.Markdown(label="HybridColpaliRAG Response")
-        with gr.Tab("Settings"):
-            api_key_input = gr.Textbox(label="OpenAI API Key", type="password")
-            update_api_button = gr.Button("Update API Key")
-            api_update_status = gr.Textbox(label="API Update Status")
-            simple_table_input = gr.Textbox(
-                label="SimpleRAG Table Name", value="simpleDemo"
             )
-            vision_table_input = gr.Textbox(
-                label="VisionRAG Table Name", value="visionDemo"
             )
-            colpali_table_input = gr.Textbox(
-                label="ColpaliRAG Table Name", value="colpaliDemo"
             )
-            hybrid_table_input = gr.Textbox(
-                label="HybridColpaliRAG Table Name", value="hybridDemo"
             )
-            update_table_button = gr.Button("Update Table Names")
-            table_update_status = gr.Textbox(label="Table Update Status")
-        retrieved_results = gr.State({})
-        def update_retrieval_results(query, top_k, sequential):
-            results, timings = retrieve_data(query, top_k, sequential)
-            timing_df = pd.DataFrame(
-                list(timings.items()), columns=["RAG Type", "Time (s)"]
             )
-            return (
-                results["SimpleRAG"],
-                results["VisionRAG"],
-                results["ColpaliRAG"],
-                results["HybridColpaliRAG"],
-                timing_df,
-                results,
             )
-        retrieve_button.click(
-            update_retrieval_results,
-            inputs=[query_input, top_k_slider, sequential_checkbox],
-            outputs=[
-                simple_content,
-                vision_gallery,
-                colpali_gallery,
-                hybrid_gallery,
-                retrieval_timing,
-                retrieved_results,
-            ],
-        )
-        # def update_query_results(query, retrieved_results):
-        #     results = query_data(query, retrieved_results)
-        #     return (
-        #         results["SimpleRAG"]["response"],
-        #         results["VisionRAG"]["response"],
-        #         results["ColpaliRAG"]["response"],
-        #         results["HybridColpaliRAG"]["response"],
-        #     )
-        # query_button.click(
-        #     update_query_results,
-        #     inputs=[query_input, retrieved_results],
-        #     outputs=[
-        #         simple_response,
-        #         vision_response,
-        #         colpali_response,
-        #         hybrid_response,
-        #     ],
-        # )
-        ingest_button.click(
-            ingest_data,
-            inputs=[pdf_input, use_ocr, chunk_size],
-            outputs=[ingest_output, progress_table],
-        )
-        update_api_button.click(
-            update_api_key, inputs=[api_key_input], outputs=api_update_status
-        )
-        update_table_button.click(
-            change_table,
-            inputs=[
-                simple_table_input,
-                vision_table_input,
-                colpali_table_input,
-                hybrid_table_input,
-            ],
-            outputs=table_update_status,
-        )
-    return demo
-# Parse command-line arguments
-def parse_args():
-    parser = argparse.ArgumentParser(description="VisionRAG Gradio App")
-    parser.add_argument(
-        "--share", action="store_true", help="Enable Gradio share feature"
     )
-    return parser.parse_args()
 # Launch the app
 if __name__ == "__main__":
-    args = parse_args()
-    app = gradio_interface()
-    app.launch(share=args.share)

+"""
+Gradio Demo for Document Retrieval - Hugging Face Spaces with ZeroGPU
+This script creates a Gradio interface for testing both BiGemma3 and ColGemma3 models
+with PDF document upload, automatic conversion to images, and query-based retrieval.
+Features:
+- PDF upload with automatic conversion to images
+- Model selection: NetraEmbed (BiGemma3), ColNetraEmbed (ColGemma3), or Both
+- Query input with top-k selection (default: 5)
+- Similarity score display
+- Side-by-side comparison when both models are selected
+- Progressive loading with real-time updates
+- Proper error handling
+- ZeroGPU integration for efficient GPU usage
+"""
 import io
+import gc
+import math
+from typing import Iterator, List, Optional, Tuple
+import gradio as gr
 import torch
+import spaces
+from pdf2image import convert_from_path
+from PIL import Image
+import matplotlib.pyplot as plt
+import numpy as np
+import seaborn as sns
+from einops import rearrange
+# Import from colpali_engine
+from colpali_engine.models import BiGemma3, BiGemmaProcessor3, ColGemma3, ColGemmaProcessor3
+from colpali_engine.interpretability import get_similarity_maps_from_embeddings
+from colpali_engine.interpretability.similarity_map_utils import normalize_similarity_map
+# Configuration
+MAX_BATCH_SIZE = 32  # Maximum pages to process at once
+DEFAULT_DURATION = 120  # Default GPU duration in seconds
+# Global state for models and indexed documents
+class DocumentIndex:
+    def __init__(self):
+        self.images: List[Image.Image] = []
+        self.bigemma_embeddings = None
+        self.colgemma_embeddings = None
+        self.bigemma_model = None
+        self.bigemma_processor = None
+        self.colgemma_model = None
+        self.colgemma_processor = None
+        self.models_loaded = {"bigemma": False, "colgemma": False}
+doc_index = DocumentIndex()
+# Helper functions
+def get_loaded_models() -> List[str]:
+    """Get list of currently loaded models."""
+    loaded = []
+    if doc_index.bigemma_model is not None:
+        loaded.append("BiGemma3")
+    if doc_index.colgemma_model is not None:
+        loaded.append("ColGemma3")
+    return loaded
+def get_model_choice_from_loaded() -> str:
+    """Determine model choice string based on what's loaded."""
+    loaded = get_loaded_models()
+    if "BiGemma3" in loaded and "ColGemma3" in loaded:
+        return "Both"
+    elif "BiGemma3" in loaded:
+        return "NetraEmbed (BiGemma3)"
+    elif "ColGemma3" in loaded:
+        return "ColNetraEmbed (ColGemma3)"
+    else:
+        return ""
+@spaces.GPU(duration=DEFAULT_DURATION)
+def load_bigemma_model():
+    """Load BiGemma3 model and processor."""
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    if doc_index.bigemma_model is None:
+        print("Loading BiGemma3 (NetraEmbed)...")
+        try:
+            doc_index.bigemma_processor = BiGemmaProcessor3.from_pretrained(
+                "Cognitive-Lab/NetraEmbed",
+                use_fast=True,
+            )
+            doc_index.bigemma_model = BiGemma3.from_pretrained(
+                "Cognitive-Lab/NetraEmbed",
+                torch_dtype=torch.bfloat16,
+                device_map=device,
+            )
+            doc_index.bigemma_model.eval()
+            doc_index.models_loaded["bigemma"] = True
+            print("✓ BiGemma3 loaded successfully")
+        except Exception as e:
+            print(f"❌ Failed to load BiGemma3: {str(e)}")
+            raise
+    return doc_index.bigemma_model, doc_index.bigemma_processor
+@spaces.GPU(duration=DEFAULT_DURATION)
+def load_colgemma_model():
+    """Load ColGemma3 model and processor."""
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    if doc_index.colgemma_model is None:
+        print("Loading ColGemma3 (ColNetraEmbed)...")
+        try:
+            doc_index.colgemma_model = ColGemma3.from_pretrained(
+                "Cognitive-Lab/ColNetraEmbed",
+                dtype=torch.bfloat16,
+                device_map=device,
+            )
+            doc_index.colgemma_model.eval()
+            doc_index.colgemma_processor = ColGemmaProcessor3.from_pretrained(
+                "Cognitive-Lab/ColNetraEmbed",
+                use_fast=True,
+            )
+            doc_index.models_loaded["colgemma"] = True
+            print("✓ ColGemma3 loaded successfully")
+        except Exception as e:
+            print(f"❌ Failed to load ColGemma3: {str(e)}")
+            raise
+    return doc_index.colgemma_model, doc_index.colgemma_processor
+def unload_models():
+    """Unload models and free GPU memory."""
+    try:
+        if doc_index.bigemma_model is not None:
+            del doc_index.bigemma_model
+            del doc_index.bigemma_processor
+            doc_index.bigemma_model = None
+            doc_index.bigemma_processor = None
+            doc_index.models_loaded["bigemma"] = False
+        if doc_index.colgemma_model is not None:
+            del doc_index.colgemma_model
+            del doc_index.colgemma_processor
+            doc_index.colgemma_model = None
+            doc_index.colgemma_processor = None
+            doc_index.models_loaded["colgemma"] = False
+        # Clear embeddings and images
+        doc_index.bigemma_embeddings = None
+        doc_index.colgemma_embeddings = None
+        doc_index.images = []
+        # Force garbage collection
+        gc.collect()
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+            torch.cuda.synchronize()
+        return "✅ Models unloaded and GPU memory cleared"
+    except Exception as e:
+        return f"❌ Error unloading models: {str(e)}"
+def clear_incompatible_embeddings(model_choice: str) -> str:
+    """Clear embeddings that are incompatible with currently loading models."""
+    cleared = []
+    # If loading only BiGemma3, clear ColGemma3 embeddings
+    if model_choice == "NetraEmbed (BiGemma3)":
+        if doc_index.colgemma_embeddings is not None:
+            doc_index.colgemma_embeddings = None
+            doc_index.images = []
+            cleared.append("ColGemma3")
+            print("Cleared ColGemma3 embeddings")
+    # If loading only ColGemma3, clear BiGemma3 embeddings
+    elif model_choice == "ColNetraEmbed (ColGemma3)":
+        if doc_index.bigemma_embeddings is not None:
+            doc_index.bigemma_embeddings = None
+            doc_index.images = []
+            cleared.append("BiGemma3")
+            print("Cleared BiGemma3 embeddings")
+    if cleared:
+        return f"Cleared {', '.join(cleared)} embeddings - please re-index"
+    return ""
+def pdf_to_images(pdf_path: str) -> List[Image.Image]:
+    """Convert PDF to list of PIL Images with error handling."""
+    try:
+        print(f"Converting PDF to images: {pdf_path}")
+        images = convert_from_path(pdf_path, dpi=200)
+        print(f"Converted {len(images)} pages")
+        return images
+    except Exception as e:
+        print(f"❌ PDF conversion error: {str(e)}")
+        raise Exception(f"Failed to convert PDF: {str(e)}")
+@spaces.GPU(duration=DEFAULT_DURATION)
+def generate_colgemma_heatmap(
+    image: Image.Image,
+    query: str,
+    query_embedding: torch.Tensor,
+    image_embedding: torch.Tensor,
+    model,
+    processor,
+) -> Image.Image:
+    """Generate heatmap overlay for ColGemma3 results."""
+    try:
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        # Re-process the single image to get the proper batch_images dict for image mask
+        batch_images = processor.process_images([image]).to(device)
+        # Create image mask manually (ColGemmaProcessor3 doesn't have get_image_mask)
+        if "input_ids" in batch_images and hasattr(model.config, "image_token_id"):
+            image_token_id = model.config.image_token_id
+            image_mask = batch_images["input_ids"] == image_token_id
+        else:
+            # Fallback: all tokens are image tokens
+            image_mask = torch.ones(
+                image_embedding.shape[0], image_embedding.shape[1], dtype=torch.bool, device=device
+            )
+        # Calculate n_patches from actual number of image tokens
+        num_image_tokens = image_mask.sum().item()
+        n_side = int(math.sqrt(num_image_tokens))
+        if n_side * n_side == num_image_tokens:
+            n_patches = (n_side, n_side)
+        else:
+            # Fallback: use default calculation
+            n_patches = (16, 16)
+        # Generate similarity maps (returns a list of tensors)
+        similarity_maps_list = get_similarity_maps_from_embeddings(
+            image_embeddings=image_embedding,
+            query_embeddings=query_embedding,
+            n_patches=n_patches,
+            image_mask=image_mask,
         )
+        # Get the similarity map for our image (returns a list, get first element)
+        similarity_map = similarity_maps_list[0]  # (query_length, n_patches_x, n_patches_y)
+        # Aggregate across all query tokens (mean)
+        if similarity_map.dtype == torch.bfloat16:
+            similarity_map = similarity_map.float()
+        aggregated_map = torch.mean(similarity_map, dim=0)
+        # Convert the image to an array
+        img_array = np.array(image.convert("RGBA"))
+        # Normalize the similarity map and convert to numpy
+        similarity_map_array = normalize_similarity_map(aggregated_map).to(torch.float32).cpu().numpy()
+        # Reshape to match PIL convention
+        similarity_map_array = rearrange(similarity_map_array, "h w -> w h")
+        # Create PIL image from similarity map
+        similarity_map_image = Image.fromarray((similarity_map_array * 255).astype("uint8")).resize(
+            image.size, Image.Resampling.BICUBIC
         )
+        # Create matplotlib figure
+        fig, ax = plt.subplots(figsize=(10, 10))
+        ax.imshow(img_array)
+        ax.imshow(
+            similarity_map_image,
+            cmap=sns.color_palette("mako", as_cmap=True),
+            alpha=0.5,
         )
+        ax.set_axis_off()
+        plt.tight_layout()
+        # Convert to PIL Image
+        buffer = io.BytesIO()
+        plt.savefig(buffer, format="png", dpi=150, bbox_inches="tight", pad_inches=0)
+        buffer.seek(0)
+        heatmap_image = Image.open(buffer).copy()
+        plt.close()
+        return heatmap_image
+    except Exception as e:
+        print(f"❌ Heatmap generation error: {str(e)}")
+        # Return original image if heatmap generation fails
+        return image
+@spaces.GPU(duration=DEFAULT_DURATION)
+def index_bigemma_images(images: List[Image.Image]) -> torch.Tensor:
+    """Index images with BiGemma3 model."""
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    model, processor = doc_index.bigemma_model, doc_index.bigemma_processor
+    batch_images = processor.process_images(images).to(device)
+    embeddings = model(**batch_images, embedding_dim=768)
+    return embeddings
+@spaces.GPU(duration=DEFAULT_DURATION)
+def index_colgemma_images(images: List[Image.Image]) -> torch.Tensor:
+    """Index images with ColGemma3 model."""
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    model, processor = doc_index.colgemma_model, doc_index.colgemma_processor
+    batch_images = processor.process_images(images).to(device)
+    embeddings = model(**batch_images)
+    return embeddings
+def index_document(pdf_file, model_choice: str) -> Iterator[str]:
+    """Upload and index a PDF document with progress updates."""
+    if pdf_file is None:
+        yield "⚠️ Please upload a PDF document first."
+        return
+    try:
+        status_messages = []
+        # Convert PDF to images
+        status_messages.append("⏳ Converting PDF to images...")
+        yield "\n".join(status_messages)
+        doc_index.images = pdf_to_images(pdf_file.name)
+        num_pages = len(doc_index.images)
+        status_messages.append(f"✓ Converted PDF to {num_pages} images")
+        # Check if we need to batch process
+        if num_pages > MAX_BATCH_SIZE:
+            status_messages.append(f"⚠️ Large PDF ({num_pages} pages). Processing in batches of {MAX_BATCH_SIZE}...")
+            yield "\n".join(status_messages)
+        # Index with BiGemma3
+        if model_choice in ["NetraEmbed (BiGemma3)", "Both"]:
+            if doc_index.bigemma_model is None:
+                status_messages.append("⏳ Loading BiGemma3 model...")
+                yield "\n".join(status_messages)
+                load_bigemma_model()
+                status_messages.append("✓ BiGemma3 loaded")
+            else:
+                status_messages.append("✓ Using cached BiGemma3 model")
+            yield "\n".join(status_messages)
+            status_messages.append("⏳ Encoding images with BiGemma3...")
+            yield "\n".join(status_messages)
+            doc_index.bigemma_embeddings = index_bigemma_images(doc_index.images)
+            status_messages.append("✓ Indexed with BiGemma3 (shape: {})".format(doc_index.bigemma_embeddings.shape))
+            yield "\n".join(status_messages)
+        # Index with ColGemma3
+        if model_choice in ["ColNetraEmbed (ColGemma3)", "Both"]:
+            if doc_index.colgemma_model is None:
+                status_messages.append("⏳ Loading ColGemma3 model...")
+                yield "\n".join(status_messages)
+                load_colgemma_model()
+                status_messages.append("✓ ColGemma3 loaded")
+            else:
+                status_messages.append("✓ Using cached ColGemma3 model")
+            yield "\n".join(status_messages)
+            status_messages.append("⏳ Encoding images with ColGemma3...")
+            yield "\n".join(status_messages)
+            doc_index.colgemma_embeddings = index_colgemma_images(doc_index.images)
+            status_messages.append(
+                "✓ Indexed with ColGemma3 (shape: {})".format(doc_index.colgemma_embeddings.shape)
+            )
+            yield "\n".join(status_messages)
+        final_status = "\n".join(status_messages) + "\n\n✅ Document ready for querying!"
+        yield final_status
+    except Exception as e:
+        import traceback
+        error_details = traceback.format_exc()
+        print(f"Indexing error: {error_details}")
+        yield f"❌ Error indexing document: {str(e)}"
+@spaces.GPU(duration=DEFAULT_DURATION)
+def query_bigemma(query: str, top_k: int) -> Tuple[str, List]:
+    """Query indexed documents with BiGemma3."""
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    model, processor = doc_index.bigemma_model, doc_index.bigemma_processor
+    # Encode query
+    batch_query = processor.process_texts([query]).to(device)
+    query_embedding = model(**batch_query, embedding_dim=768)
+    # Compute scores (cosine similarity)
+    scores = processor.score(
+        qs=query_embedding,
+        ps=doc_index.bigemma_embeddings,
     )
+    # Get top-k results
+    top_k_actual = min(top_k, len(doc_index.images))
+    top_indices = scores[0].argsort(descending=True)[:top_k_actual]
+    # Format results
+    results_text = "### BiGemma3 (NetraEmbed) Results\n\n"
+    gallery_images = []
+    for rank, idx in enumerate(top_indices):
+        score = scores[0, idx].item()
+        results_text += f"**Rank {rank + 1}:** Page {idx.item() + 1} - Score: {score:.4f}\n"
+        gallery_images.append(
+            (doc_index.images[idx.item()], f"Rank {rank + 1} - Page {idx.item() + 1} (Score: {score:.4f})")
         )
+    return results_text, gallery_images
+@spaces.GPU(duration=DEFAULT_DURATION)
+def query_colgemma(query: str, top_k: int, show_heatmap: bool = False) -> Tuple[str, List]:
+    """Query indexed documents with ColGemma3."""
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    model, processor = doc_index.colgemma_model, doc_index.colgemma_processor
+    # Encode query
+    batch_query = processor.process_queries([query]).to(device)
+    query_embedding = model(**batch_query)
+    # Compute scores (MaxSim)
+    scores = processor.score_multi_vector(
+        qs=query_embedding,
+        ps=doc_index.colgemma_embeddings,
+    )
+    # Get top-k results
+    top_k_actual = min(top_k, len(doc_index.images))
+    top_indices = scores[0].argsort(descending=True)[:top_k_actual]
+    # Format results
+    results_text = "### ColGemma3 (ColNetraEmbed) Results\n\n"
+    gallery_images = []
+    for rank, idx in enumerate(top_indices):
+        score = scores[0, idx].item()
+        results_text += f"**Rank {rank + 1}:** Page {idx.item() + 1} - Score: {score:.2f}\n"
+        # Generate heatmap if requested
+        if show_heatmap:
+            heatmap_image = generate_colgemma_heatmap(
+                image=doc_index.images[idx.item()],
+                query=query,
+                query_embedding=query_embedding,
+                image_embedding=doc_index.colgemma_embeddings[idx.item()].unsqueeze(0),
+                model=model,
+                processor=processor,
             )
+            gallery_images.append(
+                (heatmap_image, f"Rank {rank + 1} - Page {idx.item() + 1} (Score: {score:.2f})")
             )
+        else:
+            gallery_images.append(
+                (
+                    doc_index.images[idx.item()],
+                    f"Rank {rank + 1} - Page {idx.item() + 1} (Score: {score:.2f})",
+                )
             )
+    return results_text, gallery_images
+def query_documents(
+    query: str, model_choice: str, top_k: int, show_heatmap: bool = False
+) -> Tuple[Optional[str], Optional[str], Optional[List], Optional[List]]:
+    """Query the indexed documents."""
+    if not doc_index.images:
+        return "⚠️ Please upload and index a document first.", None, None, None
+    if not query.strip():
+        return "⚠️ Please enter a query.", None, None, None
+    try:
+        results_bi = None
+        results_col = None
+        gallery_images_bi = []
+        gallery_images_col = []
+        # Query with BiGemma3
+        if model_choice in ["NetraEmbed (BiGemma3)", "Both"]:
+            if doc_index.bigemma_embeddings is None:
+                return "⚠️ Please index the document with BiGemma3 first.", None, None, None
+            results_bi, gallery_images_bi = query_bigemma(query, top_k)
+        # Query with ColGemma3
+        if model_choice in ["ColNetraEmbed (ColGemma3)", "Both"]:
+            if doc_index.colgemma_embeddings is None:
+                return "⚠️ Please index the document with ColGemma3 first.", None, None, None
+            results_col, gallery_images_col = query_colgemma(query, top_k, show_heatmap)
+        # Return results based on model choice
+        if model_choice == "NetraEmbed (BiGemma3)":
+            return results_bi, None, gallery_images_bi, None
+        elif model_choice == "ColNetraEmbed (ColGemma3)":
+            return results_col, None, None, gallery_images_col
+        else:  # Both
+            return results_bi, results_col, gallery_images_bi, gallery_images_col
+    except Exception as e:
+        import traceback
+        error_details = traceback.format_exc()
+        print(f"Query error: {error_details}")
+        return f"❌ Error during query: {str(e)}", None, None, None
+def load_models_with_progress(model_choice: str) -> Iterator[Tuple]:
+    """Load models with progress updates."""
+    if not model_choice:
+        yield (
+            "❌ Please select a model first.",
+            gr.update(visible=True),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(interactive=False),
+            gr.update(interactive=False),
+            gr.update(interactive=False),
+            gr.update(interactive=False),
+            gr.update(interactive=False),
+            gr.update(value="Load model first"),
+        )
+        return
+    try:
+        status_messages = []
+        # Clear incompatible embeddings
+        clear_msg = clear_incompatible_embeddings(model_choice)
+        if clear_msg:
+            status_messages.append(f"⚠️ {clear_msg}")
+        # Load BiGemma3
+        if model_choice in ["NetraEmbed (BiGemma3)", "Both"]:
+            status_messages.append("⏳ Loading BiGemma3 (NetraEmbed)...")
+            yield (
+                "\n".join(status_messages),
+                gr.update(visible=True),
+                gr.update(visible=False),
+                gr.update(visible=False),
+                gr.update(visible=False),
+                gr.update(visible=False),
+                gr.update(interactive=False),
+                gr.update(interactive=False),
+                gr.update(interactive=False),
+                gr.update(interactive=False),
+                gr.update(interactive=False),
+                gr.update(value="Loading models..."),
             )
+            load_bigemma_model()
+            status_messages[-1] = "✅ BiGemma3 loaded successfully"
+            yield (
+                "\n".join(status_messages),
+                gr.update(visible=True),
+                gr.update(visible=False),
+                gr.update(visible=False),
+                gr.update(visible=False),
+                gr.update(visible=False),
+                gr.update(interactive=False),
+                gr.update(interactive=False),
+                gr.update(interactive=False),
+                gr.update(interactive=False),
+                gr.update(interactive=False),
+                gr.update(value="Loading models..."),
             )
+        # Load ColGemma3
+        if model_choice in ["ColNetraEmbed (ColGemma3)", "Both"]:
+            status_messages.append("⏳ Loading ColGemma3 (ColNetraEmbed)...")
+            yield (
+                "\n".join(status_messages),
+                gr.update(visible=True),
+                gr.update(visible=False),
+                gr.update(visible=False),
+                gr.update(visible=False),
+                gr.update(visible=False),
+                gr.update(interactive=False),
+                gr.update(interactive=False),
+                gr.update(interactive=False),
+                gr.update(interactive=False),
+                gr.update(interactive=False),
+                gr.update(value="Loading models..."),
+            )
+            load_colgemma_model()
+            status_messages[-1] = "✅ ColGemma3 loaded successfully"
+            yield (
+                "\n".join(status_messages),
+                gr.update(visible=True),
+                gr.update(visible=False),
+                gr.update(visible=False),
+                gr.update(visible=False),
+                gr.update(visible=False),
+                gr.update(interactive=False),
+                gr.update(interactive=False),
+                gr.update(interactive=False),
+                gr.update(interactive=False),
+                gr.update(interactive=False),
+                gr.update(value="Loading models..."),
             )
+        # Determine column visibility based on loaded models
+        show_bigemma = model_choice in ["NetraEmbed (BiGemma3)", "Both"]
+        show_colgemma = model_choice in ["ColNetraEmbed (ColGemma3)", "Both"]
+        show_heatmap_checkbox = model_choice in ["ColNetraEmbed (ColGemma3)", "Both"]
+        final_status = "\n".join(status_messages) + "\n\n✅ Ready!"
+        yield (
+            final_status,
+            gr.update(visible=False),
+            gr.update(visible=True),
+            gr.update(visible=show_bigemma),
+            gr.update(visible=show_colgemma),
+            gr.update(visible=show_heatmap_checkbox),
+            gr.update(interactive=True),
+            gr.update(interactive=True),
+            gr.update(interactive=True),
+            gr.update(interactive=True),
+            gr.update(interactive=True),
+            gr.update(value="Ready to index"),
+        )
+    except Exception as e:
+        import traceback
+        error_details = traceback.format_exc()
+        print(f"Model loading error: {error_details}")
+        yield (
+            f"❌ Failed to load models: {str(e)}",
+            gr.update(visible=True),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(visible=False),
+            gr.update(interactive=False),
+            gr.update(interactive=False),
+            gr.update(interactive=False),
+            gr.update(interactive=False),
+            gr.update(interactive=False),
+            gr.update(value="Load model first"),
+        )
+def unload_models_and_hide_ui():
+    """Unload models and hide main UI."""
+    status = unload_models()
+    return (
+        status,
+        gr.update(visible=True),
+        gr.update(visible=False),
+        gr.update(visible=False),
+        gr.update(visible=False),
+        gr.update(visible=False),
+        gr.update(interactive=False),
+        gr.update(interactive=False),
+        gr.update(interactive=False),
+        gr.update(interactive=False),
+        gr.update(interactive=False),
+        gr.update(value="Load model first"),
+    )
+# Create Gradio interface
+with gr.Blocks(
+    title="NetraEmbed Demo",
+) as demo:
+    # Header section with model info and banner
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("# NetraEmbed")
+            gr.HTML(
+                """
+                <div style="display: flex; gap: 8px; flex-wrap: wrap; margin-bottom: 15px;">
+                    <a href="https://arxiv.org/abs/2512.03514" target="_blank">
+                        <img src="https://img.shields.io/badge/arXiv-2512.03514-b31b1b.svg" alt="Paper">
+                    </a>
+                    <a href="https://github.com/adithya-s-k/colpali" target="_blank">
+                        <img src="https://img.shields.io/badge/GitHub-colpali-181717?logo=github" alt="GitHub">
+                    </a>
+                    <a href="https://huggingface.co/Cognitive-Lab/ColNetraEmbed" target="_blank">
+                        <img src="https://img.shields.io/badge/🤗%20HuggingFace-Model-yellow" alt="Model">
+                    </a>
+                    <a href="https://www.cognitivelab.in/blog/introducing-netraembed" target="_blank">
+                        <img src="https://img.shields.io/badge/Blog-CognitiveLab-blue" alt="Blog">
+                    </a>
+                    <a href="https://cloud.cognitivelab.in" target="_blank">
+                        <img src="https://img.shields.io/badge/Demo-Try%20it%20out-green" alt="Demo">
+                    </a>
+                </div>
+                """
             )
+            gr.Markdown(
+                """
+                **🚀 Universal Multilingual Multimodal Document Retrieval**
+                Upload a PDF document, select your model(s), and query using semantic search.
+                **Available Models:**
+                - **NetraEmbed (BiGemma3)**: Single-vector embedding with Matryoshka representation
+                  Fast retrieval with cosine similarity
+                - **ColNetraEmbed (ColGemma3)**: Multi-vector embedding with late interaction
+                  High-quality retrieval with MaxSim scoring and attention heatmaps
+                """
             )
+        with gr.Column(scale=1):
+            gr.HTML(
+                """
+                <div style="text-align: center;">
+                    <img src="https://cdn-uploads.huggingface.co/production/uploads/6442d975ad54813badc1ddf7/-fYMikXhSuqRqm-UIdulK.png"
+                         alt="NetraEmbed Banner"
+                         style="width: 100%; height: auto; border-radius: 8px;">
+                </div>
+                """
             )
+    gr.Markdown("---")
+    # Compact 3-column layout
+    with gr.Row():
+        # Column 1: Model Management
+        with gr.Column(scale=1):
+            gr.Markdown("### 🤖 Model Management")
+            model_select = gr.Radio(
+                choices=["NetraEmbed (BiGemma3)", "ColNetraEmbed (ColGemma3)", "Both"],
+                value="Both",
+                label="Select Model(s)",
             )
+            load_model_btn = gr.Button("🔄 Load Model", variant="primary", size="sm")
+            unload_model_btn = gr.Button("🗑️ Unload", variant="secondary", size="sm")
+            model_status = gr.Textbox(
+                label="Status",
+                lines=6,
+                interactive=False,
+                value="Select and load a model",
             )
+            loading_info = gr.Markdown(
+                """
+                **First load:** 2-3 min
+                **Cached:** ~30 sec
+                """,
+                visible=True,
             )
+        # Column 2: Document Upload & Indexing
+        with gr.Column(scale=1):
+            gr.Markdown("### 📄 Upload & Index")
+            pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"], interactive=False)
+            index_btn = gr.Button("📥 Index Document", variant="primary", size="sm", interactive=False)
+            index_status = gr.Textbox(
+                label="Indexing Status",
+                lines=6,
+                interactive=False,
+                value="Load model first",
+            )
+        # Column 3: Query
+        with gr.Column(scale=1):
+            gr.Markdown("### 🔎 Query Document")
+            query_input = gr.Textbox(
+                label="Enter Query",
+                placeholder="e.g., financial report, organizational structure...",
+                lines=2,
+                interactive=False,
+            )
+            with gr.Row():
+                top_k_slider = gr.Slider(
+                    minimum=1,
+                    maximum=10,
+                    value=5,
+                    step=1,
+                    label="Top K",
+                    scale=2,
+                    interactive=False,
+                )
+                heatmap_checkbox = gr.Checkbox(
+                    label="Heatmaps",
+                    value=False,
+                    visible=False,
+                    scale=1,
+                )
+            query_btn = gr.Button("🔍 Search", variant="primary", size="sm", interactive=False)
+    gr.Markdown("---")
+    # Results section (always visible after model load)
+    with gr.Column(visible=False) as main_interface:
+        gr.Markdown("### 📊 Results")
+        with gr.Row(equal_height=True):
+            with gr.Column(scale=1, visible=False) as bigemma_column:
+                bigemma_results = gr.Markdown(
+                    value="*BiGemma3 results will appear here...*",
+                )
+                bigemma_gallery = gr.Gallery(
+                    label="BiGemma3 - Top Retrieved Pages",
+                    show_label=True,
+                    columns=2,
+                    height="auto",
+                    object_fit="contain",
+                )
+            with gr.Column(scale=1, visible=False) as colgemma_column:
+                colgemma_results = gr.Markdown(
+                    value="*ColGemma3 results will appear here...*",
+                )
+                colgemma_gallery = gr.Gallery(
+                    label="ColGemma3 - Top Retrieved Pages",
+                    show_label=True,
+                    columns=2,
+                    height="auto",
+                    object_fit="contain",
+                )
+        # Tips
+        with gr.Accordion("💡 Tips", open=False):
+            gr.Markdown(
+                """
+                - **Both models**: Compare results side-by-side
+                - **Scores**: BiGemma3 uses cosine similarity (-1 to 1), ColGemma3 uses MaxSim (higher is better)
+                - **Heatmaps**: Enable to visualize ColGemma3 attention patterns (brighter = higher attention)
+                """
+            )
+    # Event handlers - Model Management
+    load_model_btn.click(
+        fn=load_models_with_progress,
+        inputs=[model_select],
+        outputs=[
+            model_status,
+            loading_info,
+            main_interface,
+            bigemma_column,
+            colgemma_column,
+            heatmap_checkbox,
+            pdf_upload,
+            index_btn,
+            query_input,
+            top_k_slider,
+            query_btn,
+            index_status,
+        ],
+    )
+    unload_model_btn.click(
+        fn=unload_models_and_hide_ui,
+        outputs=[
+            model_status,
+            loading_info,
+            main_interface,
+            bigemma_column,
+            colgemma_column,
+            heatmap_checkbox,
+            pdf_upload,
+            index_btn,
+            query_input,
+            top_k_slider,
+            query_btn,
+            index_status,
+        ],
+    )
+    # Event handlers - Main Interface
+    def index_with_current_models(pdf_file):
+        """Index document with currently loaded models."""
+        if pdf_file is None:
+            yield "⚠️ Please upload a PDF document first."
+            return
+        model_choice = get_model_choice_from_loaded()
+        if not model_choice:
+            yield "⚠️ No models loaded. Please load a model first."
+            return
+        # Use generator from index_document
+        for status in index_document(pdf_file, model_choice):
+            yield status
+    def query_with_current_models(query, top_k, show_heatmap):
+        """Query with currently loaded models."""
+        model_choice = get_model_choice_from_loaded()
+        if not model_choice:
+            return "⚠️ No models loaded. Please load a model first.", None, None, None
+        return query_documents(query, model_choice, top_k, show_heatmap)
+    index_btn.click(
+        fn=index_with_current_models,
+        inputs=[pdf_upload],
+        outputs=[index_status],
+    )
+    query_btn.click(
+        fn=query_with_current_models,
+        inputs=[query_input, top_k_slider, heatmap_checkbox],
+        outputs=[bigemma_results, colgemma_results, bigemma_gallery, colgemma_gallery],
     )
+# Enable queue for handling multiple requests
+demo.queue(max_size=20)
 # Launch the app
 if __name__ == "__main__":
+    demo.launch()

requirements.txt CHANGED Viewed

@@ -1,17 +1,12 @@
-git+https://github.com/huggingface/transformers
-torch
-lancedb
-colpali-engine
 pdf2image
-pypdf
-pymupdf
-timm
 einops
-sentence-transformers
-tiktoken
-docling==1.16.1
-pdf2image
-GPUtil
-accelerate==0.30.1
 torchvision
-git+https://github.com/adithya-s-k/VARAG

+spaces
+git+https://github.com/adithya-s-k/colpali.git
+gradio
 pdf2image
+Pillow
+matplotlib
+seaborn
 einops
+numpy
+torch
 torchvision
+accelerate