Spaces:

timothytzkung
/

SFU-IT-Chatbot-V2

Running

App Files Files Community

timothytzkung commited on 10 days ago

Commit

d84e52f

verified ·

1 Parent(s): 9beca5a

Init commit

Browse files

Files changed (6) hide show

README.md +18 -12
app.py +138 -0
data.jsonl +0 -0
preload-data +0 -0
requirements.txt +22 -0
unchunked_data.json +0 -0

README.md CHANGED Viewed

@@ -1,12 +1,18 @@
----
-title: SFU IT Chatbot V2
-emoji: 🦀
-colorFrom: indigo
-colorTo: pink
-sdk: gradio
-sdk_version: 6.0.1
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# SFU IT Chatbot w/ Ollama
+This chatbot runs locally on your computer! Yay! This RAG Chatbot uses Gemma3-4b LLM and all-MiniLM-L6-v2 for vector embedding.
+To run the app, you need Ollama installed which can be found here: <br>
+https://ollama.com/download
+Then, you need to download Gemma3-4b from your terminal:<br>
+`ollama pull gemma3:4b`
+Now, first set up virtual environment:<br>
+`python3 -m venv venv`<br>
+`source venv/bin/activate`
+Then install requirements:<br>
+`pip install -r requirements.txt`
+Now run the app (Note: This uses about 3-4 GB of RAM):<br>
+`python app.py`

app.py ADDED Viewed

	@@ -0,0 +1,138 @@

+import json
+import numpy as np
+import pandas as pd
+from transformers import pipeline
+from sentence_transformers import SentenceTransformer
+import gradio as gr
+import torch
+from huggingface_hub import login
+import os
+# Sanity Check
+hf_token = os.getenv("V2_TOKEN")
+if hf_token is None:
+    raise RuntimeError("V2_TOKEN environment variable is not set in this Space.")
+# Explicit login
+login(token=hf_token)
+# --- Configuration ---
+print("Loading RAG system on your device...")
+# Load Knowledge base
+FILE_PATH = "data.jsonl"
+PRELOAD_FILE_PATH = "preload-data"
+# File path readings
+if not os.path.exists(FILE_PATH):
+    # Dummy data for testing if you don't have the file yet
+    print(f"Warning: {FILE_PATH} not found. Creating dummy data.")
+    data = [{"text": "To reset your password, visit password.sfu.ca and click 'Forgot Password'."}]
+elif os.path.exists(PRELOAD_FILE_PATH):
+    print(f"Found Preloaded Data! Using {PRELOAD_FILE_PATH}...")
+    with open(PRELOAD_FILE_PATH, "r", encoding="utf-8") as f:
+        data = json.load(f)
+else:
+    with open(FILE_PATH, "r", encoding="utf-8") as f:
+        print(f"No Preloaded Data Found. Using {FILE_PATH}...")
+        data = pd.read_json(path_or_buf=f, lines=True)
+# Writes in data embedding
+if not os.path.exists(PRELOAD_FILE_PATH):
+    documents = list(data["text"])
+    print(f"Creating {PRELOAD_FILE_PATH}...")
+    with open("preload-data", "w") as fp:
+        json.dump(documents, fp)
+else:
+    documents = data
+# Embeddings
+embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
+embeddings = embedding_model.encode(documents, convert_to_numpy=True)
+# Use pandas dataframe
+df = pd.DataFrame(
+    {
+        "Document": documents,
+        "Embedding": list(embeddings),  # store as list
+    }
+)
+# Load LLM Pipeline
+llm = pipeline(
+    "text-generation",
+    model="google/flan-t5-xl", # Might not have enough storage ngl
+    token=hf_token
+)
+# Retrieve w Pandas
+def retrieve_with_pandas(query: str, top_k: int = 10):
+    """
+    Embed the query, compute cosine similarity to each document,
+    and return the top_k most similar documents (as a DataFrame).
+    """
+    query_embedding = embedding_model.encode([query])[0]
+    def cosine_sim(x):
+        x = np.array(x)
+        return float(
+            np.dot(query_embedding, x)
+            / (np.linalg.norm(query_embedding) * np.linalg.norm(x))
+        )
+    df["Similarity"] = df["Embedding"].apply(cosine_sim)
+    results = df.sort_values(by="Similarity", ascending=False).head(top_k)
+    return results[["Document", "Similarity"]]
+def generate_with_rag(query, top_k=5):
+    # Retrieve context as a pandas Series of document texts
+    docs = retrieve_with_pandas(query)  # whatever you currently return
+    context_series = docs["Document"] if "Document" in docs else docs
+    # Turn the Series into a single string of text
+    # (each doc separated by a divider)
+    context_str = "\n\n---\n\n".join(context_series.tolist())
+    # Build a clean prompt
+    input_text = f"""You are an IT helpdesk assistant.
+If the user asked a question, answer the user's question with detailed step by step instructions: consider all the articles below.
+If the user asked a question and the answer is not in the contexts, say you don't know and suggest contacting SFU IT.
+If the user DID NOT ask a question, be friendly and ask how you can help them.
+Question:
+{query}
+-- Start of Articles --
+{context_str}
+-- End of Articles --
+Answer:"""
+    # Call the LLM
+    response = llm(
+        input_text,
+        max_new_tokens=1024,
+        do_sample=False,
+        return_full_text=False
+    )
+    return response[0]["generated_text"].strip()
+def chat_fn(message, history):
+    """
+    Chat Interface callback
+    """
+    answer = generate_with_rag(message, top_k=2)
+    return answer
+demo = gr.ChatInterface(
+    fn=chat_fn,
+    title="SFU IT Chatbot",
+    description="Enter your question and the SFU IT Chatbot will try to answer using retrieved SFU IT knowledge.",
+)
+# share=True
+if __name__ == "__main__":
+    demo.launch()

data.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

preload-data ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,22 @@

+gradio==5.6.0
+gradio_client==1.4.3
+huggingface-hub
+keras
+libclang
+numpy
+pandas
+pydantic
+pydantic_core
+safetensors
+scikit-learn
+scipy
+sentence-transformers
+tensorboard
+tensorflow
+tf_keras
+tokenizers
+torch
+transformers
+wheel

unchunked_data.json ADDED Viewed

The diff for this file is too large to render. See raw diff