Spaces:

timothytzkung
/

SFU-IT-Chatbot-V2

Running

App Files Files Community

timothytzkung commited on 8 days ago

Commit

4e657f7

verified ·

1 Parent(s): 1a48548

Rollback to Gemm3-4B commit

Browse files

Files changed (1) hide show

app.py +100 -60

app.py CHANGED Viewed

@@ -1,113 +1,153 @@
 import json
 import numpy as np
 import pandas as pd
-from transformers import pipeline, BitsAndBytesConfig
 from sentence_transformers import SentenceTransformer
 import gradio as gr
 import torch
 from huggingface_hub import login
 import os
-# --- Setup & Configuration ---
 hf_token = os.getenv("V2_TOKEN")
 if hf_token is None:
-    raise RuntimeError("V2_TOKEN environment variable is not set.")
 login(token=hf_token)
-PRELOAD_PARQUET = "preload.parquet"
-print("Loading RAG system...")
-# optimization: Ensure we aren't re-embedding every restart if possible.
 FILE_PATH = "data.jsonl"
 PRELOAD_FILE_PATH = "preload-data.json"
-print(f"Loading data from {PRELOAD_FILE_PATH}...")
 with open(PRELOAD_FILE_PATH, "r", encoding="utf-8") as f:
-    documents = json.load(f)
-# Load Embedding Model
-embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
-# Pre-calculate embeddings once and stack them into a numpy matrix for fast math
-print("Generating/Loading embeddings...")
-doc_embeddings = embedding_model.encode(documents, convert_to_numpy=True)
-# Normalize embeddings now so we only need dot product later (faster than cosine calc every time)
-doc_embeddings = doc_embeddings / np.linalg.norm(doc_embeddings, axis=1, keepdims=True)
-# Create DataFrame just for text storage (we will use numpy for math)
-df = pd.DataFrame({"Document": documents})
-# Load llm
-print("Loading LLM...")
 llm = pipeline(
     "text-generation",
-    model="google/gemma-3-1b-it",
-    token=hf_token,
 )
-# --- Optimized Retrieval Function ---
-def retrieve_vectorized(query: str, top_k: int = 5):
     """
-    Uses Matrix Multiplication instead of Row-by-Row iteration.
     """
-    # Encode query
     query_embedding = embedding_model.encode([query])[0]
-    # Normalize query
-    query_norm = query_embedding / np.linalg.norm(query_embedding)
-    scores = np.dot(doc_embeddings, query_norm)
-    top_indices = np.argsort(scores)[::-1][:top_k]
-    # Retrieve documents
-    results = df.iloc[top_indices].copy()
-    return results["Document"].tolist()
-# --- Main Generation Function ---
-def generate_with_rag(query):
-    # goSFU specific cleaning
     if "gosfu" in query.lower():
         query = query.replace("gosfu", "goSFU")
     # Retrieve
-    retrieved_docs = retrieve_vectorized(query, top_k=5)
-    context_str = "\n\n---\n\n".join(retrieved_docs)
-    # Prompt
     prompt_content = f"""
     You are a SFU IT helpdesk chatbot.
-    Your task is to answer SFU IT related questions.
-    Context Articles:
-    {context_str}
-    User Question: {query}
-    Instructions:
-    1. Answer the question using ONLY the Context Articles above.
-    2. Provide step-by-step instructions and include relevant links found in the text.
-    3. If the answer is not in the context, suggest contacting SFU IT at 778-782-8888.
-    4. If the user is asking about mental health, redirect to SFU Health & Counselling.
-    Answer:"""
     response = llm(
         prompt_content,
-        max_new_tokens=300, # Reduced token count for speed
         do_sample=False,
         return_full_text=False
     )
     return response[0]["generated_text"].strip()
 def chat_fn(message, history):
-    return generate_with_rag(message)
 demo = gr.ChatInterface(
     fn=chat_fn,
-    title="SFU IT Chatbot (Optimized)",
     description="Enter your question and the SFU IT Chatbot will try to answer using retrieved SFU IT knowledge.",
 )
 if __name__ == "__main__":
     demo.launch()

 import json
 import numpy as np
 import pandas as pd
+from transformers import pipeline
 from sentence_transformers import SentenceTransformer
 import gradio as gr
 import torch
 from huggingface_hub import login
 import os
+# Sanity Check
 hf_token = os.getenv("V2_TOKEN")
 if hf_token is None:
+    raise RuntimeError("V2_TOKEN environment variable is not set in this Space.")
+# Explicit login
 login(token=hf_token)
+# --- Configuration ---
+print("Loading RAG system on your device...")
+# Load Knowledge base
 FILE_PATH = "data.jsonl"
 PRELOAD_FILE_PATH = "preload-data.json"
+# Load data
+print(f"Found Preloaded Data! Using {PRELOAD_FILE_PATH}...")
 with open(PRELOAD_FILE_PATH, "r", encoding="utf-8") as f:
+    data = json.load(f)
+# Set data
+documents = data
+# Embeddings
+embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
+embeddings = embedding_model.encode(documents, convert_to_numpy=True)
+# Use pandas dataframe
+df = pd.DataFrame(
+    {
+        "Document": documents,
+        "Embedding": list(embeddings),  # store as list
+    }
+)
+# Load LLM Pipeline
 llm = pipeline(
     "text-generation",
+    model="google/gemma-3-4b-it", # Might not have enough storage ngl
+    token=hf_token
 )
+def clean_query_with_llm(query):
+    prompt_content = f"""
+    Below is a new question asked by the user that needs to be answered by searching in a knowledge base.
+    You have access to SFU IT Knowledge Base index with 100's of chunked documents.
+    Generate a search question based the user's question.
+    If you cannot generate a search query, return just the number 0.
+    User's Question:
+    {query}
+    Search Query:
+    """
+    response = llm(
+        prompt_content,
+        max_new_tokens=100,
+        do_sample=False,
+        return_full_text=False
+    )
+    return response[0]["generated_text"].strip()
+# Retrieve w Pandas
+def retrieve_with_pandas(query: str, top_k: int = 5):
     """
+    Embed the query, compute cosine similarity to each document,
+    and return the top_k most similar documents (as a DataFrame).
     """
     query_embedding = embedding_model.encode([query])[0]
+    def cosine_sim(x):
+        x = np.array(x)
+        return float(
+            np.dot(query_embedding, x)
+            / (np.linalg.norm(query_embedding) * np.linalg.norm(x))
+        )
+    df["Similarity"] = df["Embedding"].apply(cosine_sim)
+    results = df.sort_values(by="Similarity", ascending=False).head(top_k)
+    return results[["Document", "Similarity"]]
+def generate_with_rag(query, top_k=5):
+        # goSFU specific cleaning
     if "gosfu" in query.lower():
         query = query.replace("gosfu", "goSFU")
     # Retrieve
+    search_query = clean_query_with_llm(query)
+    results = retrieve_with_pandas(search_query)
+    # Turn the Series into a single string of text
+    # (each doc separated by a divider)
+    context_str = "\n\n---\n\n".join(results["Document"].tolist())
+    # Build a clean prompt
     prompt_content = f"""
     You are a SFU IT helpdesk chatbot.
+    Your task is to answer SFU IT related questions such as accessing various technology services or general troubleshooting.
+    Below is new question asked by the user, and related article chunks to the user question.
+    If the user asked a question, answer the user's question with short step by step instructions: consider all the articles below.
+    If there are links in the articles, provide those links in your answer.
+    If the user asked a question and the answer is not in the contexts, say that you're sorry that you can't help them and suggest contacting SFU IT at 778-782-8888 or by submitting an inquiry ticket at https://www.sfu.ca/information-systems/get-help.html
+    If the user DID NOT ask a question, be friendly and ask how you can help them.
+    Do not recommend, suggest, or provide any advice on anything that is not related to SFU or SFU IT.
+    If the user asked something relating to mental health or is seeking medical advice, redirect them to SFU Health & Counselling at https://www.sfu.ca/students/health.html
+    Do not ask the user any follow-up questions after answering them.
+Question:
+{query}
+-- Start of Articles --
+{context_str}
+-- End of Articles --
+Answer:"""
+    # Call the LLM
     response = llm(
         prompt_content,
+        max_new_tokens=500,
         do_sample=False,
         return_full_text=False
     )
     return response[0]["generated_text"].strip()
 def chat_fn(message, history):
+    """
+    Chat Interface callback
+    """
+    answer = generate_with_rag(message, top_k=5)
+    return answer
 demo = gr.ChatInterface(
     fn=chat_fn,
+    title="SFU IT Chatbot",
     description="Enter your question and the SFU IT Chatbot will try to answer using retrieved SFU IT knowledge.",
 )
+# share=True
 if __name__ == "__main__":
     demo.launch()