Spaces:

timothytzkung
/

SFU-IT-Chatbot-V2

Running

App Files Files Community

SFU-IT-Chatbot-V2 / app.py

timothytzkung

Rollback to Gemm3-4B commit

4e657f7 verified 9 days ago

raw

history blame contribute delete

4.8 kB

	import json
	import numpy as np
	import pandas as pd

	from transformers import pipeline
	from sentence_transformers import SentenceTransformer
	import gradio as gr
	import torch
	from huggingface_hub import login
	import os

	# Sanity Check
	hf_token = os.getenv("V2_TOKEN")
	if hf_token is None:
	raise RuntimeError("V2_TOKEN environment variable is not set in this Space.")

	# Explicit login
	login(token=hf_token)

	# --- Configuration ---
	print("Loading RAG system on your device...")

	# Load Knowledge base
	FILE_PATH = "data.jsonl"
	PRELOAD_FILE_PATH = "preload-data.json"

	# Load data
	print(f"Found Preloaded Data! Using {PRELOAD_FILE_PATH}...")
	with open(PRELOAD_FILE_PATH, "r", encoding="utf-8") as f:
	data = json.load(f)

	# Set data
	documents = data

	# Embeddings
	embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
	embeddings = embedding_model.encode(documents, convert_to_numpy=True)

	# Use pandas dataframe
	df = pd.DataFrame(
	{
	"Document": documents,
	"Embedding": list(embeddings), # store as list
	}
	)

	# Load LLM Pipeline
	llm = pipeline(
	"text-generation",
	model="google/gemma-3-4b-it", # Might not have enough storage ngl
	token=hf_token
	)

	def clean_query_with_llm(query):
	prompt_content = f"""
	Below is a new question asked by the user that needs to be answered by searching in a knowledge base.
	You have access to SFU IT Knowledge Base index with 100's of chunked documents.
	Generate a search question based the user's question.
	If you cannot generate a search query, return just the number 0.
	User's Question:
	{query}
	Search Query:
	"""

	response = llm(
	prompt_content,
	max_new_tokens=100,
	do_sample=False,
	return_full_text=False
	)
	return response[0]["generated_text"].strip()


	# Retrieve w Pandas
	def retrieve_with_pandas(query: str, top_k: int = 5):
	"""
	Embed the query, compute cosine similarity to each document,
	and return the top_k most similar documents (as a DataFrame).
	"""
	query_embedding = embedding_model.encode([query])[0]

	def cosine_sim(x):
	x = np.array(x)
	return float(
	np.dot(query_embedding, x)
	/ (np.linalg.norm(query_embedding) * np.linalg.norm(x))
	)

	df["Similarity"] = df["Embedding"].apply(cosine_sim)
	results = df.sort_values(by="Similarity", ascending=False).head(top_k)
	return results[["Document", "Similarity"]]


	def generate_with_rag(query, top_k=5):
	# goSFU specific cleaning
	if "gosfu" in query.lower():
	query = query.replace("gosfu", "goSFU")

	# Retrieve
	search_query = clean_query_with_llm(query)
	results = retrieve_with_pandas(search_query)

	# Turn the Series into a single string of text
	# (each doc separated by a divider)
	context_str = "\n\n---\n\n".join(results["Document"].tolist())

	# Build a clean prompt
	prompt_content = f"""
	You are a SFU IT helpdesk chatbot.
	Your task is to answer SFU IT related questions such as accessing various technology services or general troubleshooting.
	Below is new question asked by the user, and related article chunks to the user question.
	If the user asked a question, answer the user's question with short step by step instructions: consider all the articles below.
	If there are links in the articles, provide those links in your answer.
	If the user asked a question and the answer is not in the contexts, say that you're sorry that you can't help them and suggest contacting SFU IT at 778-782-8888 or by submitting an inquiry ticket at https://www.sfu.ca/information-systems/get-help.html
	If the user DID NOT ask a question, be friendly and ask how you can help them.
	Do not recommend, suggest, or provide any advice on anything that is not related to SFU or SFU IT.
	If the user asked something relating to mental health or is seeking medical advice, redirect them to SFU Health & Counselling at https://www.sfu.ca/students/health.html
	Do not ask the user any follow-up questions after answering them.

	Question:
	{query}
	-- Start of Articles --
	{context_str}
	-- End of Articles --
	Answer:"""

	# Call the LLM
	response = llm(
	prompt_content,
	max_new_tokens=500,
	do_sample=False,
	return_full_text=False
	)
	return response[0]["generated_text"].strip()


	def chat_fn(message, history):
	"""
	Chat Interface callback
	"""
	answer = generate_with_rag(message, top_k=5)
	return answer


	demo = gr.ChatInterface(
	fn=chat_fn,
	title="SFU IT Chatbot",
	description="Enter your question and the SFU IT Chatbot will try to answer using retrieved SFU IT knowledge.",
	)

	# share=True
	if __name__ == "__main__":
	demo.launch()