Spaces:
Running
Running
File size: 4,047 Bytes
d84e52f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import json
import numpy as np
import pandas as pd
from transformers import pipeline
from sentence_transformers import SentenceTransformer
import gradio as gr
import torch
from huggingface_hub import login
import os
# Sanity Check
hf_token = os.getenv("V2_TOKEN")
if hf_token is None:
raise RuntimeError("V2_TOKEN environment variable is not set in this Space.")
# Explicit login
login(token=hf_token)
# --- Configuration ---
print("Loading RAG system on your device...")
# Load Knowledge base
FILE_PATH = "data.jsonl"
PRELOAD_FILE_PATH = "preload-data"
# File path readings
if not os.path.exists(FILE_PATH):
# Dummy data for testing if you don't have the file yet
print(f"Warning: {FILE_PATH} not found. Creating dummy data.")
data = [{"text": "To reset your password, visit password.sfu.ca and click 'Forgot Password'."}]
elif os.path.exists(PRELOAD_FILE_PATH):
print(f"Found Preloaded Data! Using {PRELOAD_FILE_PATH}...")
with open(PRELOAD_FILE_PATH, "r", encoding="utf-8") as f:
data = json.load(f)
else:
with open(FILE_PATH, "r", encoding="utf-8") as f:
print(f"No Preloaded Data Found. Using {FILE_PATH}...")
data = pd.read_json(path_or_buf=f, lines=True)
# Writes in data embedding
if not os.path.exists(PRELOAD_FILE_PATH):
documents = list(data["text"])
print(f"Creating {PRELOAD_FILE_PATH}...")
with open("preload-data", "w") as fp:
json.dump(documents, fp)
else:
documents = data
# Embeddings
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = embedding_model.encode(documents, convert_to_numpy=True)
# Use pandas dataframe
df = pd.DataFrame(
{
"Document": documents,
"Embedding": list(embeddings), # store as list
}
)
# Load LLM Pipeline
llm = pipeline(
"text-generation",
model="google/flan-t5-xl", # Might not have enough storage ngl
token=hf_token
)
# Retrieve w Pandas
def retrieve_with_pandas(query: str, top_k: int = 10):
"""
Embed the query, compute cosine similarity to each document,
and return the top_k most similar documents (as a DataFrame).
"""
query_embedding = embedding_model.encode([query])[0]
def cosine_sim(x):
x = np.array(x)
return float(
np.dot(query_embedding, x)
/ (np.linalg.norm(query_embedding) * np.linalg.norm(x))
)
df["Similarity"] = df["Embedding"].apply(cosine_sim)
results = df.sort_values(by="Similarity", ascending=False).head(top_k)
return results[["Document", "Similarity"]]
def generate_with_rag(query, top_k=5):
# Retrieve context as a pandas Series of document texts
docs = retrieve_with_pandas(query) # whatever you currently return
context_series = docs["Document"] if "Document" in docs else docs
# Turn the Series into a single string of text
# (each doc separated by a divider)
context_str = "\n\n---\n\n".join(context_series.tolist())
# Build a clean prompt
input_text = f"""You are an IT helpdesk assistant.
If the user asked a question, answer the user's question with detailed step by step instructions: consider all the articles below.
If the user asked a question and the answer is not in the contexts, say you don't know and suggest contacting SFU IT.
If the user DID NOT ask a question, be friendly and ask how you can help them.
Question:
{query}
-- Start of Articles --
{context_str}
-- End of Articles --
Answer:"""
# Call the LLM
response = llm(
input_text,
max_new_tokens=1024,
do_sample=False,
return_full_text=False
)
return response[0]["generated_text"].strip()
def chat_fn(message, history):
"""
Chat Interface callback
"""
answer = generate_with_rag(message, top_k=2)
return answer
demo = gr.ChatInterface(
fn=chat_fn,
title="SFU IT Chatbot",
description="Enter your question and the SFU IT Chatbot will try to answer using retrieved SFU IT knowledge.",
)
# share=True
if __name__ == "__main__":
demo.launch() |