File size: 4,047 Bytes
d84e52f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import json
import numpy as np
import pandas as pd

from transformers import pipeline
from sentence_transformers import SentenceTransformer
import gradio as gr
import torch
from huggingface_hub import login
import os    

# Sanity Check
hf_token = os.getenv("V2_TOKEN")
if hf_token is None:
    raise RuntimeError("V2_TOKEN environment variable is not set in this Space.")

# Explicit login
login(token=hf_token)

# --- Configuration ---
print("Loading RAG system on your device...")

# Load Knowledge base
FILE_PATH = "data.jsonl"
PRELOAD_FILE_PATH = "preload-data"

# File path readings
if not os.path.exists(FILE_PATH):
    # Dummy data for testing if you don't have the file yet
    print(f"Warning: {FILE_PATH} not found. Creating dummy data.")
    data = [{"text": "To reset your password, visit password.sfu.ca and click 'Forgot Password'."}]
elif os.path.exists(PRELOAD_FILE_PATH):
    print(f"Found Preloaded Data! Using {PRELOAD_FILE_PATH}...")
    with open(PRELOAD_FILE_PATH, "r", encoding="utf-8") as f:
        data = json.load(f)
else:
    with open(FILE_PATH, "r", encoding="utf-8") as f:
        print(f"No Preloaded Data Found. Using {FILE_PATH}...")
        data = pd.read_json(path_or_buf=f, lines=True)

# Writes in data embedding
if not os.path.exists(PRELOAD_FILE_PATH):
    documents = list(data["text"])
    print(f"Creating {PRELOAD_FILE_PATH}...")
    with open("preload-data", "w") as fp:
        json.dump(documents, fp)
else:
    documents = data

# Embeddings
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = embedding_model.encode(documents, convert_to_numpy=True)

# Use pandas dataframe
df = pd.DataFrame(
    {
        "Document": documents,
        "Embedding": list(embeddings),  # store as list
    }
)

# Load LLM Pipeline
llm = pipeline(
    "text-generation",
    model="google/flan-t5-xl", # Might not have enough storage ngl
    token=hf_token
)

# Retrieve w Pandas
def retrieve_with_pandas(query: str, top_k: int = 10):
    """
    Embed the query, compute cosine similarity to each document,
    and return the top_k most similar documents (as a DataFrame).
    """
    query_embedding = embedding_model.encode([query])[0]

    def cosine_sim(x):
        x = np.array(x)
        return float(
            np.dot(query_embedding, x)
            / (np.linalg.norm(query_embedding) * np.linalg.norm(x))
        )

    df["Similarity"] = df["Embedding"].apply(cosine_sim)
    results = df.sort_values(by="Similarity", ascending=False).head(top_k)
    return results[["Document", "Similarity"]]


def generate_with_rag(query, top_k=5):
    # Retrieve context as a pandas Series of document texts
    docs = retrieve_with_pandas(query)  # whatever you currently return
    context_series = docs["Document"] if "Document" in docs else docs

    # Turn the Series into a single string of text
    # (each doc separated by a divider)
    context_str = "\n\n---\n\n".join(context_series.tolist())

    # Build a clean prompt
    input_text = f"""You are an IT helpdesk assistant.
If the user asked a question, answer the user's question with detailed step by step instructions: consider all the articles below.
If the user asked a question and the answer is not in the contexts, say you don't know and suggest contacting SFU IT.
If the user DID NOT ask a question, be friendly and ask how you can help them.

Question:
{query}

-- Start of Articles --
{context_str}
-- End of Articles --

Answer:"""

    # Call the LLM
    response = llm(
        input_text,
        max_new_tokens=1024,
        do_sample=False,
        return_full_text=False
    )
    return response[0]["generated_text"].strip()

def chat_fn(message, history):
    """
    Chat Interface callback
    """
    answer = generate_with_rag(message, top_k=2)
    return answer


demo = gr.ChatInterface(
    fn=chat_fn,
    title="SFU IT Chatbot",
    description="Enter your question and the SFU IT Chatbot will try to answer using retrieved SFU IT knowledge.",
)

# share=True
if __name__ == "__main__":
    demo.launch()