timothytzkung commited on
Commit
d84e52f
·
verified ·
1 Parent(s): 9beca5a

Init commit

Browse files
Files changed (6) hide show
  1. README.md +18 -12
  2. app.py +138 -0
  3. data.jsonl +0 -0
  4. preload-data +0 -0
  5. requirements.txt +22 -0
  6. unchunked_data.json +0 -0
README.md CHANGED
@@ -1,12 +1,18 @@
1
- ---
2
- title: SFU IT Chatbot V2
3
- emoji: 🦀
4
- colorFrom: indigo
5
- colorTo: pink
6
- sdk: gradio
7
- sdk_version: 6.0.1
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
1
+ # SFU IT Chatbot w/ Ollama
2
+ This chatbot runs locally on your computer! Yay! This RAG Chatbot uses Gemma3-4b LLM and all-MiniLM-L6-v2 for vector embedding.
3
+
4
+ To run the app, you need Ollama installed which can be found here: <br>
5
+ https://ollama.com/download
6
+
7
+ Then, you need to download Gemma3-4b from your terminal:<br>
8
+ `ollama pull gemma3:4b`
9
+
10
+ Now, first set up virtual environment:<br>
11
+ `python3 -m venv venv`<br>
12
+ `source venv/bin/activate`
13
+
14
+ Then install requirements:<br>
15
+ `pip install -r requirements.txt`
16
+
17
+ Now run the app (Note: This uses about 3-4 GB of RAM):<br>
18
+ `python app.py`
app.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import numpy as np
3
+ import pandas as pd
4
+
5
+ from transformers import pipeline
6
+ from sentence_transformers import SentenceTransformer
7
+ import gradio as gr
8
+ import torch
9
+ from huggingface_hub import login
10
+ import os
11
+
12
+ # Sanity Check
13
+ hf_token = os.getenv("V2_TOKEN")
14
+ if hf_token is None:
15
+ raise RuntimeError("V2_TOKEN environment variable is not set in this Space.")
16
+
17
+ # Explicit login
18
+ login(token=hf_token)
19
+
20
+ # --- Configuration ---
21
+ print("Loading RAG system on your device...")
22
+
23
+ # Load Knowledge base
24
+ FILE_PATH = "data.jsonl"
25
+ PRELOAD_FILE_PATH = "preload-data"
26
+
27
+ # File path readings
28
+ if not os.path.exists(FILE_PATH):
29
+ # Dummy data for testing if you don't have the file yet
30
+ print(f"Warning: {FILE_PATH} not found. Creating dummy data.")
31
+ data = [{"text": "To reset your password, visit password.sfu.ca and click 'Forgot Password'."}]
32
+ elif os.path.exists(PRELOAD_FILE_PATH):
33
+ print(f"Found Preloaded Data! Using {PRELOAD_FILE_PATH}...")
34
+ with open(PRELOAD_FILE_PATH, "r", encoding="utf-8") as f:
35
+ data = json.load(f)
36
+ else:
37
+ with open(FILE_PATH, "r", encoding="utf-8") as f:
38
+ print(f"No Preloaded Data Found. Using {FILE_PATH}...")
39
+ data = pd.read_json(path_or_buf=f, lines=True)
40
+
41
+ # Writes in data embedding
42
+ if not os.path.exists(PRELOAD_FILE_PATH):
43
+ documents = list(data["text"])
44
+ print(f"Creating {PRELOAD_FILE_PATH}...")
45
+ with open("preload-data", "w") as fp:
46
+ json.dump(documents, fp)
47
+ else:
48
+ documents = data
49
+
50
+ # Embeddings
51
+ embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
52
+ embeddings = embedding_model.encode(documents, convert_to_numpy=True)
53
+
54
+ # Use pandas dataframe
55
+ df = pd.DataFrame(
56
+ {
57
+ "Document": documents,
58
+ "Embedding": list(embeddings), # store as list
59
+ }
60
+ )
61
+
62
+ # Load LLM Pipeline
63
+ llm = pipeline(
64
+ "text-generation",
65
+ model="google/flan-t5-xl", # Might not have enough storage ngl
66
+ token=hf_token
67
+ )
68
+
69
+ # Retrieve w Pandas
70
+ def retrieve_with_pandas(query: str, top_k: int = 10):
71
+ """
72
+ Embed the query, compute cosine similarity to each document,
73
+ and return the top_k most similar documents (as a DataFrame).
74
+ """
75
+ query_embedding = embedding_model.encode([query])[0]
76
+
77
+ def cosine_sim(x):
78
+ x = np.array(x)
79
+ return float(
80
+ np.dot(query_embedding, x)
81
+ / (np.linalg.norm(query_embedding) * np.linalg.norm(x))
82
+ )
83
+
84
+ df["Similarity"] = df["Embedding"].apply(cosine_sim)
85
+ results = df.sort_values(by="Similarity", ascending=False).head(top_k)
86
+ return results[["Document", "Similarity"]]
87
+
88
+
89
+ def generate_with_rag(query, top_k=5):
90
+ # Retrieve context as a pandas Series of document texts
91
+ docs = retrieve_with_pandas(query) # whatever you currently return
92
+ context_series = docs["Document"] if "Document" in docs else docs
93
+
94
+ # Turn the Series into a single string of text
95
+ # (each doc separated by a divider)
96
+ context_str = "\n\n---\n\n".join(context_series.tolist())
97
+
98
+ # Build a clean prompt
99
+ input_text = f"""You are an IT helpdesk assistant.
100
+ If the user asked a question, answer the user's question with detailed step by step instructions: consider all the articles below.
101
+ If the user asked a question and the answer is not in the contexts, say you don't know and suggest contacting SFU IT.
102
+ If the user DID NOT ask a question, be friendly and ask how you can help them.
103
+
104
+ Question:
105
+ {query}
106
+
107
+ -- Start of Articles --
108
+ {context_str}
109
+ -- End of Articles --
110
+
111
+ Answer:"""
112
+
113
+ # Call the LLM
114
+ response = llm(
115
+ input_text,
116
+ max_new_tokens=1024,
117
+ do_sample=False,
118
+ return_full_text=False
119
+ )
120
+ return response[0]["generated_text"].strip()
121
+
122
+ def chat_fn(message, history):
123
+ """
124
+ Chat Interface callback
125
+ """
126
+ answer = generate_with_rag(message, top_k=2)
127
+ return answer
128
+
129
+
130
+ demo = gr.ChatInterface(
131
+ fn=chat_fn,
132
+ title="SFU IT Chatbot",
133
+ description="Enter your question and the SFU IT Chatbot will try to answer using retrieved SFU IT knowledge.",
134
+ )
135
+
136
+ # share=True
137
+ if __name__ == "__main__":
138
+ demo.launch()
data.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
preload-data ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ gradio==5.6.0
3
+ gradio_client==1.4.3
4
+ huggingface-hub
5
+ keras
6
+ libclang
7
+ numpy
8
+ pandas
9
+ pydantic
10
+ pydantic_core
11
+
12
+ safetensors
13
+ scikit-learn
14
+ scipy
15
+ sentence-transformers
16
+ tensorboard
17
+ tensorflow
18
+ tf_keras
19
+ tokenizers
20
+ torch
21
+ transformers
22
+ wheel
unchunked_data.json ADDED
The diff for this file is too large to render. See raw diff