nuttachot commited on
Commit
50a4073
·
1 Parent(s): f8dd9ab

Add application file

Browse files
Files changed (1) hide show
  1. app.py +60 -0
app.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from transformers import pipeline
4
+ import spaces
5
+
6
+ model_name = "scb10x/typhoon2.5-qwen3-4b:latest"
7
+ pipe = None
8
+
9
+ hf_token = os.getenv("HF_TOKEN")
10
+
11
+ def load_model():
12
+ global pipe
13
+ if pipe is None:
14
+ cache_dir = "./model-cache"
15
+ os.makedirs(cache_dir, exist_ok=True)
16
+
17
+ pipe = pipeline(
18
+ "text-generation",
19
+ model=model_name,
20
+ trust_remote_code=True,
21
+ device_map="auto",
22
+ max_new_tokens=128,
23
+ temperature=0.9,
24
+ cache_dir=cache_dir, # บังคับให้เก็บ Model weights ลงใน Space
25
+ token=hf_token,
26
+ )
27
+ return pipe
28
+
29
+ @spaces.GPU
30
+ def predict(message, history):
31
+ generator = load_model()
32
+ short_history = history[-3:] if history else []
33
+
34
+ chat_context = ""
35
+ for user, bot in short_history:
36
+ chat_context += f"User: {user}\nAssistant: {bot}\n"
37
+ chat_context += f"User: {message}\nAssistant:"
38
+
39
+ output = generator(
40
+ chat_context,
41
+ max_new_tokens=128,
42
+ do_sample=True,
43
+ temperature=0.7,
44
+ stream=False,
45
+ )
46
+
47
+ response = output[0]["generated_text"].split("Assistant:")[-1].strip()
48
+ return response
49
+
50
+
51
+ chat_ui = gr.ChatInterface(
52
+ fn=predict,
53
+ title="Typhoon 2.5 ZeroGPU Cache Edition",
54
+ description="โหลด Model ครั้งเดียว ใช้ซ้ำได้ ลดเวลา Startup",
55
+ retry_btn="ลองใหม่",
56
+ undo_btn="ลบข้อความล่าสุด",
57
+ )
58
+
59
+ if __name__ == "__main__":
60
+ chat_ui.launch()