Spaces:

nuttachot
/

llm1

Running on Zero

App Files Files Community

nuttachot commited on 14 days ago

Commit

50a4073

1 Parent(s): f8dd9ab

Add application file

Browse files

Files changed (1) hide show

app.py +60 -0

app.py ADDED Viewed

	@@ -0,0 +1,60 @@

+import os
+import gradio as gr
+from transformers import pipeline
+import spaces
+model_name = "scb10x/typhoon2.5-qwen3-4b:latest"
+pipe = None
+hf_token = os.getenv("HF_TOKEN")
+def load_model():
+    global pipe
+    if pipe is None:
+        cache_dir = "./model-cache"
+        os.makedirs(cache_dir, exist_ok=True)
+        pipe = pipeline(
+            "text-generation",
+            model=model_name,
+            trust_remote_code=True,
+            device_map="auto",
+            max_new_tokens=128,
+            temperature=0.9,
+            cache_dir=cache_dir,   # บังคับให้เก็บ Model weights ลงใน Space
+            token=hf_token,
+        )
+    return pipe
+@spaces.GPU
+def predict(message, history):
+    generator = load_model()
+    short_history = history[-3:] if history else []
+    chat_context = ""
+    for user, bot in short_history:
+        chat_context += f"User: {user}\nAssistant: {bot}\n"
+    chat_context += f"User: {message}\nAssistant:"
+    output = generator(
+        chat_context,
+        max_new_tokens=128,
+        do_sample=True,
+        temperature=0.7,
+        stream=False,
+    )
+    response = output[0]["generated_text"].split("Assistant:")[-1].strip()
+    return response
+chat_ui = gr.ChatInterface(
+    fn=predict,
+    title="Typhoon 2.5 ZeroGPU Cache Edition",
+    description="โหลด Model ครั้งเดียว ใช้ซ้ำได้ ลดเวลา Startup",
+    retry_btn="ลองใหม่",
+    undo_btn="ลบข้อความล่าสุด",
+)
+if __name__ == "__main__":
+    chat_ui.launch()