Spaces:

nuttachot
/

llm1

Running on Zero

nuttachot commited on 20 days ago

Commit

d0cd438

1 Parent(s): face026

Add application file

Files changed (1) hide show

app.py CHANGED Viewed

@@ -11,37 +11,23 @@ hf_token = os.getenv("HF_TOKEN")
 def load_model():
     global pipe
     if pipe is None:
-        cache_dir = "./model-cache"
-        os.makedirs(cache_dir, exist_ok=True)
         pipe = pipeline(
             "text-generation",
             model=model_name,
             trust_remote_code=True,
             device_map="auto",
-            max_new_tokens=128,
-            temperature=0.9,
-            cache_dir=cache_dir,   # บังคับให้เก็บ Model weights ลงใน Space
             token=hf_token,
         )
     return pipe
 @spaces.GPU
-def predict(message, history):
     generator = load_model()
-    short_history = history[-3:] if history else []
-    chat_context = ""
-    for user, bot in short_history:
-        chat_context += f"User: {user}\nAssistant: {bot}\n"
-    chat_context += f"User: {message}\nAssistant:"
     output = generator(
-        chat_context,
-        max_new_tokens=128,
-        do_sample=True,
         temperature=0.7,
-        stream=False,
     )
     response = output[0]["generated_text"].split("Assistant:")[-1].strip()
@@ -51,7 +37,7 @@ def predict(message, history):
 chat_ui = gr.ChatInterface(
     fn=predict,
     title="Typhoon 2.5 ZeroGPU Cache Edition",
-    description="โหลด Model ครั้งเดียว ใช้ซ้ำได้ ลดเวลา Startup",
 )
 if __name__ == "__main__":

 def load_model():
     global pipe
     if pipe is None:
         pipe = pipeline(
             "text-generation",
             model=model_name,
             trust_remote_code=True,
             device_map="auto",
             token=hf_token,
         )
     return pipe
 @spaces.GPU
+def predict(message):
     generator = load_model()
     output = generator(
+        message,
+        max_new_tokens=256,
         temperature=0.7,
     )
     response = output[0]["generated_text"].split("Assistant:")[-1].strip()
 chat_ui = gr.ChatInterface(
     fn=predict,
     title="Typhoon 2.5 ZeroGPU Cache Edition",
+    description="A chat interface powered by Typhoon 2.5 model with ZeroGPU",
 )
 if __name__ == "__main__":