llm1 / app.py
nuttachot
Add application file
3548309
import os
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import spaces
model_name = "scb10x/typhoon2.5-qwen3-4b"
token = os.getenv("HF_TOKEN")
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model_name,
trust_remote_code=True,
torch_dtype=torch.float16,
low_cpu_mem_usage=True,
)
model.to("cuda" if torch.cuda.is_available() else "cpu")
def generate_chat(message):
messages = [{"role": "user", "content": message}]
input_ids = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
return_tensors="pt"
).to(model.device)
output_ids = model.generate(
input_ids,
max_new_tokens=256,
temperature=0.7,
do_sample=True,
top_p=0.9,
pad_token_id=tokenizer.eos_token_id,
)
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
response_text = response.split(message)[-1].strip()
return response_text
@spaces.GPU
def predict(message, history=None): # history ไม่ถูกใช้
response = generate_chat(message)
return response
chat_ui = gr.ChatInterface(
fn=predict,
title="Typhoon 2.5 ZeroGPU",
)
if __name__ == "__main__":
chat_ui.launch()