File size: 1,346 Bytes
50a4073 43bc6a8 50a4073 43bc6a8 50a4073 face026 43bc6a8 50a4073 43bc6a8 3548309 50a4073 43bc6a8 d0cd438 3548309 43bc6a8 3548309 43bc6a8 50a4073 43bc6a8 3548309 43bc6a8 50a4073 43bc6a8 3548309 43bc6a8 50a4073 43bc6a8 50a4073 43bc6a8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
import os
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import spaces
model_name = "scb10x/typhoon2.5-qwen3-4b"
token = os.getenv("HF_TOKEN")
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model_name,
trust_remote_code=True,
torch_dtype=torch.float16,
low_cpu_mem_usage=True,
)
model.to("cuda" if torch.cuda.is_available() else "cpu")
def generate_chat(message):
messages = [{"role": "user", "content": message}]
input_ids = tokenizer.apply_chat_template(
messages,
add_generation_prompt=True,
return_tensors="pt"
).to(model.device)
output_ids = model.generate(
input_ids,
max_new_tokens=256,
temperature=0.7,
do_sample=True,
top_p=0.9,
pad_token_id=tokenizer.eos_token_id,
)
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
response_text = response.split(message)[-1].strip()
return response_text
@spaces.GPU
def predict(message, history=None): # history ไม่ถูกใช้
response = generate_chat(message)
return response
chat_ui = gr.ChatInterface(
fn=predict,
title="Typhoon 2.5 ZeroGPU",
)
if __name__ == "__main__":
chat_ui.launch()
|