File size: 1,346 Bytes
50a4073
43bc6a8
50a4073
43bc6a8
50a4073
 
face026
43bc6a8
 
 
 
 
 
 
 
 
50a4073
43bc6a8
 
3548309
 
50a4073
43bc6a8
 
 
 
 
 
 
 
d0cd438
3548309
43bc6a8
3548309
43bc6a8
50a4073
 
43bc6a8
3548309
43bc6a8
 
50a4073
43bc6a8
3548309
 
43bc6a8
50a4073
 
 
43bc6a8
50a4073
 
 
43bc6a8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import os
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import spaces

model_name = "scb10x/typhoon2.5-qwen3-4b"
token = os.getenv("HF_TOKEN")

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True,
)

model.to("cuda" if torch.cuda.is_available() else "cpu")

def generate_chat(message):
    messages = [{"role": "user", "content": message}]

    input_ids = tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        return_tensors="pt"
    ).to(model.device)

    output_ids = model.generate(
        input_ids,
        max_new_tokens=256,
        temperature=0.7,
        do_sample=True,
        top_p=0.9,
        pad_token_id=tokenizer.eos_token_id,
    )

    response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
    response_text = response.split(message)[-1].strip()

    return response_text

@spaces.GPU
def predict(message, history=None):  # history ไม่ถูกใช้
    response = generate_chat(message)
    return response

chat_ui = gr.ChatInterface(
    fn=predict,
    title="Typhoon 2.5 ZeroGPU",
)

if __name__ == "__main__":
    chat_ui.launch()