beyoru commited on
Commit
74622a6
·
verified ·
1 Parent(s): 893cf47

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -4
app.py CHANGED
@@ -7,8 +7,7 @@ MODEL_NAME = "beyoru/Qwen3-0.9B-A0.6B"
7
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
8
  model = AutoModelForCausalLM.from_pretrained(
9
  MODEL_NAME,
10
- torch_dtype=torch.bfloat16,
11
- device_map="auto"
12
  )
13
 
14
  # --- Chat function ---
@@ -30,7 +29,7 @@ def chat_fn(message, history, num_ctx, temperature, repeat_penalty, min_p, top_k
30
 
31
  outputs = model.generate(
32
  **inputs,
33
- max_new_tokens=4096,
34
  temperature=float(temperature),
35
  top_p=float(top_p),
36
  top_k=int(top_k),
@@ -51,7 +50,7 @@ with gr.Blocks(fill_height=True, fill_width=True) as app:
51
  gr.Markdown("## Qwen3 Playground (Transformers Edition)")
52
  gr.Markdown("Model: **beyoru/Qwen3-0.9B-A0.6B** — chạy trực tiếp bằng Transformers")
53
 
54
- num_ctx = gr.Slider(512, 8192, 8192, 128, label="Context Length (num_ctx)")
55
  temperature = gr.Slider(0.1, 2.0, 0.6, 0.1, label="Temperature")
56
  repeat_penalty = gr.Slider(0.1, 2.0, 1.0, 0.1, label="Repeat Penalty")
57
  min_p = gr.Slider(0.0, 1.0, 0.0, 0.01, label="Min P")
 
7
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
8
  model = AutoModelForCausalLM.from_pretrained(
9
  MODEL_NAME,
10
+ torch_dtype=torch.float16,
 
11
  )
12
 
13
  # --- Chat function ---
 
29
 
30
  outputs = model.generate(
31
  **inputs,
32
+ max_new_tokens=2048,
33
  temperature=float(temperature),
34
  top_p=float(top_p),
35
  top_k=int(top_k),
 
50
  gr.Markdown("## Qwen3 Playground (Transformers Edition)")
51
  gr.Markdown("Model: **beyoru/Qwen3-0.9B-A0.6B** — chạy trực tiếp bằng Transformers")
52
 
53
+ num_ctx = gr.Slider(512, 8192, 2048, 128, label="Context Length (num_ctx)")
54
  temperature = gr.Slider(0.1, 2.0, 0.6, 0.1, label="Temperature")
55
  repeat_penalty = gr.Slider(0.1, 2.0, 1.0, 0.1, label="Repeat Penalty")
56
  min_p = gr.Slider(0.0, 1.0, 0.0, 0.01, label="Min P")