beyoru commited on
Commit
b0da11a
·
verified ·
1 Parent(s): 8d2927f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -18
app.py CHANGED
@@ -3,10 +3,10 @@ import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
4
  from threading import Thread
5
  import gradio as gr
6
- import os
7
 
8
  MODEL_NAME = os.getenv('MODEL_ID')
9
  TOKEN = os.getenv('TOKEN')
 
10
  print("Loading model...")
11
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True, token=TOKEN)
12
  model = AutoModelForCausalLM.from_pretrained(
@@ -18,10 +18,10 @@ model = AutoModelForCausalLM.from_pretrained(
18
  )
19
  print("Model loaded.")
20
 
21
-
22
  def playground(
23
  message,
24
  history,
 
25
  max_new_tokens,
26
  temperature,
27
  repetition_penalty,
@@ -31,24 +31,30 @@ def playground(
31
  if not isinstance(message, str) or not message.strip():
32
  yield ""
33
  return
34
-
35
- # Build conversation
36
  conversation = []
 
 
 
 
 
 
37
  for user_msg, bot_msg in history:
38
  conversation.append({"role": "user", "content": user_msg})
39
  if bot_msg:
40
  conversation.append({"role": "assistant", "content": bot_msg})
 
41
  conversation.append({"role": "user", "content": message})
42
-
43
  if hasattr(tokenizer, "apply_chat_template"):
44
  prompt = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
45
  else:
46
  prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in conversation]) + "\nassistant:"
47
-
48
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
49
-
50
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
51
-
52
  generation_kwargs = dict(
53
  **inputs,
54
  streamer=streamer,
@@ -60,31 +66,39 @@ def playground(
60
  do_sample=True if temperature > 0 else False,
61
  pad_token_id=tokenizer.eos_token_id
62
  )
63
-
64
- # Start generation in a background thread
65
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
66
  thread.start()
67
-
68
  generated_text = ""
69
  for new_text in streamer:
70
  generated_text += new_text
71
  yield generated_text
72
-
73
  thread.join()
74
 
75
-
76
  with gr.Blocks(fill_height=True, fill_width=True) as app:
77
  with gr.Sidebar():
78
  gr.Markdown("## Playground by UltimaX Intelligence")
79
  gr.HTML("""
80
  Runs <b><a href="https://huggingface.co/beyoru/Qwen3-0.9B-A0.6B" target="_blank">
81
  beyoru/Qwen3-0.9B-A0.6B</a></b> via <b>Hugging Face Transformers</b>.<br><br>
82
- <b>Supprot me at:</b>.<br><br>
83
  <a href="https://www.buymeacoffee.com/ductransa0g" target="_blank">
84
  <img src="https://cdn.buymeacoffee.com/buttons/v2/default-yellow.png" alt="Buy Me A Coffee" width="150px">
85
- </a>
86
- </p>
87
  """)
 
 
 
 
 
 
 
 
 
 
 
88
  gr.Markdown("---")
89
  gr.Markdown("## Generation Parameters")
90
  max_new_tokens = gr.Slider(32, 4096, value=2048, step=32, label="Max New Tokens")
@@ -92,10 +106,10 @@ with gr.Blocks(fill_height=True, fill_width=True) as app:
92
  repetition_penalty = gr.Slider(0.1, 2.0, value=1.0, step=0.1, label="Repetition Penalty")
93
  top_k = gr.Slider(0, 100, value=20, step=1, label="Top K (0 = off)")
94
  top_p = gr.Slider(0.0, 1.0, value=0.95, step=0.05, label="Top P")
95
-
96
  gr.ChatInterface(
97
  fn=playground,
98
- additional_inputs=[max_new_tokens, temperature, repetition_penalty, top_k, top_p],
99
  chatbot=gr.Chatbot(
100
  label="Qwen3-0.9B-A0.6B",
101
  show_copy_button=True,
 
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
4
  from threading import Thread
5
  import gradio as gr
 
6
 
7
  MODEL_NAME = os.getenv('MODEL_ID')
8
  TOKEN = os.getenv('TOKEN')
9
+
10
  print("Loading model...")
11
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True, token=TOKEN)
12
  model = AutoModelForCausalLM.from_pretrained(
 
18
  )
19
  print("Model loaded.")
20
 
 
21
  def playground(
22
  message,
23
  history,
24
+ system_prompt,
25
  max_new_tokens,
26
  temperature,
27
  repetition_penalty,
 
31
  if not isinstance(message, str) or not message.strip():
32
  yield ""
33
  return
34
+
35
+ # Build conversation với system prompt
36
  conversation = []
37
+
38
+ # Thêm system prompt nếu có
39
+ if system_prompt and system_prompt.strip():
40
+ conversation.append({"role": "system", "content": system_prompt.strip()})
41
+
42
+ # Thêm lịch sử chat
43
  for user_msg, bot_msg in history:
44
  conversation.append({"role": "user", "content": user_msg})
45
  if bot_msg:
46
  conversation.append({"role": "assistant", "content": bot_msg})
47
+
48
  conversation.append({"role": "user", "content": message})
49
+
50
  if hasattr(tokenizer, "apply_chat_template"):
51
  prompt = tokenizer.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True)
52
  else:
53
  prompt = "\n".join([f"{msg['role']}: {msg['content']}" for msg in conversation]) + "\nassistant:"
54
+
55
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
 
56
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
57
+
58
  generation_kwargs = dict(
59
  **inputs,
60
  streamer=streamer,
 
66
  do_sample=True if temperature > 0 else False,
67
  pad_token_id=tokenizer.eos_token_id
68
  )
69
+
 
70
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
71
  thread.start()
72
+
73
  generated_text = ""
74
  for new_text in streamer:
75
  generated_text += new_text
76
  yield generated_text
77
+
78
  thread.join()
79
 
 
80
  with gr.Blocks(fill_height=True, fill_width=True) as app:
81
  with gr.Sidebar():
82
  gr.Markdown("## Playground by UltimaX Intelligence")
83
  gr.HTML("""
84
  Runs <b><a href="https://huggingface.co/beyoru/Qwen3-0.9B-A0.6B" target="_blank">
85
  beyoru/Qwen3-0.9B-A0.6B</a></b> via <b>Hugging Face Transformers</b>.<br><br>
86
+ <b>Support me at:</b>.<br><br>
87
  <a href="https://www.buymeacoffee.com/ductransa0g" target="_blank">
88
  <img src="https://cdn.buymeacoffee.com/buttons/v2/default-yellow.png" alt="Buy Me A Coffee" width="150px">
89
+ </a>
 
90
  """)
91
+
92
+ gr.Markdown("---")
93
+ gr.Markdown("## System Prompt")
94
+ system_prompt = gr.Textbox(
95
+ label="System Prompt",
96
+ placeholder="Enter custom system instructions here (optional)...",
97
+ lines=4,
98
+ value="You are a helpful AI assistant.",
99
+ info="AI role custome"
100
+ )
101
+
102
  gr.Markdown("---")
103
  gr.Markdown("## Generation Parameters")
104
  max_new_tokens = gr.Slider(32, 4096, value=2048, step=32, label="Max New Tokens")
 
106
  repetition_penalty = gr.Slider(0.1, 2.0, value=1.0, step=0.1, label="Repetition Penalty")
107
  top_k = gr.Slider(0, 100, value=20, step=1, label="Top K (0 = off)")
108
  top_p = gr.Slider(0.0, 1.0, value=0.95, step=0.05, label="Top P")
109
+
110
  gr.ChatInterface(
111
  fn=playground,
112
+ additional_inputs=[system_prompt, max_new_tokens, temperature, repetition_penalty, top_k, top_p],
113
  chatbot=gr.Chatbot(
114
  label="Qwen3-0.9B-A0.6B",
115
  show_copy_button=True,