markian-rybchuk commited on
Commit
6acd886
·
verified ·
1 Parent(s): 6a356af

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +121 -101
app.py CHANGED
@@ -25,7 +25,7 @@ from utils import COMMUNITY_POSTFIX_URL, get_model_config, check_format, models_
25
  logged_event_handler, DEBUG_MODE, DEBUG_MODEL, log_debug, log_info, log_error, log_warning
26
  from log_chat import log_chat
27
 
28
- DEFAULT_MODEL_TEMPERATURE = 0.6
29
  BUTTON_WIDTH = 160
30
  DEFAULT_OPT_OUT_VALUE = DEBUG_MODE
31
 
@@ -33,7 +33,7 @@ DEFAULT_OPT_OUT_VALUE = DEBUG_MODE
33
  # DEFAULT_MODEL_NAME = "Apriel-1.5-15B-thinker" if not DEBUG_MODEL else "Apriel-1.5-15B-thinker"
34
  DEFAULT_MODEL_NAME = "Apriel-1.6-15B-Thinker"
35
 
36
- SHOW_BANNER = True
37
  INFO_BANNER_MARKDOWN = """
38
  <span class="banner-message-text">ℹ️ This app has been updated to use the recommended temperature of 0.6. We had set it to 0.8 earlier and expect 0.6 to be better. Please provide feedback using the model link.</span>
39
  """
@@ -110,14 +110,14 @@ def setup_model(model_key, intial=False):
110
 
111
 
112
  def chat_started():
113
- # outputs: model_dropdown, user_input, send_btn, stop_btn, clear_btn
114
  return (DROPDOWN_DISABLED, gr.update(value="", interactive=False),
115
- SEND_BUTTON_DISABLED, STOP_BUTTON_ENABLED, BUTTON_DISABLED)
116
 
117
 
118
  def chat_finished():
119
- # outputs: model_dropdown, user_input, send_btn, stop_btn, clear_btn
120
- return DROPDOWN_ENABLED, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED
121
 
122
 
123
  def stop_chat(state):
@@ -131,7 +131,7 @@ def toggle_opt_out(state, checkbox):
131
  return state
132
 
133
 
134
- def run_chat_inference(history, message, state):
135
  global chat_start_count
136
  state["is_streaming"] = True
137
  state["stop_flag"] = False
@@ -148,7 +148,7 @@ def run_chat_inference(history, message, state):
148
 
149
  # Reinitialize the OpenAI client with a random endpoint from the list
150
  setup_model(model_config.get('MODEL_KEY'))
151
- log_info(f"Using model {model_name} (temperature: {temperature}) with endpoint {model_config.get('base_url')}")
152
 
153
  if len(history) == 0:
154
  state["chat_id"] = uuid4().hex
@@ -400,15 +400,15 @@ def run_chat_inference(history, message, state):
400
  api_messages.pop(idx - removed)
401
  removed += 1
402
  gr.Warning(f"Too many images provided; keeping the latest {MAX_IMAGE_MESSAGES} and dropped {removed} older image message(s).")
403
- print('model', model_name)
404
- print('messages', api_messages)
405
  stream = openai_client.chat.completions.create(
406
  model=model_name,
407
  messages=api_messages,
408
  temperature=temperature,
 
 
409
  stream=True
410
  )
411
- print('stream', stream)
412
  except Exception as e:
413
  log_error(f"Error:\n\t{e}\n\tInference failed for model {model_name} and endpoint {model_config['base_url']}")
414
  error = str(e)
@@ -444,56 +444,58 @@ def run_chat_inference(history, message, state):
444
  log_debug(f"History added empty assistant: {history}")
445
  check_format(history, "messages")
446
 
447
- output = ""
 
448
  completion_started = False
 
449
  for chunk in stream:
450
  if state["stop_flag"]:
451
  log_debug(f"chat_fn() --> Stopping streaming...")
452
  break # Exit the loop if the stop flag is set
453
- # Extract the new content from the delta field
454
- content = getattr(chunk.choices[0].delta, "content", "") or ""
455
- reasoning_content = getattr(chunk.choices[0].delta, "reasoning_content", "") or ""
456
- output += reasoning_content + content
 
 
 
457
 
458
  if is_reasoning:
459
- parts = output.split(output_tag_start)
460
-
461
- if len(parts) > 1:
462
- if parts[1].endswith(output_tag_end):
463
- parts[1] = parts[1].replace(output_tag_end, "")
464
- if parts[1].endswith(f"{output_tag_end}\n{output_stop_token}"):
465
- parts[1] = parts[1].replace(f"{output_tag_end}\n{output_stop_token}", "")
466
- if parts[1].endswith(f"{output_tag_end}\n{output_stop_token}\n"):
467
- parts[1] = parts[1].replace(f"{output_tag_end}\n{output_stop_token}\n", "")
468
- if parts[1].endswith(f"{output_stop_token}"):
469
- parts[1] = parts[1].replace(f"{output_stop_token}", "")
470
- if parts[1].endswith(f"{output_stop_token}\n"):
471
- parts[1] = parts[1].replace(f"{output_stop_token}\n", "")
472
-
473
  history[-1 if not completion_started else -2] = gr.ChatMessage(
474
  role="assistant",
475
- content=parts[0],
476
  metadata={"title": "🧠 Thought"}
477
  )
478
- if completion_started:
479
- history[-1] = gr.ChatMessage(
480
- role="assistant",
481
- content=parts[1]
482
- )
483
- elif len(parts) > 1 and not completion_started:
484
- completion_started = True
485
- history.append(gr.ChatMessage(
486
- role="assistant",
487
- content=parts[1]
488
- ))
 
 
 
 
 
 
 
 
 
 
489
  else:
490
- if output.endswith("<|end|>"):
491
- output = output.replace("<|end|>", "")
492
- if output.endswith("<|end|>\n"):
493
- output = output.replace("<|end|>\n", "")
494
  history[-1] = gr.ChatMessage(
495
  role="assistant",
496
- content=output
497
  )
498
 
499
  # log_message(f"Yielding messages: {history}")
@@ -554,7 +556,7 @@ with gr.Blocks(theme=theme, css=custom_css) as demo:
554
  with gr.Column():
555
  gr.Markdown(BANNER_MARKDOWN, elem_classes="banner-message")
556
 
557
- with gr.Row(variant="panel", elem_classes="responsive-row"):
558
  with gr.Column(scale=1, min_width=400, elem_classes="model-dropdown-container"):
559
  model_dropdown = gr.Dropdown(
560
  choices=[f"Model: {model}" for model in models_config.keys()],
@@ -568,71 +570,89 @@ with gr.Blocks(theme=theme, css=custom_css) as demo:
568
  with gr.Column(scale=4, min_width=0):
569
  feedback_message_html = gr.HTML(description, elem_classes="model-message")
570
 
571
- chatbot = gr.Chatbot(
572
- type="messages",
573
- height="calc(100svh - 320px)",
574
- max_height="calc(100svh - 320px)",
575
- elem_classes="chatbot",
576
- )
577
-
578
- with gr.Row():
579
- with gr.Column(scale=10, min_width=400, elem_classes="user-input-container"):
580
- with gr.Row():
581
- user_input = gr.MultimodalTextbox(
582
- interactive=True,
583
- container=False,
584
- file_count="multiple",
585
- placeholder="Type your message here and press Enter or upload file...",
586
- show_label=False,
587
- sources=["upload"],
588
- max_plain_text_length=100000,
589
- max_lines=10
590
- )
591
-
592
- # Original text-only input
593
- # user_input = gr.Textbox(
594
- # show_label=False,
595
- # placeholder="Type your message here and press Enter",
596
- # container=False
597
- # )
598
- with gr.Column(scale=1, min_width=BUTTON_WIDTH * 2 + 20):
599
- with gr.Row():
600
- with gr.Column(scale=1, min_width=BUTTON_WIDTH, elem_classes="send-button-container"):
601
- send_btn = gr.Button("Send", variant="primary", elem_classes="control-button")
602
- stop_btn = gr.Button("Stop", variant="cancel", elem_classes="control-button", visible=False)
603
- with gr.Column(scale=1, min_width=BUTTON_WIDTH, elem_classes="clear-button-container"):
604
- clear_btn = gr.ClearButton(chatbot, value="New Chat", variant="secondary", elem_classes="control-button")
605
- with gr.Row():
606
- with gr.Column(min_width=400, elem_classes="opt-out-container"):
607
- with gr.Row():
608
- gr.HTML(
609
- "We may use your chats to improve our AI. You may opt out if you don’t want your conversations saved.",
610
- elem_classes="opt-out-message")
611
- with gr.Row():
612
- opt_out_checkbox = gr.Checkbox(
613
- label="Don’t save my chat history for improvements or training",
614
- value=DEFAULT_OPT_OUT_VALUE,
615
- elem_classes="opt-out-checkbox",
616
- interactive=True,
617
- container=False
618
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
619
 
620
  gr.on(
621
  triggers=[send_btn.click, user_input.submit],
622
  fn=run_chat_inference, # this generator streams results. do not use logged_event_handler wrapper
623
- inputs=[chatbot, user_input, session_state],
624
  outputs=[chatbot, user_input, send_btn, stop_btn, clear_btn, session_state],
625
  concurrency_limit=4,
626
  api_name=False
627
  ).then(
628
- fn=chat_finished, inputs=None, outputs=[model_dropdown, user_input, send_btn, stop_btn, clear_btn], queue=False)
629
 
630
  # In parallel, disable or update the UI controls
631
  gr.on(
632
  triggers=[send_btn.click, user_input.submit],
633
  fn=chat_started,
634
  inputs=None,
635
- outputs=[model_dropdown, user_input, send_btn, stop_btn, clear_btn],
636
  queue=False,
637
  show_progress='hidden',
638
  api_name=False
@@ -645,7 +665,7 @@ with gr.Blocks(theme=theme, css=custom_css) as demo:
645
  api_name=False
646
  )
647
 
648
- opt_out_checkbox.change(fn=toggle_opt_out, inputs=[session_state, opt_out_checkbox], outputs=[session_state])
649
 
650
  # Ensure the model is reset to default on page reload
651
  demo.load(
@@ -667,4 +687,4 @@ with gr.Blocks(theme=theme, css=custom_css) as demo:
667
  )
668
 
669
  demo.queue(default_concurrency_limit=2).launch(ssr_mode=False, show_api=False, max_file_size="10mb")
670
- log_info("Gradio app launched")
 
25
  logged_event_handler, DEBUG_MODE, DEBUG_MODEL, log_debug, log_info, log_error, log_warning
26
  from log_chat import log_chat
27
 
28
+ DEFAULT_MODEL_TEMPERATURE = 1.0
29
  BUTTON_WIDTH = 160
30
  DEFAULT_OPT_OUT_VALUE = DEBUG_MODE
31
 
 
33
  # DEFAULT_MODEL_NAME = "Apriel-1.5-15B-thinker" if not DEBUG_MODEL else "Apriel-1.5-15B-thinker"
34
  DEFAULT_MODEL_NAME = "Apriel-1.6-15B-Thinker"
35
 
36
+ SHOW_BANNER = False
37
  INFO_BANNER_MARKDOWN = """
38
  <span class="banner-message-text">ℹ️ This app has been updated to use the recommended temperature of 0.6. We had set it to 0.8 earlier and expect 0.6 to be better. Please provide feedback using the model link.</span>
39
  """
 
110
 
111
 
112
  def chat_started():
113
+ # outputs: model_dropdown, user_input, send_btn, stop_btn, clear_btn, reasoning_effort
114
  return (DROPDOWN_DISABLED, gr.update(value="", interactive=False),
115
+ SEND_BUTTON_DISABLED, STOP_BUTTON_ENABLED, BUTTON_DISABLED, gr.update(interactive=False))
116
 
117
 
118
  def chat_finished():
119
+ # outputs: model_dropdown, user_input, send_btn, stop_btn, clear_btn, reasoning_effort
120
+ return DROPDOWN_ENABLED, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, gr.update(interactive=True)
121
 
122
 
123
  def stop_chat(state):
 
131
  return state
132
 
133
 
134
+ def run_chat_inference(history, message, state, reasoning_effort="medium"):
135
  global chat_start_count
136
  state["is_streaming"] = True
137
  state["stop_flag"] = False
 
148
 
149
  # Reinitialize the OpenAI client with a random endpoint from the list
150
  setup_model(model_config.get('MODEL_KEY'))
151
+ log_info(f"Using model {model_name} (temperature: {temperature}, reasoning_effort: {reasoning_effort}) with endpoint {model_config.get('base_url')}")
152
 
153
  if len(history) == 0:
154
  state["chat_id"] = uuid4().hex
 
400
  api_messages.pop(idx - removed)
401
  removed += 1
402
  gr.Warning(f"Too many images provided; keeping the latest {MAX_IMAGE_MESSAGES} and dropped {removed} older image message(s).")
403
+
 
404
  stream = openai_client.chat.completions.create(
405
  model=model_name,
406
  messages=api_messages,
407
  temperature=temperature,
408
+ top_p=1.0,
409
+ reasoning_effort=reasoning_effort,
410
  stream=True
411
  )
 
412
  except Exception as e:
413
  log_error(f"Error:\n\t{e}\n\tInference failed for model {model_name} and endpoint {model_config['base_url']}")
414
  error = str(e)
 
444
  log_debug(f"History added empty assistant: {history}")
445
  check_format(history, "messages")
446
 
447
+ output_reasoning = ""
448
+ output_content = ""
449
  completion_started = False
450
+
451
  for chunk in stream:
452
  if state["stop_flag"]:
453
  log_debug(f"chat_fn() --> Stopping streaming...")
454
  break # Exit the loop if the stop flag is set
455
+
456
+ delta = chunk.choices[0].delta
457
+ new_reasoning = getattr(delta, "reasoning_content", "") or ""
458
+ new_content = getattr(delta, "content", "") or ""
459
+
460
+ output_reasoning += new_reasoning
461
+ output_content += new_content
462
 
463
  if is_reasoning:
464
+ # Update the reasoning bubble
 
 
 
 
 
 
 
 
 
 
 
 
 
465
  history[-1 if not completion_started else -2] = gr.ChatMessage(
466
  role="assistant",
467
+ content=output_reasoning,
468
  metadata={"title": "🧠 Thought"}
469
  )
470
+
471
+ # Handle the content bubble
472
+ # Check if we have actual content or if we should start the content bubble
473
+ if new_content or (output_content and not completion_started):
474
+ # Clean up stop tokens from the content if present
475
+ if output_tag_end and output_content.endswith(output_tag_end):
476
+ output_content = output_content.replace(output_tag_end, "")
477
+ if output_stop_token and output_content.endswith(output_stop_token):
478
+ output_content = output_content.replace(output_stop_token, "")
479
+
480
+ if not completion_started:
481
+ completion_started = True
482
+ history.append(gr.ChatMessage(
483
+ role="assistant",
484
+ content=output_content
485
+ ))
486
+ else:
487
+ history[-1] = gr.ChatMessage(
488
+ role="assistant",
489
+ content=output_content
490
+ )
491
  else:
492
+ if output_content.endswith("<|end|>"):
493
+ output_content = output_content.replace("<|end|>", "")
494
+ if output_content.endswith("<|end|>\n"):
495
+ output_content = output_content.replace("<|end|>\n", "")
496
  history[-1] = gr.ChatMessage(
497
  role="assistant",
498
+ content=output_content
499
  )
500
 
501
  # log_message(f"Yielding messages: {history}")
 
556
  with gr.Column():
557
  gr.Markdown(BANNER_MARKDOWN, elem_classes="banner-message")
558
 
559
+ with gr.Row(variant="panel", elem_classes="responsive-row", visible=False):
560
  with gr.Column(scale=1, min_width=400, elem_classes="model-dropdown-container"):
561
  model_dropdown = gr.Dropdown(
562
  choices=[f"Model: {model}" for model in models_config.keys()],
 
570
  with gr.Column(scale=4, min_width=0):
571
  feedback_message_html = gr.HTML(description, elem_classes="model-message")
572
 
573
+ with gr.Column(visible=True, elem_classes="agreement-overlay") as agreement_overlay:
574
+ with gr.Column(elem_classes="form"):
575
+ gr.Markdown("## Privacy Agreement")
576
+ gr.Markdown("""
577
+ By using this app, you agree to the following terms:
578
+
579
+ We record all content you submit and all model outputs (“Data”), including text, images, files, and minimal request metadata (timestamp & technical logs). We do not store IP addresses, cookies, or account identifiers, so we cannot link any submission back to a particular person. However, the text you submit may itself contain personal information (e.g., names, Social Security numbers). Please do not include sensitive personal data in your prompts. Any such information will be subject to our redaction process before any public release.
580
+
581
+ Data is used for research, safety evaluation, and to improve the Service. We reserve the right to publish, share, or redistribute redacted versions of the Data under a Creative Commons Attribution (CC‑BY) or similar open license. Before any public release, we apply automated and manual redaction to remove private keys, names, contact details, and other identifiers that may appear in the content.
582
+
583
+ Because we do not track user identities, individual submissions cannot be deleted or withdrawn once made. If you do not want your content used or released, do not submit it.
584
+ """)
585
+ agree_btn = gr.Button("I Agree", variant="primary")
586
+
587
+ with gr.Column(visible=True) as main_app_area:
588
+ chatbot = gr.Chatbot(
589
+ type="messages",
590
+ height="calc(100svh - 320px)",
591
+ max_height="calc(100svh - 320px)",
592
+ elem_classes="chatbot",
593
+ )
594
+
595
+ with gr.Row():
596
+ with gr.Column(scale=10, min_width=400, elem_classes="user-input-container"):
597
+ with gr.Row():
598
+ # user_input = gr.MultimodalTextbox(
599
+ # interactive=True,
600
+ # container=False,
601
+ # file_count="multiple",
602
+ # placeholder="Type your message here and press Enter or upload file...",
603
+ # show_label=False,
604
+ # sources=["upload"],
605
+ # max_plain_text_length=100000,
606
+ # max_lines=10
607
+ # )
608
+
609
+ # Original text-only input
610
+ user_input = gr.Textbox(
611
+ show_label=False,
612
+ placeholder="Type your message here and press Enter",
613
+ container=False,
614
+ max_lines=10
615
+ )
616
+ with gr.Column(scale=1, min_width=BUTTON_WIDTH * 2 + 20):
617
+ with gr.Row():
618
+ with gr.Column(scale=1, min_width=BUTTON_WIDTH, elem_classes="send-button-container"):
619
+ send_btn = gr.Button("Send", variant="primary", elem_classes="control-button")
620
+ stop_btn = gr.Button("Stop", variant="cancel", elem_classes="control-button", visible=False)
621
+ with gr.Column(scale=1, min_width=BUTTON_WIDTH, elem_classes="clear-button-container"):
622
+ clear_btn = gr.ClearButton(chatbot, value="New Chat", variant="secondary", elem_classes="control-button")
623
+ with gr.Row():
624
+ with gr.Column(scale=1):
625
+ reasoning_effort_radio = gr.Radio(
626
+ choices=["low", "medium", "high"],
627
+ value="medium",
628
+ label="Reasoning Effort",
629
+ interactive=True,
630
+ container=True,
631
+ elem_classes="reasoning-radio"
632
+ )
633
+
634
+
635
+ def agree_to_terms():
636
+ return gr.update(visible=False)
637
+
638
+ agree_btn.click(agree_to_terms, None, [agreement_overlay])
639
 
640
  gr.on(
641
  triggers=[send_btn.click, user_input.submit],
642
  fn=run_chat_inference, # this generator streams results. do not use logged_event_handler wrapper
643
+ inputs=[chatbot, user_input, session_state, reasoning_effort_radio],
644
  outputs=[chatbot, user_input, send_btn, stop_btn, clear_btn, session_state],
645
  concurrency_limit=4,
646
  api_name=False
647
  ).then(
648
+ fn=chat_finished, inputs=None, outputs=[model_dropdown, user_input, send_btn, stop_btn, clear_btn, reasoning_effort_radio], queue=False)
649
 
650
  # In parallel, disable or update the UI controls
651
  gr.on(
652
  triggers=[send_btn.click, user_input.submit],
653
  fn=chat_started,
654
  inputs=None,
655
+ outputs=[model_dropdown, user_input, send_btn, stop_btn, clear_btn, reasoning_effort_radio],
656
  queue=False,
657
  show_progress='hidden',
658
  api_name=False
 
665
  api_name=False
666
  )
667
 
668
+
669
 
670
  # Ensure the model is reset to default on page reload
671
  demo.load(
 
687
  )
688
 
689
  demo.queue(default_concurrency_limit=2).launch(ssr_mode=False, show_api=False, max_file_size="10mb")
690
+ log_info("Gradio app launched")