gpt-oss-120b-chat

Running

App Files Files Community

markian-rybchuk commited on 5 days ago

Commit

6acd886

verified ·

1 Parent(s): 6a356af

Update app.py

Browse files

Files changed (1) hide show

app.py +121 -101

app.py CHANGED Viewed

@@ -25,7 +25,7 @@ from utils import COMMUNITY_POSTFIX_URL, get_model_config, check_format, models_
     logged_event_handler, DEBUG_MODE, DEBUG_MODEL, log_debug, log_info, log_error, log_warning
 from log_chat import log_chat
-DEFAULT_MODEL_TEMPERATURE = 0.6
 BUTTON_WIDTH = 160
 DEFAULT_OPT_OUT_VALUE = DEBUG_MODE
@@ -33,7 +33,7 @@ DEFAULT_OPT_OUT_VALUE = DEBUG_MODE
 # DEFAULT_MODEL_NAME = "Apriel-1.5-15B-thinker" if not DEBUG_MODEL else "Apriel-1.5-15B-thinker"
 DEFAULT_MODEL_NAME = "Apriel-1.6-15B-Thinker"
-SHOW_BANNER = True
 INFO_BANNER_MARKDOWN = """
                     <span class="banner-message-text">ℹ️ This app has been updated to use the recommended temperature of 0.6. We had set it to 0.8 earlier and expect 0.6 to be better. Please provide feedback using the model link.</span>
                     """
@@ -110,14 +110,14 @@ def setup_model(model_key, intial=False):
 def chat_started():
-    # outputs: model_dropdown, user_input, send_btn, stop_btn, clear_btn
     return (DROPDOWN_DISABLED, gr.update(value="", interactive=False),
-            SEND_BUTTON_DISABLED, STOP_BUTTON_ENABLED, BUTTON_DISABLED)
 def chat_finished():
-    # outputs: model_dropdown, user_input, send_btn, stop_btn, clear_btn
-    return DROPDOWN_ENABLED, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED
 def stop_chat(state):
@@ -131,7 +131,7 @@ def toggle_opt_out(state, checkbox):
     return state
-def run_chat_inference(history, message, state):
     global chat_start_count
     state["is_streaming"] = True
     state["stop_flag"] = False
@@ -148,7 +148,7 @@ def run_chat_inference(history, message, state):
     # Reinitialize the OpenAI client with a random endpoint from the list
     setup_model(model_config.get('MODEL_KEY'))
-    log_info(f"Using model {model_name} (temperature: {temperature}) with endpoint {model_config.get('base_url')}")
     if len(history) == 0:
         state["chat_id"] = uuid4().hex
@@ -400,15 +400,15 @@ def run_chat_inference(history, message, state):
                     api_messages.pop(idx - removed)
                     removed += 1
                 gr.Warning(f"Too many images provided; keeping the latest {MAX_IMAGE_MESSAGES} and dropped {removed} older image message(s).")
-            print('model', model_name)
-            print('messages', api_messages)
             stream = openai_client.chat.completions.create(
                 model=model_name,
                 messages=api_messages,
                 temperature=temperature,
                 stream=True
             )
-            print('stream', stream)
         except Exception as e:
             log_error(f"Error:\n\t{e}\n\tInference failed for model {model_name} and endpoint {model_config['base_url']}")
             error = str(e)
@@ -444,56 +444,58 @@ def run_chat_inference(history, message, state):
             log_debug(f"History added empty assistant: {history}")
             check_format(history, "messages")
-        output = ""
         completion_started = False
         for chunk in stream:
             if state["stop_flag"]:
                 log_debug(f"chat_fn() --> Stopping streaming...")
                 break  # Exit the loop if the stop flag is set
-            # Extract the new content from the delta field
-            content = getattr(chunk.choices[0].delta, "content", "") or ""
-            reasoning_content = getattr(chunk.choices[0].delta, "reasoning_content", "") or ""
-            output += reasoning_content + content
             if is_reasoning:
-                parts = output.split(output_tag_start)
-                if len(parts) > 1:
-                    if parts[1].endswith(output_tag_end):
-                        parts[1] = parts[1].replace(output_tag_end, "")
-                    if parts[1].endswith(f"{output_tag_end}\n{output_stop_token}"):
-                        parts[1] = parts[1].replace(f"{output_tag_end}\n{output_stop_token}", "")
-                    if parts[1].endswith(f"{output_tag_end}\n{output_stop_token}\n"):
-                        parts[1] = parts[1].replace(f"{output_tag_end}\n{output_stop_token}\n", "")
-                    if parts[1].endswith(f"{output_stop_token}"):
-                        parts[1] = parts[1].replace(f"{output_stop_token}", "")
-                    if parts[1].endswith(f"{output_stop_token}\n"):
-                        parts[1] = parts[1].replace(f"{output_stop_token}\n", "")
                 history[-1 if not completion_started else -2] = gr.ChatMessage(
                     role="assistant",
-                    content=parts[0],
                     metadata={"title": "🧠 Thought"}
                 )
-                if completion_started:
-                    history[-1] = gr.ChatMessage(
-                        role="assistant",
-                        content=parts[1]
-                    )
-                elif len(parts) > 1 and not completion_started:
-                    completion_started = True
-                    history.append(gr.ChatMessage(
-                        role="assistant",
-                        content=parts[1]
-                    ))
             else:
-                if output.endswith("<|end|>"):
-                    output = output.replace("<|end|>", "")
-                if output.endswith("<|end|>\n"):
-                    output = output.replace("<|end|>\n", "")
                 history[-1] = gr.ChatMessage(
                     role="assistant",
-                    content=output
                 )
             # log_message(f"Yielding messages: {history}")
@@ -554,7 +556,7 @@ with gr.Blocks(theme=theme, css=custom_css) as demo:
             with gr.Column():
                 gr.Markdown(BANNER_MARKDOWN, elem_classes="banner-message")
-    with gr.Row(variant="panel", elem_classes="responsive-row"):
         with gr.Column(scale=1, min_width=400, elem_classes="model-dropdown-container"):
             model_dropdown = gr.Dropdown(
                 choices=[f"Model: {model}" for model in models_config.keys()],
@@ -568,71 +570,89 @@ with gr.Blocks(theme=theme, css=custom_css) as demo:
         with gr.Column(scale=4, min_width=0):
             feedback_message_html = gr.HTML(description, elem_classes="model-message")
-    chatbot = gr.Chatbot(
-        type="messages",
-        height="calc(100svh - 320px)",
-        max_height="calc(100svh - 320px)",
-        elem_classes="chatbot",
-    )
-    with gr.Row():
-        with gr.Column(scale=10, min_width=400, elem_classes="user-input-container"):
-            with gr.Row():
-                user_input = gr.MultimodalTextbox(
-                    interactive=True,
-                    container=False,
-                    file_count="multiple",
-                    placeholder="Type your message here and press Enter or upload file...",
-                    show_label=False,
-                    sources=["upload"],
-                    max_plain_text_length=100000,
-                    max_lines=10
-                )
-                # Original text-only input
-                # user_input = gr.Textbox(
-                #     show_label=False,
-                #     placeholder="Type your message here and press Enter",
-                #     container=False
-                # )
-        with gr.Column(scale=1, min_width=BUTTON_WIDTH * 2 + 20):
-            with gr.Row():
-                with gr.Column(scale=1, min_width=BUTTON_WIDTH, elem_classes="send-button-container"):
-                    send_btn = gr.Button("Send", variant="primary", elem_classes="control-button")
-                    stop_btn = gr.Button("Stop", variant="cancel", elem_classes="control-button", visible=False)
-                with gr.Column(scale=1, min_width=BUTTON_WIDTH, elem_classes="clear-button-container"):
-                    clear_btn = gr.ClearButton(chatbot, value="New Chat", variant="secondary", elem_classes="control-button")
-    with gr.Row():
-        with gr.Column(min_width=400, elem_classes="opt-out-container"):
-            with gr.Row():
-                gr.HTML(
-                    "We may use your chats to improve our AI. You may opt out if you don’t want your conversations saved.",
-                    elem_classes="opt-out-message")
-            with gr.Row():
-                opt_out_checkbox = gr.Checkbox(
-                    label="Don’t save my chat history for improvements or training",
-                    value=DEFAULT_OPT_OUT_VALUE,
-                    elem_classes="opt-out-checkbox",
-                    interactive=True,
-                    container=False
-                )
     gr.on(
         triggers=[send_btn.click, user_input.submit],
         fn=run_chat_inference,  # this generator streams results. do not use logged_event_handler wrapper
-        inputs=[chatbot, user_input, session_state],
         outputs=[chatbot, user_input, send_btn, stop_btn, clear_btn, session_state],
         concurrency_limit=4,
         api_name=False
     ).then(
-        fn=chat_finished, inputs=None, outputs=[model_dropdown, user_input, send_btn, stop_btn, clear_btn], queue=False)
     # In parallel, disable or update the UI controls
     gr.on(
         triggers=[send_btn.click, user_input.submit],
         fn=chat_started,
         inputs=None,
-        outputs=[model_dropdown, user_input, send_btn, stop_btn, clear_btn],
         queue=False,
         show_progress='hidden',
         api_name=False
@@ -645,7 +665,7 @@ with gr.Blocks(theme=theme, css=custom_css) as demo:
         api_name=False
     )
-    opt_out_checkbox.change(fn=toggle_opt_out, inputs=[session_state, opt_out_checkbox], outputs=[session_state])
     # Ensure the model is reset to default on page reload
     demo.load(
@@ -667,4 +687,4 @@ with gr.Blocks(theme=theme, css=custom_css) as demo:
     )
 demo.queue(default_concurrency_limit=2).launch(ssr_mode=False, show_api=False, max_file_size="10mb")
-log_info("Gradio app launched")

     logged_event_handler, DEBUG_MODE, DEBUG_MODEL, log_debug, log_info, log_error, log_warning
 from log_chat import log_chat
+DEFAULT_MODEL_TEMPERATURE = 1.0
 BUTTON_WIDTH = 160
 DEFAULT_OPT_OUT_VALUE = DEBUG_MODE
 # DEFAULT_MODEL_NAME = "Apriel-1.5-15B-thinker" if not DEBUG_MODEL else "Apriel-1.5-15B-thinker"
 DEFAULT_MODEL_NAME = "Apriel-1.6-15B-Thinker"
+SHOW_BANNER = False
 INFO_BANNER_MARKDOWN = """
                     <span class="banner-message-text">ℹ️ This app has been updated to use the recommended temperature of 0.6. We had set it to 0.8 earlier and expect 0.6 to be better. Please provide feedback using the model link.</span>
                     """
 def chat_started():
+    # outputs: model_dropdown, user_input, send_btn, stop_btn, clear_btn, reasoning_effort
     return (DROPDOWN_DISABLED, gr.update(value="", interactive=False),
+            SEND_BUTTON_DISABLED, STOP_BUTTON_ENABLED, BUTTON_DISABLED, gr.update(interactive=False))
 def chat_finished():
+    # outputs: model_dropdown, user_input, send_btn, stop_btn, clear_btn, reasoning_effort
+    return DROPDOWN_ENABLED, INPUT_ENABLED, SEND_BUTTON_ENABLED, STOP_BUTTON_DISABLED, BUTTON_ENABLED, gr.update(interactive=True)
 def stop_chat(state):
     return state
+def run_chat_inference(history, message, state, reasoning_effort="medium"):
     global chat_start_count
     state["is_streaming"] = True
     state["stop_flag"] = False
     # Reinitialize the OpenAI client with a random endpoint from the list
     setup_model(model_config.get('MODEL_KEY'))
+    log_info(f"Using model {model_name} (temperature: {temperature}, reasoning_effort: {reasoning_effort}) with endpoint {model_config.get('base_url')}")
     if len(history) == 0:
         state["chat_id"] = uuid4().hex
                     api_messages.pop(idx - removed)
                     removed += 1
                 gr.Warning(f"Too many images provided; keeping the latest {MAX_IMAGE_MESSAGES} and dropped {removed} older image message(s).")
             stream = openai_client.chat.completions.create(
                 model=model_name,
                 messages=api_messages,
                 temperature=temperature,
+                top_p=1.0,
+                reasoning_effort=reasoning_effort,
                 stream=True
             )
         except Exception as e:
             log_error(f"Error:\n\t{e}\n\tInference failed for model {model_name} and endpoint {model_config['base_url']}")
             error = str(e)
             log_debug(f"History added empty assistant: {history}")
             check_format(history, "messages")
+        output_reasoning = ""
+        output_content = ""
         completion_started = False
         for chunk in stream:
             if state["stop_flag"]:
                 log_debug(f"chat_fn() --> Stopping streaming...")
                 break  # Exit the loop if the stop flag is set
+            delta = chunk.choices[0].delta
+            new_reasoning = getattr(delta, "reasoning_content", "") or ""
+            new_content = getattr(delta, "content", "") or ""
+            output_reasoning += new_reasoning
+            output_content += new_content
             if is_reasoning:
+                # Update the reasoning bubble
                 history[-1 if not completion_started else -2] = gr.ChatMessage(
                     role="assistant",
+                    content=output_reasoning,
                     metadata={"title": "🧠 Thought"}
                 )
+                # Handle the content bubble
+                # Check if we have actual content or if we should start the content bubble
+                if new_content or (output_content and not completion_started):
+                    # Clean up stop tokens from the content if present
+                    if output_tag_end and output_content.endswith(output_tag_end):
+                        output_content = output_content.replace(output_tag_end, "")
+                    if output_stop_token and output_content.endswith(output_stop_token):
+                        output_content = output_content.replace(output_stop_token, "")
+                    if not completion_started:
+                        completion_started = True
+                        history.append(gr.ChatMessage(
+                            role="assistant",
+                            content=output_content
+                        ))
+                    else:
+                        history[-1] = gr.ChatMessage(
+                            role="assistant",
+                            content=output_content
+                        )
             else:
+                if output_content.endswith("<|end|>"):
+                    output_content = output_content.replace("<|end|>", "")
+                if output_content.endswith("<|end|>\n"):
+                    output_content = output_content.replace("<|end|>\n", "")
                 history[-1] = gr.ChatMessage(
                     role="assistant",
+                    content=output_content
                 )
             # log_message(f"Yielding messages: {history}")
             with gr.Column():
                 gr.Markdown(BANNER_MARKDOWN, elem_classes="banner-message")
+    with gr.Row(variant="panel", elem_classes="responsive-row", visible=False):
         with gr.Column(scale=1, min_width=400, elem_classes="model-dropdown-container"):
             model_dropdown = gr.Dropdown(
                 choices=[f"Model: {model}" for model in models_config.keys()],
         with gr.Column(scale=4, min_width=0):
             feedback_message_html = gr.HTML(description, elem_classes="model-message")
+    with gr.Column(visible=True, elem_classes="agreement-overlay") as agreement_overlay:
+        with gr.Column(elem_classes="form"):
+            gr.Markdown("## Privacy Agreement")
+            gr.Markdown("""
+By using this app, you agree to the following terms:
+We record all content you submit and all model outputs (“Data”), including text, images, files, and minimal request metadata (timestamp & technical logs). We do not store IP addresses, cookies, or account identifiers, so we cannot link any submission back to a particular person. However, the text you submit may itself contain personal information (e.g., names, Social Security numbers). Please do not include sensitive personal data in your prompts. Any such information will be subject to our redaction process before any public release.
+Data is used for research, safety evaluation, and to improve the Service. We reserve the right to publish, share, or redistribute redacted versions of the Data under a Creative Commons Attribution (CC‑BY) or similar open license. Before any public release, we apply automated and manual redaction to remove private keys, names, contact details, and other identifiers that may appear in the content.
+Because we do not track user identities, individual submissions cannot be deleted or withdrawn once made. If you do not want your content used or released, do not submit it.
+""")
+            agree_btn = gr.Button("I Agree", variant="primary")
+    with gr.Column(visible=True) as main_app_area:
+        chatbot = gr.Chatbot(
+            type="messages",
+            height="calc(100svh - 320px)",
+            max_height="calc(100svh - 320px)",
+            elem_classes="chatbot",
+        )
+        with gr.Row():
+            with gr.Column(scale=10, min_width=400, elem_classes="user-input-container"):
+                with gr.Row():
+                    # user_input = gr.MultimodalTextbox(
+                    #     interactive=True,
+                    #     container=False,
+                    #     file_count="multiple",
+                    #     placeholder="Type your message here and press Enter or upload file...",
+                    #     show_label=False,
+                    #     sources=["upload"],
+                    #     max_plain_text_length=100000,
+                    #     max_lines=10
+                    # )
+                    # Original text-only input
+                    user_input = gr.Textbox(
+                        show_label=False,
+                        placeholder="Type your message here and press Enter",
+                        container=False,
+                        max_lines=10
+                    )
+            with gr.Column(scale=1, min_width=BUTTON_WIDTH * 2 + 20):
+                with gr.Row():
+                    with gr.Column(scale=1, min_width=BUTTON_WIDTH, elem_classes="send-button-container"):
+                        send_btn = gr.Button("Send", variant="primary", elem_classes="control-button")
+                        stop_btn = gr.Button("Stop", variant="cancel", elem_classes="control-button", visible=False)
+                    with gr.Column(scale=1, min_width=BUTTON_WIDTH, elem_classes="clear-button-container"):
+                        clear_btn = gr.ClearButton(chatbot, value="New Chat", variant="secondary", elem_classes="control-button")
+        with gr.Row():
+             with gr.Column(scale=1):
+                  reasoning_effort_radio = gr.Radio(
+                      choices=["low", "medium", "high"],
+                      value="medium",
+                      label="Reasoning Effort",
+                      interactive=True,
+                      container=True,
+                      elem_classes="reasoning-radio"
+                  )
+    def agree_to_terms():
+        return gr.update(visible=False)
+    agree_btn.click(agree_to_terms, None, [agreement_overlay])
     gr.on(
         triggers=[send_btn.click, user_input.submit],
         fn=run_chat_inference,  # this generator streams results. do not use logged_event_handler wrapper
+        inputs=[chatbot, user_input, session_state, reasoning_effort_radio],
         outputs=[chatbot, user_input, send_btn, stop_btn, clear_btn, session_state],
         concurrency_limit=4,
         api_name=False
     ).then(
+        fn=chat_finished, inputs=None, outputs=[model_dropdown, user_input, send_btn, stop_btn, clear_btn, reasoning_effort_radio], queue=False)
     # In parallel, disable or update the UI controls
     gr.on(
         triggers=[send_btn.click, user_input.submit],
         fn=chat_started,
         inputs=None,
+        outputs=[model_dropdown, user_input, send_btn, stop_btn, clear_btn, reasoning_effort_radio],
         queue=False,
         show_progress='hidden',
         api_name=False
         api_name=False
     )
     # Ensure the model is reset to default on page reload
     demo.load(
     )
 demo.queue(default_concurrency_limit=2).launch(ssr_mode=False, show_api=False, max_file_size="10mb")
+log_info("Gradio app launched")