import gradio as gr from huggingface_hub import InferenceClient # ============================================ # KTH ID2223 Lab 2 - Llama 3.2 ChatBot # ============================================ # 使用你的微调模型(safetensors 格式) MODEL_ID = "Marcus719/Llama-3.2-3B-Instruct-Lab2" client = InferenceClient(model=MODEL_ID) def chat(message, history, system_message, max_tokens, temperature, top_p): """Generate response using HuggingFace Inference API""" messages = [{"role": "system", "content": system_message}] # Add conversation history for user_msg, assistant_msg in history: if user_msg: messages.append({"role": "user", "content": user_msg}) if assistant_msg: messages.append({"role": "assistant", "content": assistant_msg}) # Add current message messages.append({"role": "user", "content": message}) # Stream response response = "" for chunk in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): if chunk.choices and chunk.choices[0].delta.content: token = chunk.choices[0].delta.content response += token yield response # ============================================ # Gradio 界面 # ============================================ DEFAULT_SYSTEM_PROMPT = "You are a helpful, respectful and honest assistant." with gr.Blocks(theme=gr.themes.Soft(), title="🦙 Llama 3.2 ChatBot") as demo: gr.Markdown( """ # 🦙 Llama 3.2 3B Instruct - Fine-tuned on FineTome **KTH ID2223 Scalable Machine Learning - Lab 2** This chatbot uses my fine-tuned Llama 3.2 3B model trained on the FineTome-100k dataset. 📦 Model: [Marcus719/Llama-3.2-3B-Instruct-Lab2](https://huggingface.co/Marcus719/Llama-3.2-3B-Instruct-Lab2) """ ) chatbot = gr.Chatbot(label="Chat", height=450, show_copy_button=True) with gr.Row(): msg = gr.Textbox( placeholder="Type your message here...", scale=4, container=False, autofocus=True ) submit_btn = gr.Button("Send 🚀", scale=1, variant="primary") with gr.Accordion("⚙️ Settings", open=False): system_prompt = gr.Textbox( label="System Prompt", value=DEFAULT_SYSTEM_PROMPT, lines=2 ) with gr.Row(): max_tokens = gr.Slider(64, 1024, value=512, step=32, label="Max Tokens") temperature = gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature") top_p = gr.Slider(0.1, 1.0, value=0.95, step=0.05, label="Top-p") with gr.Row(): clear_btn = gr.Button("🗑️ Clear Chat") retry_btn = gr.Button("🔄 Regenerate") gr.Examples( examples=[ "Hello! Can you introduce yourself?", "Explain machine learning in simple terms.", "What is the difference between fine-tuning and pre-training?", "Write a short poem about AI.", ], inputs=msg, label="💡 Try these examples" ) # Event handlers def user_input(message, history): return "", history + [[message, None]] def bot_response(history, system_prompt, max_tokens, temperature, top_p): if not history: return history message = history[-1][0] history_for_model = history[:-1] for response in chat(message, history_for_model, system_prompt, max_tokens, temperature, top_p): history[-1][1] = response yield history def retry_last(history, system_prompt, max_tokens, temperature, top_p): if history: history[-1][1] = None message = history[-1][0] history_for_model = history[:-1] for response in chat(message, history_for_model, system_prompt, max_tokens, temperature, top_p): history[-1][1] = response yield history msg.submit(user_input, [msg, chatbot], [msg, chatbot], queue=False).then( bot_response, [chatbot, system_prompt, max_tokens, temperature, top_p], chatbot ) submit_btn.click(user_input, [msg, chatbot], [msg, chatbot], queue=False).then( bot_response, [chatbot, system_prompt, max_tokens, temperature, top_p], chatbot ) clear_btn.click(lambda: [], None, chatbot, queue=False) retry_btn.click(retry_last, [chatbot, system_prompt, max_tokens, temperature, top_p], chatbot) gr.Markdown( """ --- ### 📝 About This Project **Fine-tuning Details:** - Base Model: `meta-llama/Llama-3.2-3B-Instruct` - Dataset: [FineTome-100k](https://huggingface.co/datasets/mlabonne/FineTome-100k) - Method: QLoRA (4-bit quantization + LoRA) - Framework: [Unsloth](https://github.com/unslothai/unsloth) Built with ❤️ for KTH ID2223 Lab 2 """ ) if __name__ == "__main__": demo.launch()