Spaces:
Runtime error
Runtime error
| from unsloth import FastLanguageModel | |
| import torch | |
| max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally! | |
| dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+ | |
| load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False. | |
| # 4bit pre quantized models we support for 4x faster downloading + no OOMs. | |
| fourbit_models = [ | |
| "unsloth/mistral-7b-v0.3-bnb-4bit", # New Mistral v3 2x faster! | |
| "unsloth/mistral-7b-instruct-v0.3-bnb-4bit", | |
| "unsloth/llama-3-8b-bnb-4bit", # Llama-3 15 trillion tokens model 2x faster! | |
| "unsloth/llama-3-8b-Instruct-bnb-4bit", | |
| "unsloth/llama-3-70b-bnb-4bit", | |
| "unsloth/Phi-3-mini-4k-instruct", # Phi-3 2x faster! | |
| "unsloth/Phi-3-medium-4k-instruct", | |
| "unsloth/mistral-7b-bnb-4bit", | |
| "unsloth/gemma-7b-bnb-4bit", # Gemma 2.2x faster! | |
| #"netmouse/Llama-3-Taiwan-8B-Instruct-finetuning-by-promisedchat", #conversational chat model | |
| #"netmouse/Llama-3-Taiwan-8B-finetuning-by-promisedchat-Instruction" #instruction model | |
| ] # More models at https://huggingface.co/unsloth | |
| model, tokenizer = FastLanguageModel.from_pretrained( | |
| model_name = "netmouse/Llama-3-Taiwan-8B-finetuning-by-promisedchat-Instruction", # YOUR MODEL YOU USED FOR TRAINING | |
| max_seq_length = 2048, | |
| dtype = None, | |
| load_in_4bit = True, | |
| ) | |
| FastLanguageModel.for_inference(model) # Enable native 2x faster inference | |
| import transformers | |
| message = [ | |
| {"role": "user", "content": "你是一個在臉書社團「應許之地」的社團成員,大家會互相稱為「應友」"}, | |
| {"role": "user", "content": "應許的精神就是「混沌」"} | |
| ] | |
| prompt = tokenizer.apply_chat_template(message, add_generation_prompt=True, tokenize=False) | |
| # Create pipeline | |
| pipeline = transformers.pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=tokenizer | |
| ) | |
| terminators = [ | |
| pipeline.tokenizer.eos_token_id, | |
| pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>") | |
| ] | |
| # Generate text | |
| sequences = pipeline( | |
| prompt, | |
| do_sample=True, | |
| temperature=0.7, | |
| top_p=0.9, | |
| eos_token_id=terminators, | |
| num_return_sequences=1, | |
| max_length=200, | |
| ) | |
| print(sequences[0]['generated_text'][len(prompt):]) | |
| import gradio as gr | |
| messages = [] | |
| def add_text(history, text): | |
| global messages #message[list] is defined globally | |
| history = history + [(text,'')] | |
| messages = messages + [{"role":'user', 'content': text}] | |
| return history, "" | |
| def generate(history): | |
| global messages | |
| prompt = pipeline.tokenizer.apply_chat_template( | |
| messages, | |
| tokenize=False, | |
| add_generation_prompt=True | |
| ) | |
| terminators = [ | |
| pipeline.tokenizer.eos_token_id, | |
| pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>") | |
| ] | |
| outputs = pipeline( | |
| prompt, | |
| max_new_tokens=256, | |
| eos_token_id=terminators, | |
| do_sample=True, | |
| temperature=0.6, | |
| top_p=0.9, | |
| ) | |
| response_msg = outputs[0]["generated_text"][len(prompt):] | |
| for char in response_msg: | |
| history[-1][1] += char | |
| yield history | |
| pass | |
| with gr.Blocks() as demo: | |
| chatbot = gr.Chatbot(value=[], elem_id="chatbot") | |
| with gr.Row(): | |
| txt = gr.Textbox( | |
| show_label=False, | |
| placeholder="請輸入聊天內容", | |
| ) | |
| txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then( | |
| generate, inputs =[chatbot,],outputs = chatbot,) | |
| demo.queue() | |
| demo.launch(debug=True) |