sovetskiysn's picture
with
a473ae5
import os
from huggingface_hub import login
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr
# Авторизация через токен
hf_token = os.environ.get("HF_TOKEN")
if hf_token:
login(token=hf_token)
model_path = "inceptionai/Llama-3.1-Sherkala-8B-Chat"
tokenizer = AutoTokenizer.from_pretrained(model_path, token=hf_token)
model = AutoModelForCausalLM.from_pretrained(
model_path, torch_dtype=torch.bfloat16, device_map="auto", token=hf_token
)
tokenizer.chat_template = "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}"
def chat_fn(user_input):
conversation = [{"role": "user", "content": user_input}]
input_ids = tokenizer.apply_chat_template(
conversation=conversation,
tokenize=True,
add_generation_prompt=True,
return_tensors="pt"
).to(model.device)
output_ids = model.generate(
input_ids,
max_new_tokens=512,
do_sample=True,
temperature=0.7
)
response = tokenizer.decode(output_ids[0][input_ids.shape[-1]:], skip_special_tokens=True)
return response
gr.Interface(
fn=chat_fn,
inputs="text",
outputs="text",
title="Sherkala-8B Chat",
description="Kazakh-Russian-English multilingual chat model",
).launch()