sovetskiysn commited on
Commit
a473ae5
·
1 Parent(s): 071e653
Files changed (1) hide show
  1. app.py +11 -7
app.py CHANGED
@@ -1,17 +1,22 @@
 
 
1
  import torch
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import gradio as gr
4
 
 
 
 
 
 
5
  model_path = "inceptionai/Llama-3.1-Sherkala-8B-Chat"
6
- tokenizer = AutoTokenizer.from_pretrained(model_path)
7
  model = AutoModelForCausalLM.from_pretrained(
8
- model_path, torch_dtype=torch.bfloat16, device_map="auto"
9
  )
10
 
11
  tokenizer.chat_template = "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}"
12
 
13
- device = "cuda" if torch.cuda.is_available() else "cpu"
14
-
15
  def chat_fn(user_input):
16
  conversation = [{"role": "user", "content": user_input}]
17
  input_ids = tokenizer.apply_chat_template(
@@ -19,7 +24,7 @@ def chat_fn(user_input):
19
  tokenize=True,
20
  add_generation_prompt=True,
21
  return_tensors="pt"
22
- ).to(device)
23
 
24
  output_ids = model.generate(
25
  input_ids,
@@ -35,6 +40,5 @@ gr.Interface(
35
  inputs="text",
36
  outputs="text",
37
  title="Sherkala-8B Chat",
38
- description="Multilingual LLaMA-3.1 based model (Kazakh, Russian, English)",
39
- theme="default"
40
  ).launch()
 
1
+ import os
2
+ from huggingface_hub import login
3
  import torch
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
  import gradio as gr
6
 
7
+ # Авторизация через токен
8
+ hf_token = os.environ.get("HF_TOKEN")
9
+ if hf_token:
10
+ login(token=hf_token)
11
+
12
  model_path = "inceptionai/Llama-3.1-Sherkala-8B-Chat"
13
+ tokenizer = AutoTokenizer.from_pretrained(model_path, token=hf_token)
14
  model = AutoModelForCausalLM.from_pretrained(
15
+ model_path, torch_dtype=torch.bfloat16, device_map="auto", token=hf_token
16
  )
17
 
18
  tokenizer.chat_template = "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}"
19
 
 
 
20
  def chat_fn(user_input):
21
  conversation = [{"role": "user", "content": user_input}]
22
  input_ids = tokenizer.apply_chat_template(
 
24
  tokenize=True,
25
  add_generation_prompt=True,
26
  return_tensors="pt"
27
+ ).to(model.device)
28
 
29
  output_ids = model.generate(
30
  input_ids,
 
40
  inputs="text",
41
  outputs="text",
42
  title="Sherkala-8B Chat",
43
+ description="Kazakh-Russian-English multilingual chat model",
 
44
  ).launch()