update
Browse files
README.md
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
---
|
| 2 |
-
title: Llamacpp-
|
| 3 |
-
emoji: ⚡
|
| 4 |
colorFrom: indigo
|
| 5 |
colorTo: blue
|
| 6 |
sdk: gradio
|
|
@@ -8,7 +7,7 @@ sdk_version: 5.20.1
|
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
license: mit
|
| 11 |
-
short_description:
|
| 12 |
---
|
| 13 |
|
| 14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Llamacpp-t5-query-reformulation-RL
|
|
|
|
| 3 |
colorFrom: indigo
|
| 4 |
colorTo: blue
|
| 5 |
sdk: gradio
|
|
|
|
| 7 |
app_file: app.py
|
| 8 |
pinned: false
|
| 9 |
license: mit
|
| 10 |
+
short_description: t5-query-reformulation-RL on Llama.cpp CPU
|
| 11 |
---
|
| 12 |
|
| 13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
|
@@ -25,16 +25,20 @@ from exception import CustomExceptionHandling
|
|
| 25 |
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
|
| 26 |
os.makedirs("models",exist_ok=True)
|
| 27 |
|
|
|
|
|
|
|
| 28 |
hf_hub_download(
|
| 29 |
repo_id="AnanyaPathak/t5-query-reformulation-RL-GGUF",
|
| 30 |
filename="t5-query-reformulation-RL-q8_0.gguf",
|
| 31 |
local_dir="./models",
|
| 32 |
)
|
| 33 |
|
|
|
|
|
|
|
| 34 |
# Set the title and description
|
| 35 |
title = "t5-query-reformulation-RL Llama.cpp"
|
| 36 |
description = """
|
| 37 |
-
I'm using [fairydreaming/T5-branch](https://github.com/fairydreaming/llama-cpp-python/tree/t5), I'm not sure current llama-cpp-python support t5
|
| 38 |
|
| 39 |
[Model-Q8_0-GGUF](https://huggingface.co/AnanyaPathak/t5-query-reformulation-RL-GGUF), [Reference1](https://huggingface.co/spaces/sitammeur/Gemma-llamacpp)
|
| 40 |
"""
|
|
@@ -77,16 +81,18 @@ def respond(
|
|
| 77 |
Returns:
|
| 78 |
str: The response to the message.
|
| 79 |
"""
|
|
|
|
| 80 |
try:
|
| 81 |
global llama
|
| 82 |
if llama == None:
|
| 83 |
-
|
|
|
|
| 84 |
n_gpu_layers=0,
|
| 85 |
-
n_batch=
|
| 86 |
n_ctx=512,
|
| 87 |
n_threads=2,
|
| 88 |
n_threads_batch=2)
|
| 89 |
-
|
| 90 |
tokens = llama.tokenize(f"{message}".encode("utf-8"))
|
| 91 |
llama.encode(tokens)
|
| 92 |
tokens = [llama.decoder_start_token()]
|
|
@@ -114,7 +120,7 @@ def respond(
|
|
| 114 |
# Create a chat interface
|
| 115 |
demo = gr.ChatInterface(
|
| 116 |
respond,
|
| 117 |
-
examples=[["What is the capital of France?"], ["
|
| 118 |
additional_inputs_accordion=gr.Accordion(
|
| 119 |
label="⚙️ Parameters", open=False, render=False
|
| 120 |
),
|
|
@@ -140,12 +146,12 @@ demo = gr.ChatInterface(
|
|
| 140 |
value=1024,
|
| 141 |
step=1,
|
| 142 |
label="Max Tokens",
|
| 143 |
-
info="Maximum length of response (higher = longer replies)",
|
| 144 |
),
|
| 145 |
gr.Slider(
|
| 146 |
minimum=0.1,
|
| 147 |
maximum=2.0,
|
| 148 |
-
value=0.
|
| 149 |
step=0.1,
|
| 150 |
label="Temperature",
|
| 151 |
info="Creativity level (higher = more creative, lower = more focused)",
|
|
|
|
| 25 |
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
|
| 26 |
os.makedirs("models",exist_ok=True)
|
| 27 |
|
| 28 |
+
|
| 29 |
+
|
| 30 |
hf_hub_download(
|
| 31 |
repo_id="AnanyaPathak/t5-query-reformulation-RL-GGUF",
|
| 32 |
filename="t5-query-reformulation-RL-q8_0.gguf",
|
| 33 |
local_dir="./models",
|
| 34 |
)
|
| 35 |
|
| 36 |
+
|
| 37 |
+
|
| 38 |
# Set the title and description
|
| 39 |
title = "t5-query-reformulation-RL Llama.cpp"
|
| 40 |
description = """
|
| 41 |
+
I'm using [fairydreaming/T5-branch](https://github.com/fairydreaming/llama-cpp-python/tree/t5), I'm not sure current llama-cpp-python server support t5
|
| 42 |
|
| 43 |
[Model-Q8_0-GGUF](https://huggingface.co/AnanyaPathak/t5-query-reformulation-RL-GGUF), [Reference1](https://huggingface.co/spaces/sitammeur/Gemma-llamacpp)
|
| 44 |
"""
|
|
|
|
| 81 |
Returns:
|
| 82 |
str: The response to the message.
|
| 83 |
"""
|
| 84 |
+
|
| 85 |
try:
|
| 86 |
global llama
|
| 87 |
if llama == None:
|
| 88 |
+
model_id = "t5-query-reformulation-RL-q8_0.gguf"
|
| 89 |
+
llama = Llama(f"models/{model_id}",flash_attn=False,
|
| 90 |
n_gpu_layers=0,
|
| 91 |
+
#n_batch=16,#batch sometime make error
|
| 92 |
n_ctx=512,
|
| 93 |
n_threads=2,
|
| 94 |
n_threads_batch=2)
|
| 95 |
+
|
| 96 |
tokens = llama.tokenize(f"{message}".encode("utf-8"))
|
| 97 |
llama.encode(tokens)
|
| 98 |
tokens = [llama.decoder_start_token()]
|
|
|
|
| 120 |
# Create a chat interface
|
| 121 |
demo = gr.ChatInterface(
|
| 122 |
respond,
|
| 123 |
+
examples=[["What is the capital of France?"], ["What real child was raised by wolves?"], ["What is gravity?"]],
|
| 124 |
additional_inputs_accordion=gr.Accordion(
|
| 125 |
label="⚙️ Parameters", open=False, render=False
|
| 126 |
),
|
|
|
|
| 146 |
value=1024,
|
| 147 |
step=1,
|
| 148 |
label="Max Tokens",
|
| 149 |
+
info="Maximum length of response (higher = longer replies)",visible=False
|
| 150 |
),
|
| 151 |
gr.Slider(
|
| 152 |
minimum=0.1,
|
| 153 |
maximum=2.0,
|
| 154 |
+
value=0.4,
|
| 155 |
step=0.1,
|
| 156 |
label="Temperature",
|
| 157 |
info="Creativity level (higher = more creative, lower = more focused)",
|