Spaces:
Sleeping
Sleeping
Commit
·
b160907
1
Parent(s):
ca13c5e
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
import time
|
| 2 |
import gradio as gr
|
| 3 |
-
from transformers import AutoTokenizer
|
| 4 |
import json
|
| 5 |
import requests
|
| 6 |
|
|
@@ -32,11 +31,6 @@ def update(prompt, llm, nctx, max_tokens):
|
|
| 32 |
|
| 33 |
answer = {}
|
| 34 |
|
| 35 |
-
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
|
| 36 |
-
|
| 37 |
-
# Tokenize the input text
|
| 38 |
-
tokenized_input = tokenizer.encode(prompt, return_tensors="pt")
|
| 39 |
-
|
| 40 |
# Measure processing time
|
| 41 |
start_time = time.time()
|
| 42 |
result = make_request_to_llm(llm, prompt, max_tokens, int(nctx)*1000)
|
|
@@ -47,8 +41,6 @@ def update(prompt, llm, nctx, max_tokens):
|
|
| 47 |
duration = end_time - start_time
|
| 48 |
answer['Duration'] = duration
|
| 49 |
print("Duration: "+str(duration))
|
| 50 |
-
tokens_per_second = len(tokenized_input) / duration
|
| 51 |
-
answer['Tokens Per Second'] = tokens_per_second
|
| 52 |
answer['answer'] = result
|
| 53 |
return json.dumps(answer)
|
| 54 |
|
|
|
|
| 1 |
import time
|
| 2 |
import gradio as gr
|
|
|
|
| 3 |
import json
|
| 4 |
import requests
|
| 5 |
|
|
|
|
| 31 |
|
| 32 |
answer = {}
|
| 33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
# Measure processing time
|
| 35 |
start_time = time.time()
|
| 36 |
result = make_request_to_llm(llm, prompt, max_tokens, int(nctx)*1000)
|
|
|
|
| 41 |
duration = end_time - start_time
|
| 42 |
answer['Duration'] = duration
|
| 43 |
print("Duration: "+str(duration))
|
|
|
|
|
|
|
| 44 |
answer['answer'] = result
|
| 45 |
return json.dumps(answer)
|
| 46 |
|