|
|
import torch |
|
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
|
import gradio as gr |
|
|
|
|
|
model_dir = "Model" |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_dir) |
|
|
model = AutoModelForCausalLM.from_pretrained(model_dir) |
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
model.to(device) |
|
|
|
|
|
|
|
|
def generate_answer(question, max_new_tokens=128, temperature=0.8, top_p=0.9): |
|
|
|
|
|
prompt = f"Question: {question}\nRéponse:" |
|
|
|
|
|
inputs = tokenizer(prompt, return_tensors="pt").to(device) |
|
|
|
|
|
with torch.no_grad(): |
|
|
outputs = model.generate( |
|
|
**inputs, |
|
|
max_new_tokens=max_new_tokens, |
|
|
do_sample=True, |
|
|
top_p=top_p, |
|
|
temperature=temperature, |
|
|
) |
|
|
|
|
|
full_text = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
|
|
|
|
|
|
if "Réponse:" in full_text: |
|
|
answer_part = full_text.split("Réponse:", 1)[1] |
|
|
else: |
|
|
|
|
|
answer_part = full_text |
|
|
|
|
|
|
|
|
if "<EOS>" in answer_part: |
|
|
answer_part = answer_part.split("<EOS>")[0] |
|
|
|
|
|
|
|
|
answer_part = answer_part.strip() |
|
|
|
|
|
return answer_part |
|
|
|
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=generate_answer, |
|
|
inputs=[ |
|
|
gr.Textbox(lines=2, label="Ta question"), |
|
|
gr.Slider(16, 512, value=128, step=16, label="max_new_tokens"), |
|
|
gr.Slider(0.1, 1.5, value=0.8, step=0.05, label="temperature"), |
|
|
gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="top_p"), |
|
|
], |
|
|
outputs=gr.Textbox(lines=8, label="Réponse de l'IA"), |
|
|
title="QA LLM entraîné", |
|
|
description="Pose une question en français. Le modèle répond et s'arrête logiquement à <EOS>, sans afficher le prompt interne.", |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
iface.launch(share=True) |