from fastapi import FastAPI from transformers import AutoTokenizer, AutoModelForCausalLM import uvicorn tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct") model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct") app = FastAPI() @app.get("/") def root(): return {"message": "AI API is running"} @app.post("/chat") def chat(data: dict): msg = data["message"] inputs = tokenizer(msg, return_tensors="pt") outputs = model.generate(**inputs, max_length=200) res = tokenizer.decode(outputs[0], skip_special_tokens=True) return {"response": res} if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=7860)