Spaces:
Sleeping
Sleeping
| import os | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import torch | |
| class Model: | |
| def __init__(self, model_url) -> None: | |
| self.model_url = model_url | |
| self.tokenizer = None | |
| self.model = None | |
| self.device = "cpu" | |
| self.dir_name = None | |
| def download_model(self) -> bool: | |
| self.dir_name = "model" | |
| if not os.path.exists(self.dir_name) or not os.listdir(self.dir_name): | |
| os.makedirs(self.dir_name) | |
| tokenizer = AutoTokenizer.from_pretrained(self.model_url) | |
| model = AutoModelForCausalLM.from_pretrained(self.model_url) | |
| model.save_pretrained(self.dir_name) | |
| tokenizer.save_pretrained(self.dir_name) | |
| print(f"Model saved on '{self.dir_name}' directory.") | |
| return True | |
| else: | |
| print("Model is already downloaded and ready to use.") | |
| return False | |
| def load_local_model(self): | |
| tokenizer = AutoTokenizer.from_pretrained(self.dir_name) | |
| model = AutoModelForCausalLM.from_pretrained(self.dir_name) | |
| if self.device == "cuda" and torch.cuda.is_available(): | |
| model.to("cuda") | |
| self.model = model | |
| self.tokenizer = tokenizer | |
| def inference(self, prompt_list) -> list: | |
| if self.model != None and self.tokenizer != None: | |
| self.model.eval() | |
| model_inferences = [] | |
| for prompt in prompt_list: | |
| inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device) | |
| with torch.no_grad(): | |
| outputs = self.model.generate(input_ids = inputs["input_ids"], max_new_tokens=512) | |
| response = self.tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0] | |
| model_inferences.append(response) | |
| return model_inferences | |
| else: | |
| print("Model was not able to make inference, make sure you've loaded the model.") | |
| def set_cuda(self) -> str: | |
| self.device = "cuda" | |
| def set_cpu(self) -> str: | |
| self.device = "cpu" |