Spaces:
Running
Running
| import torch | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig | |
| import os | |
| # ββ 1) Configuration ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| BASE_DIR = "MAS-AI-0000/Authentica" | |
| MODEL_DIR = os.path.join(BASE_DIR, "Lib/Models/Text") # Update this path to your model location | |
| MAX_LEN = 512 | |
| # ββ 2) Load model & tokenizer ββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| print(f"Text prediction device: {device}") | |
| # Global variables for model and tokenizer | |
| tokenizer = None | |
| model = None | |
| ID2LABEL = {0: "human", 1: "ai"} | |
| try: | |
| # Config carries id2label/label2id if you saved them | |
| config = AutoConfig.from_pretrained(MODEL_DIR) | |
| # Loads tokenizer.json + special_tokens_map.json automatically | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR, use_fast=True) | |
| # Loads model.safetensors automatically (no extra flags needed) | |
| model = AutoModelForSequenceClassification.from_pretrained(MODEL_DIR, config=config) | |
| model.eval().to(device) | |
| # Update label mapping from config if available | |
| ID2LABEL = model.config.id2label if getattr(model.config, "id2label", None) else {0: "human", 1: "ai"} | |
| print(f"Text classification model loaded successfully") | |
| print("Labels:", ID2LABEL) | |
| except Exception as e: | |
| print(f"Error loading text model: {e}") | |
| print("Text prediction will return fallback responses") | |
| # ββ 3) Inference function ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def predict_text(text: str, max_length: int = None): | |
| """ | |
| Predict whether the given text is human-written or AI-generated. | |
| Args: | |
| text (str): The text to classify | |
| max_length (int): Maximum sequence length for tokenization (defaults to MAX_LEN) | |
| Returns: | |
| dict: Contains predicted_class and confidence | |
| """ | |
| if model is None or tokenizer is None: | |
| return {"predicted_class": "Human", "confidence": 0} | |
| if max_length is None: | |
| max_length = MAX_LEN | |
| try: | |
| # Tokenize input | |
| enc = tokenizer( | |
| text, | |
| return_tensors="pt", | |
| truncation=True, | |
| max_length=max_length, | |
| ) | |
| enc = {k: v.to(device) for k, v in enc.items()} | |
| # Get predictions | |
| logits = model(**enc).logits | |
| probs = torch.softmax(logits, dim=-1).squeeze(0).detach().cpu().numpy() | |
| pred_id = int(probs.argmax(-1)) | |
| # Get label (capitalize first letter for consistency) | |
| label = ID2LABEL.get(pred_id, str(pred_id)) | |
| label = label.capitalize() # "human" -> "Human", "ai" -> "Ai" | |
| return { | |
| "predicted_class": label, | |
| "confidence": float(probs[pred_id]) | |
| } | |
| except Exception as e: | |
| print(f"Error during text prediction: {e}") | |
| return {"predicted_class": "Human", "confidence": 0} | |
| # ββ 4) Batch prediction (optional, for future use) βββββββββββββββββββββββββββββ | |
| def predict_batch(texts, batch_size=16): | |
| """ | |
| Predict multiple texts in batches. | |
| Args: | |
| texts (list): List of text strings to classify | |
| batch_size (int): Batch size for processing | |
| Returns: | |
| list: List of prediction dictionaries | |
| """ | |
| if model is None or tokenizer is None: | |
| return [{"predicted_class": "Human", "confidence": 0} for _ in texts] | |
| results = [] | |
| for i in range(0, len(texts), batch_size): | |
| chunk = texts[i:i+batch_size] | |
| enc = tokenizer( | |
| chunk, | |
| return_tensors="pt", | |
| truncation=True, | |
| max_length=MAX_LEN, | |
| padding=True, | |
| ) | |
| enc = {k: v.to(device) for k, v in enc.items()} | |
| logits = model(**enc).logits | |
| probs = torch.softmax(logits, dim=-1).detach().cpu().numpy() | |
| ids = probs.argmax(-1) | |
| for t, pid, p in zip(chunk, ids, probs): | |
| label = ID2LABEL.get(int(pid), str(int(pid))).capitalize() | |
| results.append({ | |
| "text": t, | |
| "predicted_class": label, | |
| "confidence": float(p[int(pid)]) | |
| }) | |
| return results | |