xxmaranxx commited on
Commit
8f028d3
·
verified ·
1 Parent(s): 233841c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -10
app.py CHANGED
@@ -1,8 +1,18 @@
1
- import pickle, numpy as np
2
  from fastapi import FastAPI
3
  from sentence_transformers import SentenceTransformer
4
  from transformers import pipeline
5
 
 
 
 
 
 
 
 
 
 
 
6
  lw = pickle.load(open("predictor.pkl", "rb"))
7
  sbert = SentenceTransformer(lw["model_name"])
8
 
@@ -13,34 +23,59 @@ cids = sorted(centroides.keys())
13
 
14
  meta = lw.get("meta", {})
15
 
16
- sentiment = pipeline("text-classification", model="UMUTeam/roberta-spanish-sentiment-analysis")
17
- emotion = pipeline("zero-shot-classification", model="MoritzLaurer/mDeBERTa-v3-base-xnli-multilingual-nli-2mil7")
 
 
 
 
 
18
  EMOTIONS = ["alegría","tristeza","ira","asco","miedo","sorpresa","neutral"]
 
 
 
 
 
19
 
20
  app = FastAPI()
21
 
22
- def _encode(text):
23
  emb = sbert.encode(text, convert_to_numpy=True, normalize_embeddings=True).astype(np.float32)
24
  return emb[None, :] if emb.ndim == 1 else emb
25
 
26
- def _assign(vec):
27
  dists = [np.linalg.norm(vec - centroides[c]) for c in cids]
28
  return cids[int(np.argmin(dists))]
29
 
 
 
 
 
 
 
 
 
 
30
  @app.post("/predict")
31
  def predict(payload: dict):
32
  item = payload.get("data", [{}])[0]
33
- text = f"{item.get('subject','')} — {item.get('body','')}"
 
 
 
34
  emb = _encode(text)[0]
35
  cid = _assign(emb)
36
 
37
- s = sentiment(text)[0]["label"]
38
- e = emotion(text, candidate_labels=EMOTIONS, hypothesis_template="El texto expresa {}.")["labels"][0]
 
 
 
39
 
40
  m = meta.get(str(cid), meta.get(cid, {}))
41
  return {
42
- "subject": item.get("subject",""),
43
- "body": item.get("body",""),
44
  "cluster": cid,
45
  "cluster_nombre": (m or {}).get("nombre"),
46
  "cluster_desc": (m or {}).get("descripcion"),
 
1
+ import os, pickle, numpy as np
2
  from fastapi import FastAPI
3
  from sentence_transformers import SentenceTransformer
4
  from transformers import pipeline
5
 
6
+ # ---- Performance flags ----
7
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
8
+
9
+ try:
10
+ import torch
11
+ torch.set_num_threads(1) # evita thrashing en CPU básica
12
+ except Exception:
13
+ pass
14
+
15
+ # ---- Carga artefactos una vez ----
16
  lw = pickle.load(open("predictor.pkl", "rb"))
17
  sbert = SentenceTransformer(lw["model_name"])
18
 
 
23
 
24
  meta = lw.get("meta", {})
25
 
26
+ # Sentimiento (modelo liviano; recorta a 256 tokens)
27
+ sentiment = pipeline(
28
+ "text-classification",
29
+ model="UMUTeam/roberta-spanish-sentiment-analysis",
30
+ device=-1
31
+ )
32
+
33
  EMOTIONS = ["alegría","tristeza","ira","asco","miedo","sorpresa","neutral"]
34
+ HYP = "El texto expresa {}."
35
+
36
+ # Precompute embeddings de las emociones con tu mismo encoder (muy rápido)
37
+ _emotion_texts = [HYP.format(e) for e in EMOTIONS]
38
+ _emotion_embs = sbert.encode(_emotion_texts, convert_to_numpy=True, normalize_embeddings=True).astype(np.float32)
39
 
40
  app = FastAPI()
41
 
42
+ def _encode(text: str) -> np.ndarray:
43
  emb = sbert.encode(text, convert_to_numpy=True, normalize_embeddings=True).astype(np.float32)
44
  return emb[None, :] if emb.ndim == 1 else emb
45
 
46
+ def _assign(vec: np.ndarray) -> int:
47
  dists = [np.linalg.norm(vec - centroides[c]) for c in cids]
48
  return cids[int(np.argmin(dists))]
49
 
50
+ def _truncate_for_classifier(text: str, max_chars: int = 1000) -> str:
51
+ # evita tokenizaciones eternas en CPU (≈256 tokens)
52
+ return text if len(text) <= max_chars else text[:max_chars]
53
+
54
+ def _fast_emotion(emb: np.ndarray) -> str:
55
+ # cos sim porque ya están normalizados
56
+ sims = (_emotion_embs @ emb.reshape(-1, 1)).squeeze(-1)
57
+ return EMOTIONS[int(np.argmax(sims))]
58
+
59
  @app.post("/predict")
60
  def predict(payload: dict):
61
  item = payload.get("data", [{}])[0]
62
+ subject = item.get("subject", "")
63
+ body = item.get("body", "")
64
+ text = f"{subject} — {body}"
65
+
66
  emb = _encode(text)[0]
67
  cid = _assign(emb)
68
 
69
+ # RÁPIDO: sentimiento con truncado
70
+ s = sentiment(_truncate_for_classifier(text), truncation=True, max_length=256)[0]["label"]
71
+
72
+ # RÁPIDO: emoción por similitud con SBERT (sin segundo Transformer)
73
+ e = _fast_emotion(emb)
74
 
75
  m = meta.get(str(cid), meta.get(cid, {}))
76
  return {
77
+ "subject": subject,
78
+ "body": body,
79
  "cluster": cid,
80
  "cluster_nombre": (m or {}).get("nombre"),
81
  "cluster_desc": (m or {}).get("descripcion"),