xxmaranxx commited on
Commit
b02461f
·
verified ·
1 Parent(s): 3d8f0ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -20
app.py CHANGED
@@ -1,11 +1,15 @@
 
 
 
1
  import os, pickle, numpy as np
 
2
  from fastapi import FastAPI
 
3
  from sentence_transformers import SentenceTransformer
4
  from transformers import pipeline
5
 
6
  # ---- Performance flags ----
7
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
8
-
9
  try:
10
  import torch
11
  torch.set_num_threads(1) # evita thrashing en CPU básica
@@ -13,12 +17,14 @@ except Exception:
13
  pass
14
 
15
  # ---- Carga artefactos una vez ----
16
- lw = pickle.load(open("predictor.pkl", "rb"))
17
  sbert = SentenceTransformer(lw["model_name"])
18
 
 
19
  centroides = {int(k): np.array(v, dtype=np.float32) for k, v in lw["centroides"].items()}
20
  for k, v in centroides.items():
21
- centroides[k] = v / (np.linalg.norm(v) + 1e-12)
 
22
  cids = sorted(centroides.keys())
23
 
24
  meta = lw.get("meta", {})
@@ -30,38 +36,50 @@ sentiment = pipeline(
30
  device=-1
31
  )
32
 
33
- EMOTIONS = ["alegría","tristeza","ira","asco","miedo","sorpresa","neutral"]
34
  HYP = "El texto expresa {}."
35
-
36
- # Precompute embeddings de las emociones con tu mismo encoder (muy rápido)
37
  _emotion_texts = [HYP.format(e) for e in EMOTIONS]
38
- _emotion_embs = sbert.encode(_emotion_texts, convert_to_numpy=True, normalize_embeddings=True).astype(np.float32)
 
 
39
 
40
- app = FastAPI()
41
 
 
42
  def _encode(text: str) -> np.ndarray:
43
  emb = sbert.encode(text, convert_to_numpy=True, normalize_embeddings=True).astype(np.float32)
44
  return emb[None, :] if emb.ndim == 1 else emb
45
 
46
  def _assign(vec: np.ndarray) -> int:
 
47
  dists = [np.linalg.norm(vec - centroides[c]) for c in cids]
48
  return cids[int(np.argmin(dists))]
49
 
50
  def _truncate_for_classifier(text: str, max_chars: int = 1000) -> str:
51
- # evita tokenizaciones eternas en CPU (≈256 tokens)
52
  return text if len(text) <= max_chars else text[:max_chars]
53
 
54
  def _fast_emotion(emb: np.ndarray) -> str:
55
- # cos sim porque ya están normalizados
56
  sims = (_emotion_embs @ emb.reshape(-1, 1)).squeeze(-1)
57
  return EMOTIONS[int(np.argmax(sims))]
58
 
 
 
 
 
 
 
 
 
 
 
 
59
  @app.post("/predict")
60
- def predict(payload: dict):
61
- item = payload.get("data", {})
62
- subject = item.get("subject", "")
63
- body = item.get("body", "")
64
- text = f"{subject} — {body}"
65
 
66
  emb = _encode(text)[0]
67
  cid = _assign(emb)
@@ -72,13 +90,18 @@ def predict(payload: dict):
72
  # RÁPIDO: emoción por similitud con SBERT (sin segundo Transformer)
73
  e = _fast_emotion(emb)
74
 
75
- m = meta.get(str(cid), meta.get(cid, {}))
76
  return {
77
- "subject": subject,
78
- "body": body,
79
  "cluster": cid,
80
- "cluster_nombre": (m or {}).get("nombre"),
81
- "cluster_desc": (m or {}).get("descripcion"),
82
  "sentimiento": s,
83
  "emocion": e
84
  }
 
 
 
 
 
 
1
+ # app.py
2
+ # -*- coding: utf-8 -*-
3
+
4
  import os, pickle, numpy as np
5
+ from typing import Dict
6
  from fastapi import FastAPI
7
+ from pydantic import BaseModel, Field
8
  from sentence_transformers import SentenceTransformer
9
  from transformers import pipeline
10
 
11
  # ---- Performance flags ----
12
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
 
13
  try:
14
  import torch
15
  torch.set_num_threads(1) # evita thrashing en CPU básica
 
17
  pass
18
 
19
  # ---- Carga artefactos una vez ----
20
+ lw: Dict = pickle.load(open("predictor.pkl", "rb"))
21
  sbert = SentenceTransformer(lw["model_name"])
22
 
23
+ # centroides normalizados
24
  centroides = {int(k): np.array(v, dtype=np.float32) for k, v in lw["centroides"].items()}
25
  for k, v in centroides.items():
26
+ n = np.linalg.norm(v) + 1e-12
27
+ centroides[k] = (v / n).astype(np.float32)
28
  cids = sorted(centroides.keys())
29
 
30
  meta = lw.get("meta", {})
 
36
  device=-1
37
  )
38
 
39
+ EMOTIONS = ["alegría", "tristeza", "ira", "asco", "miedo", "sorpresa", "neutral"]
40
  HYP = "El texto expresa {}."
41
+ # Precompute embeddings de emociones con el mismo encoder (rápido)
 
42
  _emotion_texts = [HYP.format(e) for e in EMOTIONS]
43
+ _emotion_embs = sbert.encode(
44
+ _emotion_texts, convert_to_numpy=True, normalize_embeddings=True
45
+ ).astype(np.float32)
46
 
47
+ app = FastAPI(title="Predicción de clusters/sentimiento/emoción")
48
 
49
+ # -------- Helpers --------
50
  def _encode(text: str) -> np.ndarray:
51
  emb = sbert.encode(text, convert_to_numpy=True, normalize_embeddings=True).astype(np.float32)
52
  return emb[None, :] if emb.ndim == 1 else emb
53
 
54
  def _assign(vec: np.ndarray) -> int:
55
+ # como están normalizados, L2 o cos son equivalentes (hasta constante)
56
  dists = [np.linalg.norm(vec - centroides[c]) for c in cids]
57
  return cids[int(np.argmin(dists))]
58
 
59
  def _truncate_for_classifier(text: str, max_chars: int = 1000) -> str:
 
60
  return text if len(text) <= max_chars else text[:max_chars]
61
 
62
  def _fast_emotion(emb: np.ndarray) -> str:
63
+ # cos sim (embs normalizados)
64
  sims = (_emotion_embs @ emb.reshape(-1, 1)).squeeze(-1)
65
  return EMOTIONS[int(np.argmax(sims))]
66
 
67
+ # -------- Schema de entrada --------
68
+ class Entrada(BaseModel):
69
+ # acepta "asunto" o "subject"
70
+ asunto: str = Field(default="", alias="subject")
71
+ # acepta "cuerpo" o "body"
72
+ cuerpo: str = Field(default="", alias="body")
73
+
74
+ class Config:
75
+ populate_by_name = True # permite usar los nombres sin alias también
76
+
77
+ # -------- Endpoint --------
78
  @app.post("/predict")
79
+ def predict(item: Entrada):
80
+ subject = (item.asunto or "").strip()
81
+ body = (item.cuerpo or "").strip()
82
+ text = f"{subject} — {body}".strip(" ")
 
83
 
84
  emb = _encode(text)[0]
85
  cid = _assign(emb)
 
90
  # RÁPIDO: emoción por similitud con SBERT (sin segundo Transformer)
91
  e = _fast_emotion(emb)
92
 
93
+ m = meta.get(str(cid), meta.get(cid, {})) or {}
94
  return {
95
+ "asunto": subject,
96
+ "cuerpo": body,
97
  "cluster": cid,
98
+ "cluster_nombre": m.get("nombre"),
99
+ "cluster_desc": m.get("descripcion"),
100
  "sentimiento": s,
101
  "emocion": e
102
  }
103
+
104
+ # -------- Entrypoint opcional --------
105
+ if __name__ == "__main__":
106
+ import uvicorn
107
+ uvicorn.run("app:app", host="0.0.0.0", port=int(os.getenv("PORT", "8000")))