Disable quantization
Browse filesWith GPU, torch says: AssertionError: Embedding quantization is only supported with float_qparams_weight_only_qconfig.
- handler.py +1 -2
handler.py
CHANGED
|
@@ -5,8 +5,7 @@ from torch.quantization import quantize_dynamic
|
|
| 5 |
|
| 6 |
class EndpointHandler():
|
| 7 |
def __init__(self, path=""):
|
| 8 |
-
|
| 9 |
-
self.model = quantize_dynamic(slowmodel, {Linear, Embedding})
|
| 10 |
|
| 11 |
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
| 12 |
"""
|
|
|
|
| 5 |
|
| 6 |
class EndpointHandler():
|
| 7 |
def __init__(self, path=""):
|
| 8 |
+
self.model = SentenceTransformer('sentence-transformers/multi-qa-MiniLM-L6-cos-v1')
|
|
|
|
| 9 |
|
| 10 |
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
|
| 11 |
"""
|