AJ50 commited on
Commit
1b05367
Β·
1 Parent(s): f152556

Add on-demand XTTS model download via setup_models.py

Browse files

- setup_models.py runs at startup before gunicorn
- Downloads XTTS-v2 model to /app/backend/models/ (first deployment only)
- Uses TTS_HOME env var for caching
- Stdin suppression eliminates TOS prompts
- Subsequent deployments use cached model (instant startup)

This approach:
Eliminates TOS prompts entirely
Prevents repository bloat (no 1.8GB in git)
Maintains instant loading (after first deployment)
Works with HF Spaces persistent storage

Dockerfile CHANGED
@@ -17,13 +17,15 @@ COPY . .
17
  # Install Python dependencies
18
  RUN pip install --no-cache-dir -r backend/requirements.txt
19
 
20
- # Note: Models will be downloaded on first request
21
- # Skipping download_models.py to avoid build timeout on HF Spaces
22
- # - English models: Downloaded via hf_hub_download on first voice enrollment/synthesis
23
- # - Hindi XTTS: Downloaded via TTS library on first Hindi synthesis request
24
 
25
  # Expose port (HF Spaces uses 7860)
26
  EXPOSE 7860
27
 
 
 
 
28
  # Start the application
29
- CMD ["gunicorn", "--bind", "0.0.0.0:7860", "--workers", "1", "--timeout", "300", "backend.wsgi:app"]
 
17
  # Install Python dependencies
18
  RUN pip install --no-cache-dir -r backend/requirements.txt
19
 
20
+ # Note: XTTS model downloads on first startup (via setup_models.py)
21
+ # - English models: Local (already in repo)
22
+ # - Hindi XTTS: Downloaded from HF Hub to /app/backend/models/ on first deployment
 
23
 
24
  # Expose port (HF Spaces uses 7860)
25
  EXPOSE 7860
26
 
27
+ # Setup models and start app
28
+ RUN echo "#!/bin/bash\nset -e\necho '[Startup] Ensuring models are available...'\npython /app/backend/setup_models.py\necho '[Startup] Models ready, starting gunicorn...'\nexec gunicorn --bind 0.0.0.0:7860 --workers 1 --timeout 300 backend.wsgi:app" > /app/start.sh && chmod +x /app/start.sh
29
+
30
  # Start the application
31
+ CMD ["/app/start.sh"]
backend/app/multilingual_tts.py CHANGED
@@ -110,28 +110,26 @@ class MultilingualTTSService:
110
  print("[MultilingualTTSService] βœ“ English vocoder loaded")
111
 
112
  def _load_hindi_models(self):
113
- """Load Hindi XTTS model (lazy load with auto-download via TTS library)."""
114
  if self._xtts_model is None:
115
  print("[MultilingualTTSService] Loading Hindi XTTS model...")
116
  try:
117
  from TTS.api import TTS
118
  import io
119
 
120
- print("[MultilingualTTSService] Loading XTTS-v2 model (may auto-download if needed)...")
121
-
122
- # Suppress stdin to prevent interactive prompts
123
- # This is the most reliable way that works in Docker/HF Spaces
124
  old_stdin = sys.stdin
125
- sys.stdin = io.StringIO("y\n") # Auto-answer "y" if prompted
126
 
127
  try:
128
  self._xtts_model = TTS(
129
  model_name="tts_models/multilingual/multi-dataset/xtts_v2",
130
- gpu=False # Set to True if CUDA available and needed
131
  )
132
  print("[MultilingualTTSService] βœ“ Hindi XTTS loaded successfully")
133
  finally:
134
- sys.stdin = old_stdin # Restore stdin
135
 
136
  except ImportError:
137
  raise ImportError(
 
110
  print("[MultilingualTTSService] βœ“ English vocoder loaded")
111
 
112
  def _load_hindi_models(self):
113
+ """Load Hindi XTTS model - uses local cached version or downloads on first run."""
114
  if self._xtts_model is None:
115
  print("[MultilingualTTSService] Loading Hindi XTTS model...")
116
  try:
117
  from TTS.api import TTS
118
  import io
119
 
120
+ # Model will be cached in /app/backend/models/tts/ after first download
121
+ # Suppress stdin to prevent interactive TOS prompts
 
 
122
  old_stdin = sys.stdin
123
+ sys.stdin = io.StringIO("y\n") # Auto-answer "y" to TOS
124
 
125
  try:
126
  self._xtts_model = TTS(
127
  model_name="tts_models/multilingual/multi-dataset/xtts_v2",
128
+ gpu=False
129
  )
130
  print("[MultilingualTTSService] βœ“ Hindi XTTS loaded successfully")
131
  finally:
132
+ sys.stdin = old_stdin
133
 
134
  except ImportError:
135
  raise ImportError(
backend/setup_models.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ """Download XTTS model on-demand (runs once on first deployment)."""
3
+
4
+ import os
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ def setup_models():
9
+ """Ensure all required models are available."""
10
+
11
+ print("[Setup] Checking model requirements...")
12
+
13
+ # Ensure backend/models directory exists
14
+ models_dir = Path(__file__).parent.parent / "models"
15
+ models_dir.mkdir(parents=True, exist_ok=True)
16
+
17
+ tts_model_dir = models_dir / "tts" / "tts_models--multilingual--multi-dataset--xtts_v2"
18
+
19
+ if tts_model_dir.exists() and (tts_model_dir / "model.pth").exists():
20
+ print(f"[Setup] βœ“ XTTS model already present: {tts_model_dir}")
21
+ return True
22
+
23
+ print("[Setup] Downloading XTTS-v2 model (1.8GB, first time only)...")
24
+ print("[Setup] This may take 5-10 minutes on first deployment...")
25
+
26
+ try:
27
+ from TTS.api import TTS
28
+ import io
29
+
30
+ os.environ['TTS_HOME'] = str(models_dir)
31
+
32
+ # Suppress interactive prompts
33
+ old_stdin = sys.stdin
34
+ sys.stdin = io.StringIO("y\n")
35
+
36
+ try:
37
+ tts = TTS(
38
+ model_name="tts_models/multilingual/multi-dataset/xtts_v2",
39
+ gpu=False
40
+ )
41
+ print("[Setup] βœ“ XTTS model downloaded successfully")
42
+
43
+ # Verify model exists
44
+ if (tts_model_dir / "model.pth").exists():
45
+ print(f"[Setup] βœ“ Model verified at: {tts_model_dir}")
46
+ return True
47
+ else:
48
+ print(f"[Setup] βœ— Model not found at expected location: {tts_model_dir}")
49
+ return False
50
+
51
+ finally:
52
+ sys.stdin = old_stdin
53
+
54
+ except Exception as e:
55
+ print(f"[Setup] βœ— Failed to download XTTS model: {e}")
56
+ print("[Setup] Hindi synthesis will not be available")
57
+ return False
58
+
59
+ if __name__ == "__main__":
60
+ success = setup_models()
61
+ sys.exit(0 if success else 1)