fuvty commited on
Commit
fefa510
·
1 Parent(s): ac4eb5f

[debug] zeroGPU

Browse files
Files changed (35) hide show
  1. app.py +31 -3
  2. rosetta/baseline/__pycache__/multi_stage.cpython-310.pyc +0 -0
  3. rosetta/baseline/__pycache__/multi_stage.cpython-312.pyc +0 -0
  4. rosetta/baseline/__pycache__/two_stage_rosetta.cpython-310.pyc +0 -0
  5. rosetta/baseline/__pycache__/two_stage_rosetta.cpython-312.pyc +0 -0
  6. rosetta/model/__pycache__/__init__.cpython-310.pyc +0 -0
  7. rosetta/model/__pycache__/aggregator.cpython-310.pyc +0 -0
  8. rosetta/model/__pycache__/aggregator.cpython-312.pyc +0 -0
  9. rosetta/model/__pycache__/aligner.cpython-310.pyc +0 -0
  10. rosetta/model/__pycache__/aligner.cpython-312.pyc +0 -0
  11. rosetta/model/__pycache__/all_in_one_projector.cpython-310.pyc +0 -0
  12. rosetta/model/__pycache__/all_in_one_projector.cpython-312.pyc +0 -0
  13. rosetta/model/__pycache__/cache.cpython-310.pyc +0 -0
  14. rosetta/model/__pycache__/oracle.cpython-310.pyc +0 -0
  15. rosetta/model/__pycache__/oracle.cpython-312.pyc +0 -0
  16. rosetta/model/__pycache__/projector.cpython-310.pyc +0 -0
  17. rosetta/model/__pycache__/projector.cpython-312.pyc +0 -0
  18. rosetta/model/__pycache__/sampling.cpython-310.pyc +0 -0
  19. rosetta/model/__pycache__/sampling.cpython-312.pyc +0 -0
  20. rosetta/model/__pycache__/wrapper.cpython-310.pyc +0 -0
  21. rosetta/model/__pycache__/wrapper.cpython-312.pyc +0 -0
  22. rosetta/train/__pycache__/__init__.cpython-310.pyc +0 -0
  23. rosetta/train/__pycache__/__init__.cpython-312.pyc +0 -0
  24. rosetta/train/__pycache__/dataset_adapters.cpython-310.pyc +0 -0
  25. rosetta/train/__pycache__/dataset_adapters.cpython-312.pyc +0 -0
  26. rosetta/train/__pycache__/model_utils.cpython-310.pyc +0 -0
  27. rosetta/train/__pycache__/model_utils.cpython-312.pyc +0 -0
  28. rosetta/train/__pycache__/trainer.cpython-310.pyc +0 -0
  29. rosetta/train/__pycache__/trainer.cpython-312.pyc +0 -0
  30. rosetta/utils/__pycache__/evaluate.cpython-310.pyc +0 -0
  31. rosetta/utils/__pycache__/evaluate.cpython-312.pyc +0 -0
  32. rosetta/utils/__pycache__/multi_stage.cpython-310.pyc +0 -0
  33. rosetta/utils/__pycache__/multi_stage.cpython-312.pyc +0 -0
  34. rosetta/utils/__pycache__/registry.cpython-310.pyc +0 -0
  35. rosetta/utils/__pycache__/registry.cpython-312.pyc +0 -0
app.py CHANGED
@@ -57,7 +57,11 @@ class ModelManager:
57
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
58
  else:
59
  self.device = torch.device(device)
 
 
60
  print(f"Using device: {self.device}")
 
 
61
  if ZEROGPU_AVAILABLE:
62
  print("ZeroGPU detected: Models will be loaded to CUDA (decorator handles allocation)")
63
 
@@ -104,8 +108,10 @@ class ModelManager:
104
  self.single_model, self.single_tokenizer = load_hf_model(
105
  self.single_model_name, self.device
106
  )
 
 
107
  set_default_chat_template(self.single_tokenizer, self.single_model_name)
108
- print("[Single] ✓ Model loaded")
109
 
110
  def _load_t2t_model(self):
111
  """Load two-stage model."""
@@ -121,7 +127,10 @@ class ModelManager:
121
  device=str(self.device),
122
  background_prompt=self.t2t_background_prompt
123
  )
124
- print("[T2T] Model loaded")
 
 
 
125
 
126
  def _load_c2c_model(self):
127
  """Load Rosetta (C2C) model."""
@@ -178,7 +187,9 @@ class ModelManager:
178
  self.c2c_model, self.c2c_tokenizer = load_rosetta_model(
179
  model_config, eval_config, self.device
180
  )
181
- print("[C2C] Model loaded")
 
 
182
 
183
  def _load_all_models(self):
184
  """Load all models sequentially."""
@@ -221,6 +232,11 @@ class ModelManager:
221
  def generate_single(self, user_input: str) -> Generator[str, None, None]:
222
  """Generate response from single model with streaming."""
223
  # @spaces.GPU decorator handles GPU allocation automatically
 
 
 
 
 
224
  messages = [{"role": "system", "content": ""}, {"role": "user", "content": user_input}]
225
  text = self.single_tokenizer.apply_chat_template(
226
  messages, tokenize=False, add_generation_prompt=True, enable_thinking=False
@@ -256,6 +272,13 @@ class ModelManager:
256
  def generate_t2t(self, user_input: str) -> Generator[tuple[str, str], None, None]:
257
  """Generate response from T2T model with streaming (returns context, answer)."""
258
  # @spaces.GPU decorator handles GPU allocation automatically
 
 
 
 
 
 
 
259
 
260
  # Stage 1: Context generation
261
  context_streamer = TextIteratorStreamer(
@@ -342,6 +365,11 @@ class ModelManager:
342
  def generate_c2c(self, user_input: str) -> Generator[str, None, None]:
343
  """Generate response from C2C model with streaming."""
344
  # @spaces.GPU decorator handles GPU allocation automatically
 
 
 
 
 
345
  messages = [{"role": "system", "content": ""}, {"role": "user", "content": user_input}]
346
  text = self.c2c_tokenizer.apply_chat_template(
347
  messages, tokenize=False, add_generation_prompt=True, enable_thinking=False
 
57
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
58
  else:
59
  self.device = torch.device(device)
60
+
61
+ # Debug information
62
  print(f"Using device: {self.device}")
63
+ print(f"CUDA available: {torch.cuda.is_available()}")
64
+ print(f"CUDA device count: {torch.cuda.device_count() if torch.cuda.is_available() else 0}")
65
  if ZEROGPU_AVAILABLE:
66
  print("ZeroGPU detected: Models will be loaded to CUDA (decorator handles allocation)")
67
 
 
108
  self.single_model, self.single_tokenizer = load_hf_model(
109
  self.single_model_name, self.device
110
  )
111
+ # Explicitly move model to device (required for ZeroGPU)
112
+ self.single_model = self.single_model.to(self.device)
113
  set_default_chat_template(self.single_tokenizer, self.single_model_name)
114
+ print(f"[Single] ✓ Model loaded on {self.single_model.device}")
115
 
116
  def _load_t2t_model(self):
117
  """Load two-stage model."""
 
127
  device=str(self.device),
128
  background_prompt=self.t2t_background_prompt
129
  )
130
+ # Explicitly move models to device (required for ZeroGPU)
131
+ self.t2t_model.context_model = self.t2t_model.context_model.to(self.device)
132
+ self.t2t_model.answer_model = self.t2t_model.answer_model.to(self.device)
133
+ print(f"[T2T] ✓ Models loaded on {self.t2t_model.context_model.device} and {self.t2t_model.answer_model.device}")
134
 
135
  def _load_c2c_model(self):
136
  """Load Rosetta (C2C) model."""
 
187
  self.c2c_model, self.c2c_tokenizer = load_rosetta_model(
188
  model_config, eval_config, self.device
189
  )
190
+ # Explicitly move model to device (required for ZeroGPU)
191
+ self.c2c_model = self.c2c_model.to(self.device)
192
+ print(f"[C2C] ✓ Model loaded on {self.c2c_model.device}")
193
 
194
  def _load_all_models(self):
195
  """Load all models sequentially."""
 
232
  def generate_single(self, user_input: str) -> Generator[str, None, None]:
233
  """Generate response from single model with streaming."""
234
  # @spaces.GPU decorator handles GPU allocation automatically
235
+ # Ensure model is on correct device (ZeroGPU may move it)
236
+ if self.single_model.device != self.device:
237
+ print(f"[Single] Moving model from {self.single_model.device} to {self.device}")
238
+ self.single_model = self.single_model.to(self.device)
239
+
240
  messages = [{"role": "system", "content": ""}, {"role": "user", "content": user_input}]
241
  text = self.single_tokenizer.apply_chat_template(
242
  messages, tokenize=False, add_generation_prompt=True, enable_thinking=False
 
272
  def generate_t2t(self, user_input: str) -> Generator[tuple[str, str], None, None]:
273
  """Generate response from T2T model with streaming (returns context, answer)."""
274
  # @spaces.GPU decorator handles GPU allocation automatically
275
+ # Ensure models are on correct device (ZeroGPU may move them)
276
+ if self.t2t_model.context_model.device != self.device:
277
+ print(f"[T2T] Moving context model from {self.t2t_model.context_model.device} to {self.device}")
278
+ self.t2t_model.context_model = self.t2t_model.context_model.to(self.device)
279
+ if self.t2t_model.answer_model.device != self.device:
280
+ print(f"[T2T] Moving answer model from {self.t2t_model.answer_model.device} to {self.device}")
281
+ self.t2t_model.answer_model = self.t2t_model.answer_model.to(self.device)
282
 
283
  # Stage 1: Context generation
284
  context_streamer = TextIteratorStreamer(
 
365
  def generate_c2c(self, user_input: str) -> Generator[str, None, None]:
366
  """Generate response from C2C model with streaming."""
367
  # @spaces.GPU decorator handles GPU allocation automatically
368
+ # Ensure model is on correct device (ZeroGPU may move it)
369
+ if self.c2c_model.device != self.device:
370
+ print(f"[C2C] Moving model from {self.c2c_model.device} to {self.device}")
371
+ self.c2c_model = self.c2c_model.to(self.device)
372
+
373
  messages = [{"role": "system", "content": ""}, {"role": "user", "content": user_input}]
374
  text = self.c2c_tokenizer.apply_chat_template(
375
  messages, tokenize=False, add_generation_prompt=True, enable_thinking=False
rosetta/baseline/__pycache__/multi_stage.cpython-310.pyc DELETED
Binary file (24.3 kB)
 
rosetta/baseline/__pycache__/multi_stage.cpython-312.pyc DELETED
Binary file (34.7 kB)
 
rosetta/baseline/__pycache__/two_stage_rosetta.cpython-310.pyc DELETED
Binary file (9.34 kB)
 
rosetta/baseline/__pycache__/two_stage_rosetta.cpython-312.pyc DELETED
Binary file (13.2 kB)
 
rosetta/model/__pycache__/__init__.cpython-310.pyc DELETED
Binary file (419 Bytes)
 
rosetta/model/__pycache__/aggregator.cpython-310.pyc DELETED
Binary file (5.83 kB)
 
rosetta/model/__pycache__/aggregator.cpython-312.pyc DELETED
Binary file (8.53 kB)
 
rosetta/model/__pycache__/aligner.cpython-310.pyc DELETED
Binary file (16.3 kB)
 
rosetta/model/__pycache__/aligner.cpython-312.pyc DELETED
Binary file (24.2 kB)
 
rosetta/model/__pycache__/all_in_one_projector.cpython-310.pyc DELETED
Binary file (27.8 kB)
 
rosetta/model/__pycache__/all_in_one_projector.cpython-312.pyc DELETED
Binary file (53.5 kB)
 
rosetta/model/__pycache__/cache.cpython-310.pyc DELETED
Binary file (980 Bytes)
 
rosetta/model/__pycache__/oracle.cpython-310.pyc DELETED
Binary file (13.9 kB)
 
rosetta/model/__pycache__/oracle.cpython-312.pyc DELETED
Binary file (23.5 kB)
 
rosetta/model/__pycache__/projector.cpython-310.pyc DELETED
Binary file (30.9 kB)
 
rosetta/model/__pycache__/projector.cpython-312.pyc DELETED
Binary file (56 kB)
 
rosetta/model/__pycache__/sampling.cpython-310.pyc DELETED
Binary file (2.09 kB)
 
rosetta/model/__pycache__/sampling.cpython-312.pyc DELETED
Binary file (3.49 kB)
 
rosetta/model/__pycache__/wrapper.cpython-310.pyc DELETED
Binary file (18.6 kB)
 
rosetta/model/__pycache__/wrapper.cpython-312.pyc DELETED
Binary file (33.8 kB)
 
rosetta/train/__pycache__/__init__.cpython-310.pyc DELETED
Binary file (505 Bytes)
 
rosetta/train/__pycache__/__init__.cpython-312.pyc DELETED
Binary file (616 Bytes)
 
rosetta/train/__pycache__/dataset_adapters.cpython-310.pyc DELETED
Binary file (50 kB)
 
rosetta/train/__pycache__/dataset_adapters.cpython-312.pyc DELETED
Binary file (73.3 kB)
 
rosetta/train/__pycache__/model_utils.cpython-310.pyc DELETED
Binary file (3.99 kB)
 
rosetta/train/__pycache__/model_utils.cpython-312.pyc DELETED
Binary file (5.58 kB)
 
rosetta/train/__pycache__/trainer.cpython-310.pyc DELETED
Binary file (2.56 kB)
 
rosetta/train/__pycache__/trainer.cpython-312.pyc DELETED
Binary file (3.74 kB)
 
rosetta/utils/__pycache__/evaluate.cpython-310.pyc DELETED
Binary file (15.8 kB)
 
rosetta/utils/__pycache__/evaluate.cpython-312.pyc DELETED
Binary file (24 kB)
 
rosetta/utils/__pycache__/multi_stage.cpython-310.pyc DELETED
Binary file (5.22 kB)
 
rosetta/utils/__pycache__/multi_stage.cpython-312.pyc DELETED
Binary file (7.49 kB)
 
rosetta/utils/__pycache__/registry.cpython-310.pyc DELETED
Binary file (7.43 kB)
 
rosetta/utils/__pycache__/registry.cpython-312.pyc DELETED
Binary file (9.98 kB)