Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -306,8 +306,40 @@ def two_stage_qa(question, candidate_paragraphs_str, max_seq_len_mc=512, max_seq
|
|
| 306 |
if len(qa_features_dataset) == 0:
|
| 307 |
return "錯誤: 無法為選定段落生成QA特徵 (可能段落太短或內容問題)。", f"選中的段落 (索引 {selected_idx}):\n{selected_paragraph}", "N/A"
|
| 308 |
|
| 309 |
-
|
| 310 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 311 |
qa_dataloader = DataLoader(
|
| 312 |
qa_features_dataset, collate_fn=default_data_collator, batch_size=8 # batch_size可以小一些
|
| 313 |
)
|
|
|
|
| 306 |
if len(qa_features_dataset) == 0:
|
| 307 |
return "錯誤: 無法為選定段落生成QA特徵 (可能段落太短或內容問題)。", f"選中的段落 (索引 {selected_idx}):\n{selected_paragraph}", "N/A"
|
| 308 |
|
| 309 |
+
logger.info(f"--- In two_stage_qa, about to create DataLoader for question_id: {question_id} ---")
|
| 310 |
+
logger.info(f"Number of features in qa_features_dataset: {len(qa_features_dataset)}")
|
| 311 |
+
|
| 312 |
+
for i in range(len(qa_features_dataset)):
|
| 313 |
+
feature_item = qa_features_dataset[i]
|
| 314 |
+
logger.info(f" Inspecting feature {i} from qa_features_dataset:")
|
| 315 |
+
for key_to_check in ["input_ids", "attention_mask", "token_type_ids"]:
|
| 316 |
+
if key_to_check not in feature_item:
|
| 317 |
+
logger.error(f" !!!!!! CRITICAL: Key '{key_to_check}' is MISSING in feature {i}! Features: {feature_item.keys()}")
|
| 318 |
+
# 這是一個嚴重問題,會導致後續 collate 失敗
|
| 319 |
+
return f"錯誤: 特徵準備失敗,缺少 {key_to_check}", "N/A", "N/A"
|
| 320 |
+
|
| 321 |
+
val_list = feature_item[key_to_check]
|
| 322 |
+
if val_list is None:
|
| 323 |
+
logger.error(f" !!!!!! CRITICAL: Key '{key_to_check}' in feature {i} is None!")
|
| 324 |
+
return f"錯誤: 特徵準備失敗,{key_to_check} 為 None", "N/A", "N/A"
|
| 325 |
+
|
| 326 |
+
if not isinstance(val_list, list):
|
| 327 |
+
logger.error(f" !!!!!! CRITICAL: Key '{key_to_check}' in feature {i} is not a list, but {type(val_list)}!")
|
| 328 |
+
return f"錯誤: 特徵準備失敗,{key_to_check} 不是列表", "N/A", "N/A"
|
| 329 |
+
|
| 330 |
+
if not val_list: # 如果列表為空
|
| 331 |
+
logger.warning(f" Feature {i} has an empty list for '{key_to_check}'. This might be okay if handled by collator for padding, but check if intended.")
|
| 332 |
+
|
| 333 |
+
# 檢查列表內部元素
|
| 334 |
+
for elem_idx, elem in enumerate(val_list):
|
| 335 |
+
if elem is None:
|
| 336 |
+
logger.error(f" !!!!!! CRITICAL: Key '{key_to_check}' in feature {i} contains None at index {elem_idx}!")
|
| 337 |
+
return f"錯誤: 特徵準備失敗,{key_to_check} 內部有 None", "N/A", "N/A"
|
| 338 |
+
if not isinstance(elem, int):
|
| 339 |
+
logger.error(f" !!!!!! CRITICAL: Key '{key_to_check}' in feature {i} contains non-integer {elem} (type: {type(elem)}) at index {elem_idx}!")
|
| 340 |
+
return f"錯誤: 特徵準備失敗,{key_to_check} 內部有非整數", "N/A", "N/A"
|
| 341 |
+
logger.info(f" Feature {i}, key '{key_to_check}' passed inspection. Length: {len(val_list)}")
|
| 342 |
+
|
| 343 |
qa_dataloader = DataLoader(
|
| 344 |
qa_features_dataset, collate_fn=default_data_collator, batch_size=8 # batch_size可以小一些
|
| 345 |
)
|