Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -361,10 +361,56 @@ def two_stage_qa(question, candidate_paragraphs_str, max_seq_len_mc=512, max_seq
|
|
| 361 |
logger.error(f"從 qa_features_dataset 選擇列時出錯: {e}. Features: {qa_features_dataset.features}")
|
| 362 |
return f"錯誤: 準備模型輸入時出錯 (列選擇)。 Error: {e}", "N/A", "N/A"
|
| 363 |
|
| 364 |
-
logger.info(
|
| 365 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 366 |
qa_dataloader = DataLoader(
|
| 367 |
-
|
|
|
|
|
|
|
| 368 |
)
|
| 369 |
|
| 370 |
all_start_logits = []
|
|
|
|
| 361 |
logger.error(f"從 qa_features_dataset 選擇列時出錯: {e}. Features: {qa_features_dataset.features}")
|
| 362 |
return f"錯誤: 準備模型輸入時出錯 (列選擇)。 Error: {e}", "N/A", "N/A"
|
| 363 |
|
| 364 |
+
logger.info("--- 手動檢查 features_for_dataloader 以模擬 default_data_collator ---")
|
| 365 |
+
if len(features_for_dataloader) > 0:
|
| 366 |
+
# default_data_collator 會接收一個 features 列表,這裡我們模擬只有一個 feature 的情況
|
| 367 |
+
# 因為對於第一個 test_item,qa_features_dataset (以及 features_for_dataloader) 只有一行
|
| 368 |
+
|
| 369 |
+
# features_list_for_collator 將是 [features_for_dataloader[0]]
|
| 370 |
+
# 如果 qa_batch_size > 1 且 features_for_dataloader 行數也 > 1,這裡會更複雜
|
| 371 |
+
# 但錯誤發生在第一個批次,所以檢查第一個特徵就夠了。
|
| 372 |
+
|
| 373 |
+
single_feature_to_collate = features_for_dataloader[0]
|
| 374 |
+
keys_to_tensorize_by_collator = ["input_ids", "attention_mask", "token_type_ids"]
|
| 375 |
+
|
| 376 |
+
for k_collate in keys_to_tensorize_by_collator:
|
| 377 |
+
if k_collate in single_feature_to_collate:
|
| 378 |
+
value_to_tensorize = single_feature_to_collate[k_collate]
|
| 379 |
+
logger.info(f" 準備轉換鍵 '{k_collate}' 的值: {str(value_to_tensorize)[:100]}...") # 打印部分值
|
| 380 |
+
if value_to_tensorize is None:
|
| 381 |
+
logger.error(f" !!!!!! CRITICAL (Pre-Collate): {k_collate} 的值是 None!")
|
| 382 |
+
return f"錯誤: 預整理時發現 {k_collate} 為 None", "N/A", "N/A"
|
| 383 |
+
if not isinstance(value_to_tensorize, list):
|
| 384 |
+
logger.error(f" !!!!!! CRITICAL (Pre-Collate): {k_collate} 的值不是列表,類型為 {type(value_to_tensorize)}!")
|
| 385 |
+
return f"錯誤: 預整理時發現 {k_collate} 不是列表", "N/A", "N/A"
|
| 386 |
+
if not value_to_tensorize: # 空列表
|
| 387 |
+
logger.warning(f" Pre-Collate: {k_collate} 的值是空列表。")
|
| 388 |
+
|
| 389 |
+
problem_found_in_list = False
|
| 390 |
+
for elem_idx, elem_val in enumerate(value_to_tensorize):
|
| 391 |
+
if elem_val is None:
|
| 392 |
+
logger.error(f" !!!!!! CRITICAL (Pre-Collate): {k_collate}[{elem_idx}] 是 None!")
|
| 393 |
+
problem_found_in_list = True
|
| 394 |
+
break
|
| 395 |
+
if not isinstance(elem_val, int):
|
| 396 |
+
logger.error(f" !!!!!! CRITICAL (Pre-Collate): {k_collate}[{elem_idx}] 不是整數,值: {elem_val}, 類型: {type(elem_val)}!")
|
| 397 |
+
problem_found_in_list = True
|
| 398 |
+
break
|
| 399 |
+
if problem_found_in_list:
|
| 400 |
+
return f"錯誤: 預整理時在 {k_collate} 內部發現問題", "N/A", "N/A"
|
| 401 |
+
|
| 402 |
+
logger.info(f" 鍵 '{k_collate}' 的預整理檢查通過。")
|
| 403 |
+
else:
|
| 404 |
+
logger.warning(f" 鍵 '{k_collate}' 不在 features_for_dataloader[0] 中。")
|
| 405 |
+
else:
|
| 406 |
+
logger.error("features_for_dataloader 為空,無法進行手動檢查。")
|
| 407 |
+
return "錯誤: features_for_dataloader 為空", "N/A", "N/A"
|
| 408 |
+
|
| 409 |
+
|
| 410 |
qa_dataloader = DataLoader(
|
| 411 |
+
features_for_dataloader,
|
| 412 |
+
collate_fn=default_data_collator,
|
| 413 |
+
batch_size=8 # 或者 args.qa_batch_size
|
| 414 |
)
|
| 415 |
|
| 416 |
all_start_logits = []
|