Spaces:
Build error
Build error
| import pandas as pd | |
| import json | |
| import re | |
| def remove_prefix(text, prefix_pattern): | |
| """ | |
| Removes the prefix matching the given pattern from the text. | |
| """ | |
| return re.sub(prefix_pattern, "", text).strip() | |
| def main(): | |
| # Read the Excel file | |
| try: | |
| df = pd.read_excel("data/manabi.xlsx") | |
| print("Excel file read successfully.") | |
| except FileNotFoundError: | |
| print("The file 'data/manabi.xlsx' was not found. Please check the file path.") | |
| return | |
| except Exception as e: | |
| print(f"An error occurred while reading the Excel file: {e}") | |
| return | |
| # Check if the necessary columns exist | |
| if "θ³ͺε" not in df.columns or "εη" not in df.columns: | |
| print("The Excel file must contain 'θ³ͺε' and 'εη' columns.") | |
| return | |
| # Initialize the list to store processed data | |
| qa_list = [] | |
| # Iterate over each row in the DataFrame | |
| for index, row in df.iterrows(): | |
| raw_question = str(row["θ³ͺε"]) | |
| raw_answer = str(row["εη"]) | |
| # Remove prefixes using regex patterns | |
| question = remove_prefix(raw_question, r"^Q\d+\.\s*") | |
| answer = remove_prefix(raw_answer, r"^A\.\s*") | |
| qa_list.append({"question": question, "answer": answer}) | |
| # Save the list to a JSON file | |
| try: | |
| with open("data/qa_data.json", "w", encoding="utf-8") as json_file: | |
| json.dump(qa_list, json_file, ensure_ascii=False, indent=2) | |
| print("Data has been successfully saved to 'data/qa_data.json'.") | |
| except Exception as e: | |
| print(f"An error occurred while writing to JSON file: {e}") | |
| if __name__ == "__main__": | |
| main() | |