diff --git a/src/pai_rag/tools/data_process/dataset/file_dataset.py b/src/pai_rag/tools/data_process/dataset/file_dataset.py index 8884449c..21658b42 100644 --- a/src/pai_rag/tools/data_process/dataset/file_dataset.py +++ b/src/pai_rag/tools/data_process/dataset/file_dataset.py @@ -10,7 +10,7 @@ class FileDataset(ABC): def __init__(self, dataset_path: str = None, cfg=None) -> None: logger.info(f"Loading file dataset from {dataset_path}.") - self.data = get_input_files(dataset_path) + self.data, _ = get_input_files(dataset_path) if cfg: self.export_path = cfg.export_path diff --git a/src/pai_rag/tools/data_process/utils/download_utils.py b/src/pai_rag/tools/data_process/utils/download_utils.py index 2f748172..4c6e0ee5 100644 --- a/src/pai_rag/tools/data_process/utils/download_utils.py +++ b/src/pai_rag/tools/data_process/utils/download_utils.py @@ -18,9 +18,9 @@ def download_models_via_lock(model_dir, model_name, accelerator="cpu"): # 检查模型文件是否已经下载 if os.path.exists(model_path): - logger.info(f"进程 {os.getpid()} 检查到: 模型已下载完成") + logger.info(f"进程 {os.getpid()} 检查到: 模型已下载完成,环境: {accelerator}。") else: - logger.info(f"进程 {os.getpid()} 开始下载模型") + logger.info(f"进程 {os.getpid()} 开始下载模型,环境: {accelerator}。") ModelScopeDownloader( fetch_config=True, download_directory_path=model_dir, @@ -30,7 +30,7 @@ def download_models_via_lock(model_dir, model_name, accelerator="cpu"): fetch_config=True, download_directory_path=model_dir, ).load_mineru_config(accelerator) - logger.info(f"进程 {os.getpid()} 下载模型完成") + logger.info(f"进程 {os.getpid()} 下载模型完成,环境: {accelerator}。") # 释放锁并结束循环 fcntl.flock(lock_file, fcntl.LOCK_UN)