defload_data(root_dir:Optional[str]=None)->CSVDataset:"""Load and return the montgomery dataset. Sourced from http://openi.nlm.nih.gov/imgs/collections/NLM-MontgomeryCXRSet.zip. This method will download the data to local storage if the data has not been previously downloaded. Args: root_dir: The path to store the downloaded data. When `path` is not provided, the data will be saved into `fastestimator_data` under the user's home directory. Returns: train_data """home=str(Path.home())ifroot_dirisNone:root_dir=os.path.join(home,'fastestimator_data','Montgomery')else:root_dir=os.path.join(os.path.abspath(root_dir),'Montgomery')os.makedirs(root_dir,exist_ok=True)csv_path=os.path.join(root_dir,"montgomery.csv")data_compressed_path=os.path.join(root_dir,'NLM-MontgomeryCXRSet.zip')extract_folder_path=os.path.join(root_dir,'MontgomerySet')ifnotos.path.exists(extract_folder_path):# downloadifnotos.path.exists(data_compressed_path):print("Downloading data to {}".format(root_dir))wget.download('http://openi.nlm.nih.gov/imgs/collections/NLM-MontgomeryCXRSet.zip',root_dir,bar=bar_custom)# extractprint("\nExtracting file ...")withzipfile.ZipFile(data_compressed_path,'r')aszip_file:# There's some garbage data from macOS in the zip file that gets filtered out herezip_file.extractall(root_dir,filter(lambdax:x.startswith("MontgomerySet/"),zip_file.namelist()))# glob and generate csvifnotos.path.exists(csv_path):img_list=glob(os.path.join(extract_folder_path,'CXR_png','*.png'))df=pd.DataFrame(data={'image':img_list})df['image']=df['image'].apply(lambdax:os.path.relpath(x,root_dir))df['image']=df['image'].apply(os.path.normpath)df['mask_left']=df['image'].apply(lambdax:x.replace('CXR_png',os.path.join('ManualMask','leftMask')))df['mask_right']=df['image'].apply(lambdax:x.replace('CXR_png',os.path.join('ManualMask','rightMask')))df.to_csv(csv_path,index=False)returnCSVDataset(csv_path)