Download Unnatural Instruction tuning data and return its downloaded file path.
The data contains 68,478 instruction-output pairs. More in https://github.com/orhonovich/unnatural-instructions.
Parameters:
Name |
Type |
Description |
Default |
root_dir
|
Optional[str]
|
Download parent path. Defaults to None.
|
None
|
Returns:
Name | Type |
Description |
str |
str
|
|
Source code in fastestimator/fastestimator/dataset/data/unnatural_instructions.py
| def load_data(root_dir: Optional[str] = None) -> str:
"""Download Unnatural Instruction tuning data and return its downloaded file path.
The data contains 68,478 instruction-output pairs. More in https://github.com/orhonovich/unnatural-instructions.
Args:
root_dir: Download parent path. Defaults to None.
Returns:
str: Json file path.
"""
# Set up path
home = str(Path.home())
if root_dir is None:
root_dir = os.path.join(home, 'fastestimator_data', 'unnatural_instructions')
else:
root_dir = os.path.join(os.path.abspath(root_dir), 'unnatural_instructions')
os.makedirs(root_dir, exist_ok=True)
zip_path = os.path.join(root_dir, "core_data.zip")
# download data
if not os.path.exists(zip_path):
print("Downloading data to {}".format(zip_path))
wget.download("https://github.com/orhonovich/unnatural-instructions/raw/main/data/core_data.zip",
zip_path,
bar=bar_custom)
extracted_file = os.path.join(root_dir, "core_data.jsonl")
# extract data
if not os.path.exists(extracted_file):
print("Extracting {}".format(zip_path))
with zipfile.ZipFile(zip_path, 'r') as zip_file:
zip_file.extractall(root_dir)
return extracted_file
|