Skip to content

unnatural_instructions

load_data

Download Unnatural Instruction tuning data and return its downloaded file path.

The data contains 68,478 instruction-output pairs. More in https://github.com/orhonovich/unnatural-instructions.

Parameters:

Name Type Description Default
root_dir Optional[str]

Download parent path. Defaults to None.

None

Returns:

Name Type Description
str str

Json file path.

Source code in fastestimator/fastestimator/dataset/data/unnatural_instructions.py
def load_data(root_dir: Optional[str] = None) -> str:
    """Download Unnatural Instruction tuning data and return its downloaded file path.

    The data contains 68,478 instruction-output pairs. More in https://github.com/orhonovich/unnatural-instructions.

    Args:
        root_dir: Download parent path. Defaults to None.

    Returns:
        str: Json file path.
    """
    # Set up path
    home = str(Path.home())
    if root_dir is None:
        root_dir = os.path.join(home, 'fastestimator_data', 'unnatural_instructions')
    else:
        root_dir = os.path.join(os.path.abspath(root_dir), 'unnatural_instructions')
    os.makedirs(root_dir, exist_ok=True)
    zip_path = os.path.join(root_dir, "core_data.zip")
    # download data
    if not os.path.exists(zip_path):
        print("Downloading data to {}".format(zip_path))
        wget.download("https://github.com/orhonovich/unnatural-instructions/raw/main/data/core_data.zip",
                      zip_path,
                      bar=bar_custom)
    extracted_file = os.path.join(root_dir, "core_data.jsonl")
    # extract data
    if not os.path.exists(extracted_file):
        print("Extracting {}".format(zip_path))
        with zipfile.ZipFile(zip_path, 'r') as zip_file:
            zip_file.extractall(root_dir)
    return extracted_file