Skip to content

system

System

A class which tracks state information while the fe.Estimator is running.

Parameters:

Name Type Description Default
network BaseNetwork

The network instance being used by the current fe.Estimator.

required
mode Optional[str]

The current execution mode (or None for warmup).

None
num_devices int

How many GPUs are available for training.

torch.cuda.device_count()
log_steps Optional[int]

Log every n steps (0 to disable train logging, None to disable all logging).

None
total_epochs int

How many epochs training is expected to run for.

0
max_train_steps_per_epoch Optional[int]

Whether training epochs will be cut short after N steps (or use None if they will run to completion)

None

Attributes:

Name Type Description
mode

What is the current execution mode of the estimator ('train', 'eval', 'test'), None if warmup.

global_step Optional[int]

How many training steps have elapsed.

num_devices

How many GPUs are available for training.

log_steps

Log every n steps (0 to disable train logging, None to disable all logging).

total_epochs

How many epochs training is expected to run for.

epoch_idx Optional[int]

The current epoch index for the training (starting from 1).

batch_idx

The current batch index within an epoch (starting from 1).

stop_training

A flag to signal that training should abort.

network

A reference to the network being used this epoch

max_train_steps_per_epoch

Training will complete after n steps even if loader is not yet exhausted.

max_eval_steps_per_epoch

Evaluation will complete after n steps even if loader is not yet exhausted.

summary

An object to write experiment results to.

experiment_time

A timestamp indicating when this model was trained.

Source code in fastestimator\fastestimator\summary\system.py
class System:
    """A class which tracks state information while the fe.Estimator is running.

    Args:
        network: The network instance being used by the current fe.Estimator.
        mode: The current execution mode (or None for warmup).
        num_devices: How many GPUs are available for training.
        log_steps: Log every n steps (0 to disable train logging, None to disable all logging).
        total_epochs: How many epochs training is expected to run for.
        max_train_steps_per_epoch: Whether training epochs will be cut short after N steps (or use None if they will run to
            completion)

    Attributes:
        mode: What is the current execution mode of the estimator ('train', 'eval', 'test'), None if warmup.
        global_step: How many training steps have elapsed.
        num_devices: How many GPUs are available for training.
        log_steps: Log every n steps (0 to disable train logging, None to disable all logging).
        total_epochs: How many epochs training is expected to run for.
        epoch_idx: The current epoch index for the training (starting from 1).
        batch_idx: The current batch index within an epoch (starting from 1).
        stop_training: A flag to signal that training should abort.
        network: A reference to the network being used this epoch
        max_train_steps_per_epoch: Training will complete after n steps even if loader is not yet exhausted.
        max_eval_steps_per_epoch: Evaluation will complete after n steps even if loader is not yet exhausted.
        summary: An object to write experiment results to.
        experiment_time: A timestamp indicating when this model was trained.
    """

    mode: Optional[str]
    global_step: Optional[int]
    num_devices: int
    log_steps: Optional[int]
    total_epochs: int
    epoch_idx: Optional[int]
    batch_idx: Optional[int]
    stop_training: bool
    network: BaseNetwork
    max_train_steps_per_epoch: Optional[int]
    max_eval_steps_per_epoch: Optional[int]
    summary: Summary
    experiment_time: str

    def __init__(self,
                 network: BaseNetwork,
                 mode: Optional[str] = None,
                 num_devices: int = torch.cuda.device_count(),
                 log_steps: Optional[int] = None,
                 total_epochs: int = 0,
                 max_train_steps_per_epoch: Optional[int] = None,
                 max_eval_steps_per_epoch: Optional[int] = None) -> None:

        self.network = network
        self.mode = mode
        self.num_devices = num_devices
        self.log_steps = log_steps
        self.total_epochs = total_epochs
        self.batch_idx = None
        self.max_train_steps_per_epoch = max_train_steps_per_epoch
        self.max_eval_steps_per_epoch = max_eval_steps_per_epoch
        self.stop_training = False
        self.summary = Summary(None)
        self.experiment_time = ""
        self._initialize_state()

    def _initialize_state(self) -> None:
        """Initialize the training state.
        """
        self.global_step = None
        self.epoch_idx = 0

    def load_state(self, json_path) -> None:
        """Load training state.

        Args:
            json_path: The json file path to load from.
        """
        with open(json_path, 'r') as fp:
            state = json.load(fp)
        self.epoch_idx = state["epoch_idx"]
        self.global_step = state["global_step"]

    def save_state(self, json_path) -> None:
        """Load training state.

        Args:
            json_path: The json file path to save to.
        """
        # TODO "summary" and "experiment_time" needs to be saved in the future
        state = {"epoch_idx": self.epoch_idx, "global_step": self.global_step}
        with open(json_path, 'w') as fp:
            json.dump(state, fp, indent=4)

    def update_global_step(self) -> None:
        """Increment the current `global_step`.
        """
        if self.global_step is None:
            self.global_step = 1
        else:
            self.global_step += 1

    def update_batch_idx(self) -> None:
        """Increment the current `batch_idx`.
        """
        if self.batch_idx is None:
            self.batch_idx = 1
        else:
            self.batch_idx += 1

    def reset(self, summary_name: Optional[str] = None) -> None:
        """Reset the current `System` for a new round of training, including a new `Summary` object.

        Args:
            summary_name: The name of the experiment. The `Summary` object will store information iff name is not None.
        """
        self.experiment_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        self.mode = "train"
        self._initialize_state()
        self.batch_idx = None
        self.stop_training = False
        self.summary = Summary(summary_name)

    def reset_for_test(self, summary_name: Optional[str] = None) -> None:
        """Partially reset the current `System` object for a new round of testing.

        Args:
            summary_name: The name of the experiment. If not provided, the system will re-use the previous summary name.
        """
        self.experiment_time = self.experiment_time or datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        self.mode = "test"
        if not self.stop_training:
            self.epoch_idx = self.total_epochs
        self.stop_training = False
        self.summary.name = summary_name or self.summary.name  # Keep old experiment name if new one not provided
        self.summary.history.pop('test', None)

    def write_summary(self, key: str, value: Any) -> None:
        """Write an entry into the `Summary` object (iff the experiment was named).

        Args:
            key: The key to write into the summary object.
            value: The value to write into the summary object.
        """
        if self.summary:
            self.summary.history[self.mode][key][self.global_step or 0] = value

load_state

Load training state.

Parameters:

Name Type Description Default
json_path

The json file path to load from.

required
Source code in fastestimator\fastestimator\summary\system.py
def load_state(self, json_path) -> None:
    """Load training state.

    Args:
        json_path: The json file path to load from.
    """
    with open(json_path, 'r') as fp:
        state = json.load(fp)
    self.epoch_idx = state["epoch_idx"]
    self.global_step = state["global_step"]

reset

Reset the current System for a new round of training, including a new Summary object.

Parameters:

Name Type Description Default
summary_name Optional[str]

The name of the experiment. The Summary object will store information iff name is not None.

None
Source code in fastestimator\fastestimator\summary\system.py
def reset(self, summary_name: Optional[str] = None) -> None:
    """Reset the current `System` for a new round of training, including a new `Summary` object.

    Args:
        summary_name: The name of the experiment. The `Summary` object will store information iff name is not None.
    """
    self.experiment_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    self.mode = "train"
    self._initialize_state()
    self.batch_idx = None
    self.stop_training = False
    self.summary = Summary(summary_name)

reset_for_test

Partially reset the current System object for a new round of testing.

Parameters:

Name Type Description Default
summary_name Optional[str]

The name of the experiment. If not provided, the system will re-use the previous summary name.

None
Source code in fastestimator\fastestimator\summary\system.py
def reset_for_test(self, summary_name: Optional[str] = None) -> None:
    """Partially reset the current `System` object for a new round of testing.

    Args:
        summary_name: The name of the experiment. If not provided, the system will re-use the previous summary name.
    """
    self.experiment_time = self.experiment_time or datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    self.mode = "test"
    if not self.stop_training:
        self.epoch_idx = self.total_epochs
    self.stop_training = False
    self.summary.name = summary_name or self.summary.name  # Keep old experiment name if new one not provided
    self.summary.history.pop('test', None)

save_state

Load training state.

Parameters:

Name Type Description Default
json_path

The json file path to save to.

required
Source code in fastestimator\fastestimator\summary\system.py
def save_state(self, json_path) -> None:
    """Load training state.

    Args:
        json_path: The json file path to save to.
    """
    # TODO "summary" and "experiment_time" needs to be saved in the future
    state = {"epoch_idx": self.epoch_idx, "global_step": self.global_step}
    with open(json_path, 'w') as fp:
        json.dump(state, fp, indent=4)

update_batch_idx

Increment the current batch_idx.

Source code in fastestimator\fastestimator\summary\system.py
def update_batch_idx(self) -> None:
    """Increment the current `batch_idx`.
    """
    if self.batch_idx is None:
        self.batch_idx = 1
    else:
        self.batch_idx += 1

update_global_step

Increment the current global_step.

Source code in fastestimator\fastestimator\summary\system.py
def update_global_step(self) -> None:
    """Increment the current `global_step`.
    """
    if self.global_step is None:
        self.global_step = 1
    else:
        self.global_step += 1

write_summary

Write an entry into the Summary object (iff the experiment was named).

Parameters:

Name Type Description Default
key str

The key to write into the summary object.

required
value Any

The value to write into the summary object.

required
Source code in fastestimator\fastestimator\summary\system.py
def write_summary(self, key: str, value: Any) -> None:
    """Write an entry into the `Summary` object (iff the experiment was named).

    Args:
        key: The key to write into the summary object.
        value: The value to write into the summary object.
    """
    if self.summary:
        self.summary.history[self.mode][key][self.global_step or 0] = value