base ¶

Classes¶

fastvideo.train.models.base.CausalModelBase ¶

Bases: ModelBase

Extension for causal / streaming model plugins.

Cache state is internal to the model instance and keyed by cache_tag (no role handle needed).

Functions¶

fastvideo.train.models.base.CausalModelBase.clear_caches `abstractmethod` ¶

clear_caches(*, cache_tag: str = 'pos') -> None

Clear internal caches before starting a new rollout.

Source code in fastvideo/train/models/base.py

@abstractmethod
def clear_caches(self, *, cache_tag: str = "pos") -> None:
    """Clear internal caches before starting a new rollout."""

fastvideo.train.models.base.CausalModelBase.predict_noise_streaming `abstractmethod` ¶

predict_noise_streaming(noisy_latents: Tensor, timestep: Tensor, batch: TrainingBatch, *, conditional: bool, cache_tag: str = 'pos', store_kv: bool = False, cur_start_frame: int = 0, cfg_uncond: dict[str, Any] | None = None, attn_kind: Literal['dense', 'vsa'] = 'dense') -> Tensor | None

Streaming predict-noise that may update internal caches.

Source code in fastvideo/train/models/base.py

@abstractmethod
def predict_noise_streaming(
    self,
    noisy_latents: torch.Tensor,
    timestep: torch.Tensor,
    batch: TrainingBatch,
    *,
    conditional: bool,
    cache_tag: str = "pos",
    store_kv: bool = False,
    cur_start_frame: int = 0,
    cfg_uncond: dict[str, Any] | None = None,
    attn_kind: Literal["dense", "vsa"] = "dense",
) -> torch.Tensor | None:
    """Streaming predict-noise that may update internal caches."""

fastvideo.train.models.base.CausalModelBase.predict_x0_streaming ¶

predict_x0_streaming(noisy_latents: Tensor, timestep: Tensor, batch: TrainingBatch, *, conditional: bool, cache_tag: str = 'pos', store_kv: bool = False, cur_start_frame: int = 0, cfg_uncond: dict[str, Any] | None = None, attn_kind: Literal['dense', 'vsa'] = 'dense') -> Tensor | None

Predict x0 streaming via predict_noise_streaming + conversion.

Source code in fastvideo/train/models/base.py

def predict_x0_streaming(
    self,
    noisy_latents: torch.Tensor,
    timestep: torch.Tensor,
    batch: TrainingBatch,
    *,
    conditional: bool,
    cache_tag: str = "pos",
    store_kv: bool = False,
    cur_start_frame: int = 0,
    cfg_uncond: dict[str, Any] | None = None,
    attn_kind: Literal["dense", "vsa"] = "dense",
) -> torch.Tensor | None:
    """Predict x0 streaming via
    ``predict_noise_streaming`` + conversion."""
    pred_noise = self.predict_noise_streaming(
        noisy_latents,
        timestep,
        batch,
        conditional=conditional,
        cache_tag=cache_tag,
        store_kv=store_kv,
        cur_start_frame=cur_start_frame,
        cfg_uncond=cfg_uncond,
        attn_kind=attn_kind,
    )
    if pred_noise is None:
        return None
    return pred_noise_to_pred_video(
        pred_noise=pred_noise.flatten(0, 1),
        noise_input_latent=noisy_latents.flatten(0, 1),
        timestep=timestep,
        scheduler=self.noise_scheduler,
    ).unflatten(0, pred_noise.shape[:2])

fastvideo.train.models.base.ModelBase ¶

Bases: ABC

Per-role model instance.

Every role (student, teacher, critic, …) gets its own ModelBase instance. Each instance owns its own transformer and noise_scheduler. Heavyweight resources (VAE, dataloader, RNG seeds) are loaded lazily via :meth:init_preprocessors, which the method calls only on the student.

Attributes¶

fastvideo.train.models.base.ModelBase.device `property` ¶

device: device

The local CUDA device for this rank.

fastvideo.train.models.base.ModelBase.num_train_timesteps `property` ¶

num_train_timesteps: int

Return the scheduler's training timestep horizon.

Functions¶

fastvideo.train.models.base.ModelBase.add_noise `abstractmethod` ¶

add_noise(clean_latents: Tensor, noise: Tensor, timestep: Tensor) -> Tensor

Apply forward-process noise at timestep.

Source code in fastvideo/train/models/base.py

@abstractmethod
def add_noise(
    self,
    clean_latents: torch.Tensor,
    noise: torch.Tensor,
    timestep: torch.Tensor,
) -> torch.Tensor:
    """Apply forward-process noise at *timestep*."""

fastvideo.train.models.base.ModelBase.backward `abstractmethod` ¶

backward(loss: Tensor, ctx: Any, *, grad_accum_rounds: int) -> None

Backward that may restore forward-context.

Source code in fastvideo/train/models/base.py

@abstractmethod
def backward(
    self,
    loss: torch.Tensor,
    ctx: Any,
    *,
    grad_accum_rounds: int,
) -> None:
    """Backward that may restore forward-context."""

fastvideo.train.models.base.ModelBase.init_preprocessors ¶

init_preprocessors(training_config: TrainingConfig) -> None

Load VAE, build dataloader, seed RNGs.

Called only on the student by the method's __init__. Default is a no-op so teacher/critic instances skip this.

Source code in fastvideo/train/models/base.py

def init_preprocessors(  # noqa: B027
        self,
        training_config: TrainingConfig,
) -> None:
    """Load VAE, build dataloader, seed RNGs.

    Called only on the student by the method's ``__init__``.
    Default is a no-op so teacher/critic instances skip this.
    """

fastvideo.train.models.base.ModelBase.on_train_start ¶

on_train_start() -> None

Called once before the training loop begins.

Source code in fastvideo/train/models/base.py

def on_train_start(self) -> None:  # noqa: B027
    """Called once before the training loop begins."""

fastvideo.train.models.base.ModelBase.predict_noise `abstractmethod` ¶

predict_noise(noisy_latents: Tensor, timestep: Tensor, batch: TrainingBatch, *, conditional: bool, cfg_uncond: dict[str, Any] | None = None, attn_kind: Literal['dense', 'vsa'] = 'dense') -> Tensor

Predict noise/flow for the given noisy latents.

Source code in fastvideo/train/models/base.py

@abstractmethod
def predict_noise(
    self,
    noisy_latents: torch.Tensor,
    timestep: torch.Tensor,
    batch: TrainingBatch,
    *,
    conditional: bool,
    cfg_uncond: dict[str, Any] | None = None,
    attn_kind: Literal["dense", "vsa"] = "dense",
) -> torch.Tensor:
    """Predict noise/flow for the given noisy latents."""

fastvideo.train.models.base.ModelBase.predict_x0 ¶

predict_x0(noisy_latents: Tensor, timestep: Tensor, batch: TrainingBatch, *, conditional: bool, cfg_uncond: dict[str, Any] | None = None, attn_kind: Literal['dense', 'vsa'] = 'dense') -> Tensor

Predict x0 via predict_noise + conversion.

Source code in fastvideo/train/models/base.py

def predict_x0(
    self,
    noisy_latents: torch.Tensor,
    timestep: torch.Tensor,
    batch: TrainingBatch,
    *,
    conditional: bool,
    cfg_uncond: dict[str, Any] | None = None,
    attn_kind: Literal["dense", "vsa"] = "dense",
) -> torch.Tensor:
    """Predict x0 via ``predict_noise`` + conversion."""
    pred_noise = self.predict_noise(
        noisy_latents,
        timestep,
        batch,
        conditional=conditional,
        cfg_uncond=cfg_uncond,
        attn_kind=attn_kind,
    )
    return pred_noise_to_pred_video(
        pred_noise=pred_noise.flatten(0, 1),
        noise_input_latent=noisy_latents.flatten(0, 1),
        timestep=timestep,
        scheduler=self.noise_scheduler,
    ).unflatten(0, pred_noise.shape[:2])

fastvideo.train.models.base.ModelBase.prepare_batch `abstractmethod` ¶

prepare_batch(raw_batch: dict[str, Any], *, generator: Generator, latents_source: Literal['data', 'zeros'] = 'data') -> TrainingBatch

Convert a dataloader batch into forward primitives.

Source code in fastvideo/train/models/base.py

@abstractmethod
def prepare_batch(
    self,
    raw_batch: dict[str, Any],
    *,
    generator: torch.Generator,
    latents_source: Literal["data", "zeros"] = "data",
) -> TrainingBatch:
    """Convert a dataloader batch into forward primitives."""

fastvideo.train.models.base.ModelBase.shift_and_clamp_timestep ¶

shift_and_clamp_timestep(timestep: Tensor) -> Tensor

Apply model/pipeline timestep shifting and clamp.

Source code in fastvideo/train/models/base.py

def shift_and_clamp_timestep(self, timestep: torch.Tensor) -> torch.Tensor:
    """Apply model/pipeline timestep shifting and clamp."""
    return timestep

base ¶

Classes¶

fastvideo.train.models.base.CausalModelBase ¶

Functions¶

fastvideo.train.models.base.CausalModelBase.clear_caches abstractmethod ¶

fastvideo.train.models.base.CausalModelBase.predict_noise_streaming abstractmethod ¶

fastvideo.train.models.base.CausalModelBase.predict_x0_streaming ¶

fastvideo.train.models.base.ModelBase ¶

Attributes¶

fastvideo.train.models.base.ModelBase.device property ¶

fastvideo.train.models.base.ModelBase.num_train_timesteps property ¶

Functions¶

fastvideo.train.models.base.ModelBase.add_noise abstractmethod ¶

fastvideo.train.models.base.ModelBase.backward abstractmethod ¶

fastvideo.train.models.base.ModelBase.init_preprocessors ¶

fastvideo.train.models.base.ModelBase.on_train_start ¶

fastvideo.train.models.base.ModelBase.predict_noise abstractmethod ¶

fastvideo.train.models.base.ModelBase.predict_x0 ¶

fastvideo.train.models.base.ModelBase.prepare_batch abstractmethod ¶

fastvideo.train.models.base.ModelBase.shift_and_clamp_timestep ¶

Functions¶

fastvideo.train.models.base.CausalModelBase.clear_caches `abstractmethod` ¶

fastvideo.train.models.base.CausalModelBase.predict_noise_streaming `abstractmethod` ¶

fastvideo.train.models.base.ModelBase.device `property` ¶

fastvideo.train.models.base.ModelBase.num_train_timesteps `property` ¶

fastvideo.train.models.base.ModelBase.add_noise `abstractmethod` ¶

fastvideo.train.models.base.ModelBase.backward `abstractmethod` ¶

fastvideo.train.models.base.ModelBase.predict_noise `abstractmethod` ¶

fastvideo.train.models.base.ModelBase.prepare_batch `abstractmethod` ¶