Skip to content

datasets

Prompt-corpus datasets for end-to-end benchmark evaluation.

Public API mirrors :mod:fastvideo.eval (metrics side):

from fastvideo.eval.datasets import (
    PromptDataset, Sample,
    register_dataset, get_dataset, list_datasets,
)

A dataset is an iterable of plain dicts (one per sample). Built-in datasets self-register at import time. To add one, drop a module into this package that subclasses :class:PromptDataset and decorates with @register_dataset("name") — auto-discovery picks it up.

Classes

fastvideo.eval.datasets.PromptDataset

PromptDataset()

Iterable corpus of sample dicts. Subclasses populate self._rows.

Source code in fastvideo/eval/datasets/base.py
def __init__(self) -> None:
    self._rows: list[dict] = []

Functions

fastvideo.eval.datasets.PromptDataset.by_dimension
by_dimension() -> dict[str, list[dict]]

Group samples by dimension. A multi-dim sample appears under each.

Source code in fastvideo/eval/datasets/base.py
def by_dimension(self) -> dict[str, list[dict]]:
    """Group samples by dimension. A multi-dim sample appears under each."""
    out: dict[str, list[dict]] = {}
    for s in self._rows:
        for d in s.get("dimensions", ()):
            out.setdefault(d, []).append(s)
    return out

fastvideo.eval.datasets.Sample

Bases: TypedDict

Documented schema for a row yielded by :class:PromptDataset.

Only prompt is required. Extra keys beyond these are forwarded to the runner's eval-kwargs builder verbatim, so action-conditioned or audio-bearing benchmarks can add their own fields without changing the base class.

fastvideo.eval.datasets.VBenchPromptDataset

VBenchPromptDataset(dimensions: list[str] | str = 'all', full_info_path: str | Path | None = None)

Bases: PromptDataset

VBench prompts filtered by evaluation dimension.

Parameters:

Name Type Description Default
dimensions list[str] | str

List of dimension names, or "all". Unknown dimensions raise ValueError.

'all'
full_info_path str | Path | None

Optional override for VBench_full_info.json; defaults to autodetection.

None

A prompt that belongs to several requested dimensions is yielded once; its dimensions list carries all matches so the scorer can route.

Source code in fastvideo/eval/datasets/vbench.py
def __init__(
    self,
    dimensions: list[str] | str = "all",
    full_info_path: str | Path | None = None,
) -> None:
    super().__init__()
    path = Path(full_info_path) if full_info_path else _locate_full_info()
    with path.open() as f:
        entries = json.load(f)

    all_dims = sorted({d for e in entries for d in e["dimension"]})
    if dimensions == "all":
        self.dimensions: list[str] = all_dims
    else:
        unknown = set(dimensions) - set(all_dims)
        if unknown:
            raise ValueError(f"Unknown VBench dimensions: {sorted(unknown)}. "
                             f"Available: {all_dims}")
        self.dimensions = list(dimensions)

    wanted = set(self.dimensions)
    for entry in entries:
        relevant = [d for d in entry["dimension"] if d in wanted]
        if not relevant:
            continue
        n = (TEMPORAL_FLICKERING_SAMPLES if "temporal_flickering" in relevant else DEFAULT_SAMPLES)

        # Strip the outer {dim_name: ...} wrapper from upstream's aux
        # schema so every metric reads its inputs from a flat dict.
        #
        # This unwraps exactly one level — the dimension key. Whatever
        # shape lives inside is the metric's contract:
        #
        #   color:                {"color": {"color": "red"}}
        #     → flat: {"color": "red"}                  (scalar)
        #
        #   object_class:         {"object_class": {"object": "person"}}
        #     → flat: {"object": "person"}              (scalar)
        #
        #   multiple_objects:     {"multiple_objects": {"object": "a and b"}}
        #     → flat: {"object": "a and b"}             (scalar)
        #
        #   spatial_relationship: {"spatial_relationship":
        #                            {"spatial_relationship":
        #                                {"object_a": ..., "object_b": ...,
        #                                 "relationship": ...}}}
        #     → flat: {"spatial_relationship": {object_a,object_b,relationship}}
        #
        # Note the spatial_relationship case keeps a nested inner dict
        # by design — upstream double-wraps it, the SpatialRelationship
        # metric reads ``aux["spatial_relationship"]`` expecting that
        # inner dict. Don't "simplify" the wrapping away.
        raw_aux = entry.get("auxiliary_info") or {}
        flat_aux: dict = {}
        for v in raw_aux.values():
            if isinstance(v, dict):
                flat_aux.update(v)

        self._rows.append({
            "prompt": entry["prompt_en"],
            "n_samples": n,
            "dimensions": relevant,
            "auxiliary_info": flat_aux,
        })
    self.full_info_path = path

Functions

fastvideo.eval.datasets.get_dataset

get_dataset(name: str, **kwargs: Any) -> BasePromptDataset

Instantiate a registered dataset by name.

Source code in fastvideo/eval/datasets/registry.py
def get_dataset(name: str, **kwargs: Any) -> BasePromptDataset:
    """Instantiate a registered dataset by name."""
    cls = _REGISTRY.get(name)
    if cls is None:
        available = ", ".join(sorted(_REGISTRY.keys()))
        raise KeyError(f"Unknown dataset '{name}'. Available: {available}")
    return cls(**kwargs)

fastvideo.eval.datasets.list_datasets

list_datasets() -> list[str]

Return sorted list of all registered dataset names.

Source code in fastvideo/eval/datasets/registry.py
def list_datasets() -> list[str]:
    """Return sorted list of all registered dataset names."""
    return sorted(_REGISTRY.keys())

fastvideo.eval.datasets.register_dataset

register_dataset(name: str)

Decorator to register a prompt-dataset class.

Usage::

@register_dataset("vbench")
class VBenchPromptDataset(BasePromptDataset):
    ...
Source code in fastvideo/eval/datasets/registry.py
def register_dataset(name: str):
    """Decorator to register a prompt-dataset class.

    Usage::

        @register_dataset("vbench")
        class VBenchPromptDataset(BasePromptDataset):
            ...
    """

    def wrapper(cls):
        cls.name = name
        _REGISTRY[name] = cls
        return cls

    return wrapper

Modules

fastvideo.eval.datasets.base

Prompt-corpus datasets.

A :class:PromptDataset is an iterable of sample dicts describing the prompts and conditions for a benchmark. Each sample is a plain dict — no dataclass, no schema enforcement — that flows directly into both generation (VideoGenerator.generate_video(**sample)) and scoring (Evaluator.evaluate(**eval_kwargs)). The runner picks well-known keys (prompt, n_samples, dimensions, auxiliary_info, ...) and passes the rest through.

This matches the surrounding FastVideo style:

  • :class:fastvideo.dataset.validation_dataset.ValidationDataset yields dicts.
  • :meth:fastvideo.VideoGenerator.generate_video consumes **kwargs.
  • :meth:fastvideo.eval.Evaluator.evaluate consumes **kwargs.

To add a new benchmark:

  1. Subclass :class:PromptDataset, populate self._rows with dicts in __init__.
  2. Decorate with @register_dataset("my_bench").

Convention for auxiliary_info: a flat dict of metric-keyed values (e.g. {"color": "red"}). Benchmarks with nested aux schemas (VBench's {dim: {key: val}}) flatten at load time so every consumer sees the same shape.

Classes

fastvideo.eval.datasets.base.PromptDataset
PromptDataset()

Iterable corpus of sample dicts. Subclasses populate self._rows.

Source code in fastvideo/eval/datasets/base.py
def __init__(self) -> None:
    self._rows: list[dict] = []
Functions
fastvideo.eval.datasets.base.PromptDataset.by_dimension
by_dimension() -> dict[str, list[dict]]

Group samples by dimension. A multi-dim sample appears under each.

Source code in fastvideo/eval/datasets/base.py
def by_dimension(self) -> dict[str, list[dict]]:
    """Group samples by dimension. A multi-dim sample appears under each."""
    out: dict[str, list[dict]] = {}
    for s in self._rows:
        for d in s.get("dimensions", ()):
            out.setdefault(d, []).append(s)
    return out
fastvideo.eval.datasets.base.Sample

Bases: TypedDict

Documented schema for a row yielded by :class:PromptDataset.

Only prompt is required. Extra keys beyond these are forwarded to the runner's eval-kwargs builder verbatim, so action-conditioned or audio-bearing benchmarks can add their own fields without changing the base class.

fastvideo.eval.datasets.physics_iq

Physics-IQ benchmark prompt corpus.

Yields one sample dict per take-1 scenario, paired with its take-2 reference and both takes' real motion masks. Each row drops straight into :meth:fastvideo.eval.Evaluator.evaluate for the physics_iq metric:

{
    "prompt": <description>,
    "reference":            "<take-1 mp4>",
    "reference_take2":      "<take-2 mp4>",
    "reference_mask":       "<take-1 mask mp4>",
    "reference_take2_mask": "<take-2 mask mp4>",
    "scenario": <scenario_id>,
    "view": <camera view>,
    "auxiliary_info": { ... metadata ... },
}

Self-contained dataset: the manifest CSV is vendored under fastvideo/eval/metrics/physics_iq/_vendored/descriptions.csv; per-scenario videos/masks/switch-frames auto-fetch on first use from the public DeepMind bucket into ${FASTVIDEO_EVAL_CACHE}/datasets/physics_iq/. Pass auto_download=False (or dataset_root= pointing at a pre-downloaded copy) to opt out of network fetches.

Classes

fastvideo.eval.datasets.physics_iq.PhysicsIQPromptDataset
PhysicsIQPromptDataset(dataset_root: str | Path | None = None, *, fps: int = _DEFAULT_FPS, limit: int | None = None, generated_dir: str | Path | None = None, auto_download: bool = True)

Bases: PromptDataset

Physics-IQ benchmark prompt corpus.

Self-contained: get_dataset("physics_iq") works with no kwargs. The manifest CSV is vendored next to the metric, and per-scenario assets auto-fetch on first miss from the public bucket into ${FASTVIDEO_EVAL_CACHE}/datasets/physics_iq/.

Parameters:

Name Type Description Default
dataset_root str | Path | None

path to a pre-downloaded copy of the Physics-IQ release. Defaults to ${FASTVIDEO_EVAL_CACHE}/datasets/physics_iq; override only if you already have a local mirror.

None
fps int

target frame rate. The release ships at 30 FPS; other rates transcode once on first access into <root>/.physics_iq_cache/.

_DEFAULT_FPS
limit int | None

optional truncation for quick smoke runs. Apply this kwarg (not a post-construction slice) so we only fetch the assets for the scenarios actually requested.

None
generated_dir str | Path | None

optional directory of pre-generated videos — attaches each manifest row's expected output path to the sample dict under auxiliary_info["generated_video_path"].

None
auto_download bool

when True (the default), missing testing videos, masks, and switch frames are fetched from the public bucket into dataset_root. Set False for air-gapped runs; the loader will then raise FileNotFoundError on miss.

True
Source code in fastvideo/eval/datasets/physics_iq.py
def __init__(
    self,
    dataset_root: str | Path | None = None,
    *,
    fps: int = _DEFAULT_FPS,
    limit: int | None = None,
    generated_dir: str | Path | None = None,
    auto_download: bool = True,
) -> None:
    super().__init__()

    repo_root = Path(dataset_root or _default_dataset_root()).expanduser().resolve()
    self.repo_root = repo_root
    self.dataset_dir = _resolve_dataset_dir(repo_root)
    self.descriptions_path = _resolve_descriptions_path(repo_root, self.dataset_dir)
    self.cache_dir = repo_root / ".physics_iq_cache"
    self.fps = fps
    self.auto_download = auto_download
    self.bucket_url = _bucket_url()

    scenarios = self._iter_scenarios(
        fps=fps,
        generated_dir=generated_dir,
        limit=limit,
    )
    self._rows = [_scenario_to_row(s) for s in scenarios]
fastvideo.eval.datasets.physics_iq.PhysicsIQScenario dataclass
PhysicsIQScenario(scenario_id: str, view: str, scenario_name: str, take1_video_path: str, take2_video_path: str, switch_frame_path: str, caption: str, expected_gen_filename: str, generated_video_path: str | None = None, take1_mask_path: str | None = None, take2_mask_path: str | None = None)

One row of the Physics-IQ manifest, fully resolved on disk.

Functions

fastvideo.eval.datasets.registry

Registry for prompt-corpus datasets, mirroring :mod:fastvideo.eval.registry.

Functions

fastvideo.eval.datasets.registry.get_dataset
get_dataset(name: str, **kwargs: Any) -> BasePromptDataset

Instantiate a registered dataset by name.

Source code in fastvideo/eval/datasets/registry.py
def get_dataset(name: str, **kwargs: Any) -> BasePromptDataset:
    """Instantiate a registered dataset by name."""
    cls = _REGISTRY.get(name)
    if cls is None:
        available = ", ".join(sorted(_REGISTRY.keys()))
        raise KeyError(f"Unknown dataset '{name}'. Available: {available}")
    return cls(**kwargs)
fastvideo.eval.datasets.registry.list_datasets
list_datasets() -> list[str]

Return sorted list of all registered dataset names.

Source code in fastvideo/eval/datasets/registry.py
def list_datasets() -> list[str]:
    """Return sorted list of all registered dataset names."""
    return sorted(_REGISTRY.keys())
fastvideo.eval.datasets.registry.register_dataset
register_dataset(name: str)

Decorator to register a prompt-dataset class.

Usage::

@register_dataset("vbench")
class VBenchPromptDataset(BasePromptDataset):
    ...
Source code in fastvideo/eval/datasets/registry.py
def register_dataset(name: str):
    """Decorator to register a prompt-dataset class.

    Usage::

        @register_dataset("vbench")
        class VBenchPromptDataset(BasePromptDataset):
            ...
    """

    def wrapper(cls):
        cls.name = name
        _REGISTRY[name] = cls
        return cls

    return wrapper

fastvideo.eval.datasets.vbench

VBench prompt corpus.

Single source of truth: upstream's VBench_full_info.json (946 entries, each with prompt_en, a dimension list, optional auxiliary_info keyed by dimension).

Classes

fastvideo.eval.datasets.vbench.VBenchPromptDataset
VBenchPromptDataset(dimensions: list[str] | str = 'all', full_info_path: str | Path | None = None)

Bases: PromptDataset

VBench prompts filtered by evaluation dimension.

Parameters:

Name Type Description Default
dimensions list[str] | str

List of dimension names, or "all". Unknown dimensions raise ValueError.

'all'
full_info_path str | Path | None

Optional override for VBench_full_info.json; defaults to autodetection.

None

A prompt that belongs to several requested dimensions is yielded once; its dimensions list carries all matches so the scorer can route.

Source code in fastvideo/eval/datasets/vbench.py
def __init__(
    self,
    dimensions: list[str] | str = "all",
    full_info_path: str | Path | None = None,
) -> None:
    super().__init__()
    path = Path(full_info_path) if full_info_path else _locate_full_info()
    with path.open() as f:
        entries = json.load(f)

    all_dims = sorted({d for e in entries for d in e["dimension"]})
    if dimensions == "all":
        self.dimensions: list[str] = all_dims
    else:
        unknown = set(dimensions) - set(all_dims)
        if unknown:
            raise ValueError(f"Unknown VBench dimensions: {sorted(unknown)}. "
                             f"Available: {all_dims}")
        self.dimensions = list(dimensions)

    wanted = set(self.dimensions)
    for entry in entries:
        relevant = [d for d in entry["dimension"] if d in wanted]
        if not relevant:
            continue
        n = (TEMPORAL_FLICKERING_SAMPLES if "temporal_flickering" in relevant else DEFAULT_SAMPLES)

        # Strip the outer {dim_name: ...} wrapper from upstream's aux
        # schema so every metric reads its inputs from a flat dict.
        #
        # This unwraps exactly one level — the dimension key. Whatever
        # shape lives inside is the metric's contract:
        #
        #   color:                {"color": {"color": "red"}}
        #     → flat: {"color": "red"}                  (scalar)
        #
        #   object_class:         {"object_class": {"object": "person"}}
        #     → flat: {"object": "person"}              (scalar)
        #
        #   multiple_objects:     {"multiple_objects": {"object": "a and b"}}
        #     → flat: {"object": "a and b"}             (scalar)
        #
        #   spatial_relationship: {"spatial_relationship":
        #                            {"spatial_relationship":
        #                                {"object_a": ..., "object_b": ...,
        #                                 "relationship": ...}}}
        #     → flat: {"spatial_relationship": {object_a,object_b,relationship}}
        #
        # Note the spatial_relationship case keeps a nested inner dict
        # by design — upstream double-wraps it, the SpatialRelationship
        # metric reads ``aux["spatial_relationship"]`` expecting that
        # inner dict. Don't "simplify" the wrapping away.
        raw_aux = entry.get("auxiliary_info") or {}
        flat_aux: dict = {}
        for v in raw_aux.values():
            if isinstance(v, dict):
                flat_aux.update(v)

        self._rows.append({
            "prompt": entry["prompt_en"],
            "n_samples": n,
            "dimensions": relevant,
            "auxiliary_info": flat_aux,
        })
    self.full_info_path = path

Functions