Source code for pytorchvideo.data.hmdb51
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
from __future__ import annotations
import logging
import os
import pathlib
from typing import Any, Callable, List, Optional, Tuple, Type, Union
import torch
import torch.utils.data
from iopath.common.file_io import g_pathmgr
from .clip_sampling import ClipSampler
from .labeled_video_dataset import LabeledVideoDataset
logger = logging.getLogger(__name__)
class Hmdb51LabeledVideoPaths:
"""
Pre-processor for Hmbd51 dataset mentioned here -
https://serre-lab.clps.brown.edu/resource/hmdb-a-large-human-motion-database/
This dataset consists of classwise folds with each class consisting of 3
folds (splits).
The videos directory is of the format,
video_dir_path/class_x/<somevideo_name>.avi
...
video_dir_path/class_y/<somevideo_name>.avi
The splits/fold directory is of the format,
folds_dir_path/class_x_test_split_1.txt
folds_dir_path/class_x_test_split_2.txt
folds_dir_path/class_x_test_split_3.txt
...
folds_dir_path/class_y_test_split_1.txt
folds_dir_path/class_y_test_split_2.txt
folds_dir_path/class_y_test_split_3.txt
And each text file in the splits directory class_x_test_split_<1 or 2 or 3>.txt
<a video as in video_dir_path/class_x> <0 or 1 or 2>
where 0,1,2 corresponds to unused, train split respectively.
Each video has name of format
<some_name>_<tag1>_<tag2>_<tag_3>_<tag4>_<tag5>_<some_id>.avi
For more details on tags -
https://serre-lab.clps.brown.edu/resource/hmdb-a-large-human-motion-database/
"""
_allowed_splits = [1, 2, 3]
_split_type_dict = {"train": 1, "test": 2, "unused": 0}
@classmethod
def from_dir(
cls, data_path: str, split_id: int = 1, split_type: str = "train"
) -> Hmdb51LabeledVideoPaths:
"""
Factory function that creates Hmdb51LabeledVideoPaths object form a splits/folds
directory.
Args:
data_path (str): The path to the splits/folds directory of HMDB51.
split_id (int): Fold id to be loaded. Belongs to [1,2,3]
split_type (str): Split/Fold type to be loaded. It belongs to one of the
following,
- "train"
- "test"
- "unused" (This is a small set of videos that are neither
of part of test or train fold.)
"""
data_path = pathlib.Path(data_path)
if not data_path.is_dir():
return RuntimeError(f"{data_path} not found or is not a directory.")
if not int(split_id) in cls._allowed_splits:
return RuntimeError(
f"{split_id} not found in allowed split id's {cls._allowed_splits}."
)
file_name_format = "_test_split" + str(int(split_id))
file_paths = sorted(
(
f
for f in data_path.iterdir()
if f.is_file() and f.suffix == ".txt" and file_name_format in f.stem
)
)
return cls.from_csvs(file_paths, split_type)
@classmethod
def from_csvs(
cls, file_paths: List[Union[pathlib.Path, str]], split_type: str = "train"
) -> Hmdb51LabeledVideoPaths:
"""
Factory function that creates Hmdb51LabeledVideoPaths object form a list of
split files of .txt type
Args:
file_paths (List[Union[pathlib.Path, str]]) : The path to the splits/folds
directory of HMDB51.
split_type (str): Split/Fold type to be loaded.
- "train"
- "test"
- "unused"
"""
video_paths_and_label = []
for file_path in file_paths:
file_path = pathlib.Path(file_path)
assert g_pathmgr.exists(file_path), f"{file_path} not found."
if not (file_path.suffix == ".txt" and "_test_split" in file_path.stem):
return RuntimeError(f"Ivalid file: {file_path}")
action_name = "_"
action_name = action_name.join((file_path.stem).split("_")[:-2])
with g_pathmgr.open(file_path, "r") as f:
for path_label in f.read().splitlines():
line_split = path_label.rsplit(None, 1)
if not int(line_split[1]) == cls._split_type_dict[split_type]:
continue
file_path = os.path.join(action_name, line_split[0])
meta_tags = line_split[0].split("_")[-6:-1]
video_paths_and_label.append(
(file_path, {"label": action_name, "meta_tags": meta_tags})
)
assert (
len(video_paths_and_label) > 0
), f"Failed to load dataset from {file_path}."
return cls(video_paths_and_label)
def __init__(
self, paths_and_labels: List[Tuple[str, Optional[dict]]], path_prefix=""
) -> None:
"""
Args:
paths_and_labels [(str, int)]: a list of tuples containing the video
path and integer label.
"""
self._paths_and_labels = paths_and_labels
self._path_prefix = path_prefix
def path_prefix(self, prefix):
self._path_prefix = prefix
path_prefix = property(None, path_prefix)
def __getitem__(self, index: int) -> Tuple[str, dict]:
"""
Args:
index (int): the path and label index.
Returns:
The path and label tuple for the given index.
"""
path, label = self._paths_and_labels[index]
return (os.path.join(self._path_prefix, path), label)
def __len__(self) -> int:
"""
Returns:
The number of video paths and label pairs.
"""
return len(self._paths_and_labels)
[docs]def Hmdb51(
data_path: pathlib.Path,
clip_sampler: ClipSampler,
video_sampler: Type[torch.utils.data.Sampler] = torch.utils.data.RandomSampler,
transform: Optional[Callable[[dict], Any]] = None,
video_path_prefix: str = "",
split_id: int = 1,
split_type: str = "train",
decode_audio=True,
decoder: str = "pyav",
) -> LabeledVideoDataset:
"""
A helper function to create ``LabeledVideoDataset`` object for HMDB51 dataset
Args:
data_path (pathlib.Path): Path to the data. The path type defines how the data
should be read:
* For a file path, the file is read and each line is parsed into a
video path and label.
* For a directory, the directory structure defines the classes
(i.e. each subdirectory is a class).
clip_sampler (ClipSampler): Defines how clips should be sampled from each
video. See the clip sampling documentation for more information.
video_sampler (Type[torch.utils.data.Sampler]): Sampler for the internal
video container. This defines the order videos are decoded and,
if necessary, the distributed split.
transform (Callable): This callable is evaluated on the clip output before
the clip is returned. It can be used for user defined preprocessing and
augmentations to the clips. See the ``LabeledVideoDataset`` class for
clip output format.
video_path_prefix (str): Path to root directory with the videos that are
loaded in LabeledVideoDataset. All the video paths before loading
are prefixed with this path.
split_id (int): Fold id to be loaded. Options are 1, 2 or 3
split_type (str): Split/Fold type to be loaded. Options are ("train", "test" or
"unused")
decoder (str): Defines which backend should be used to decode videos.
"""
torch._C._log_api_usage_once("PYTORCHVIDEO.dataset.Hmdb51")
labeled_video_paths = Hmdb51LabeledVideoPaths.from_dir(
data_path, split_id=split_id, split_type=split_type
)
labeled_video_paths.path_prefix = video_path_prefix
dataset = LabeledVideoDataset(
labeled_video_paths,
clip_sampler,
video_sampler,
transform,
decode_audio=decode_audio,
decoder=decoder,
)
return dataset