Source code for pytorchvideo.models.memory_bank

# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.

import math
from typing import Optional

import torch
import torch.nn as nn
import torch.nn.functional as F
from pytorchvideo.layers.utils import set_attributes

[docs]class MemoryBank(nn.Module): """ Performs Non-Parametric Instance Discrimination for self supervised learning on video. A memory bank is built to keep and update the historical feature embedding and use them for contrastive learning. The original paper is: Unsupervised Feature Learning via Non-Parametric Instance Discrimination More details can be found from the memory bank part in the following paper: Momentum Contrast for Unsupervised Visual Representation Learning """
[docs] def __init__( self, backbone: nn.Module, mlp: Optional[nn.Module] = None, neg_size: int = 4096, temperature: float = 0.07, bank_size: int = 1280000, dim: int = 2048, mmt: float = 0.999, ) -> None: """ Args: backbone (nn.Module): backbone used to forward the input. mlp (nn.Module): multi-layer perception used in memory bank instance discrimination model. neg_size (int): size of negative samples per instance. temperature (float): temperature to use for contrastive learning. bank_size (int): size of the memory bank, expected to be the same size as the training set. dim (int): dimension of the channel. mmt (float): momentum to use. """ super().__init__() set_attributes(self, locals()) self._init_mem_bank(bank_size, dim)
def _init_mem_bank(self, bank_size: int, dim: int) -> None: """ Given the memory bank size and the channel dimension, initialize the memory bank. Args: bank_size (int): size of the memory bank, expected to be the same size as the training set. dim (int): dimension of the channel. """ stdv = 1.0 / math.sqrt(dim / 3) self.register_buffer( "memory", torch.rand( bank_size, dim, ) .mul_(2 * stdv) .add_(-stdv) .to(next(self.backbone.parameters()).device), )
[docs] def forward(self, x: torch.Tensor, x_ind: torch.Tensor) -> torch.Tensor: """ Perform contrastive learning with random sampled negative instance from the memory bank. During training, update the memory bank with latest feature embedding. Args: x (torch.tensor): a batch of image with augmentation. The input tensor shape should able to be feed into the backbone. x_ind (torch.tensor): the index of the image x from the dataset. Expected shape is B. """ batch_size = x.shape[0] x = self.backbone(x) if self.mlp is not None: x = self.mlp(x) # Normalize the output embedding before multiplication. x = F.normalize(x, p=2, dim=1) # Random sample negative instances from the memory bank. idx = torch.randint(0, self.bank_size, size=(batch_size, self.neg_size + 1)).to( x.device ) # Fill the first with positive instances., 0).copy_( weight = torch.index_select(self.memory, 0, idx.view(-1)).detach() weight = weight.view(batch_size, self.neg_size + 1, self.dim) # Multiplication for contrastive learning. out = torch.einsum("bkc,bc->bk", weight, x) out = torch.div(out, self.temperature) gt = torch.zeros((batch_size,), device=x.device, dtype=torch.long) loss = torch.nn.functional.cross_entropy(out, gt) # Update memory during training. if with torch.no_grad(): pos = torch.index_select(self.memory, 0, x_ind.view(-1)) pos.mul_(self.mmt) pos.add_(torch.mul(x, 1 - self.mmt)) norm = pos.pow(2).sum(1, keepdim=True).pow(0.5) updated = pos.div(norm) self.memory.index_copy_(0, x_ind, updated) return loss
Read the Docs v: latest
On Read the Docs
Project Home

Free document hosting provided by Read the Docs.