from abc import ABC, abstractmethod
from typing import Tuple
import torch
[docs]class AbsEncoder(torch.nn.Module, ABC):
[docs] @abstractmethod
def forward(
self,
input: torch.Tensor,
ilens: torch.Tensor,
) -> Tuple[torch.Tensor, torch.Tensor]:
raise NotImplementedError
@property
@abstractmethod
def output_dim(self) -> int:
raise NotImplementedError
[docs] def forward_streaming(self, input: torch.Tensor):
raise NotImplementedError
[docs] def streaming_frame(self, audio: torch.Tensor):
"""streaming_frame. It splits the continuous audio into frame-level
audio chunks in the streaming *simulation*. It is noted that this
function takes the entire long audio as input for a streaming simulation.
You may refer to this function to manage your streaming input
buffer in a real streaming application.
Args:
audio: (B, T)
Returns:
chunked: List [(B, frame_size),]
"""
NotImplementedError