Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| # ***************************************************************************** | |
| # Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. | |
| # | |
| # Redistribution and use in source and binary forms, with or without | |
| # modification, are permitted provided that the following conditions are met: | |
| # * Redistributions of source code must retain the above copyright | |
| # notice, this list of conditions and the following disclaimer. | |
| # * Redistributions in binary form must reproduce the above copyright | |
| # notice, this list of conditions and the following disclaimer in the | |
| # documentation and/or other materials provided with the distribution. | |
| # * Neither the name of the NVIDIA CORPORATION nor the | |
| # names of its contributors may be used to endorse or promote products | |
| # derived from this software without specific prior written permission. | |
| # | |
| # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
| # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
| # DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY | |
| # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | |
| # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
| # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND | |
| # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
| # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
| # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
| # | |
| # ***************************************************************************** | |
| import torch | |
| import torch.nn.functional as F | |
| from librosa.filters import mel as librosa_mel_fn | |
| from python.common.audio_processing import dynamic_range_compression, dynamic_range_decompression | |
| from python.stft import STFT | |
| class LinearNorm(torch.nn.Module): | |
| def __init__(self, in_dim, out_dim, bias=True, w_init_gain='linear'): | |
| super(LinearNorm, self).__init__() | |
| self.linear_layer = torch.nn.Linear(in_dim, out_dim, bias=bias) | |
| torch.nn.init.xavier_uniform_( | |
| self.linear_layer.weight, | |
| gain=torch.nn.init.calculate_gain(w_init_gain)) | |
| def forward(self, x): | |
| return self.linear_layer(x) | |
| class ConvNorm(torch.nn.Module): | |
| def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, | |
| padding=None, dilation=1, bias=True, w_init_gain='linear', batch_norm=False): | |
| super(ConvNorm, self).__init__() | |
| if padding is None: | |
| assert(kernel_size % 2 == 1) | |
| padding = int(dilation * (kernel_size - 1) / 2) | |
| self.conv = torch.nn.Conv1d(in_channels, out_channels, | |
| kernel_size=kernel_size, stride=stride, | |
| padding=padding, dilation=dilation, | |
| bias=bias) | |
| self.norm = torch.nn.BatchNorm1D(out_channels) if batch_norm else None | |
| torch.nn.init.xavier_uniform_( | |
| self.conv.weight, | |
| gain=torch.nn.init.calculate_gain(w_init_gain)) | |
| def forward(self, signal): | |
| if self.norm is None: | |
| return self.conv(signal) | |
| else: | |
| return self.norm(self.conv(signal)) | |
| class ConvReLUNorm(torch.nn.Module): | |
| def __init__(self, in_channels, out_channels, kernel_size=1, dropout=0.0): | |
| super(ConvReLUNorm, self).__init__() | |
| self.conv = torch.nn.Conv1d(in_channels, out_channels, | |
| kernel_size=kernel_size, | |
| padding=(kernel_size // 2)) | |
| self.norm = torch.nn.LayerNorm(out_channels) | |
| self.dropout = torch.nn.Dropout(dropout) | |
| def forward(self, signal): | |
| out = F.relu(self.conv(signal)) | |
| out = self.norm(out.transpose(1, 2)).transpose(1, 2) | |
| return self.dropout(out) | |
| class TacotronSTFT(torch.nn.Module): | |
| def __init__(self, filter_length=1024, hop_length=256, win_length=1024, | |
| n_mel_channels=80, sampling_rate=22050, mel_fmin=0.0, | |
| mel_fmax=8000.0): | |
| super(TacotronSTFT, self).__init__() | |
| self.n_mel_channels = n_mel_channels | |
| self.sampling_rate = sampling_rate | |
| self.stft_fn = STFT(filter_length, hop_length, win_length) | |
| mel_basis = librosa_mel_fn( | |
| sampling_rate, filter_length, n_mel_channels, mel_fmin, mel_fmax) | |
| mel_basis = torch.from_numpy(mel_basis).float() | |
| self.register_buffer('mel_basis', mel_basis) | |
| def spectral_normalize(self, magnitudes): | |
| output = dynamic_range_compression(magnitudes) | |
| return output | |
| def spectral_de_normalize(self, magnitudes): | |
| output = dynamic_range_decompression(magnitudes) | |
| return output | |
| def mel_spectrogram(self, y): | |
| """Computes mel-spectrograms from a batch of waves | |
| PARAMS | |
| ------ | |
| y: Variable(torch.FloatTensor) with shape (B, T) in range [-1, 1] | |
| RETURNS | |
| ------- | |
| mel_output: torch.FloatTensor of shape (B, n_mel_channels, T) | |
| """ | |
| assert(torch.min(y.data) >= -1) | |
| assert(torch.max(y.data) <= 1) | |
| magnitudes, phases = self.stft_fn.transform(y) | |
| magnitudes = magnitudes.data | |
| mel_output = torch.matmul(self.mel_basis, magnitudes) | |
| mel_output = self.spectral_normalize(mel_output) | |
| return mel_output | |