Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ coverage.xml
.hypothesis/
.pytest_cache/
cover/
tests/tmp/

# Translations
*.mo
Expand Down Expand Up @@ -117,6 +118,9 @@ ENV/
env.bak/
venv.bak/

# PyCharm project settings
.idea

# Spyder project settings
.spyderproject
.spyproject
Expand Down
3 changes: 2 additions & 1 deletion recipes/CommonLanguage/lang_id/hparams/train_ecapa_tdnn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ __set_seed: !apply:torch.manual_seed [!ref <seed>]
data_folder: !PLACEHOLDER # e.g. /localscratch/common_voice_kpd/
output_folder: !ref results/ECAPA-TDNN/<seed>
save_folder: !ref <output_folder>/save
rir_folder: !ref <data_folder>
train_log: !ref <output_folder>/train_log.txt
device: 'cuda:0'
skip_prep: False
Expand Down Expand Up @@ -51,7 +52,7 @@ test_dataloader_options:
# Added noise and reverb come from OpenRIR dataset, automatically
# downloaded and prepared with this Environmental Corruption class.
env_corrupt: !new:speechbrain.lobes.augment.EnvCorrupt
openrir_folder: !ref <data_folder>
openrir_folder: !ref <rir_folder>
openrir_max_noise_len: 3.0 # seconds
babble_prob: 0.0
reverb_prob: 1.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ wav2vec2: !new:speechbrain.lobes.models.huggingface_wav2vec.HuggingFaceWav2Vec2
source: !ref <wav2vec2_hub>
output_norm: True
freeze: !ref <freeze_wav2vec>
save_path: !ref <save_folder>/wav2vec2_checkpoints
save_path: !ref <save_folder>/wav2vec2_checkpoint

#####
# Uncomment this block if you prefer to use a Fairseq pretrained model instead
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ wav2vec2: !new:speechbrain.lobes.models.huggingface_wav2vec.HuggingFaceWav2Vec2
source: !ref <wav2vec2_hub>
output_norm: True
freeze: !ref <freeze_wav2vec>
save_path: !ref <save_folder>/wav2vec2_checkpoints
save_path: !ref <save_folder>/wav2vec2_checkpoint

#####
# Uncomment this block if you prefer to use a Fairseq pretrained model instead
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter

wav2vec2: !new:speechbrain.lobes.models.huggingface_wav2vec.HuggingFaceWav2Vec2Pretrain
source: !ref <wav2vec2_hub>
save_path: !ref <save_folder>/wav2vec2_checkpoints
save_path: !ref <save_folder>/wav2vec2_checkpoint
mask_prob: !ref <mask_prob>
mask_length: !ref <mask_length>

Expand Down
4 changes: 2 additions & 2 deletions recipes/VoxLingua107/lang_id/hparams/train_ecapa.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ output_folder: !ref results/epaca/<seed>
save_folder: !ref <output_folder>/save
train_log: !ref <output_folder>/train_log.txt
data_folder: ./

rir_folder: !ref <data_folder>

shards_url: /data/voxlingua107_shards
train_meta: !ref <shards_url>/train/meta.json
Expand Down Expand Up @@ -79,7 +79,7 @@ augment_speed: !new:speechbrain.lobes.augment.TimeDomainSpecAugment


add_rev_noise: !new:speechbrain.lobes.augment.EnvCorrupt
openrir_folder: !ref <data_folder>
openrir_folder: !ref <rir_folder>
openrir_max_noise_len: 3.0 # seconds
reverb_prob: 0.5
noise_prob: 0.8
Expand Down
18 changes: 10 additions & 8 deletions speechbrain/dataio/dataio.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def read_audio(waveforms_obj):
-------
>>> dummywav = torch.rand(16000)
>>> import os
>>> tmpfile = os.path.join(str(getfixture('tmpdir')), "wave.wav")
>>> tmpfile = str(getfixture('tmpdir') / "wave.wav")
>>> write_audio(tmpfile, dummywav, 16000)
>>> asr_example = { "wav": tmpfile, "spk_id": "foo", "words": "foo bar"}
>>> loaded = read_audio(asr_example["wav"])
Expand Down Expand Up @@ -257,7 +257,7 @@ def read_audio_multichannel(waveforms_obj):
-------
>>> dummywav = torch.rand(16000, 2)
>>> import os
>>> tmpfile = os.path.join(str(getfixture('tmpdir')), "wave.wav")
>>> tmpfile = str(getfixture('tmpdir') / "wave.wav")
>>> write_audio(tmpfile, dummywav, 16000)
>>> asr_example = { "wav": tmpfile, "spk_id": "foo", "words": "foo bar"}
>>> loaded = read_audio(asr_example["wav"])
Expand Down Expand Up @@ -305,7 +305,7 @@ def write_audio(filepath, audio, samplerate):
Example
-------
>>> import os
>>> tmpfile = os.path.join(str(getfixture('tmpdir')), "wave.wav")
>>> tmpfile = str(getfixture('tmpdir') / "wave.wav")
>>> dummywav = torch.rand(16000, 2)
>>> write_audio(tmpfile, dummywav, 16000)
>>> loaded = read_audio(tmpfile)
Expand Down Expand Up @@ -605,7 +605,7 @@ def write_txt_file(data, filename, sampling_rate=None):
-------
>>> tmpdir = getfixture('tmpdir')
>>> signal=torch.tensor([1,2,3,4])
>>> write_txt_file(signal, os.path.join(tmpdir, 'example.txt'))
>>> write_txt_file(signal, tmpdir / 'example.txt')
"""
del sampling_rate # Not used.
# Check if the path of filename exists
Expand Down Expand Up @@ -642,7 +642,7 @@ def write_stdout(data, filename=None, sampling_rate=None):
-------
>>> tmpdir = getfixture('tmpdir')
>>> signal = torch.tensor([[1,2,3,4]])
>>> write_stdout(signal, tmpdir + '/example.txt')
>>> write_stdout(signal, tmpdir / 'example.txt')
[1, 2, 3, 4]
"""
# Managing Torch.Tensor
Expand Down Expand Up @@ -805,7 +805,7 @@ def save_md5(files, out_file):
Example:
>>> files = ['tests/samples/single-mic/example1.wav']
>>> tmpdir = getfixture('tmpdir')
>>> save_md5(files, os.path.join(tmpdir, "md5.pkl"))
>>> save_md5(files, tmpdir / "md5.pkl")
"""
# Initialization of the dictionary
md5_dict = {}
Expand All @@ -830,7 +830,7 @@ def save_pkl(obj, file):

Example
-------
>>> tmpfile = os.path.join(getfixture('tmpdir'), "example.pkl")
>>> tmpfile = getfixture('tmpdir') / "example.pkl"
>>> save_pkl([1, 2, 3, 4, 5], tmpfile)
>>> load_pkl(tmpfile)
[1, 2, 3, 4, 5]
Expand Down Expand Up @@ -983,7 +983,9 @@ def merge_csvs(data_folder, csv_lst, merged_csv):

Example
-------
>>> merge_csvs("tests/samples/annotation/",
>>> tmpdir = getfixture('tmpdir')
>>> os.symlink(os.path.realpath("tests/samples/annotation/speech.csv"), tmpdir / "speech.csv")
>>> merge_csvs(tmpdir,
... ["speech.csv", "speech.csv"],
... "test_csv_merge.csv")
"""
Expand Down
21 changes: 13 additions & 8 deletions speechbrain/pretrained/interfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -766,10 +766,10 @@ class EncoderClassifier(Pretrained):

>>> # Compute embeddings
>>> signal, fs = torchaudio.load("tests/samples/single-mic/example1.wav")
>>> embeddings = classifier.encode_batch(signal)
>>> embeddings = classifier.encode_batch(signal)

>>> # Classification
>>> prediction = classifier .classify_batch(signal)
>>> prediction = classifier.classify_batch(signal)
"""

MODULES_NEEDED = [
Expand Down Expand Up @@ -2344,7 +2344,8 @@ class GraphemeToPhoneme(Pretrained, EncodeDecodePipelineMixin):
>>> text = ("English is tough. It can be understood "
... "through thorough thought though")
>>> from speechbrain.pretrained import GraphemeToPhoneme
>>> g2p = GraphemeToPhoneme.from_hparams('path/to/model') # doctest: +SKIP
>>> tmpdir = getfixture('tmpdir')
>>> g2p = GraphemeToPhoneme.from_hparams('path/to/model', savedir=tmpdir) # doctest: +SKIP
>>> phonemes = g2p.g2p(text) # doctest: +SKIP
"""

Expand Down Expand Up @@ -2590,7 +2591,8 @@ class Tacotron2(Pretrained):

Example
-------
>>> tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmpdir")
>>> tmpdir_vocoder = getfixture('tmpdir') / "vocoder"
>>> tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir=tmpdir_vocoder)
>>> mel_output, mel_length, alignment = tacotron2.encode_text("Mary had a little lamb")
>>> items = [
... "A quick brown fox jumped over the lazy dog",
Expand All @@ -2601,7 +2603,8 @@ class Tacotron2(Pretrained):

>>> # One can combine the TTS model with a vocoder (that generates the final waveform)
>>> # Intialize the Vocoder (HiFIGAN)
>>> hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
>>> tmpdir_tts = getfixture('tmpdir') / "tts"
>>> hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir=tmpdir_tts)
>>> # Running the TTS
>>> mel_output, mel_length, alignment = tacotron2.encode_text("Mary had a little lamb")
>>> # Running Vocoder (spectrogram-to-waveform)
Expand Down Expand Up @@ -2679,13 +2682,15 @@ class HIFIGAN(Pretrained):

Example
-------
>>> hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir="tmpdir_vocoder")
>>> tmpdir_vocoder = getfixture('tmpdir') / "vocoder"
>>> hifi_gan = HIFIGAN.from_hparams(source="speechbrain/tts-hifigan-ljspeech", savedir=tmpdir_vocoder)
>>> mel_specs = torch.rand(2, 80,298)
>>> waveforms = hifi_gan.decode_batch(mel_specs)

>>> # You can use the vocoder coupled with a TTS system
>>> # Intialize TTS (tacotron2)
>>> tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir="tmpdir_tts")
>>> tmpdir_tts = getfixture('tmpdir') / "tts"
>>> tacotron2 = Tacotron2.from_hparams(source="speechbrain/tts-tacotron2-ljspeech", savedir=tmpdir_tts)
>>> # Running the TTS
>>> mel_output, mel_length, alignment = tacotron2.encode_text("Mary had a little lamb")
>>> # Running Vocoder (spectrogram-to-waveform)
Expand Down Expand Up @@ -2737,7 +2742,7 @@ def decode_spectrogram(self, spectrogram):
audio can be saved by:
>>> waveform = torch.rand(1, 666666)
>>> sample_rate = 22050
>>> torchaudio.save("test.wav", waveform, sample_rate)
>>> torchaudio.save(str(getfixture('tmpdir') / "test.wav"), waveform, sample_rate)
"""
if self.first_call:
self.hparams.generator.remove_weight_norm()
Expand Down
19 changes: 11 additions & 8 deletions speechbrain/tokenizers/SentencePiece.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,23 +84,21 @@ class SentencePiece:
-------
>>> import torch
>>> dict_int2lab = {1: "HELLO", 2: "MORNING"}
>>> model_dir = "tests/unittests/tokenizer_data/"
>>> model_dir = getfixture('tmpdir') / "tokenizer_data"
>>> # Example with csv
>>> annotation_train = "tests/unittests/tokenizer_data/dev-clean.csv"
>>> annotation_train = "tests/samples/annotation/dev-clean.csv"
>>> annotation_read = "wrd"
>>> model_type = "bpe"
>>> bpe = SentencePiece(model_dir,100, annotation_train, annotation_read,
... model_type)
>>> bpe = SentencePiece(str(model_dir), 100, annotation_train, annotation_read, model_type)
>>> batch_seq = torch.Tensor([[1, 2, 2, 1],[1, 2, 1, 0]])
>>> batch_lens = torch.Tensor([1.0, 0.75])
>>> encoded_seq_ids, encoded_seq_pieces = bpe(
... batch_seq, batch_lens, dict_int2lab, task="encode"
... )
>>> # Example using JSON
>>> annotation_train = "tests/unittests/tokenizer_data/dev-clean.json"
>>> annotation_train = str(model_dir + "/dev-clean.json")
>>> annotation_read = "wrd"
>>> bpe = SentencePiece(model_dir,100, annotation_train, annotation_read,
... model_type, annotation_format = 'json')
>>> bpe = SentencePiece(model_dir, 100, annotation_train, annotation_read, model_type, annotation_format = 'json')
>>> encoded_seq_ids, encoded_seq_pieces = bpe(
... batch_seq, batch_lens, dict_int2lab, task="encode"
... )
Expand Down Expand Up @@ -142,7 +140,12 @@ def __init__(
if self.annotation_train is not None:
ext = os.path.splitext(self.annotation_train)[1]
if text_file is None:
text_file = self.annotation_train.replace(ext, ".txt")
text_file = os.path.join(
model_dir,
os.path.basename(self.annotation_train).replace(
ext, ".txt"
),
)
self.text_file = text_file

self.prefix_model_file = os.path.join(
Expand Down
2 changes: 1 addition & 1 deletion speechbrain/utils/check_HF_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
def run_HF_check(
recipe_csvfile="tests/recipes.csv",
field="HF_repo",
output_folder="HF_repos",
output_folder="tests/tmp/HF",
):
"""Checks if the code reported in the readme files of the HF repository is
runnable. Note: the tests run the code marked as python in the readme file.
Expand Down
31 changes: 25 additions & 6 deletions speechbrain/utils/recipe_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

Authors
* Mirco Ravanelli 2022
* Andreas Nautsch 2022
"""
import os
import re
Expand Down Expand Up @@ -318,7 +319,7 @@ def run_recipe_tests(
test_field="test_debug_flags",
check_field="test_debug_checks",
run_opts="--device=cpu",
output_folder="tests/recipe_tests/",
output_folder="tests/tmp/recipes/",
filters_fields=[],
filters=[],
do_checks=True,
Expand Down Expand Up @@ -429,9 +430,19 @@ def load_yaml_test(
avoid_list=[
"templates/hyperparameter_optimization_speaker_id/train.yaml",
"templates/speaker_id/train.yaml",
# recipes creating errors if NVIDIA driver is not on one's system
"recipes/timers-and-such/multistage/hparams/train_LS_LM.yaml",
"recipes/timers-and-such/multistage/hparams/train_TAS_LM.yaml",
"recipes/timers-and-such/direct/hparams/train.yaml",
"recipes/timers-and-such/decoupled/hparams/train_LS_LM.yaml",
"recipes/timers-and-such/decoupled/hparams/train_TAS_LM.yaml",
"recipes/fluent-speech-commands/direct/hparams/train.yaml",
"recipes/CommonLanguage/lang_id/hparams/train_ecapa_tdnn.yaml",
"recipes/SLURP/direct/hparams/train.yaml",
],
data_folder="yaml_check_folder",
output_folder="yaml_check_folder",
rir_folder="tests/tmp/rir",
data_folder="tests/tmp/yaml",
output_folder="tests/tmp/yaml",
):
"""Tests if the yaml files can be loaded without errors.

Expand All @@ -453,6 +464,8 @@ def load_yaml_test(
See above.
avoid_list: list
List of hparam file not to check.
rir_folder:
This overrides the rir_folder; rir_path, and openrir_folder usually specified in the hparam files.
data_folder:
This overrides the data_folder usually specified in the hparam files.
output_folder:
Expand All @@ -470,19 +483,25 @@ def load_yaml_test(
# Set data_foler and output folder
data_folder = os.path.join(cwd, data_folder)
output_folder = os.path.join(cwd, output_folder)
rir_folder = os.path.join(cwd, rir_folder)

# Additional overrides
add_overrides = {
"manual_annot_folder": data_folder,
"musan_folder": data_folder,
"tea_models_dir": data_folder,
"rir_path": data_folder,
"wsj_root": data_folder,
"tokenizer_file": data_folder,
"commonlanguage_folder": data_folder,
"tea_infer_dir": data_folder,
"original_data_folder": data_folder,
"pretrain_st_dir": data_folder,
# RIR folder specifications -> all point to the same zip file: one download destination
"rir_path": rir_folder,
"rir_folder": rir_folder,
"openrir_folder": rir_folder,
"open_rir_folder": rir_folder,
"data_folder_rirs": rir_folder,
}

# Read the csv recipe file and detect which tests we have to run
Expand Down Expand Up @@ -523,10 +542,10 @@ def load_yaml_test(
# Append additional overrides when needed
with open(hparam_file) as f:
for line in f:
for key in add_overrides.keys():
for key, value in add_overrides.items():
pattern = key + ":"
if pattern in line and line.find(pattern) == 0:
overrides.update({key: data_folder})
overrides.update({key: value})

with open(hparam_file) as fin:
try:
Expand Down
3 changes: 2 additions & 1 deletion templates/enhancement/train.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ data_folder: ./data
output_folder: !ref ./results/<seed>
save_folder: !ref <output_folder>/save
train_log: !ref <output_folder>/train_log.txt
rir_folder: !ref <data_folder>

# Path where data manifest files will be stored
# The data manifest files are created by the data preparation script.
Expand Down Expand Up @@ -73,7 +74,7 @@ resynth: !name:speechbrain.processing.signal_processing.resynthesize
# downloaded and prepared with this Environmental Corruption class.
# The babble is generated from other utterances in each batch.
env_corruption: !new:speechbrain.lobes.augment.EnvCorrupt
openrir_folder: !ref <data_folder>
openrir_folder: !ref <rir_folder>
openrir_max_noise_len: 10
noise_snr_low: 0
noise_snr_high: 15
Expand Down
1 change: 0 additions & 1 deletion tests/.run-HF-checks.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
#!/bin/bash
scp -r tests HF_repos
python -c 'from speechbrain.utils.check_HF_repo import run_HF_check; print("TEST FAILED!") if not(run_HF_check()) else print("TEST PASSED!")'
2 changes: 2 additions & 0 deletions tests/.run-load-yaml-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,6 @@
pip install pesq
pip install pystoi
pip install librosa
pip install tensorboard
pip install transformers
python -c 'from speechbrain.utils.recipe_tests import load_yaml_test; print("TEST FAILED!") if not(load_yaml_test()) else print("TEST PASSED")'
Loading