From dc3f7f118197d4f7fc4a4293fdb91159d465a678 Mon Sep 17 00:00:00 2001 From: JoFrhwld Date: Fri, 7 Oct 2022 22:58:44 -0400 Subject: [PATCH 1/9] post-publish update --- _quarto.yml | 2 -- index.Rmd | 4 +++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/_quarto.yml b/_quarto.yml index 5125e3e..52cdf16 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -1,5 +1,3 @@ project: title: "mfa-tutorial" - - diff --git a/index.Rmd b/index.Rmd index 372e662..a50d852 100644 --- a/index.Rmd +++ b/index.Rmd @@ -12,7 +12,9 @@ author: format: html: default license: "CC-BY-SA 4.0" -citation: true +citation: + container-title: "Linguistics Methods Hub" + collection-title: "Linguistics Methods Hub" --- ## Overview From c891ea5aa7e6088ba6c58f94535b12b5edb108f5 Mon Sep 17 00:00:00 2001 From: JoFrhwld Date: Fri, 7 Oct 2022 23:13:21 -0400 Subject: [PATCH 2/9] reformat author to get url linking working --- index.Rmd | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/index.Rmd b/index.Rmd index a50d852..92da018 100644 --- a/index.Rmd +++ b/index.Rmd @@ -2,13 +2,13 @@ title: "Montreal Forced Aligner" date: "2022-10-5" author: - name: "Eleanor Chodroff" - url: "https://www.eleanorchodroff.com/" - #orcid: - #email: - #affiliations: - #name: - #department: + - name: "Eleanor Chodroff" + url: "https://www.eleanorchodroff.com/" + #orcid: + #email: + #affiliations: + #name: + #department: format: html: default license: "CC-BY-SA 4.0" From 8a782ed23638db122f79b2b876110b0e61b4733e Mon Sep 17 00:00:00 2001 From: JoFrhwld Date: Fri, 7 Oct 2022 23:45:53 -0400 Subject: [PATCH 3/9] doi update --- _metadata.yml | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 _metadata.yml diff --git a/_metadata.yml b/_metadata.yml new file mode 100644 index 0000000..8c8dec3 --- /dev/null +++ b/_metadata.yml @@ -0,0 +1,4 @@ +author: + - name: "Eleanor Chodroff" + url: "https://www.eleanorchodroff.com/" +doi: "10.5281/zenodo.7161015" \ No newline at end of file From 60cb4d7be0cd4f25651d1f2d34776c7230cbc20e Mon Sep 17 00:00:00 2001 From: JoFrhwld Date: Sat, 8 Oct 2022 00:16:11 -0400 Subject: [PATCH 4/9] cff file --- CITATION.cff | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 CITATION.cff diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 0000000..6f29a08 --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,19 @@ +cff-version: 1.2.0 +title: Montreal Forced Aligner +message: >- + If you use this tutorial, please cite it using the + metadata from this file. +type: website +authors: + - given-names: Eleanor + family-names: Chodroff +identifiers: + - type: doi + value: 10.5281/zenodo.7161015 + - type: url + value: >- + https://lingmethodshub.github.io/content/tools/mfa/mfa-tutorial +license: CC-BY-SA-4.0 +date-released: '2022-10-07' +year: '2022' +collection-title: Linguistics Methods Hub From c7f65ba12c2d88a173df465af2d34f28fd6bd0ec Mon Sep 17 00:00:00 2001 From: JoFrhwld Date: Sat, 8 Oct 2022 00:18:58 -0400 Subject: [PATCH 5/9] doi update --- _metadata.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_metadata.yml b/_metadata.yml index 8c8dec3..b69a29a 100644 --- a/_metadata.yml +++ b/_metadata.yml @@ -1,4 +1,4 @@ author: - name: "Eleanor Chodroff" url: "https://www.eleanorchodroff.com/" -doi: "10.5281/zenodo.7161015" \ No newline at end of file +doi: "10.5281/zenodo.7161317" \ No newline at end of file From 18a94815a1c5d5f6ff0afea05cf32253cd161684 Mon Sep 17 00:00:00 2001 From: JoFrhwld Date: Tue, 31 Jan 2023 15:36:28 -0500 Subject: [PATCH 6/9] r to bash highlighting --- index.Rmd | 215 +++++++++++++++++++++++---------------------- mfa-tutorial.Rproj | 13 +++ 2 files changed, 124 insertions(+), 104 deletions(-) create mode 100644 mfa-tutorial.Rproj diff --git a/index.Rmd b/index.Rmd index 394a979..15a24fc 100644 --- a/index.Rmd +++ b/index.Rmd @@ -19,37 +19,37 @@ citation: ## Overview -The [Montreal Forced Aligner](https://montreal-forced-aligner.readthedocs.io/en/latest/index.html){target="_blank"} is a forced alignment system with acoustic models built using the [Kaldi ASR toolkit](https://eleanorchodroff.com/tutorial/kaldi/introduction.html){target="_blank"}. A major highlight of this system is the availability of pretrained acoustic models and grapheme-to-phoneme models for a wide variety of languages, as well as the ability to train acoustic and grapheme-to-phoneme models to any new dataset you might have. It also uses advanced techniques for training and aligning speech data (Kaldi) with a full suite of training and speaker adaptation algorithms. The basic acoustic model recipe uses the traditional GMM-HMM framework, starting with monophone models, then triphone models (which allow for context sensitivity, read: coarticulation), and some transformations and speaker adaptation along the way. You can check out more regarding the recipes in the [MFA user guide and reference guide](https://montreal-forced-aligner.readthedocs.io/en/latest/); for an overview to a standard training recipe, check out this [Kaldi tutorial](https://www.eleanorchodroff.com/tutorial/kaldi/training-overview.html). +The [Montreal Forced Aligner](https://montreal-forced-aligner.readthedocs.io/en/latest/index.html){target="_blank"} is a forced alignment system with acoustic models built using the [Kaldi ASR toolkit](https://eleanorchodroff.com/tutorial/kaldi/introduction.html){target="_blank"}. A major highlight of this system is the availability of pretrained acoustic models and grapheme-to-phoneme models for a wide variety of languages, as well as the ability to train acoustic and grapheme-to-phoneme models to any new dataset you might have. It also uses advanced techniques for training and aligning speech data (Kaldi) with a full suite of training and speaker adaptation algorithms. The basic acoustic model recipe uses the traditional GMM-HMM framework, starting with monophone models, then triphone models (which allow for context sensitivity, read: coarticulation), and some transformations and speaker adaptation along the way. You can check out more regarding the recipes in the [MFA user guide and reference guide](https://montreal-forced-aligner.readthedocs.io/en/latest/); for an overview to a standard training recipe, check out this [Kaldi tutorial](https://www.eleanorchodroff.com/tutorial/kaldi/training-overview.html). -As a forced alignment system, the Montreal Forced Aligner will time-align a transcript to a corresponding audio file at the phone and word levels provided there exist a set of pretrained acoustic models and a pronunciation dictionary (a.k.a. lexicon) of the words in the transcript with their canonical phonetic pronunciation(s). +As a forced alignment system, the Montreal Forced Aligner will time-align a transcript to a corresponding audio file at the phone and word levels provided there exist a set of pretrained acoustic models and a pronunciation dictionary (a.k.a. lexicon) of the words in the transcript with their canonical phonetic pronunciation(s). -The current MFA download contains a suite of ``mfa`` commands that allow you to do everything from basic forced alignment to grapheme-to-phoneme conversion to automatic speech recognition. In this tutorial, we'll be focusing mostly on those commands relating to forced alignment and a side of grapheme-to-phoneme conversion. +The current MFA download contains a suite of `mfa` commands that allow you to do everything from basic forced alignment to grapheme-to-phoneme conversion to automatic speech recognition. In this tutorial, we'll be focusing mostly on those commands relating to forced alignment and a side of grapheme-to-phoneme conversion. -Very generally, the procedure for forced alignment is as follows: +Very generally, the procedure for forced alignment is as follows: -* Prep audio file(s) -* Prep transcript(s) (Praat `TextGrids` or `.lab`/`.txt` files) -* Obtain a pronunciation dictionary -* Obtain an acoustic model -* Create an input folder that contains the audio files and transcripts -* Create an empty output folder -* Run the ``mfa align`` command +- Prep audio file(s) +- Prep transcript(s) (Praat `TextGrids` or `.lab`/`.txt` files) +- Obtain a pronunciation dictionary +- Obtain an acoustic model +- Create an input folder that contains the audio files and transcripts +- Create an empty output folder +- Run the `mfa align` command ## Installation -You can find the streamlined installation instructions on the [main MFA installation page](https://montreal-forced-aligner.readthedocs.io/en/latest/installation.html). The majority of users will be following the "All platforms" instructions. +You can find the streamlined installation instructions on the [main MFA installation page](https://montreal-forced-aligner.readthedocs.io/en/latest/installation.html). The majority of users will be following the "All platforms" instructions. -As listed there, you will need to download Miniconda or Anaconda. If you're trying to decide between the two, I'd probably recommend Miniconda. Compared to Anaconda, Miniconda is smaller, and has a slightly higher success rate for installation. Conda is a package and environment management system. If you're familiar with R, the package management system is similar in concept to the R CRAN server. Once installed, you can import sets of packages via the ``conda`` commands, similar to how you might run ``install.packages`` in R. The environment manager aspect of conda is fairly unique, and essentially creates a small environment on your computer for the collection of imported packages (here the MFA suite) to run. The primary advantage is that you don't have to worry about conflicting versions of the same code package on your computer; the environment will be a bubble in your computer with the correct package versions for the MFA suite. +As listed there, you will need to download Miniconda or Anaconda. If you're trying to decide between the two, I'd probably recommend Miniconda. Compared to Anaconda, Miniconda is smaller, and has a slightly higher success rate for installation. Conda is a package and environment management system. If you're familiar with R, the package management system is similar in concept to the R CRAN server. Once installed, you can import sets of packages via the `conda` commands, similar to how you might run `install.packages` in R. The environment manager aspect of conda is fairly unique, and essentially creates a small environment on your computer for the collection of imported packages (here the MFA suite) to run. The primary advantage is that you don't have to worry about conflicting versions of the same code package on your computer; the environment will be a bubble in your computer with the correct package versions for the MFA suite. -Once you've installed Miniconda/Anaconda, you'll run the following line of code in the command line. For Mac users, the command line will be the terminal or a downloaded equivalent. For Windows users, *I believe* you will run this directly in the Miniconda console. +Once you've installed Miniconda/Anaconda, you'll run the following line of code in the command line. For Mac users, the command line will be the terminal or a downloaded equivalent. For Windows users, *I believe* you will run this directly in the Miniconda console. -```{r eval = F} +```{bash eval = F} conda create -n aligner -c conda-forge montreal-forced-aligner ``` -The `-n` flag here refers to the *name* of the environment. If you want to test a new version of the aligner, but aren't ready to overwrite your old working version of the aligner, you can create a new environment using a different name after the `-n` flag. See \@ref(Tips and tricks) for more on conda environments. -If it's worked, you should see something like: -```{r eval = F} +The `-n` flag here refers to the *name* of the environment. If you want to test a new version of the aligner, but aren't ready to overwrite your old working version of the aligner, you can create a new environment using a different name after the `-n` flag. See \@ref(Tips and tricks) for more on conda environments. If it's worked, you should see something like: + +```{bash eval = F} # To activate this environment, use # # $ conda activate aligner @@ -61,86 +61,89 @@ If it's worked, you should see something like: And with that, the final step is **activating** the aligner. You will need to re-activate the aligner every time you open a new shell/command line window. You should be able to see which environment you have open in the parentheses before each shell prompt. For example, mine looks like: -```{r eval = F} +```{bash eval = F} (base) Eleanors-iPro:Documents eleanorchodroff$ conda activate aligner (aligner) Eleanors-iPro:Documents eleanorchodroff$ ``` You can run the aligner from any location on the computer as long as you're in the aligner environment on the command line. -**NB:** The Montreal Forced Aligner is now located in the ``Documents/MFA`` folder. All acoustic models, dictionaries, temporary alignment or training files, etc. will be stored in this folder. If you are ever confused or curious about what is happening with the aligner, just poke around this folder. +**NB:** The Montreal Forced Aligner is now located in the `Documents/MFA` folder. All acoustic models, dictionaries, temporary alignment or training files, etc. will be stored in this folder. If you are ever confused or curious about what is happening with the aligner, just poke around this folder. ## Running the aligner To run the aligner, we'll need to follow this procedure: -* Prep audio file(s) -* Prep transcript(s) (Praat `TextGrids` or `.lab`/`.txt` files) -* Obtain a pronunciation dictionary -* Obtain an acoustic model -* Create an input folder that contains the audio files and transcripts -* Create an empty output folder -* Run the align command +- Prep audio file(s) +- Prep transcript(s) (Praat `TextGrids` or `.lab`/`.txt` files) +- Obtain a pronunciation dictionary +- Obtain an acoustic model +- Create an input folder that contains the audio files and transcripts +- Create an empty output folder +- Run the align command -In this section, I will give a quick example of how this works using a hypothetical example of aligning American English. Through this, you'll see the primary ``mfa`` commands that you'll need for alignment. Then in the following sections, I'll go into more detail about each of these steps. +In this section, I will give a quick example of how this works using a hypothetical example of aligning American English. Through this, you'll see the primary `mfa` commands that you'll need for alignment. Then in the following sections, I'll go into more detail about each of these steps. -For the first steps of prepping the audio files and prepping the transcripts, I'll assume we're working with a `wav` file and a Praat `TextGrid` that has a single interval tier with utterances transcribed in English. We can have multiple intervals per `TextGrid`. The `wav` file and `TextGrid` must have the same name. +For the first steps of prepping the audio files and prepping the transcripts, I'll assume we're working with a `wav` file and a Praat `TextGrid` that has a single interval tier with utterances transcribed in English. We can have multiple intervals per `TextGrid`. The `wav` file and `TextGrid` must have the same name. -After we have the audio files and transcript `TextGrids`, we'll place them in an input folder. For the sake of this tutorial, this input folder will be called ``Documents/input_english/``. +After we have the audio files and transcript `TextGrids`, we'll place them in an input folder. For the sake of this tutorial, this input folder will be called `Documents/input_english/`. -We will also need to create an empty output folder. I will call this output folder ``Documents/output_english``. +We will also need to create an empty output folder. I will call this output folder `Documents/output_english`. We can then obtain the acoustic model from the internet (yes, you'll have to be online the first time you do this) using the following command: -```{r eval = F} +```{bash eval = F} mfa model download acoustic english_us_arpa ``` -(You can also use ``mfa models download acoustic english_us_arpa``.) +(You can also use `mfa models download acoustic english_us_arpa`.) And we can obtain the dictionary from the internet using this command: -```{r eval = F} +```{bash eval = F} mfa model download dictionary english_us_arpa ``` -(You can also use ``mfa models download dictionary english_us_arpa``.) +(You can also use `mfa models download dictionary english_us_arpa`.) -The dictionary and acoustic model can now be found in their respective ``Documents/MFA/pretrained_models/`` folders. +The dictionary and acoustic model can now be found in their respective `Documents/MFA/pretrained_models/` folders. Once we have the dictionary, acoustic model, `wav` files and `TextGrids` in their input folder, and an empty output folder, then we're ready to run the aligner! -The alignment command is called ``align`` and takes 4 arguments: +The alignment command is called `align` and takes 4 arguments: -+ path to the input folder -+ name of the dictionary (in the ``Documents/MFA/pretrained_models/dictionary/`` folder) / path to the acoustic model if it's elsewhere on the computer -+ name of the acoustic model (in the ``Documents/MFA/pretrained_models/acoustic/`` folder) / path to the acoustic model if it's elsewhere on the computer -+ path to the output folder +- path to the input folder +- name of the dictionary (in the `Documents/MFA/pretrained_models/dictionary/` folder) / path to the acoustic model if it's elsewhere on the computer +- name of the acoustic model (in the `Documents/MFA/pretrained_models/acoustic/` folder) / path to the acoustic model if it's elsewhere on the computer +- path to the output folder -I always add the optional ``--clean`` flag as this "cleans" out any old temporary files created in a previous run. (If you're anything like me, you might find yourself re-running the aligner a few times, and with the same input filename. The aligner won't actually re-run properly unless you clear out the old files.) +I always add the optional `--clean` flag as this "cleans" out any old temporary files created in a previous run. (If you're anything like me, you might find yourself re-running the aligner a few times, and with the same input filename. The aligner won't actually re-run properly unless you clear out the old files.) -**Important:** You will need to scroll across to see the whole line of code. +**Important:** You will need to scroll across to see the whole line of code. -```{r eval = F} +```{bash eval = F} mfa align --clean /Users/Eleanor/Documents/input_english/ english_us_arpa english_us_arpa /Users/Eleanor/Documents/output_english/ ``` -And if everything worked appropriately, your aligned `TextGrids` should now be in the ``Documents/output_english/`` folder! +And if everything worked appropriately, your aligned `TextGrids` should now be in the `Documents/output_english/` folder! ## File preparation ### Audio files -The Montreal Forced Aligner is incredibly robust to audio files of differing formats, sampling rates and channels. You should not have to do much prep, but note that whatever you feed the system will be converted to be a `wav` file with a sampling rate of 16 kHz with a single (mono) channel unless otherwise specified (see [Feature Configuration](https://montreal-forced-aligner.readthedocs.io/en/latest/user_guide/configuration/global.html##feature-config). For the record, I have not yet tried any other file format except for `wav` files, so I'm not yet aware of potential issues that might arise there. + +The Montreal Forced Aligner is incredibly robust to audio files of differing formats, sampling rates and channels. You should not have to do much prep, but note that whatever you feed the system will be converted to be a `wav` file with a sampling rate of 16 kHz with a single (mono) channel unless otherwise specified (see [Feature Configuration](https://montreal-forced-aligner.readthedocs.io/en/latest/user_guide/configuration/global.html##feature-config). For the record, I have not yet tried any other file format except for `wav` files, so I'm not yet aware of potential issues that might arise there. ### Transcripts -The MFA can take as input either a Praat `TextGrid` or a `.lab` or `.txt` file. I have worked most extensively with the `TextGrid` input, so I'll describe those details here. As for `.lab` and `.txt` input, I believe this method only works when the transcript is pasted in as a single line. In other words, I don't think it can handle time stamps for utterance start and end times. + +The MFA can take as input either a Praat `TextGrid` or a `.lab` or `.txt` file. I have worked most extensively with the `TextGrid` input, so I'll describe those details here. As for `.lab` and `.txt` input, I believe this method only works when the transcript is pasted in as a single line. In other words, I don't think it can handle time stamps for utterance start and end times. ### Filenames -The filename of the `wav` file and its corresponding transcript must be identical except for the extension (`.wav` or `.TextGrid`). If you have multiple speakers in the alignment or training, you can actually implement some degree of speaker/channel adaptation. You can do this either by placing speaker-specific files in its own subfolder within the input folder, or by adding an optional argument to the ``mfa align`` command to use the first ``n`` characters of the filename. + +The filename of the `wav` file and its corresponding transcript must be identical except for the extension (`.wav` or `.TextGrid`). If you have multiple speakers in the alignment or training, you can actually implement some degree of speaker/channel adaptation. You can do this either by placing speaker-specific files in its own subfolder within the input folder, or by adding an optional argument to the `mfa align` command to use the first `n` characters of the filename. If you go forward with this, it helps to have the speaker ID as the prefix to the filenamex. For example: -```{r eval = F} +```{bash eval = F} spkr01_utt1.wav, spkr01_utt1.TextGrid spkr01_utt2.wav, spkr01_utt2.TextGrid spkr02_utt1.wav, spkr02_utt1.TextGrid @@ -153,48 +156,51 @@ spkr04_utt2.wav, spkr04_utt2.TextGrid In this case, we can then tell the aligner to use the first 6 characters as the speaker information. The `-s` flag stands for speaker characters. -```{r eval = F} +```{bash eval = F} mfa align -s 6 --clean /Users/Eleanor/input_english/ english_us_arpa english_us_arpa /Users/Eleanor/output_english/ ``` Alternatively: -```{r eval = F} + +```{bash eval = F} mfa align --speaker_characters 6 --clean /Users/Eleanor/input_english/ english_us_arpa english_us_arpa /Users/Eleanor/output_english/ ``` ### Input and output folders -I would recommend creating a special input folder that houses a copy of your audio files and `TextGrid` transcripts. In case something goes wrong, you won't be messing up the raw data. You can place this folder basically anywhere on your computer, and you can call this whatever you want. +I would recommend creating a special input folder that houses a copy of your audio files and `TextGrid` transcripts. In case something goes wrong, you won't be messing up the raw data. You can place this folder basically anywhere on your computer, and you can call this whatever you want. You will also need to create an empty output folder. I recommend making sure this is empty each time you run the aligner as the aligner does not overwrite any existing files. You can place this folder basically anywhere on your computer, and you can call this whatever you want; however, you may not re-use the input folder as the output folder. ## Obtaining acoustic models ### Download an acoustic model -Pretrained acoustic models for several languages can be downloaded directly using the command line interface. This is the [master list](https://mfa-models.readthedocs.io/en/latest/acoustic/index.html) of acoustic models available for download. + +Pretrained acoustic models for several languages can be downloaded directly using the command line interface. This is the [master list](https://mfa-models.readthedocs.io/en/latest/acoustic/index.html) of acoustic models available for download. ### Train an acoustic model -You can also train an acoustic model yourself directly on the data you're working on. You do need a fair amount of data to get reasonable alignments out. If an existing acoustic model already exists, it might be worth simply using that directly on your data or you could try [adapting](https://montreal-forced-aligner.readthedocs.io/en/latest/user_guide/workflows/adapt_acoustic_model.html) the model to your data. There are many cases, however, when training a new acoustic model is the best path forward. -The ``mfa train`` command takes 3 arguments: +You can also train an acoustic model yourself directly on the data you're working on. You do need a fair amount of data to get reasonable alignments out. If an existing acoustic model already exists, it might be worth simply using that directly on your data or you could try [adapting](https://montreal-forced-aligner.readthedocs.io/en/latest/user_guide/workflows/adapt_acoustic_model.html) the model to your data. There are many cases, however, when training a new acoustic model is the best path forward. -+ path to the input folder with the audio files and utterance-level transcripts (`TextGrids`) -+ name of the pronunciation dictionary in the ``pretrained_models`` folder or path to the pronunciation dictionary -+ path to the output folder (it still runs an alignment at the end) +The `mfa train` command takes 3 arguments: -In other words, ``mfa train`` has the same syntax and requirements as the ``mfa align`` command, but without the acoustic model specification. +- path to the input folder with the audio files and utterance-level transcripts (`TextGrids`) +- name of the pronunciation dictionary in the `pretrained_models` folder or path to the pronunciation dictionary +- path to the output folder (it still runs an alignment at the end) -```{r eval = F} +In other words, `mfa train` has the same syntax and requirements as the `mfa align` command, but without the acoustic model specification. + +```{bash eval = F} mfa train --clean ~/Documents/input_spanish ~/Documents/talnupf_spanish.txt ~/Documents/output_spanish_textgrids ``` -Once again, I'm using the ``--clean`` flag just in case I need to clean out an old folder in ``Documents/MFA``. +Once again, I'm using the `--clean` flag just in case I need to clean out an old folder in `Documents/MFA`. -If you would like to reuse the acoustic model, you can find all of the trained models (there are several -- one for each layer of training) in the associated folder in ``Documents/MFA/`` and any folder ending in ``_ali``. I would recommend using the acoustic model from the last training and alignment pass, which is the ``sat2_ali`` folder. ``sat2_ali`` stands for the second round of Speaker Adaptive Training --- alignment pass. +If you would like to reuse the acoustic model, you can find all of the trained models (there are several -- one for each layer of training) in the associated folder in `Documents/MFA/` and any folder ending in `_ali`. I would recommend using the acoustic model from the last training and alignment pass, which is the `sat2_ali` folder. `sat2_ali` stands for the second round of Speaker Adaptive Training --- alignment pass. -The ``mfa model save acoustic`` command takes one required argument, which is the path to the model you want to save, and an optional (but highly recommended) argument which is the new name of the acoustic model. The optional argument is specified after the ``--name`` flag: +The `mfa model save acoustic` command takes one required argument, which is the path to the model you want to save, and an optional (but highly recommended) argument which is the new name of the acoustic model. The optional argument is specified after the `--name` flag: -```{r eval = F} +```{bash eval = F} mfa model save acoustic --name guarani_cv ~/Documents/MFA/mfainput4guarani/sat3_ali/acoustic_model.zip ``` @@ -208,9 +214,9 @@ There are a few options for obtaining a pronunciation dictionary: ### Download a dictionary -This is the [master list](https://mfa-models.readthedocs.io/en/latest/dictionary/index.html) of pre-existing pronunciation dictionaries available through the MFA. Click on the dictionary of interest, and if you scroll to the bottom of the page, it will tell you the name to type in the ``mfa model download dictionary`` command. +This is the [master list](https://mfa-models.readthedocs.io/en/latest/dictionary/index.html) of pre-existing pronunciation dictionaries available through the MFA. Click on the dictionary of interest, and if you scroll to the bottom of the page, it will tell you the name to type in the `mfa model download dictionary` command. -```{r eval = F} +```{bash eval = F} mfa model download dictionary spanish_latin_america_mfa ``` @@ -220,101 +226,102 @@ NB: you must add any missing words in your corpus manually, or train a G2P model Grapheme-to-phoneme (G2P) models automatically convert the orthographic words in your corpus to the most likely phonetic pronunciation. How exactly it does this depends a lot on the type of model and its training data. -The MFA has a handful of pretrained G2P models that you can download. This is the [master list](https://mfa-models.readthedocs.io/en/latest/g2p/index.html) of G2P models available for download. +The MFA has a handful of pretrained G2P models that you can download. This is the [master list](https://mfa-models.readthedocs.io/en/latest/g2p/index.html) of G2P models available for download. -```{r eval = F} +```{bash eval = F} mfa model download g2p bulgarian_mfa ``` -You'll then use the ``mfa g2p`` command to generate the phonetic transcriptions from the submitted orthographic forms and the g2p model. The ``mfa g2p`` command takes 3 arguments: +You'll then use the `mfa g2p` command to generate the phonetic transcriptions from the submitted orthographic forms and the g2p model. The `mfa g2p` command takes 3 arguments: -+ name of g2p model (in ``Documents/MFA/pretrained_models/g2p/``) -+ path to the `TextGrids` or transcripts in your corpus -+ path to where the new pronunciation dictionary should go +- name of g2p model (in `Documents/MFA/pretrained_models/g2p/`) +- path to the `TextGrids` or transcripts in your corpus +- path to where the new pronunciation dictionary should go -```{r eval = F} +```{bash eval = F} mfa g2p --clean bulgarian_mfa ~/Desktop/romanian/TextGrids_with_new_words/ ~/Desktop/new_bulgarian_dictionary.txt ``` -You can also use an external resource like [Epitran](https://github.com/dmort27/epitran) or [XPF](https://cohenpr-xpf.github.io/XPF/Convert-to-IPA.html) to generate a dictionary for you. These are both rule-based G2P systems built by linguists; these systems work to varying degrees of success. +You can also use an external resource like [Epitran](https://github.com/dmort27/epitran) or [XPF](https://cohenpr-xpf.github.io/XPF/Convert-to-IPA.html) to generate a dictionary for you. These are both rule-based G2P systems built by linguists; these systems work to varying degrees of success. In all cases, you're best off checking the output. Remember that the phone set in the dictionary must entirely match the phone set in the acoustic model. -### Train a G2P model +### Train a G2P model -You can also train a G2P model on an existing pronunciation dictionary. Once you've trained the G2P model, you'll need to jump back up to the generate dictionary instructions just above. This might be useful in cases when you have many unlisted orthographic forms that are still in need of a phonetic transcription. +You can also train a G2P model on an existing pronunciation dictionary. Once you've trained the G2P model, you'll need to jump back up to the generate dictionary instructions just above. This might be useful in cases when you have many unlisted orthographic forms that are still in need of a phonetic transcription. -The ``mfa train_g2p`` command has 2 arguments: +The `mfa train_g2p` command has 2 arguments: -+ path to the training data pronunciation lexicon -+ path to where the trained G2P model should go +- path to the training data pronunciation lexicon +- path to where the trained G2P model should go -```{r eval = F} +```{bash eval = F} mfa train_g2p ~/Desktop/romanian/romanian_lexicon.txt ~/Desktop/romanian/romanian_g2p ``` -### Create the dictionary by hand +### Create the dictionary by hand This one is self-explanatory, but once again, make sure to use the same phone set as the acoustic models. ## Tips and tricks -+ Add ``-h`` after any command to get a help screen that will also display its argument structure. For example: +- Add `-h` after any command to get a help screen that will also display its argument structure. For example: -```{r eval = F} +```{bash eval = F} mfa align -h mfa train -h ``` -+ Use the `--clean` flag each time you run the ``align`` command -+ Don't forget to activate the aligner -+ Make sure the output folder is empty each time you run the ``align`` command -+ The input and output folders must be different folders -+ Many users have trouble reading/writing files to the Desktop folder. If you're having issues using the Desktop, just switch to a different folder like Documents, or Google how to change the read/write permissions on your Desktop folder -+ And a little more on conda: +- Use the `--clean` flag each time you run the `align` command +- Don't forget to activate the aligner +- Make sure the output folder is empty each time you run the `align` command +- The input and output folders must be different folders +- Many users have trouble reading/writing files to the Desktop folder. If you're having issues using the Desktop, just switch to a different folder like Documents, or Google how to change the read/write permissions on your Desktop folder +- And a little more on conda: If you have created multiple environments, you can list all of your environments using the following command: -```{r eval = F} +```{bash eval = F} conda info --envs ``` -You can delete an environment with the following command, where ENV_NAME is the name of the environment, like ``aligner``. Make sure that you have deactivated the environment before deleting it. +You can delete an environment with the following command, where ENV_NAME is the name of the environment, like `aligner`. Make sure that you have deactivated the environment before deleting it. -```{r eval = F} +```{bash eval = F} conda deactivate conda env remove -n ENV_NAME ``` -+ You can inspect the details of any local acoustic model or dictionary using the ``mfa model inspect`` commands. One of the important outputs of this is the assumed phone set: +- You can inspect the details of any local acoustic model or dictionary using the `mfa model inspect` commands. One of the important outputs of this is the assumed phone set: -```{r eval = F} +```{bash eval = F} mfa model inspect acoustic english_us_arpa mfa model inspect dictionary english_us_arpa ``` -+ You can get a list of the local acoustic models and dictionaries in your ``Documents/MFA/pretrained_models/`` folders using the ``mfa model list`` commands: +- You can get a list of the local acoustic models and dictionaries in your `Documents/MFA/pretrained_models/` folders using the `mfa model list` commands: -```{r eval = F} +```{bash eval = F} mfa model list acoustic mfa model list dictionary ``` -+ You can check for problems with your corpus using the ``validate`` command. Such problems might include words in your TextGrids that are not listed in your dictionary, phones in your dictionary that do not have corresponding acoustic models, as well as general problems with your sound file (like a missing waveform -- this has happened to me when an audio file got corrupted during download). Note that checking for audio file problems takes a lot longer, so I frequently add the optional ``--ignore_acoustics`` flag. +- You can check for problems with your corpus using the `validate` command. Such problems might include words in your TextGrids that are not listed in your dictionary, phones in your dictionary that do not have corresponding acoustic models, as well as general problems with your sound file (like a missing waveform -- this has happened to me when an audio file got corrupted during download). Note that checking for audio file problems takes a lot longer, so I frequently add the optional `--ignore_acoustics` flag. The three arguments required by the validate command are: -+ path to the corpus -+ name of the dictionary -+ name of the acoustic model +- path to the corpus +- name of the dictionary +- name of the acoustic model -If you don't yet have an acoustic model (i.e., you're running the validate command on the data you're using to train an acoustic model), just put in any acoustic model as a placeholder and ignore any corresponding warnings about phones in your dictionary that are not in the acoustic model. +If you don't yet have an acoustic model (i.e., you're running the validate command on the data you're using to train an acoustic model), just put in any acoustic model as a placeholder and ignore any corresponding warnings about phones in your dictionary that are not in the acoustic model. -```{r eval = F} +```{bash eval = F} mfa validate ~/Documents/myCorpus/mfainput4english/ english_us_arpa english_us_arpa mfa validate ~/Documents/myCorpus/mfainput4english/ english_us_arpa english_us_arpa --ignore_acoustics ``` -words that are not in your dictionary and problems with your sounds -+ And finally, there is even more to the MFA than just what I've put here! Definitely poke around the MFA user guide to learn more and find special functions that might make your workflow even easier. +words that are not in your dictionary and problems with your sounds + +- And finally, there is even more to the MFA than just what I've put here! Definitely poke around the MFA user guide to learn more and find special functions that might make your workflow even easier. diff --git a/mfa-tutorial.Rproj b/mfa-tutorial.Rproj new file mode 100644 index 0000000..d063e8b --- /dev/null +++ b/mfa-tutorial.Rproj @@ -0,0 +1,13 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: knitr +LaTeX: pdfLaTeX From 77447871c41f9e1f2d9e1ff3c327a00e78c248cd Mon Sep 17 00:00:00 2001 From: JoFrhwld Date: Tue, 31 Jan 2023 15:41:53 -0500 Subject: [PATCH 7/9] citation info --- _metadata.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/_metadata.yml b/_metadata.yml index b69a29a..edc30b9 100644 --- a/_metadata.yml +++ b/_metadata.yml @@ -1,4 +1,8 @@ author: - name: "Eleanor Chodroff" url: "https://www.eleanorchodroff.com/" -doi: "10.5281/zenodo.7161317" \ No newline at end of file +doi: "10.5281/zenodo.7161317" + +citation: + collection-title: "Linguistics Methods Hub" + container-title: "Linguistics Methods Hub" \ No newline at end of file From 4b38e51e5b90f58d128ef0f251ec748d712f6fda Mon Sep 17 00:00:00 2001 From: JoFrhwld Date: Tue, 31 Jan 2023 15:44:30 -0500 Subject: [PATCH 8/9] spellcheck --- index.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/index.Rmd b/index.Rmd index 15a24fc..1d9e0a6 100644 --- a/index.Rmd +++ b/index.Rmd @@ -141,7 +141,7 @@ The MFA can take as input either a Praat `TextGrid` or a `.lab` or `.txt` file. The filename of the `wav` file and its corresponding transcript must be identical except for the extension (`.wav` or `.TextGrid`). If you have multiple speakers in the alignment or training, you can actually implement some degree of speaker/channel adaptation. You can do this either by placing speaker-specific files in its own subfolder within the input folder, or by adding an optional argument to the `mfa align` command to use the first `n` characters of the filename. -If you go forward with this, it helps to have the speaker ID as the prefix to the filenamex. For example: +If you go forward with this, it helps to have the speaker ID as the prefix to the filenames. For example: ```{bash eval = F} spkr01_utt1.wav, spkr01_utt1.TextGrid From 00bef5c3008493f772d0b0e809baca99ca9557f4 Mon Sep 17 00:00:00 2001 From: JoFrhwld Date: Tue, 31 Jan 2023 16:00:00 -0500 Subject: [PATCH 9/9] doi update --- _metadata.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_metadata.yml b/_metadata.yml index edc30b9..6fbdf5f 100644 --- a/_metadata.yml +++ b/_metadata.yml @@ -1,7 +1,7 @@ author: - name: "Eleanor Chodroff" url: "https://www.eleanorchodroff.com/" -doi: "10.5281/zenodo.7161317" +doi: "10.5281/zenodo.7591607" citation: collection-title: "Linguistics Methods Hub"