Spaces:

clr
/

phonalign

App Files Files Community

cati commited on Feb 7, 2023

Commit

90f256f

•

1 Parent(s): 69d94dc

..

Files changed (3) hide show

.#ctcalign.py +1 -0
app.py +8 -1
ctcalign.py +12 -1

.#ctcalign.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ [email protected]

app.py CHANGED Viewed

@@ -61,10 +61,17 @@ All phoneme durations are measured automatically with no human correction. The p
             gr.Markdown(
             """
             # Forced alignment demo with CTC decoding
-            Choose a language to record or upload a sentence with text. Generate word or letter time-alignments from the language's wav2vec-2.0 model, with output in MFA (Montreal Forced Aligner)-compatible format.
             """
                  )

             gr.Markdown(
             """
             # Forced alignment demo with CTC decoding
+            Choose a language to record or upload a sentence with corresponding text. Generate word and letter time-alignments from the language's wav2vec-2.0 model, with output in MFA (Montreal Forced Aligner)-compatible format.
             """
                  )
+            audio_file = gr.Audio(type="filepath")
+            transcript_boxx = gr.Textbox(label="Transcript",placeholder="Type or paste the transcript here. Capitalisation and punctuation, if any, will be ignored.")
+            alangmenu = gr.Radio(["Icelandic", "Faroese", "Norwegian"],value="Icelandic")
+            al_btn = gr.Button(value="Run forced alignment")
+            output_box = gr.Textbox(label="Forced alignment output")

ctcalign.py CHANGED Viewed

@@ -11,7 +11,7 @@ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 torch.random.manual_seed(0)
 # info: https://huggingface.co/carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h/blob/main/vocab.json
-MODEL_PATH="/work/caitlinr/w2vrec/wav2vec2-large-xlsr-53-icelandic-ep10-1000h"
 model_blank_token = '[PAD]' # important to know for CTC decoding
 model_word_separator = '|'
 labels_dict = {"f": 0, "a": 1, "é": 2, "t": 3, "o": 4, "n": 5, "e": 6, "y": 8, "k": 9, "j": 10, "u": 11, "d": 12, "w": 13, "l": 14, "ú": 15, "q": 16, "g": 17, "í": 18, "s": 19, "r": 20, "ý": 21, "i": 22, "z": 23, "m": 24, "h": 25, "ó": 26, "þ": 27, "æ": 28, "c": 29, "á": 30, "v": 31, "b": 32, "ð": 33, "x": 34, "ö": 35, "p": 36, "|": 7, "[UNK]": 37, "[PAD]": 38}
@@ -25,6 +25,17 @@ blank_id = labels_dict[model_blank_token]
 #------------------------------------------
 # forced alignment with ctc decoder
 #   originally based on implementation of

 torch.random.manual_seed(0)
 # info: https://huggingface.co/carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h/blob/main/vocab.json
+MODEL_PATH="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h"
 model_blank_token = '[PAD]' # important to know for CTC decoding
 model_word_separator = '|'
 labels_dict = {"f": 0, "a": 1, "é": 2, "t": 3, "o": 4, "n": 5, "e": 6, "y": 8, "k": 9, "j": 10, "u": 11, "d": 12, "w": 13, "l": 14, "ú": 15, "q": 16, "g": 17, "í": 18, "s": 19, "r": 20, "ý": 21, "i": 22, "z": 23, "m": 24, "h": 25, "ó": 26, "þ": 27, "æ": 28, "c": 29, "á": 30, "v": 31, "b": 32, "ð": 33, "x": 34, "ö": 35, "p": 36, "|": 7, "[UNK]": 37, "[PAD]": 38}
 #------------------------------------------
 # forced alignment with ctc decoder
 #   originally based on implementation of