glados-tts/glados_tts/utils/symbols.py

30 lines
1.0 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

""" from https://github.com/keithito/tacotron """
'''Defines the set of symbols used in text input to the model.
The default is a set of ASCII characters that works well for English
or text that has been run through Unidecode. For other data, you can
modify _characters. See TRAINING_DATA.md for details.
'''
_pad = '_'
_punctuation = '!\'(),.:;? '
_special = '-'
# Phonemes
_vowels = 'iyɨʉɯuɪʏʊeøɘəɵɤoɛœɜɞʌɔæɐaɶɑɒᵻ'
_non_pulmonic_consonants = 'ʘɓǀɗǃʄǂɠǁʛ'
_pulmonic_consonants = 'pbtdʈɖcɟkɡʔɴŋɲɳnɱmʙrʀⱱɾɽɸβfvθðszʃʒʂʐçʝxɣχʁħʕhɦɬɮʋɹɻjɰlɭʎʟ'
_suprasegmentals = 'ˈˌːˑ'
_other_symbols = 'ʍwɥʜʢʡɕʑɺɧ'
_diacrilics = 'ɚ˞ɫ'
# some extra symbols that I found in from wiktionary ipa annotations
_extra_phons = ['g', 'ɝ', '̃', '̍', '̥', '̩', '̯', '͡']
phonemes = list(
_pad + _punctuation + _special + _vowels + _non_pulmonic_consonants
+ _pulmonic_consonants + _suprasegmentals + _other_symbols + _diacrilics) + _extra_phons
phonemes_set = set(phonemes)