nerdaxic-glados-tts/utils/tokenizer.py

18 lines
535 B
Python

from typing import List
from utils.symbols import phonemes
class Tokenizer:
def __init__(self) -> None:
self.symbol_to_id = {s: i for i, s in enumerate(phonemes)}
self.id_to_symbol = {i: s for i, s in enumerate(phonemes)}
def __call__(self, text: str) -> List[int]:
return [self.symbol_to_id[t] for t in text if t in self.symbol_to_id]
def decode(self, sequence: List[int]) -> str:
text = [self.id_to_symbol[s] for s in sequence if s in self.id_to_symbol]
return ''.join(text)