18 lines
535 B
Python
18 lines
535 B
Python
from typing import List
|
|
|
|
from utils.symbols import phonemes
|
|
|
|
|
|
class Tokenizer:
|
|
|
|
def __init__(self) -> None:
|
|
self.symbol_to_id = {s: i for i, s in enumerate(phonemes)}
|
|
self.id_to_symbol = {i: s for i, s in enumerate(phonemes)}
|
|
|
|
def __call__(self, text: str) -> List[int]:
|
|
return [self.symbol_to_id[t] for t in text if t in self.symbol_to_id]
|
|
|
|
def decode(self, sequence: List[int]) -> str:
|
|
text = [self.id_to_symbol[s] for s in sequence if s in self.id_to_symbol]
|
|
return ''.join(text)
|