ls_mlkit.model.decoder_tf.tokenizer module

class ls_mlkit.model.decoder_tf.tokenizer.Tokenizer[source]

Bases: object

add_special_tokens(special_token_list: List[str])[source]
build_vocab(text_list: list, max_vocab_size=10000, min_freq=1)[source]
convert_id_to_token(id_list)[source]
convert_token_to_id(token_list)[source]
get_vocab_size()[source]
load_state_dict(save_directory='model_pretrained/gpt2')[source]
save_state_dict(save_directory='model_pretrained/gpt2')[source]
set_eos_token(val: str = '[EOS]')[source]
set_pad_token(val: str = '[PAD]')[source]
tokenize(text)[source]
ls_mlkit.model.decoder_tf.tokenizer.get_collate_fn(tokenizer: Tokenizer, max_len: int = 500, train=True)[source]
ls_mlkit.model.decoder_tf.tokenizer.get_masks(data: Tensor, tokenizer)[source]