ls_mlkit.model.decoder_tf.tokenizer module
-
class ls_mlkit.model.decoder_tf.tokenizer.Tokenizer[source]
Bases: object
-
add_special_tokens(special_token_list: List[str])[source]
-
build_vocab(text_list: list, max_vocab_size=10000, min_freq=1)[source]
-
convert_id_to_token(id_list)[source]
-
convert_token_to_id(token_list)[source]
-
get_vocab_size()[source]
-
load_state_dict(save_directory='model_pretrained/gpt2')[source]
-
save_state_dict(save_directory='model_pretrained/gpt2')[source]
-
set_eos_token(val: str = '[EOS]')[source]
-
set_pad_token(val: str = '[PAD]')[source]
-
tokenize(text)[source]
-
ls_mlkit.model.decoder_tf.tokenizer.get_collate_fn(tokenizer: Tokenizer, max_len: int = 500, train=True)[source]
-
ls_mlkit.model.decoder_tf.tokenizer.get_masks(data: Tensor, tokenizer)[source]