ls_mlkit.dataset package

Submodules

Module contents

ls_mlkit.dataset.get_cifar100_dataset(root='data')[source]
ls_mlkit.dataset.get_cifar10_dataset(root='data')[source]
ls_mlkit.dataset.get_fashionmnist_dataset(root='data')[source]
ls_mlkit.dataset.get_iris_dataset(test_ratio=0.2, **kwargs)[source]
ls_mlkit.dataset.get_lda_dataset(seed: int = 31, n_samples: int = 100, n_local_topics: int = 1, n_total_topics: int = 10, n_words_per_topic: int = 7, seq_len: int = 100, eval_ratio: float = 0.1, fix_seq_len: bool = True, fix_local_topics_num: bool = True, per_topic_strategy: str = 'cyclic', topic_distribution: str = 'uniform')[source]
ls_mlkit.dataset.get_minist_dataset(root='data')[source]
ls_mlkit.dataset.get_regular_language_dataset(regex_pattern, max_len=10, data_size=100, limit=100, test_ratio=0.2, **kwargs)[source]
ls_mlkit.dataset.load_alpaca_gpt4(**kwargs)[source]
ls_mlkit.dataset.load_codefeedback(max_tokens=512, num_samples=100000, eval_split_ratio=0.1, seed=31, **kwargs)[source]
ls_mlkit.dataset.load_gsm8k(**kwargs)[source]
ls_mlkit.dataset.load_meta_math(max_tokens=666, num_samples=100000, eval_split_ratio=0.1, seed=31, **kwargs)[source]
ls_mlkit.dataset.load_mt19937(max_seq_len=666, num_samples=100000, eval_split_ratio=0.1, seed=31, fixed_len=False, delimiter=',', num_bits=8)[source]
ls_mlkit.dataset.load_mt19937_12bits(seed=31, **kwargs)[source]
ls_mlkit.dataset.load_mt19937_12bits_with_eval(seed=31, **kwargs)[source]
ls_mlkit.dataset.load_mt19937_16bits(seed=31, **kwargs)[source]
ls_mlkit.dataset.load_mt19937_16bits_with_eval(seed=31, **kwargs)[source]
ls_mlkit.dataset.load_mt19937_32bits(seed=31, **kwargs)[source]
ls_mlkit.dataset.load_mt19937_32bits_with_eval(seed=31, **kwargs)[source]
ls_mlkit.dataset.load_mt19937_8bits(seed=31, **kwargs)[source]
ls_mlkit.dataset.load_mt19937_8bits_with_eval(seed=31, **kwargs)[source]
ls_mlkit.dataset.load_sst2(**kwargs)[source]
ls_mlkit.dataset.load_wizardlm(max_tokens=512, num_samples=70000, eval_split_ratio=0.1, seed=31, **kwargs)[source]