tatm
Introduction:
Getting Started
Examples:
Loading Text Data for LLM Training
Dataset Metadata
API Reference:
tatm
CLI Reference
tatm.data
Data Module API
tatm.config
Config API Reference
tatm.tokenizer
Tokenizer API Reference
tatm
Index
Index
A
|
B
|
C
|
D
|
E
|
F
|
G
|
I
|
L
|
M
|
N
|
P
|
Q
|
R
|
S
|
T
|
V
A
account (tatm.config.SlurmConfig attribute)
as_json() (tatm.data.TatmDataMetadata method)
as_yaml() (tatm.data.TatmDataMetadata method)
B
backend (tatm.config.TatmConfig attribute)
C
conda_env (tatm.config.EnvironmentConfig attribute)
content_field (tatm.data.TatmDataMetadata attribute)
(tatm.tokenizer.engine.ExampleMessage attribute)
corpus_data_dir_parent (tatm.data.TatmDataMetadata attribute)
corpus_separation_strategy (tatm.data.TatmDataMetadata attribute)
corpuses (tatm.data.TatmDataMetadata attribute)
D
data (tatm.tokenizer.engine.ExampleMessage attribute)
data_content (tatm.data.TatmDataMetadata attribute)
dataset_path (tatm.data.TatmDataMetadata attribute)
date_downloaded (tatm.data.TatmDataMetadata attribute)
description (tatm.data.TatmDataMetadata attribute)
download_source (tatm.data.TatmDataMetadata attribute)
E
environment (tatm.config.TatmConfig attribute)
EnvironmentConfig (class in tatm.config)
ExampleMessage (class in tatm.tokenizer.engine)
F
from_directory() (tatm.data.TatmDataMetadata class method)
from_file() (tatm.data.TatmDataMetadata class method)
from_json() (tatm.data.TatmDataMetadata class method)
from_metadata() (tatm.data.TatmData class method)
(tatm.data.TatmTextData class method)
from_yaml() (tatm.data.TatmDataMetadata class method)
G
get_data() (in module tatm.data)
get_dataset() (in module tatm.data)
I
initialize() (tatm.data.TatmData method)
(tatm.data.TatmTextData method)
L
load_config() (in module tatm.config)
M
module
tatm.config
tatm.data
tatm.tokenizer
tatm.tokenizer.engine
modules (tatm.config.EnvironmentConfig attribute)
N
name (tatm.data.TatmDataMetadata attribute)
num_files() (tatm.data.TatmMemmapDataset method)
num_tokens() (tatm.data.TatmMemmapDataset method)
P
partition (tatm.config.SlurmConfig attribute)
Q
qos (tatm.config.SlurmConfig attribute)
R
run_with_ray() (tatm.tokenizer.engine.TokenizationEngine method)
(tatm.tokenizer.TokenizationEngine method)
S
singularity_image (tatm.config.EnvironmentConfig attribute)
slurm (tatm.config.TatmConfig attribute)
slurm_bin_dir (tatm.config.SlurmConfig attribute)
SlurmConfig (class in tatm.config)
T
tatm.config
module
tatm.data
module
tatm.tokenizer
module
tatm.tokenizer.engine
module
TatmConfig (class in tatm.config)
TatmData (class in tatm.data)
TatmDataMetadata (class in tatm.data)
TatmMemmapDataset (class in tatm.data)
TatmTextData (class in tatm.data)
to_json() (tatm.data.TatmDataMetadata method)
to_yaml() (tatm.data.TatmDataMetadata method)
TokenizationEngine (class in tatm.tokenizer)
(class in tatm.tokenizer.engine)
tokenized_info (tatm.data.TatmDataMetadata attribute)
torch_collate_fn() (in module tatm.data)
V
venv (tatm.config.EnvironmentConfig attribute)