import pathlib | |
import pydantic | |
MAX_DOWNLOAD_TIME = 0.2 | |
IMAGE_DOWNLOAD_PATH = pathlib.Path("/tmp/images") | |
class DataConfig(pydantic.BaseModel): | |
buffer_size: int = 1000 | |
data_len: int = 100 | |
train_len: int = 90 | |
small_dataset: str = "laion/220k-gpt4vision-captions-from-livis" | |
large_dataset: str = "laion/laion400m" | |
dataset: str = small_dataset | |
class ModelConfig(pydantic.BaseModel): | |
text_model: str = "microsoft/xtremedistil-l6-h256-uncased" # 51 mb | |
vision_model: str = "edgenext_small" # 20 mb | |
projection_layers: int = 3 | |
embed_dim: int = 256 | |
transformer_embed_dim: int = 768 | |
max_len: int = 128 # 77 | |
cls_type: bool = True | |
freeze_vision_base: bool = False | |
freeze_text_base: bool = False | |
class TrainerConfig(pydantic.BaseModel): | |
epochs: int = 20 | |
batch_size: int = 256 | |
learning_rate: float = 5e-4 | |
accumulate_grad_batches: int = 1 | |
temperature: float = 1.0 | |
vision_freeze_layers: int = 2 | |
lambda_1: float = 1.0 | |
lambda_2: float = 1.0 | |
val_check_interval: int = 1000 | |
run_openai_clip: bool = False | |
_model_config: ModelConfig = ModelConfig() | |
_data_config: DataConfig = DataConfig() | |