RobertML commited on
Commit
224c45d
1 Parent(s): 0a2ceb9

Upload training_config.yml with huggingface_hub

Browse files
Files changed (1) hide show
  1. training_config.yml +22 -21
training_config.yml CHANGED
@@ -5,30 +5,31 @@ model:
5
  - v_proj
6
  apply_lora_to_mlp: false
7
  apply_lora_to_output: false
8
- lora_rank: 8
9
- lora_alpha: 16
10
  perception_tokens: 2
11
  use_clip: false
12
  tokenizer:
13
  _component_: models.a2a_tokenizer
14
- path: checkpoints/Meta-Llama-3-8B-Instruct/original/tokenizer.model
15
  checkpointer:
16
  _component_: torchtune.utils.FullModelMetaCheckpointer
17
- checkpoint_dir: checkpoints/Meta-Llama-3-8B-Instruct/original
18
  checkpoint_files:
19
- - consolidated.02.pth
20
  adapter_checkpoint: null
21
  recipe_checkpoint: null
22
- output_dir: output_checkpoints/experiment_3
23
  model_type: LLAMA3
24
  resume_from_checkpoint: false
25
- interim_checkpoint_steps: 5000
26
  interim_gen_steps: null
27
- max_new_tokens: 100
28
- temperature: 0.6
29
- top_k: 300
30
  dataset:
31
  _component_: ds.EvenBatcher
 
32
  dataset:
33
  _component_: ds.RoundRobinDataset
34
  datasets:
@@ -45,21 +46,21 @@ dataset:
45
  train_on_input: false
46
  seed: null
47
  shuffle: true
48
- batch_size: 8
49
  optimizer:
50
  _component_: torch.optim.AdamW
51
- weight_decay: 0.01
52
- lr: 0.0003
53
  lr_scheduler:
54
  _component_: torchtune.modules.get_cosine_schedule_with_warmup
55
- num_warmup_steps: 100
56
  loss:
57
  _component_: torch.nn.CrossEntropyLoss
58
- epochs: 2
59
- max_steps_per_epoch: 10
60
- gradient_accumulation_steps: 64
61
  compile: false
62
- output_dir: /tmp/lora_finetune_output
63
  metric_logger:
64
  _component_: torchtune.utils.metric_logging.DiskLogger
65
  log_dir: ${output_dir}
@@ -76,8 +77,8 @@ inference:
76
  {video}
77
 
78
  Caption the previous video.'
79
- max_new_tokens: 300
80
- temperature: 0.6
81
- top_k: 300
82
  quantizer: null
83
  gradient-accumulation-steps: 32
 
5
  - v_proj
6
  apply_lora_to_mlp: false
7
  apply_lora_to_output: false
8
+ lora_rank: 32
9
+ lora_alpha: 64
10
  perception_tokens: 2
11
  use_clip: false
12
  tokenizer:
13
  _component_: models.a2a_tokenizer
14
+ path: models/tokenizer.model
15
  checkpointer:
16
  _component_: torchtune.utils.FullModelMetaCheckpointer
17
+ checkpoint_dir:
18
  checkpoint_files:
19
+ -
20
  adapter_checkpoint: null
21
  recipe_checkpoint: null
22
+ output_dir: output_checkpoints/experiment_1
23
  model_type: LLAMA3
24
  resume_from_checkpoint: false
25
+ interim_checkpoint_steps: 15000
26
  interim_gen_steps: null
27
+ max_new_tokens: 88
28
+ temperature: 0.7
29
+ top_k: 232
30
  dataset:
31
  _component_: ds.EvenBatcher
32
+ buffer_size: 73
33
  dataset:
34
  _component_: ds.RoundRobinDataset
35
  datasets:
 
46
  train_on_input: false
47
  seed: null
48
  shuffle: true
49
+ batch_size: 6
50
  optimizer:
51
  _component_: torch.optim.AdamW
52
+ weight_decay: 0.99
53
+ lr: 20.0e-05
54
  lr_scheduler:
55
  _component_: torchtune.modules.get_cosine_schedule_with_warmup
56
+ num_warmup_steps: 4
57
  loss:
58
  _component_: torch.nn.CrossEntropyLoss
59
+ epochs: 60
60
+ max_steps_per_epoch: null
61
+ gradient_accumulation_steps: 260
62
  compile: false
63
+ output_dir: /workspace/hebbanvogola/lora_finetune_output
64
  metric_logger:
65
  _component_: torchtune.utils.metric_logging.DiskLogger
66
  log_dir: ${output_dir}
 
77
  {video}
78
 
79
  Caption the previous video.'
80
+ max_new_tokens: 231
81
+ temperature: 0.8
82
+ top_k: 231
83
  quantizer: null
84
  gradient-accumulation-steps: 32