CUDA_VISIBLE_DEVICES=0,1 python3 -m torch.distributed.launch --nproc_per_node 2 /home/avuhong/AAVesm/run_mlm.py \ | |
--model_name_or_path facebook/esm2_t33_650M_UR50D \ | |
--tokenizer_name facebook/esm2_t33_650M_UR50D \ | |
--train_file /home/avuhong/AAVesm/ds_seq_train.csv \ | |
--validation_file /home/avuhong/AAVesm/ds_seq_val.csv \ | |
--do_train --do_eval --learning_rate 1e-05 \ | |
--per_device_train_batch_size 1 --gradient_accumulation_steps 4 --num_train_epochs 36 \ | |
--per_device_eval_batch_size 1 --evaluation_strategy epoch \ | |
--save_strategy no \ | |
--overwrite_output_dir --output_dir output_AAVESM2_650M_v1 \ | |
--fp16 --sharded_ddp simple \ | |
--max_seq_length 800 \ | |
--save_strategy epoch --save_total_limit 2 |