I have the error RuntimeError: Only Tensors of floating point and complex dtype can require gradients

#2
by changsu12 - opened

Thank you for your hard work. I faced the following error. How can I solve it?


RuntimeError Traceback (most recent call last)
Cell In[1], line 26
22 model_path = "Rodeszones/CogVLM-grounding-generalist-hf-quant4"
25 tokenizer = LlamaTokenizer.from_pretrained('lmsys/vicuna-7b-v1.5')
---> 26 model = AutoModelForCausalLM.from_pretrained(
27 model_path,
28 torch_dtype=torch.bfloat16,
29 low_cpu_mem_usage=True,
30 trust_remote_code=True
31 ).eval()

File /opt/conda/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py:561, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
559 else:
560 cls.register(config.class, model_class, exist_ok=True)
--> 561 return model_class.from_pretrained(
562 pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
563 )
564 elif type(config) in cls._model_mapping.keys():
565 model_class = _get_model_class(config, cls._model_mapping)

File /opt/conda/lib/python3.11/site-packages/transformers/modeling_utils.py:3706, in PreTrainedModel.from_pretrained(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs)
3697 if dtype_orig is not None:
3698 torch.set_default_dtype(dtype_orig)
3699 (
3700 model,
3701 missing_keys,
3702 unexpected_keys,
3703 mismatched_keys,
3704 offload_index,
3705 error_msgs,
-> 3706 ) = cls._load_pretrained_model(
3707 model,
3708 state_dict,
3709 loaded_state_dict_keys, # XXX: rename?
3710 resolved_archive_file,
3711 pretrained_model_name_or_path,
3712 ignore_mismatched_sizes=ignore_mismatched_sizes,
3713 sharded_metadata=sharded_metadata,
3714 _fast_init=_fast_init,
3715 low_cpu_mem_usage=low_cpu_mem_usage,
3716 device_map=device_map,
3717 offload_folder=offload_folder,
3718 offload_state_dict=offload_state_dict,
3719 dtype=torch_dtype,
3720 is_quantized=(getattr(model, "quantization_method", None) == QuantizationMethod.BITS_AND_BYTES),
3721 keep_in_fp32_modules=keep_in_fp32_modules,
3722 )
3724 model.is_loaded_in_4bit = load_in_4bit
3725 model.is_loaded_in_8bit = load_in_8bit

File /opt/conda/lib/python3.11/site-packages/transformers/modeling_utils.py:4116, in PreTrainedModel._load_pretrained_model(cls, model, state_dict, loaded_keys, resolved_archive_file, pretrained_model_name_or_path, ignore_mismatched_sizes, sharded_metadata, _fast_init, low_cpu_mem_usage, device_map, offload_folder, offload_state_dict, dtype, is_quantized, keep_in_fp32_modules)
4112 set_module_quantized_tensor_to_device(
4113 model_to_load, key, "cpu", torch.empty(*param.size(), dtype=dtype)
4114 )
4115 else:
-> 4116 new_error_msgs, offload_index, state_dict_index = _load_state_dict_into_meta_model(
4117 model_to_load,
4118 state_dict,
4119 loaded_keys,
4120 start_prefix,
4121 expected_keys,
4122 device_map=device_map,
4123 offload_folder=offload_folder,
4124 offload_index=offload_index,
4125 state_dict_folder=state_dict_folder,
4126 state_dict_index=state_dict_index,
4127 dtype=dtype,
4128 is_quantized=is_quantized,
4129 is_safetensors=is_safetensors,
4130 keep_in_fp32_modules=keep_in_fp32_modules,
4131 )
4132 error_msgs += new_error_msgs
4133 else:

File /opt/conda/lib/python3.11/site-packages/transformers/modeling_utils.py:786, in _load_state_dict_into_meta_model(model, state_dict, loaded_state_dict_keys, start_prefix, expected_keys, device_map, offload_folder, offload_index, state_dict_folder, state_dict_index, dtype, is_quantized, is_safetensors, keep_in_fp32_modules)
783 fp16_statistics = None
785 if "SCB" not in param_name:
--> 786 set_module_quantized_tensor_to_device(
787 model, param_name, param_device, value=param, fp16_statistics=fp16_statistics
788 )
790 return error_msgs, offload_index, state_dict_index

File /opt/conda/lib/python3.11/site-packages/transformers/integrations/bitsandbytes.py:115, in set_module_quantized_tensor_to_device(module, tensor_name, device, value, fp16_statistics)
113 module._buffers[tensor_name] = new_value
114 else:
--> 115 new_value = nn.Parameter(new_value, requires_grad=old_value.requires_grad)
116 module._parameters[tensor_name] = new_value

File ~/.local/lib/python3.11/site-packages/torch/nn/parameter.py:40, in Parameter.new(cls, data, requires_grad)
36 data = torch.empty(0)
37 if type(data) is torch.Tensor or type(data) is Parameter:
38 # For ease of BC maintenance, keep this path for standard Tensor.
39 # Eventually (tm), we should change the behavior for standard Tensor to match.
---> 40 return torch.Tensor.make_subclass(cls, data, requires_grad)
42 # Path for custom tensors: set a flag on the instance to indicate parameter-ness.
43 t = data.detach().requires_grad
(requires_grad)

RuntimeError: Only Tensors of floating point and complex dtype can require gradients

Sign up or log in to comment