Fix: AttributeError when `input_ids` is None during multimodal LLM training

When training a multimodal language model, such as MiniGPT-4, the model utilizes `inputs_embeds` instead of `input_ids`. This is because the multimodal embeddings are aligned with the LLM's text space and are concatenated with the text embeddings, rendering `input_ids` unnecessary and thus `None`.

This leads to the following error:

```
AttributeError: 'NoneType' object has no attribute 'shape'
```

This commit addresses the issue by modifying the code to handle cases where `input_ids` is None, ensuring that the model can properly process the provided `inputs_embeds` without relying on `input_ids`.

Files changed (1) hide show

modeling_chatglm.py +5 -4

modeling_chatglm.py CHANGED Viewed

@@ -771,15 +771,16 @@ class ChatGLMPreTrainedModel(PreTrainedModel):
  if padding_mask is not None and not padding_mask.all():
  return padding_mask
  return None
- batch_size, seq_length = input_ids.shape
- full_attention_mask = torch.ones(batch_size, seq_length, seq_length, device=input_ids.device)
  full_attention_mask.tril_()
  past_length = 0
  if past_key_values:
  past_length = past_key_values[0][0].shape[2]
  if past_length:
  full_attention_mask = torch.cat((torch.ones(batch_size, seq_length, past_length,
- device=input_ids.device), full_attention_mask), dim=-1)
  if padding_mask is not None:
  full_attention_mask = full_attention_mask * padding_mask.unsqueeze(1)
  if not past_length and padding_mask is not None:
@@ -872,7 +873,7 @@ class ChatGLMModel(ChatGLMPreTrainedModel):
  use_cache = use_cache if use_cache is not None else self.config.use_cache
  return_dict = return_dict if return_dict is not None else self.config.use_return_dict
- batch_size, seq_length = input_ids.shape
  if inputs_embeds is None:
  inputs_embeds = self.embedding(input_ids)

  if padding_mask is not None and not padding_mask.all():
  return padding_mask
  return None
+ batch_size, seq_length = input_ids.shape if input_ids is not None else padding_mask.shape
+ device = input_ids.device if input_ids is not None else padding_mask.device
+ full_attention_mask = torch.ones(batch_size, seq_length, seq_length, device=device)
  full_attention_mask.tril_()
  past_length = 0
  if past_key_values:
  past_length = past_key_values[0][0].shape[2]
  if past_length:
  full_attention_mask = torch.cat((torch.ones(batch_size, seq_length, past_length,
+ device=device), full_attention_mask), dim=-1)
  if padding_mask is not None:
  full_attention_mask = full_attention_mask * padding_mask.unsqueeze(1)
  if not past_length and padding_mask is not None:
  use_cache = use_cache if use_cache is not None else self.config.use_cache
  return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+ batch_size, seq_length = (input_ids.shape if input_ids is not None else inputs_embeds.shape[:2] if inputs_embeds is not None else (None, None))
  if inputs_embeds is None:
  inputs_embeds = self.embedding(input_ids)