[BugFix] gemma loading after quantization or LoRA. (#3553)

IBM · Mar 21, 2024 · cd4af85 · cd4af85
1 parent 443dcbe
commit cd4af85
Showing 1 changed file with 4 additions and 0 deletions.
diff --git a/vllm/model_executor/models/gemma.py b/vllm/model_executor/models/gemma.py
@@ -340,6 +340,10 @@ def load_weights(self,
                 weight_loader(param, loaded_weight, shard_id)
                 break
             else:
+                # lm_head is not used in vllm as it is tied with embed_token.
+                # To prevent errors, skip loading lm_head.weight.
+                if "lm_head.weight" in name:
+                    continue
                 # Skip loading extra bias for GPTQ models.
                 if name.endswith(".bias") and name not in params_dict:
                     continue