huggingface · Wauplin · Aug 30, 2024 · Aug 6, 2024 · Aug 9, 2024 · Aug 10, 2024
diff --git a/src/huggingface_hub/serialization/__init__.py b/src/huggingface_hub/serialization/__init__.py
@@ -22,4 +22,5 @@
     save_torch_model,
     save_torch_state_dict,
     split_torch_state_dict_into_shards,
+    torch_version_at_least,
 )
diff --git a/src/huggingface_hub/serialization/_torch.py b/src/huggingface_hub/serialization/_torch.py
@@ -20,7 +20,7 @@
 from collections import defaultdict
 from functools import lru_cache
 from pathlib import Path
-from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Union
+from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, Union, Any
 
 from .. import constants, logging
 from ._base import MAX_SHARD_SIZE, StateDictSplit, split_state_dict_into_shards_factory
@@ -32,6 +32,26 @@
     import torch
 
 
+def parse_version(version_string):
+    # Extract just the X.Y.Z part from the version string
+    match = re.match(r'(\d+\.\d+\.\d+)', version_string)
+    if match:
+        version = match.group(1)
+        return [int(x) for x in version.split('.')]
+    else:
+        raise ValueError(f"Invalid version string format: {version_string}")
+
+def compare_versions(v1, v2):
+    v1_parts = parse_version(v1)
+    v2_parts = parse_version(v2)
+    return (v1_parts > v2_parts) - (v1_parts < v2_parts)
+
+def torch_version_at_least(min_version):
+    try:
+        return compare_versions(torch.__version__, min_version) >= 0
+    except:
+        return False
+
 def save_torch_model(
     model: "torch.nn.Module",
     save_directory: Union[str, Path],
@@ -335,18 +355,18 @@ def split_torch_state_dict_into_shards(
         get_storage_id=get_torch_storage_id,
     )
 
-
-def get_torch_storage_id(tensor: "torch.Tensor") -> Tuple["torch.device", int, int]:
+def _get_unique_id(tensor: "torch.Tensor") -> Union[int, Tuple[Any, ...]]:
+    """Returns a unique id for plain tensor
+    or a (potentially nested) Tuple of unique id for the flattened Tensor
+    if the input is a wrapper tensor subclass Tensor
     """
-    Return unique identifier to a tensor storage.
 
-    Multiple different tensors can share the same underlying storage. For
-    example, "meta" tensors all share the same storage, and thus their identifier will all be equal. This identifier is
-    guaranteed to be unique and constant for this tensor's storage during its lifetime. Two tensor storages with
-    non-overlapping lifetimes may have the same id.
+    if torch_version_at_least("2.1.0"):
+        from torch.utils._python_dispatch import is_traceable_wrapper_subclass
+        if is_traceable_wrapper_subclass(tensor):
+            attrs, _ = tensor.__tensor_flatten__()
+            unique_id = tuple(_get_unique_id(getattr(tensor, attr)) for attr in attrs)
 
-    Taken from https://github.com/huggingface/transformers/blob/1ecf5f7c982d761b4daaa96719d162c324187c64/src/transformers/pytorch_utils.py#L278.
-    """
     if tensor.device.type == "xla" and is_torch_tpu_available():
         # NOTE: xla tensors dont have storage
         # use some other unique id to distinguish.
@@ -358,13 +378,33 @@ def get_torch_storage_id(tensor: "torch.Tensor") -> Tuple["torch.device", int, i
     else:
         unique_id = storage_ptr(tensor)
 
-    return tensor.device, unique_id, get_torch_storage_size(tensor)
+    return unique_id
+
+def get_torch_storage_id(tensor: "torch.Tensor") -> Tuple["torch.device", Union[int, Tuple[Any, ...]], int]:
+    """
+    Return unique identifier to a tensor storage.
+
+    Multiple different tensors can share the same underlying storage. For
+    example, "meta" tensors all share the same storage, and thus their identifier will all be equal. This identifier is
+    guaranteed to be unique and constant for this tensor's storage during its lifetime. Two tensor storages with
+    non-overlapping lifetimes may have the same id.
+
+    Taken from https://github.com/huggingface/transformers/blob/1ecf5f7c982d761b4daaa96719d162c324187c64/src/transformers/pytorch_utils.py#L278.
+    """
+    return tensor.device, _get_unique_id(tensor), get_torch_storage_size(tensor)
 
 
 def get_torch_storage_size(tensor: "torch.Tensor") -> int:
     """
     Taken from https://github.com/huggingface/safetensors/blob/08db34094e9e59e2f9218f2df133b7b4aaff5a99/bindings/python/py_src/safetensors/torch.py#L31C1-L41C59
     """
+    if torch_version_at_least("2.1.0"):
+        from torch.utils._python_dispatch import is_traceable_wrapper_subclass
+        if is_traceable_wrapper_subclass(tensor):
+            attrs, _ = tensor.__tensor_flatten__()
+            print(get_torch_storage_size(getattr(tensor, attr)) for attr in attrs)
+            return sum(get_torch_storage_size(getattr(tensor, attr)) for attr in attrs)
+
     try:
         return tensor.untyped_storage().nbytes()
     except AttributeError:
@@ -398,10 +438,15 @@ def is_torch_tpu_available(check_device=True):
     return False
 
 
-def storage_ptr(tensor: "torch.Tensor") -> int:
+def storage_ptr(tensor: "torch.Tensor") -> Union[int, Tuple[Any, ...]]:
     """
     Taken from https://github.com/huggingface/safetensors/blob/079781fd0dc455ba0fe851e2b4507c33d0c0d407/bindings/python/py_src/safetensors/torch.py#L11.
     """
+    if torch_version_at_least("2.1.0"):
+        from torch.utils._python_dispatch import is_traceable_wrapper_subclass
+        if is_traceable_wrapper_subclass(tensor):
+            return _get_unique_id(tensor)
+
     try:
         return tensor.untyped_storage().data_ptr()
     except Exception:

diff --git a/tests/test_serialization.py b/tests/test_serialization.py
@@ -13,6 +13,7 @@
     save_torch_model,
     save_torch_state_dict,
     split_state_dict_into_shards_factory,
+    torch_version_at_least,
 )
 from huggingface_hub.serialization._base import parse_size_to_int
 
@@ -58,6 +59,25 @@ def torch_state_dict() -> Dict[str, "torch.Tensor"]:
         pytest.skip("torch is not available")
 
 
+@pytest.fixture
+def torch_state_dict_tensor_subclass() -> Dict[str, "torch.Tensor"]:
+    try:
+        import torch
+        from torch.testing._internal.two_tensor import TwoTensor
+
+        t = torch.tensor([4])
+        return {
+            "layer_1": torch.tensor([4]),
+            "layer_2": torch.tensor([10]),
+            "layer_3": torch.tensor([30]),
+            "layer_4": torch.tensor([2]),
+            "layer_5": torch.tensor([2]),
+            "layer_6": TwoTensor(t, t),
+        }
+    except ImportError:
+        pytest.skip("torch is not available")
+
+
 @pytest.fixture
 def torch_state_dict_shared_layers() -> Dict[str, "torch.Tensor"]:
     try:
@@ -75,6 +95,52 @@ def torch_state_dict_shared_layers() -> Dict[str, "torch.Tensor"]:
         pytest.skip("torch is not available")
 
 
+@pytest.fixture
+def torch_state_dict_shared_layers_tensor_subclass() -> Dict[str, "torch.Tensor"]:
+    try:
+        import torch
+        from torch.testing._internal.two_tensor import TwoTensor
+
+        t = torch.tensor([4])
+        tensor_subclass_tensor = TwoTensor(t, t)
+
+        t = torch.tensor([4])
+        shared_tensor_subclass_tensor = TwoTensor(t, t)
+        return {
+            "layer_1": torch.tensor([4]),
+            "layer_2": torch.tensor([10]),
+            "layer_3": torch.tensor([30]),
+            "layer_4": torch.tensor([2]),
+            "layer_5": torch.tensor([2]),
+            "layer_6": tensor_subclass_tensor,
+            "ts_shared_1": shared_tensor_subclass_tensor,
+            "ts_shared_2": shared_tensor_subclass_tensor,
+        }
+    except ImportError:
+        pytest.skip("torch is not available")
+
+
+@pytest.fixture
+def torch_state_dict_shared_layers() -> Dict[str, "torch.Tensor"]:
+    try:
+        import torch
+        from torch.testing._internal.two_tensor import TwoTensor
+
+        if torch_version_at_least("2.1.0"):
+            shared_layer = TwoTensor(torch.tensor([4]), torch.tensor([4]))
+        else:
+            shared_layer = torch.tensor([4])
+
+        return {
+            "shared_1": shared_layer,
+            "unique_1": torch.tensor([10]),
+            "unique_2": torch.tensor([30]),
+            "shared_2": shared_layer,
+        }
+    except ImportError:
+        pytest.skip("torch is not available")
+
+
 def test_single_shard(dummy_state_dict):
     state_dict_split = split_state_dict_into_shards_factory(
         dummy_state_dict,
@@ -170,6 +236,17 @@ def test_get_torch_storage_size():
     assert get_torch_storage_size(torch.tensor([1, 2, 3, 4, 5], dtype=torch.float16)) == 5 * 2
 
 
+@requires("torch")
+def test_get_torch_storage_size_wrapper_tensor_subclass():
+    import torch
+    if torch_version_at_least("2.1.0"):
+        from torch.testing._internal.two_tensor import TwoTensor
+        t = torch.tensor([1, 2, 3, 4, 5], dtype=torch.float64)
+        assert get_torch_storage_size(TwoTensor(t, t)) == 5 * 8 * 2
+        t = torch.tensor([1, 2, 3, 4, 5], dtype=torch.float16)
+        assert get_torch_storage_size(TwoTensor(t, TwoTensor(t, t))) == 5 * 2 * 3
+
+
 def test_parse_size_to_int():
     assert parse_size_to_int("1KB") == 1 * 10**3
     assert parse_size_to_int("2MB") == 2 * 10**6
@@ -247,6 +324,34 @@ def test_save_torch_state_dict_unsafe_not_sharded(
     assert not (tmp_path / "pytorch_model.bin.index.json").is_file()
 
 
+def test_save_torch_state_dict_tensor_subclass_unsafe_not_sharded(
+    tmp_path: Path, caplog: pytest.LogCaptureFixture, torch_state_dict_tensor_subclass: Dict[str, "torch.Tensor"]
+) -> None:
+    if not torch_version_at_least("2.1.0"):
+        return
+    """Save as pickle without sharding."""
+    with caplog.at_level("WARNING"):
+        save_torch_state_dict(torch_state_dict, tmp_path, max_shard_size="1GB", safe_serialization=False)
+    assert "we strongly recommend using safe serialization" in caplog.text
+
+    assert (tmp_path / "pytorch_model.bin").is_file()
+    assert not (tmp_path / "pytorch_model.bin.index.json").is_file()
+
+
+def test_save_torch_state_dict_shared_layers_tensor_subclass_unsafe_not_sharded(
+    tmp_path: Path, caplog: pytest.LogCaptureFixture, torch_state_dict_shared_layers_tensor_subclass: Dict[str, "torch.Tensor"]
+) -> None:
+    if not torch_version_at_least("2.1.0"):
+        return
+    """Save as pickle without sharding."""
+    with caplog.at_level("WARNING"):
+        save_torch_state_dict(torch_state_dict, tmp_path, max_shard_size="1GB", safe_serialization=False)
+    assert "we strongly recommend using safe serialization" in caplog.text
+
+    assert (tmp_path / "pytorch_model.bin").is_file()
+    assert not (tmp_path / "pytorch_model.bin.index.json").is_file()
+
+
 def test_save_torch_state_dict_unsafe_sharded(
     tmp_path: Path, caplog: pytest.LogCaptureFixture, torch_state_dict: Dict[str, "torch.Tensor"]
 ) -> None: