Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Converter block storage #1508

Merged
merged 34 commits into from
May 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
a206fff
block storage support for converter
braingram Jan 20, 2023
65d7aec
cleanup api
braingram Mar 22, 2023
ab5d19f
add claim_block
braingram Mar 27, 2023
4001700
don't limit IntegerType _value_cache per ctx
braingram Feb 3, 2023
8745ca3
make a new AsdfFile on write_to
braingram Feb 3, 2023
8740a74
new AsdfFile on write_to
braingram Mar 28, 2023
5dd1d48
fix array block assignment if write_to called before update
braingram Mar 28, 2023
38cb99c
update with callback working
braingram Mar 29, 2023
ba9cd5a
cleanup tests for new AsdfFile on write
braingram Mar 30, 2023
538c112
add docstrings to block converter functions
braingram Mar 30, 2023
22d2af3
more code cleanup
braingram Mar 30, 2023
ead0f7c
remove non-array block storage settings
braingram Mar 30, 2023
6cda51b
pass serialization context to select tag
braingram Mar 30, 2023
ad5c9a4
rename claim_block to assign_block_key
braingram Mar 30, 2023
63c28fc
add tests for block management through serialization context
braingram Mar 30, 2023
fc9a8f0
test invalid key with get_block_by_key
braingram Mar 31, 2023
4d8d1e8
initial block converter docs
braingram Mar 31, 2023
f8ac711
refine docs for block converter
braingram Mar 31, 2023
2f5e387
add changelog
braingram Mar 31, 2023
74d1111
remove comment
braingram Mar 31, 2023
fd0e26c
convert load_block to callback generating get_block_data_callback
braingram Mar 31, 2023
264fdf5
replace use of id(obj) for block key
braingram Mar 31, 2023
ee21876
add asdf.util.BlockKey for generating unique keys
braingram Apr 1, 2023
a730090
add BlockKey test
braingram Apr 3, 2023
ce5da7e
add test with external block storage and defined uri
braingram Apr 17, 2023
a7a6418
separate _key_to_block and _data_to_block mappings
braingram Apr 17, 2023
3370627
don't modify history and asdf_library on write_to
braingram Apr 17, 2023
ddffc0f
add note about repeatability of data_callback
braingram Apr 17, 2023
23f9a90
remove unnecessary util.get_array_base
braingram Apr 17, 2023
a2be1e8
switch 0 to False
braingram Apr 17, 2023
c072bf6
implement __eq__ for BlockKey
braingram Apr 17, 2023
68458cd
return NotImplemented for __eq__ with non-BlockKey
braingram Apr 24, 2023
abd6494
remove ctx arg from reserve_blocks
braingram Apr 24, 2023
2c9b8e1
shallow copy tree before write_to
braingram Apr 24, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ The ASDF Standard is at v1.6.0
- Add ``all_array_storage``, ``all_array_compression`` and
``all_array_compression_kwargs`` to ``asdf.config.AsdfConfig`` [#1468]
- Move built-in tags to converters (except ndarray and integer). [#1474]
- Add block storage support to Converter [#1508]

2.15.0 (2023-03-28)
-------------------
Expand Down
3 changes: 2 additions & 1 deletion asdf/_tests/commands/tests/test_exploded.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ def test_explode_then_implode(tmpdir):
# in internal blocks rather than letting some of them be automatically put
# inline.
ff.write_to(path, all_array_storage="internal")
assert len(ff._blocks) == 2
with asdf.open(path) as af:
assert len(af._blocks._internal_blocks) == 2

result = main.main_from_args(["explode", path])

Expand Down
2 changes: 1 addition & 1 deletion asdf/_tests/tags/core/tests/test_integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,9 @@ def test_integer_storage_duplication(tmpdir):

with asdf.AsdfFile(tree) as af:
af.write_to(tmpfile)
assert len(af._blocks) == 1

with asdf.open(tmpfile, _force_raw_types=True) as rf:
assert len(af._blocks) == 1
assert rf.tree["integer1"]["words"]["source"] == 0
assert rf.tree["integer2"]["words"]["source"] == 0

Expand Down
17 changes: 17 additions & 0 deletions asdf/_tests/test_api.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import copy
import getpass
import io
import os
Expand Down Expand Up @@ -439,6 +440,7 @@ def test_array_inline_threshold_masked_array(array_inline_threshold, inline_bloc

with asdf.AsdfFile(tree) as af:
af.write_to(file_path)
with asdf.open(file_path) as af:
assert len(list(af._blocks.inline_blocks)) == inline_blocks
assert len(list(af._blocks.internal_blocks)) == internal_blocks

Expand Down Expand Up @@ -547,3 +549,18 @@ def test_asdf_standard_version_tag_selection():
content = buff.read()
assert b"!core/asdf-1.0.0" not in content
assert b"!core/asdf-1.1.0" in content


def test_write_to_no_tree_modification(tmp_path):
fn = tmp_path / "test.asdf"
fn2 = tmp_path / "test2.asdf"
tree = {"foo": None}
af = asdf.AsdfFile(tree.copy())
af.write_to(fn)
assert tree == af.tree
with asdf.open(fn) as af:
af["history"]["extensions"][0]["software"]["version"] = "0.0.0.dev+abcdefg"
af["asdf_library"]["author"] = "foo"
tree = copy.deepcopy(af.tree)
af.write_to(fn2)
assert af.tree == tree
133 changes: 100 additions & 33 deletions asdf/_tests/test_array_blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import numpy as np
import pytest
import yaml
from numpy.random import random
from numpy.testing import assert_array_equal

Expand All @@ -26,6 +27,15 @@ def test_external_block(tmp_path):
assert "test0000.asdf" in os.listdir(tmp_path)


def test_external_block_url():
uri = "asdf://foo"
my_array = RNG.normal(size=(8, 8))
tree = {"my_array": my_array}
asdf.get_config().all_array_storage = "external"
# this should not raise a ValueError since uri is provided
asdf.AsdfFile(tree, uri=uri)


def test_external_block_non_url():
my_array = RNG.normal(size=(8, 8))
tree = {"my_array": my_array}
Expand Down Expand Up @@ -655,7 +665,8 @@ def test_invalid_block_index_values():
assert len(ff._blocks) == 1


def test_invalid_last_block_index():
@pytest.mark.parametrize("block_index_index", [0, -1])
def test_invalid_block_index_offset(block_index_index):
"""
This adds a value in the block index that points to something
that isn't a block
Expand All @@ -670,13 +681,33 @@ def test_invalid_last_block_index():
tree = {"arrays": arrays}

ff = asdf.AsdfFile(tree)
ff.write_to(buff, include_block_index=False)
ff._blocks._internal_blocks[-1]._offset -= 4
ff._blocks.write_block_index(buff, ff)
ff.write_to(buff)

# now overwrite the block index with the first entry
# incorrectly pointing to a non-block offset
buff.seek(0)
bs = buff.read()
block_index_header_start = bs.index(constants.INDEX_HEADER)
block_index_start = block_index_header_start + len(constants.INDEX_HEADER)
block_index = yaml.load(bs[block_index_start:], yaml.SafeLoader)
block_index[block_index_index] -= 4
yaml_version = tuple(int(x) for x in ff.version_map["YAML_VERSION"].split("."))
buff.seek(block_index_start)
yaml.dump(
block_index,
stream=buff,
explicit_start=True,
explicit_end=True,
version=yaml_version,
allow_unicode=True,
encoding="utf-8",
)

buff.seek(0)
with asdf.open(buff) as ff:
assert len(ff._blocks) == 1
for i, a in enumerate(arrays):
assert_array_equal(ff["arrays"][i], a)


def test_unordered_block_index():
Expand All @@ -702,30 +733,6 @@ def test_unordered_block_index():
assert len(ff._blocks) == 1


def test_invalid_block_index_first_block_value():
"""
This creates a bogus block index where the offset of the first
block doesn't match what we already know it to be. In this
case, we should reject the whole block index.
"""
buff = io.BytesIO()

arrays = []
for i in range(10):
arrays.append(np.ones((8, 8)) * i)

tree = {"arrays": arrays}

ff = asdf.AsdfFile(tree)
ff.write_to(buff, include_block_index=False)
ff._blocks._internal_blocks[0]._offset -= 4
ff._blocks.write_block_index(buff, ff)

buff.seek(0)
with asdf.open(buff) as ff:
assert len(ff._blocks) == 1


def test_invalid_block_id():
ff = asdf.AsdfFile()
with pytest.raises(ValueError, match=r"Invalid source id .*"):
Expand Down Expand Up @@ -859,7 +866,11 @@ def test_write_to_update_storage_options(tmp_path, all_array_storage, all_array_
if all_array_compression == "bzp2" and compression_kwargs is not None:
compression_kwargs = {"compresslevel": 1}

def assert_result(ff, arr):
def assert_result(ff):
if "array" not in ff:
# this was called from _write_to while making an external block
# so don't check the result
return
if all_array_storage == "external":
assert "test0000.asdf" in os.listdir(tmp_path)
else:
Expand All @@ -868,10 +879,12 @@ def assert_result(ff, arr):
assert len(ff._blocks._internal_blocks) == 1
else:
assert len(ff._blocks._internal_blocks) == 0
blk = ff._blocks[arr]
blk = ff._blocks[ff["array"]]

target_compression = all_array_compression or None
assert blk._output_compression == target_compression
if target_compression == "input":
target_compression = None
assert blk.output_compression == target_compression

target_compression_kwargs = compression_kwargs or {}
assert blk._output_compression_kwargs == target_compression_kwargs
Expand All @@ -881,14 +894,28 @@ def assert_result(ff, arr):
fn = tmp_path / "test.asdf"

ff1 = asdf.AsdfFile(tree)

# as a new AsdfFile is used for write_to and we want
# to check blocks here, we patch _write_to to allow us
# to inspect the blocks in the new AsdfFile before
# it falls out of scope
original = asdf.AsdfFile._write_to

def patched(self, *args, **kwargs):
original(self, *args, **kwargs)
assert_result(self)

asdf.AsdfFile._write_to = patched

# first check write_to
ff1.write_to(
fn,
all_array_storage=all_array_storage,
all_array_compression=all_array_compression,
compression_kwargs=compression_kwargs,
)
assert_result(ff1, arr1)

asdf.AsdfFile._write_to = original

# then reuse the file to check update
with asdf.open(fn, mode="rw") as ff2:
Expand All @@ -899,7 +926,7 @@ def assert_result(ff, arr):
all_array_compression=all_array_compression,
compression_kwargs=compression_kwargs,
)
assert_result(ff2, arr2)
assert_result(ff2)


def test_block_key():
Expand Down Expand Up @@ -971,3 +998,43 @@ def __call__(self):
mode="r",
) as f:
rb.read(f, past_magic=False)


def test_remove_blocks(tmp_path):
"""Test that writing to a new file"""
fn1 = tmp_path / "test.asdf"
fn2 = tmp_path / "test2.asdf"

tree = {"a": np.zeros(3), "b": np.ones(1)}
af = asdf.AsdfFile(tree)
af.write_to(fn1)

with asdf.open(fn1, mode="rw") as af:
assert len(af._blocks._internal_blocks) == 2
af["a"] = None
af.write_to(fn2)

with asdf.open(fn1, mode="rw") as af:
assert len(af._blocks._internal_blocks) == 2
af["a"] = None
af.update()

for fn in (fn1, fn2):
with asdf.open(fn) as af:
assert len(af._blocks._internal_blocks) == 1


def test_write_to_before_update(tmp_path):
# this is a regression test for: https://github.com/asdf-format/asdf/issues/1505
fn1 = tmp_path / "test1.asdf"
fn2 = tmp_path / "test2.asdf"

tree = {"a": np.zeros(3), "b": np.ones(3)}
af = asdf.AsdfFile(tree)

af.write_to(fn1)

with asdf.open(fn1, mode="rw") as af:
af["a"] = None
af.write_to(fn2)
af.update()
4 changes: 2 additions & 2 deletions asdf/_tests/test_asdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def test_open_asdf_extensions(tmp_path):

def test_serialization_context():
extension_manager = ExtensionManager([])
context = SerializationContext("1.4.0", extension_manager, "file://test.asdf")
context = SerializationContext("1.4.0", extension_manager, "file://test.asdf", None)
assert context.version == "1.4.0"
assert context.extension_manager is extension_manager
assert context._extensions_used == set()
Expand All @@ -215,7 +215,7 @@ def test_serialization_context():
context._mark_extension_used(object())

with pytest.raises(ValueError, match=r"ASDF Standard version .* is not supported by asdf==.*"):
SerializationContext("0.5.4", extension_manager, None)
SerializationContext("0.5.4", extension_manager, None, None)


def test_reading_extension_metadata():
Expand Down
Loading