Skip to content

gh-136170: Use earliest zinfo.header_offset as ZipFile.data_offset #136171

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions Lib/test/test_zipfile/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3505,12 +3505,28 @@ def test_data_offset_write_with_prefix(self):
fp.write(b"this is a prefix")
with zipfile.ZipFile(fp, "w") as zipfp:
self.assertEqual(zipfp.data_offset, 16)
fp.seek(0)
with zipfile.ZipFile(fp) as zipfp:
self.assertEqual(zipfp.data_offset, 16)

def test_data_offset_write_with_prefix_and_entry(self):
with io.BytesIO() as fp:
fp.write(b"this is a prefix")
with zipfile.ZipFile(fp, "w") as zipfp:
self.assertEqual(zipfp.data_offset, 16)
zipfp.writestr("test.txt", "content")
fp.seek(0)
with zipfile.ZipFile(fp) as zipfp:
self.assertEqual(zipfp.data_offset, 16)

def test_data_offset_append_with_bad_zip(self):
with io.BytesIO() as fp:
fp.write(b"this is a prefix")
with zipfile.ZipFile(fp, "a") as zipfp:
self.assertEqual(zipfp.data_offset, 16)
fp.seek(0)
with zipfile.ZipFile(fp) as zipfp:
self.assertEqual(zipfp.data_offset, 16)

def test_data_offset_write_no_tell(self):
# The initializer in ZipFile checks if tell raises AttributeError or
Expand Down
12 changes: 8 additions & 4 deletions Lib/zipfile/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1535,10 +1535,6 @@ def _RealGetContents(self):
# self.start_dir: Position of start of central directory
self.start_dir = offset_cd + concat

# store the offset to the beginning of data for the
# .data_offset property
self._data_offset = concat

if self.start_dir < 0:
raise BadZipFile("Bad offset for central directory")
fp.seek(self.start_dir, 0)
Expand Down Expand Up @@ -1594,11 +1590,19 @@ def _RealGetContents(self):
print("total", total)

end_offset = self.start_dir
zinfo = None
for zinfo in reversed(sorted(self.filelist,
key=lambda zinfo: zinfo.header_offset)):
zinfo._end_offset = end_offset
end_offset = zinfo.header_offset

# store the offset to the beginning of data for the
# .data_offset property
if zinfo is None:
self._data_offset = self.start_dir
else:
self._data_offset = zinfo.header_offset

@property
def data_offset(self):
"""The offset to the start of zip data in the file or None if
Expand Down
Loading