Skip to content

Commit dda808e

Browse files
zeryxlemonez
andauthored
AML- 8 DATA API support for automatically zipping/unzipping nested directories (#112)
* added a getAsZip and putAsZip function for both directories and single files; with tests * correct relative path import issue * removed unnecessary imports, python 2.7+ support * good doc string Co-authored-by: lemonez <36384768+lemonez@users.noreply.github.com> * good doc string Co-authored-by: lemonez <36384768+lemonez@users.noreply.github.com> * Updated docstrings Co-authored-by: lemonez <36384768+lemonez@users.noreply.github.com>
1 parent 2915fc8 commit dda808e

File tree

5 files changed

+58
-1
lines changed

5 files changed

+58
-1
lines changed

Algorithmia/datafile.py

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from datetime import datetime
88
import os.path
99
import pkgutil
10+
import zipfile
1011

1112
from Algorithmia.util import getParentAndBase
1213
from Algorithmia.data import DataObject, DataObjectType
@@ -50,6 +51,23 @@ def getFile(self, as_path=False):
5051
else:
5152
return open(f.name)
5253

54+
def getAsZip(self):
55+
"""Download/decompress file/directory and return path to file/directory.
56+
57+
Expects the `DataFile` object to contain a data API path pointing to a file/directory compressed with a zip-based compression algorithm.
58+
Either returns the directory or a path to the file, depending on whether a directory or file was zipped.
59+
"""
60+
local_file_path = self.getFile(as_path=True)
61+
directory_path = tempfile.mkdtemp()
62+
with zipfile.ZipFile(local_file_path, 'r') as ziph:
63+
ziph.extractall(directory_path)
64+
if len(ziph.namelist()) > 1:
65+
output_path = directory_path
66+
else:
67+
filename = ziph.namelist()[0]
68+
output_path = os.path.join(directory_path, filename)
69+
return output_path
70+
5371
def getName(self):
5472
_, name = getParentAndBase(self.path)
5573
return name
@@ -145,6 +163,24 @@ def putNumpy(self, array):
145163
else:
146164
raise DataApiError("Attempted to .putNumpy() a file without numpy available, please install numpy.")
147165

166+
def putAsZip(self, path):
167+
"""Zip file/directory and upload to data API location defined by `DataFile` object.
168+
169+
Accepts either a single file or a directory containing other files and directories.
170+
"""
171+
temp = tempfile.NamedTemporaryFile(delete=False).name
172+
if os.path.isdir(path):
173+
with zipfile.ZipFile(temp, 'w') as ziph:
174+
for root, dirs, files in os.walk(path):
175+
for file in files:
176+
f_path = os.path.join(root, file)
177+
arc_path = os.path.relpath(os.path.join(root, file), path)
178+
ziph.write(f_path, arc_path)
179+
else:
180+
with zipfile.ZipFile(temp, 'w') as ziph:
181+
ziph.write(path)
182+
return self.putFile(temp)
183+
148184
def delete(self):
149185
# Delete from data api
150186
result = self.client.deleteHelper(self.url)
@@ -256,7 +292,7 @@ def __del__(self):
256292
filepath = self.local_file.name
257293
self.local_file.close()
258294
if self.cleanup:
259-
os.remove(filepath)
295+
os.remove(filepath)
260296

261297
def readable(self):
262298
return True

Test/datafile_test.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,5 +151,21 @@ def test_putJson_getJson(self):
151151
self.assertDictEqual(result, payload)
152152
self.assertEqual(str(result), str(payload))
153153

154+
def test_putZipDir_getZipDir(self):
155+
local_directory = os.path.join(os.getcwd(), "Test/resources/zip_directory")
156+
remote_directory = "data://.my/empty/datafile.zip"
157+
df = AdvancedDataFile(self.client, remote_directory, cleanup=True)
158+
response = df.putAsZip(local_directory)
159+
self.assertEqual(response, df)
160+
161+
unzipped_local_path = df.getAsZip()
162+
self.assertTrue(os.path.isdir(unzipped_local_path))
163+
found_files = []
164+
for _, _, files in os.walk(unzipped_local_path):
165+
for file in files:
166+
found_files.append(file)
167+
self.assertEqual(len(found_files), 3)
168+
169+
154170
if __name__ == '__main__':
155171
unittest.main()
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"location": "root"}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .build_wait import get_build
2+
from .publish_algo import publish_algo
3+
from .test_algo import test_algo
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"foo": "bar"}

0 commit comments

Comments
 (0)