Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Exp backoff for downloads. #9671

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/tutorials/basic/data.md
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ Let's download sample images that we can work with.


```python
fname = mx.test_utils.download(url='http://data.mxnet.io/data/test_images.tar.gz', dirname='data', overwrite=False)
fname = mx.utils.download(url='http://data.mxnet.io/data/test_images.tar.gz', dirname='data', overwrite=False)
tar = tarfile.open(fname)
tar.extractall(path='./data')
tar.close()
Expand Down Expand Up @@ -380,7 +380,7 @@ Download and unzip


```python
fname = mx.test_utils.download(url='http://www.vision.caltech.edu/Image_Datasets/Caltech101/101_ObjectCategories.tar.gz', dirname='data', overwrite=False)
fname = mx.utils.download(url='http://www.vision.caltech.edu/Image_Datasets/Caltech101/101_ObjectCategories.tar.gz', dirname='data', overwrite=False)
tar = tarfile.open(fname)
tar.extractall(path='./data')
tar.close()
Expand Down
2 changes: 1 addition & 1 deletion docs/tutorials/basic/module.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ logging.getLogger().setLevel(logging.INFO)
import mxnet as mx
import numpy as np

fname = mx.test_utils.download('http://archive.ics.uci.edu/ml/machine-learning-databases/letter-recognition/letter-recognition.data')
fname = mx.utils.download('http://archive.ics.uci.edu/ml/machine-learning-databases/letter-recognition/letter-recognition.data')
data = np.genfromtxt(fname, delimiter=',')[:,1:]
label = np.array([ord(l.split(',')[0])-ord('A') for l in open(fname, 'r')])

Expand Down
8 changes: 4 additions & 4 deletions docs/tutorials/python/predict_image.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ a text file for the labels.
```python
import mxnet as mx
path='http://data.mxnet.io/models/imagenet-11k/'
[mx.test_utils.download(path+'resnet-152/resnet-152-symbol.json'),
mx.test_utils.download(path+'resnet-152/resnet-152-0000.params'),
mx.test_utils.download(path+'synset.txt')]
[mx.utils.download(path+'resnet-152/resnet-152-symbol.json'),
mx.utils.download(path+'resnet-152/resnet-152-0000.params'),
mx.utils.download(path+'synset.txt')]
```

Next, we load the downloaded model. *Note:* If GPU is available, we can replace all
Expand Down Expand Up @@ -60,7 +60,7 @@ Batch = namedtuple('Batch', ['data'])

def get_image(url, show=False):
# download and show the image
fname = mx.test_utils.download(url)
fname = mx.utils.download(url)
img = cv2.cvtColor(cv2.imread(fname), cv2.COLOR_BGR2RGB)
if img is None:
return None
Expand Down
2 changes: 1 addition & 1 deletion example/gluon/style_transfer/dataset/download_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

import os, zipfile
import mxnet
from mxnet.test_utils import download
from mxnet.utils import download

def unzip_file(filename, outpath):
fh = open(filename, 'rb')
Expand Down
2 changes: 1 addition & 1 deletion example/gluon/style_transfer/models/download_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.

from mxnet.test_utils import download
from mxnet.utils import download

download('https://apache-mxnet.s3-accelerate.amazonaws.com/gluon/models/21styles-32f7205c.params', 'models/21styles.params')

2 changes: 1 addition & 1 deletion example/gluon/super_resolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from mxnet import gluon
from mxnet.gluon import nn
from mxnet import autograd as ag
from mxnet.test_utils import download
from mxnet.utils import download
from mxnet.image import CenterCropAug, ResizeAug
from mxnet.io import PrefetchingIter

Expand Down
2 changes: 1 addition & 1 deletion example/gluon/tree_lstm/scripts/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
import shutil
import zipfile
import gzip
from mxnet.test_utils import download
from mxnet.utils import download

def unzip(filepath):
print("Extracting: " + filepath)
Expand Down
2 changes: 1 addition & 1 deletion example/image-classification/test_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

VAL_DATA='data/val-5k-256.rec'
def download_data():
return mx.test_utils.download(
return mx.utils.download(
'http://data.mxnet.io/data/val-5k-256.rec', VAL_DATA)

def test_imagenet1k_resnet(**kwargs):
Expand Down
4 changes: 2 additions & 2 deletions example/mxnet_adversarial_vae/convert_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
import scipy.io
import scipy.misc
import numpy as np
from mxnet.test_utils import *
from mxnet.utils import download

logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)

Expand Down Expand Up @@ -95,7 +95,7 @@ def parse_args():

def main():
args = parse_args()
download(DEFAULT_DATASET_URL, fname=args.dataset, dirname=args.save_path, overwrite=False)
download(DEFAULT_DATASET_URL, path=args.save_path + args.dataset, overwrite=False)
convert_mat_to_images(args)

if __name__ == '__main__':
Expand Down
1 change: 1 addition & 0 deletions python/mxnet/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@
from . import image as img

from . import test_utils
from . import utils

from . import rnn

Expand Down
2 changes: 1 addition & 1 deletion python/mxnet/gluon/contrib/data/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

from . import _constants as C
from ...data import dataset
from ...utils import download, check_sha1
from ....utils import download, check_sha1
from ....contrib import text
from .... import nd

Expand Down
2 changes: 1 addition & 1 deletion python/mxnet/gluon/data/vision/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
import numpy as np

from .. import dataset
from ...utils import download, check_sha1
from .... utils import download, check_sha1
from .... import nd, image, recordio


Expand Down
2 changes: 1 addition & 1 deletion python/mxnet/gluon/model_zoo/model_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import os
import zipfile

from ..utils import download, check_sha1
from ... utils import download, check_sha1

_model_sha1 = {name: checksum for checksum, name in [
('44335d1f0046b328243b32a26a4fbd62d9057b45', 'alexnet'),
Expand Down
104 changes: 9 additions & 95 deletions python/mxnet/gluon/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,14 @@
# coding: utf-8
# pylint: disable=
"""Parallelization utility optimizer."""
__all__ = ['split_data', 'split_and_load', 'clip_global_norm',
'check_sha1', 'download']
__all__ = ['split_data', 'split_and_load', 'clip_global_norm']

import os
import hashlib
import warnings
try:
import requests
except ImportError:
class requests_failed_to_import(object):
pass
requests = requests_failed_to_import

import numpy as np

from .. import ndarray


def split_data(data, num_slice, batch_axis=0, even_split=True):
"""Splits an NDArray into `num_slice` slices along `batch_axis`.
Usually used for data parallelism where each slices is sent
Expand All @@ -61,13 +52,13 @@ def split_data(data, num_slice, batch_axis=0, even_split=True):
size = data.shape[batch_axis]
if size < num_slice:
raise ValueError(
"Too many slices for data with shape %s. Arguments are " \
"num_slice=%d and batch_axis=%d."%(str(data.shape), num_slice, batch_axis))
"Too many slices for data with shape %s. Arguments are "
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't the backslash required?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't triple quotes useful in this case? """blahs dah"""

Copy link
Contributor Author

@KellenSunderland KellenSunderland Feb 2, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think triplequotes would be the equiv of

"Too many slices for data with shape %s. Arguments are \n" \
"num_slice=%d and batch_axis=%d."%(str(data.shape), num_slice, batch_axis))

but I'll try a few options.

"num_slice=%d and batch_axis=%d." % (str(data.shape), num_slice, batch_axis))
if even_split and size % num_slice != 0:
raise ValueError(
"data with shape %s cannot be evenly split into %d slices along axis %d. " \
"Use a batch size that's multiple of %d or set even_split=False to allow " \
"uneven partitioning of data."%(
"data with shape %s cannot be evenly split into %d slices along axis %d. "
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Backslashes

"Use a batch size that's multiple of %d or set even_split=False to allow "
"uneven partitioning of data." % (
str(data.shape), num_slice, batch_axis, num_slice))

step = size // num_slice
Expand Down Expand Up @@ -131,90 +122,13 @@ def clip_global_norm(arrays, max_norm):
return total_norm


def _indent(s_, numSpaces):
def _indent(s_, num_spaces):
"""Indent string
"""
s = s_.split('\n')
if len(s) == 1:
return s_
first = s.pop(0)
s = [first] + [(numSpaces * ' ') + line for line in s]
s = [first] + [(num_spaces * ' ') + line for line in s]
s = '\n'.join(s)
return s


def check_sha1(filename, sha1_hash):
"""Check whether the sha1 hash of the file content matches the expected hash.

Parameters
----------
filename : str
Path to the file.
sha1_hash : str
Expected sha1 hash in hexadecimal digits.

Returns
-------
bool
Whether the file content matches the expected hash.
"""
sha1 = hashlib.sha1()
with open(filename, 'rb') as f:
while True:
data = f.read(1048576)
if not data:
break
sha1.update(data)

return sha1.hexdigest() == sha1_hash


def download(url, path=None, overwrite=False, sha1_hash=None):
"""Download an given URL

Parameters
----------
url : str
URL to download
path : str, optional
Destination path to store downloaded file. By default stores to the
current directory with same name as in url.
overwrite : bool, optional
Whether to overwrite destination file if already exists.
sha1_hash : str, optional
Expected sha1 hash in hexadecimal digits. Will ignore existing file when hash is specified
but doesn't match.

Returns
-------
str
The file path of the downloaded file.
"""
if path is None:
fname = url.split('/')[-1]
elif os.path.isdir(path):
fname = os.path.join(path, url.split('/')[-1])
else:
fname = path

if overwrite or not os.path.exists(fname) or (sha1_hash and not check_sha1(fname, sha1_hash)):
dirname = os.path.dirname(os.path.abspath(os.path.expanduser(fname)))
if not os.path.exists(dirname):
os.makedirs(dirname)

print('Downloading %s from %s...'%(fname, url))
r = requests.get(url, stream=True)
if r.status_code != 200:
raise RuntimeError("Failed downloading url %s"%url)
with open(fname, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)

if sha1_hash and not check_sha1(fname, sha1_hash):
raise UserWarning('File {} is downloaded but the content hash does not match. ' \
'The repo may be outdated or download may be incomplete. ' \
'If the "repo_url" is overridden, consider switching to ' \
'the default repo.'.format(fname))

return fname
64 changes: 2 additions & 62 deletions python/mxnet/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,6 @@
import subprocess
import sys
import os
import errno
import logging
import bz2
import zipfile
from contextlib import contextmanager
Expand All @@ -38,11 +36,6 @@
import scipy.stats as ss
except ImportError:
ss = None
try:
import requests
except ImportError:
# in rare cases requests may be not installed
pass
import mxnet as mx
from .context import Context
from .ndarray.ndarray import _STORAGE_TYPE_STR_TO_ID
Expand Down Expand Up @@ -1364,59 +1357,6 @@ def list_gpus():
pass
return range(len([i for i in re.split('\n') if 'GPU' in i]))

def download(url, fname=None, dirname=None, overwrite=False):
"""Download an given URL

Parameters
----------

url : str
URL to download
fname : str, optional
filename of the downloaded file. If None, then will guess a filename
from url.
dirname : str, optional
output directory name. If None, then guess from fname or use the current
directory
overwrite : bool, optional
Default is false, which means skipping download if the local file
exists. If true, then download the url to overwrite the local file if
exists.

Returns
-------
str
The filename of the downloaded file
"""
if fname is None:
fname = url.split('/')[-1]

if dirname is None:
dirname = os.path.dirname(fname)
else:
fname = os.path.join(dirname, fname)
if dirname != "":
if not os.path.exists(dirname):
try:
logging.info('create directory %s', dirname)
os.makedirs(dirname)
except OSError as exc:
if exc.errno != errno.EEXIST:
raise OSError('failed to create ' + dirname)

if not overwrite and os.path.exists(fname):
logging.info("%s exists, skipping download", fname)
return fname

r = requests.get(url, stream=True)
assert r.status_code == 200, "failed to open %s" % url
with open(fname, 'wb') as f:
for chunk in r.iter_content(chunk_size=1024):
if chunk: # filter out keep-alive new chunks
f.write(chunk)
logging.info("downloaded %s into %s successfully", url, fname)
return fname

def get_mnist():
"""Download and load the MNIST dataset

Expand All @@ -1426,10 +1366,10 @@ def get_mnist():
A dict containing the data
"""
def read_data(label_url, image_url):
with gzip.open(mx.test_utils.download(label_url)) as flbl:
with gzip.open(mx.utils.download(label_url)) as flbl:
struct.unpack(">II", flbl.read(8))
label = np.fromstring(flbl.read(), dtype=np.int8)
with gzip.open(mx.test_utils.download(image_url), 'rb') as fimg:
with gzip.open(mx.utils.download(image_url), 'rb') as fimg:
_, _, rows, cols = struct.unpack(">IIII", fimg.read(16))
image = np.fromstring(fimg.read(), dtype=np.uint8).reshape(len(label), rows, cols)
image = image.reshape(image.shape[0], 1, 28, 28).astype(np.float32)/255
Expand Down
Loading