Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Download images with correct filename #15

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions downloadutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
p.add_argument('--downloadImages', help='Should download images', action='store_true', default=False)
p.add_argument('--downloadOriginalImages', help='Should download original images', action='store_true', default=False)
p.add_argument('--downloadBoundingBox', help='Should download bouding box annotation files', action='store_true', default=False)
p.add_argument('--replaceIfExists', help='Image should be downloaded even if already exists on direcotry and replace it', action='store_true', default=False)
# p.add_argument('--jobs', '-j', type=int, default=1, help='Number of parallel threads to download')
# p.add_argument('--timeout', '-t', type=int, default=10, help='Timeout per image in seconds')
# p.add_argument('--retry', '-r', type=int, default=10, help='Max count of retry for each image')
Expand All @@ -31,8 +32,8 @@

if args.downloadImages is True:
for id in args.wnid:
list = downloader.getImageURLsOfWnid(id)
downloader.downloadImagesByURLs(id, list)
mapping = downloader.getImageURLsMappingOfWnid(id)
downloader.downloadImagesByURLsMapping(id, mapping, replace_if_exists=args.replaceIfExists)

if args.downloadBoundingBox is True:
for id in args.wnid:
Expand Down
53 changes: 50 additions & 3 deletions libs/imagedownloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ class ImageNetDownloader:
def __init__(self):
self.host = 'http://www.image-net.org'

def download_file(self, url, desc=None, renamed_file=None):
def download_file(self, url, desc=None, renamed_file=None, replace_if_exists=False):
u = urllib2.urlopen(url)

scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
Expand All @@ -29,6 +29,13 @@ def download_file(self, url, desc=None, renamed_file=None):
if desc:
filename = os.path.join(desc, filename)

# if the file should not be replaced if already present
# then check if is already exists, if it does then just return the file name
if replace_if_exists is False:
if os.path.isfile(filename):
print("Image already downloaded: {}".format(filename))
return filename

with open(filename, 'wb') as f:
meta = u.info()
meta_func = meta.getheaders if hasattr(meta, 'getheaders') else meta.get_all
Expand Down Expand Up @@ -87,24 +94,64 @@ def getImageURLsOfWnid(self, wnid):

return imageUrls

def getImageURLsMappingOfWnid(self, wnid):
url = 'http://www.image-net.org/api/text/imagenet.synset.geturls.getmapping?wnid=' + str(wnid)
f = urllib.urlopen(url)
contents = f.read().split('\n')
imageUrlsMapping = []

for each_line in contents:
# Remove unnecessary char
each_line = each_line.replace('\r', '').strip()
if each_line:
# parsing each line into filename and imageUrl
each_line_split = each_line.split(' ')

if len(each_line_split) != 2:
continue

filename = each_line_split[0]
imageUrl = each_line_split[1]

imageUrlsMapping.append({
'filename': filename,
'url': imageUrl
})

return imageUrlsMapping

def mkWnidDir(self, wnid):
if not os.path.exists(wnid):
os.mkdir(wnid)
return os.path.abspath(wnid)

def downloadImagesByURLs(self, wnid, imageUrls):
def downloadImagesByURLs(self, wnid, imageUrls, replace_if_exists=False):
# save to the dir e.g: n005555_urlimages/
wnid_urlimages_dir = os.path.join(self.mkWnidDir(wnid), str(wnid) + '_urlimages')
if not os.path.exists(wnid_urlimages_dir):
os.mkdir(wnid_urlimages_dir)

for url in imageUrls:
try:
self.download_file(url, wnid_urlimages_dir)
self.download_file(url, wnid_urlimages_dir, replace_if_exists=replace_if_exists)
except Exception, error:
print 'Fail to download : ' + url
print str(error)

def downloadImagesByURLsMapping(self, wnid, imageUrlsMapping, replace_if_exists=False):
# save to the dir e.g: n005555_urlimages/
wnid_urlimages_dir = os.path.join(self.mkWnidDir(wnid), str(wnid) + '_urlimages')
if not os.path.exists(wnid_urlimages_dir):
os.mkdir(wnid_urlimages_dir)

for imageInfo in imageUrlsMapping:
try:
self.download_file(imageInfo['url'], wnid_urlimages_dir, imageInfo['filename']+'.JPEG', replace_if_exists=replace_if_exists)
except Exception, error:
print 'Fail to download : ' + imageInfo['url']
print str(error)


def downloadOriginalImages(self, wnid, username, accesskey):
download_url = 'http://www.image-net.org/download/synset?wnid=%s&username=%s&accesskey=%s&release=latest&src=stanford' % (wnid, username, accesskey)
try:
Expand Down