tzutalin · adoval4 · Feb 23, 2018 · Feb 23, 2018
diff --git a/downloadutils.py b/downloadutils.py
@@ -12,6 +12,7 @@
     p.add_argument('--downloadImages', help='Should download images', action='store_true', default=False)
     p.add_argument('--downloadOriginalImages', help='Should download original images', action='store_true', default=False)
     p.add_argument('--downloadBoundingBox', help='Should download bouding box annotation files', action='store_true', default=False)
+    p.add_argument('--replaceIfExists', help='Image should be downloaded even if already exists on direcotry and replace it', action='store_true', default=False)
     # p.add_argument('--jobs', '-j', type=int, default=1, help='Number of parallel threads to download')
     # p.add_argument('--timeout', '-t', type=int, default=10, help='Timeout per image in seconds')
     # p.add_argument('--retry', '-r', type=int, default=10, help='Max count of retry for each image')
@@ -31,8 +32,8 @@
 
     if args.downloadImages is True:
         for id in args.wnid:
-            list = downloader.getImageURLsOfWnid(id)
-            downloader.downloadImagesByURLs(id, list)
+            mapping = downloader.getImageURLsMappingOfWnid(id)
+            downloader.downloadImagesByURLsMapping(id, mapping, replace_if_exists=args.replaceIfExists)
 
     if args.downloadBoundingBox is True:
         for id in args.wnid:

diff --git a/libs/imagedownloader.py b/libs/imagedownloader.py
@@ -15,7 +15,7 @@ class ImageNetDownloader:
     def __init__(self):
         self.host = 'http://www.image-net.org'
 
-    def download_file(self, url, desc=None, renamed_file=None):
+    def download_file(self, url, desc=None, renamed_file=None, replace_if_exists=False):
         u = urllib2.urlopen(url)
 
         scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
@@ -29,6 +29,13 @@ def download_file(self, url, desc=None, renamed_file=None):
         if desc:
             filename = os.path.join(desc, filename)
 
+        # if the file should not be replaced if already present 
+        # then check if is already exists, if it does then just return the file name
+        if replace_if_exists is False:
+            if os.path.isfile(filename):
+                print("Image already downloaded: {}".format(filename))
+                return filename
+
         with open(filename, 'wb') as f:
             meta = u.info()
             meta_func = meta.getheaders if hasattr(meta, 'getheaders') else meta.get_all
@@ -87,24 +94,64 @@ def getImageURLsOfWnid(self, wnid):
 
         return imageUrls
 
+    def getImageURLsMappingOfWnid(self, wnid):
+        url = 'http://www.image-net.org/api/text/imagenet.synset.geturls.getmapping?wnid=' + str(wnid)
+        f = urllib.urlopen(url)
+        contents = f.read().split('\n')
+        imageUrlsMapping = []
+
+        for each_line in contents:
+            # Remove unnecessary char
+            each_line = each_line.replace('\r', '').strip()
+            if each_line:
+                # parsing each line into filename and imageUrl
+                each_line_split = each_line.split(' ')
+
+                if len(each_line_split) != 2:
+                    continue
+
+                filename = each_line_split[0]
+                imageUrl = each_line_split[1]
+
+                imageUrlsMapping.append({
+                    'filename': filename,
+                    'url': imageUrl
+                })
+
+        return imageUrlsMapping
+
     def mkWnidDir(self, wnid):
         if not os.path.exists(wnid):
             os.mkdir(wnid)
         return os.path.abspath(wnid)
 
-    def downloadImagesByURLs(self, wnid, imageUrls):
+    def downloadImagesByURLs(self, wnid, imageUrls, replace_if_exists=False):
         # save to the dir e.g: n005555_urlimages/
         wnid_urlimages_dir = os.path.join(self.mkWnidDir(wnid), str(wnid) + '_urlimages')
         if not os.path.exists(wnid_urlimages_dir):
             os.mkdir(wnid_urlimages_dir)
 
         for url in imageUrls:
             try:
-                self.download_file(url, wnid_urlimages_dir)
+                self.download_file(url, wnid_urlimages_dir, replace_if_exists=replace_if_exists)
             except Exception, error:
                 print 'Fail to download : ' + url
                 print str(error)
 
+    def downloadImagesByURLsMapping(self, wnid, imageUrlsMapping, replace_if_exists=False):
+        # save to the dir e.g: n005555_urlimages/
+        wnid_urlimages_dir = os.path.join(self.mkWnidDir(wnid), str(wnid) + '_urlimages')
+        if not os.path.exists(wnid_urlimages_dir):
+            os.mkdir(wnid_urlimages_dir)
+
+        for imageInfo in imageUrlsMapping:
+            try:
+                self.download_file(imageInfo['url'], wnid_urlimages_dir, imageInfo['filename']+'.JPEG', replace_if_exists=replace_if_exists)
+            except Exception, error:
+                print 'Fail to download : ' + imageInfo['url']
+                print str(error)
+
+
     def downloadOriginalImages(self, wnid, username, accesskey):
         download_url = 'http://www.image-net.org/download/synset?wnid=%s&username=%s&accesskey=%s&release=latest&src=stanford' % (wnid, username, accesskey)
         try: