@@ -708,22 +708,47 @@ def download_files(self, files, savedir=None, cache=True,
708
708
if savedir is None :
709
709
savedir = self .cache_location
710
710
for fileLink in unique (files ):
711
+ log .debug ("Downloading {0} to {1}" .format (fileLink , savedir ))
711
712
try :
712
- log .debug ("Downloading {0} to {1}" .format (fileLink , savedir ))
713
713
check_filename = self ._request ('HEAD' , fileLink , auth = auth ,
714
714
stream = True )
715
715
check_filename .raise_for_status ()
716
- if 'text/html' in check_filename .headers ['Content-Type' ]:
717
- raise ValueError ("Bad query. This can happen if you "
718
- "attempt to download proprietary "
719
- "data when not logged in" )
720
-
721
- filename = self ._request ("GET" , fileLink , save = True ,
722
- savedir = savedir ,
723
- timeout = self .TIMEOUT ,
724
- cache = cache ,
725
- auth = auth ,
726
- continuation = continuation )
716
+ except requests .HTTPError as ex :
717
+ if ex .response .status_code == 401 :
718
+ if skip_unauthorized :
719
+ log .info ("Access denied to {url}. Skipping to"
720
+ " next file" .format (url = fileLink ))
721
+ continue
722
+ else :
723
+ raise (ex )
724
+
725
+ if 'text/html' in check_filename .headers ['Content-Type' ]:
726
+ raise ValueError ("Bad query. This can happen if you "
727
+ "attempt to download proprietary "
728
+ "data when not logged in" )
729
+
730
+ try :
731
+ filename = re .search ("filename=(.*)" ,
732
+ check_filename .headers ['Content-Disposition' ]).groups ()[0 ]
733
+ except KeyError :
734
+ log .info (f"Unable to find filename for { fileLink } "
735
+ "(missing Content-Disposition in header). "
736
+ "Skipping to next file." )
737
+
738
+ if savedir is not None :
739
+ filename = os .path .join (savedir ,
740
+ filename )
741
+
742
+ try :
743
+ self ._download_file (fileLink ,
744
+ filename ,
745
+ timeout = self .TIMEOUT ,
746
+ auth = auth ,
747
+ cache = cache ,
748
+ method = 'GET' ,
749
+ head_safe = True ,
750
+ continuation = continuation )
751
+
727
752
downloaded_files .append (filename )
728
753
except requests .HTTPError as ex :
729
754
if ex .response .status_code == 401 :
@@ -744,12 +769,15 @@ def download_files(self, files, savedir=None, cache=True,
744
769
raise ex
745
770
elif ex .response .status_code == 500 :
746
771
# empirically, this works the second time most of the time...
747
- filename = self ._request ("GET" , fileLink , save = True ,
748
- savedir = savedir ,
749
- timeout = self .TIMEOUT ,
750
- cache = cache ,
751
- auth = auth ,
752
- continuation = continuation )
772
+ self ._download_file (fileLink ,
773
+ filename ,
774
+ timeout = self .TIMEOUT ,
775
+ auth = auth ,
776
+ cache = cache ,
777
+ method = 'GET' ,
778
+ head_safe = True ,
779
+ continuation = continuation )
780
+
753
781
downloaded_files .append (filename )
754
782
else :
755
783
raise ex
0 commit comments