Source code for datedown.wget

# The MIT License (MIT)
#
# Copyright (c) 2016,Christoph Paulik
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

'''
Interface to wget command line utility.
'''

import subprocess
import os


[docs]def download(url, target, username=None, password=None, cookie_file=None, recursive=False, filetypes=None): """ Download a url using wget. Retry as often as necessary and store cookies if authentification is necessary. Parameters ---------- url: string URL to download target: string path on local filesystem where to store the downloaded file username: string, optional username password: string, optional password cookie_file: string, optional file where to store cookies recursive: boolean, optional If set then no exact filenames can be given. The data will then be downloaded recursively and stored in the target folder. filetypes: list, optional list of file extension to download, any others will no be downloaded """ cmd_list = ['wget', url, '--retry-connrefused'] if recursive: cmd_list = cmd_list + ['-P', target] cmd_list = cmd_list + ['-nd'] cmd_list = cmd_list + ['-np'] cmd_list = cmd_list + ['-r'] else: cmd_list = cmd_list + ['-O', target] if filetypes is not None: cmd_list = cmd_list + ['-A ' + ','.join(filetypes)] target_path = os.path.split(target)[0] if not os.path.exists(target_path): os.makedirs(target_path) if username is not None: cmd_list.append('--user={}'.format(username)) if password is not None: cmd_list.append('--password={}'.format(password)) if cookie_file is not None: cmd_list = cmd_list + [ '--load-cookies', cookie_file, '--save-cookies', cookie_file, '--keep-session-cookies'] subprocess.call(" ".join(cmd_list), shell=True)
[docs]def map_download(url_target, username=None, password=None, cookie_file=None, recursive=False, filetypes=None): """ variant of the function that only takes one argument. Otherwise map_async of the multiprocessing module can not work with the function. Parameters ---------- url_target: list first element the url, second the target string username: string, optional username password: string, optional password cookie_file: string, optional file where to store cookies recursive: boolean, optional If set then no exact filenames can be given. The data will then be downloaded recursively and stored in the target folder. filetypes: list, optional list of file extension to download, any others will no be downloaded """ download(url_target[0], url_target[1], username=username, password=password, cookie_file=cookie_file, recursive=recursive, filetypes=filetypes)