Source code for materials_commons.cli.subcommands.down

import argparse
import io
import os
import requests
import sys
import time

import materials_commons.api as mcapi
import materials_commons.cli.exceptions as cliexcept
import materials_commons.cli.functions as clifuncs
import materials_commons.cli.globus as cliglobus
import materials_commons.cli.tree_functions as treefuncs
import materials_commons.cli.file_functions as filefuncs
from materials_commons.cli.treedb import LocalTree, RemoteTree

def _get_current_globus_download(pconfig, proj, verbose=True):
    all_downloads = {download.id:download for download in proj.remote.get_all_globus_download_requests(proj.id)}

    globus_download_id = None
    if pconfig.globus_download_id:
        globus_download_id = pconfig.globus_download_id
        if globus_download_id not in all_downloads:
            if verbose:
                print("Current globus download (name=?, id=" + str(globus_download_id) + ") no longer exists.")
            globus_download_id = None
    if globus_download_id is None:
        name = clifuncs.random_name()
        download = proj.remote.create_globus_download_request(proj.id, name)
        if verbose:
            print("Created new globus download (name=" + download.name + ", id=" + str(download.id) + ").")
        pconfig.globus_download_id = download.id
        pconfig.save()
    else:
        download = all_downloads[globus_download_id]
        if verbose:
            print("Using current globus download (name=" + download.name + ", id=" + str(download.id) + ").")

    return download

def _check_download_file(proj_id, file_id, local_path, remote, working_dir,
                         force=False):
    """Prompt user for confirmation before overwriting an existing local file

    Arguments
    ---------
    proj_id: int, Project ID
    file_id: int, ID of file to download
    local_path: str, Location to download file. Intermediate directories are created if necessary.
    remote: mcapi.Client, Materials Commons Client
    working_dir (str): Current working directory, used for finding relative
        paths and printing messages.
    force: bool (optional, default=False) If True, force overwrite existing file without confirmation.

    Returns
    -------
    local_path: str or None, Location of downloaded file or None if not downloaded
    """
    if not os.path.exists(local_path) or force:
        dir = os.path.dirname(local_path)
        if not os.path.isdir(dir):
            os.makedirs(dir)
        remote.download_file(proj_id, file_id, local_path)
        return local_path
    else:
        print("Overwrite '" + os.path.relpath(local_path, working_dir) + "'?")
        while True:
            ans = input('y/n: ')
            if ans == 'y':
                dir = os.path.dirname(local_path)
                if not os.path.exists(dir):
                    os.makedirs(dir)
                remote.download_file(proj_id, file_id, local_path)
                return local_path
            elif ans == 'n':
                break
    return None

[docs]def standard_download(proj, path, working_dir, force=False, output=None, recursive=False, no_compare=False, localtree=None, remotetree=None): """Download files and directories Arguments --------- proj: mcapi.Project, Project to download from path: str, Materials Commons style path of file or directory to download working_dir (str): Current working directory, used for finding relative paths and printing messages. force: bool (optional, default=False) If True, force overwrite existing files without confirmation. output: str (optional, default=None) Specify a different download location. By default, files are downloaded to the matching location in the local project directory. For example remote file at "/A/B/file.txt" is downloaded to "<proj.local_path>/A/B/file.txt" by default. recursive: bool (optional, default=False) Download directory contents recursively. no_compare: bool (optional, default=False) By default, this function checks local and remote file checksum to avoid downloading files that already exist. If no_compare is True, this check is skipped and all specified files are downloaded, even if an equivalent file already exists locally. localtree: LocalTree object (optional, default=None) A LocalTree object stores local file checksums to avoid unnecessary hashing. Will be used and updated if provided and checksum == True. remotetree: RemoteTree object (optional, default=None) A RemoteTree object stores remote file and directory information to minimize API calls and data transfer. Will be used and updated if provided. Returns ------- success: bool, True if download succeeds, False otherwise """ local_abspath = filefuncs.make_local_abspath(proj.local_path, path) printpath = os.path.relpath(local_abspath, start=working_dir) if output is None: output = local_abspath checksum = True if no_compare: checksum = False files_data, dirs_data, child_data, non_existing = treefuncs.treecompare( proj, [path], checksum=checksum, localtree=localtree, remotetree=remotetree) # if remote file: if path in files_data and files_data[path]['r_type'] == 'file': if files_data[path]['l_type'] == 'directory': print(printpath + ": is local directory and remote file") return False elif 'eq' in files_data[path] and files_data[path]['eq'] and output == local_abspath: print(printpath + ": local is equivalent to remote (skipping)") return True else: try: result_path = _check_download_file(proj.id, files_data[path]['id'], output, proj.remote, working_dir, force=force) except Exception as e: print(printpath + ": " + str(e) + " (skipping)") return False if result_path: if output != local_abspath: print("downloaded:", printpath, "as", os.path.relpath(output, start=working_dir)) else: print("downloaded:", printpath) return True else: return False # if directory: elif path in dirs_data and dirs_data[path]['r_type'] == 'directory': if not recursive: print(printpath + ": is a directory") return False if dirs_data[path]['l_type'] == 'file': print(printpath + ": is local file and remote directory") return False success = True for childpath, record in child_data[path].items(): childoutput = os.path.join(output, os.path.basename(childpath)) success &= standard_download(proj, childpath, working_dir, force=force, output=childoutput, recursive=recursive, no_compare=no_compare, localtree=localtree, remotetree=remotetree) return success else: print(printpath + ": does not exist on remote") return False
[docs]def download_file_as_string(client, project_id, file_id): urlpart = "/projects/" + str(project_id) + "/files/" + str(file_id) + "/download" url = client.base_url + urlpart with requests.get(url, stream=True, verify=False, headers=client.headers) as r: client._handle(r) f = io.BytesIO() for block in r.iter_content(chunk_size=8192): f.write(block) return f.getvalue().decode('utf-8')
[docs]def make_parser(): """Make argparse.ArgumentParser for `mc down`""" mc_down_description = "Download files from Materials Commons" mc_down_usage = """ mc down [-r] [-p] [-o] [-f] [--no-compare] <pathspec> [<pathspec> ...] mc down -p <pathspec> mc down -g [-r] [--no-compare] [--label] <pathspec> [<pathspec> ...]""" parser = argparse.ArgumentParser( description=mc_down_description, usage=mc_down_usage, prog='mc down') parser.add_argument('paths', nargs='*', default=None, help='Files or directories') parser.add_argument('-r', '--recursive', action="store_true", default=False, help='Download directory contents recursively') parser.add_argument('-f', '--force', action="store_true", default=False, help='Force overwrite of existing files') parser.add_argument('-p', '--print', action="store_true", default=False, help='Print file, do not write') parser.add_argument('-o', '--output', nargs=1, default=None, help='Download file name') parser.add_argument('-g', '--globus', action="store_true", default=False, help='Use globus to download files.') parser.add_argument('--label', nargs=1, type=str, help='Globus transfer label to make finding tasks simpler.') parser.add_argument('--no-compare', action="store_true", default=False, help='Download remote without checking if local is equivalent.') return parser
[docs]def down_subcommand(argv, working_dir): """ download files from Materials Commons mc down [-r] [<pathspec> ...] """ parser = make_parser() args = parser.parse_args(argv) pconfig = clifuncs.read_project_config(working_dir) proj = clifuncs.make_local_project(working_dir) paths = treefuncs.clipaths_to_mcpaths(proj.local_path, args.paths, working_dir) localtree = None if not args.no_compare: localtree = LocalTree(proj.local_path) remotetree = None if pconfig.remote_updatetime: remotetree = RemoteTree(proj, pconfig.remote_updatetime) # validate if args.print and len(args.paths) != 1: print("--print option acts on 1 file, received", len(args.paths)) raise cliexcept.MCCLIException("Invalid download request") if args.output and len(args.paths) != 1: print("--output option acts on 1 file or directory, received", len(args.paths)) raise cliexcept.MCCLIException("Invalid download request") if args.output and args.globus: print("--output option is not supported with --globus") raise cliexcept.MCCLIException("Invalid upload request") if args.globus: download = _get_current_globus_download(pconfig, proj) if download.status != 0: # TODO clean up status code / message print("Checking if download is ready.", end='', flush=True) count = 0 while download.status != 0 and count < 5: time.sleep(2) print(".", end='', flush=True) download = _get_current_globus_download(pconfig, proj, verbose=False) count += 1 print("", flush=True) if download.status != 0: print("Current Globus download (name=" + download.name + ", id=" + str(download.id) + ")" + " not ready for downloading. Materials Commons is still preparing the project" + " files for download. For large projects this may take some time.") print("Use `mc globus download` to check when it is ready and try again.") raise cliexcept.MCCLIException("Invalid download request") print("Download is ready.") label = proj.name + "-" + download.name if args.label: label = args.label[0] globus_ops = cliglobus.GlobusOperations() task_id = globus_ops.download_v0(proj, paths, download, working_dir, recursive=args.recursive, label=label, localtree=localtree, remotetree=remotetree, force=args.force) if task_id: print("Globus transfer task initiated.") print("Use `globus task list` to monitor task status.") print("Use `mc globus download` to manage Globus downloads.") print("Multiple transfer tasks may be initiated.") print("When all tasks finish downloading, use `mc globus download --id " + str(download.id) + " --delete` " + "to close the download.") elif args.print: print_file(proj, paths[0], working_dir) else: output=None if args.output: output = os.path.abspath(args.output[0]) for path in paths: standard_download(proj, path, working_dir, force=args.force, output=output, recursive=args.recursive, no_compare=args.no_compare, localtree=localtree, remotetree=remotetree) return