import copy
import json
import os
import requests
import shutil
from sortedcontainers import SortedSet
import time
import materials_commons.api as mcapi
import materials_commons.cli.exceptions as cliexcept
import materials_commons.cli.functions as clifuncs
import materials_commons.cli.file_functions as filefuncs
[docs]def clipaths_to_local_abspaths(proj_local_path, clipaths, working_dir):
"""Convert CLI paths input to local absolute paths
Args:
proj_local_path (str): Path to Materials Commons project
clipaths (List of str): Indicates files and directories, either
absolute paths or relative to current working directory
working_dir (str): Directory cli_paths are relative to.
Returns:
List local absolute paths to upload, excluding the `.mc` directory.
Raises:
MCCLIException, if any path in clipaths is not within the local project
directory.
"""
if not os.path.isabs(working_dir):
raise cliexcept.MCCLIException("Error in clipaths_to_mcpaths: working_dir is not absolute")
local_abspaths = []
for p in clipaths:
if not os.path.isabs(p):
p = os.path.join(working_dir, p)
local_abspaths.append(p)
return local_abspaths
[docs]def clipaths_to_mcpaths(proj_local_path, clipaths, working_dir):
"""Convert CLI paths input to Materials Commons standardized paths
Args:
proj_local_path (str): Path to Materials Commons project
clipaths (List of str): Indicates files and directories, either absolute paths or relative to current working directory
working_dir (str): Directory cli_paths are relative to.
Returns:
List Materials Commons paths (does not include project top directory, starts with "/") to
upload, excluding the `.mc` directory.
Raises:
MCCLIException, if any path in clipaths is not within the local project
directory.
"""
if not os.path.isabs(working_dir):
raise cliexcept.MCCLIException("Error in clipaths_to_mcpaths: working_dir is not absolute")
mcpaths = []
for p in clipaths:
if not os.path.isabs(p):
p = os.path.join(working_dir, p)
mcpath = filefuncs.make_mcpath(proj_local_path, p)
mcpaths.append(mcpath)
return mcpaths
[docs]def make_local_abspaths_for_upload(proj_local_path, paths):
"""Clean paths for uploads
This is written for identifying uploads. If the top directory is included
it replaces it with all children except `.mc`.
Args:
proj_local_path (str): Path to project
paths (iterable of str): Local absolute paths to filter
Returns:
List of str: Materials Commons paths, filtered as described above.
"""
_paths = []
for path in paths:
if os.path.normpath(path) == os.path.normpath(proj_local_path):
for child in os.listdir(proj_local_path):
if child == ".mc":
continue
_paths.append(os.path.join(os.path.normpath(proj_local_path), child))
else:
_paths.append(path)
return _paths
[docs]def make_mcpaths_for_upload(proj_local_path, paths):
"""Clean paths for uploads
This is written for identifying uploads. If the top directory is included
it replaces it with all children except `.mc`.
Args:
proj_local_path (str): Path to project
paths (iterable of str): Paths to filter and convert to absolute paths
Returns:
List of str: Materials Commons paths, filtered as described above.
"""
_paths = []
for path in paths:
if path == "/":
for child in os.listdir(proj_local_path):
if child == ".mc":
continue
_paths.append(os.path.join("/", child))
else:
_paths.append(path)
return _paths
[docs]def standard_upload(proj, paths, working_dir, recursive=False, limit=50, no_compare=False, upload_as=None, localtree=None, remotetree=None):
"""Upload files to Materials Commons
Args:
proj (:class:`materials_commons.api.Project`): Project instance with
proj.local_path indicating local project location
paths (List of str):
List of paths to upload. Expects local absolute paths, or paths
relative to working_dir.
working_dir (str): Current working directory, used for finding relative
paths and printing messages.
recursive (bool): If True, remove directories recursively. Otherwise,
will not remove directories.
limit (int): The limit in MB on the size of the file allowed to be uploaded.
no_compare (bool): By default, this function checks local and remote
file checksum to avoid downloading files that already exist. If
no_compare is True, this check is skipped and all specified files
are downloaded, even if an equivalent file already exists locally.
upload_as (str): Materials Commons style path specifying where to
upload. Requires `len(paths) == 1`.
localtree (LocalTree): A LocalTree object stores local file checksums
to avoid unnecessary hashing. Optional, will be used and updated if
provided and checksum == True.
remotetree (RemoteTree): A RemoteTree object stores remote file and
directory information to minimize API calls and data transfer.
Optional, will be used and updated if provided.
Returns:
(file_results, error_results):
file_results: dict of path: file
Successfully uploaded files
error_results: dict of path: str
Error messages for unsuccessful file uploads
"""
file_results = {}
error_results = {}
# check for non-existing paths, paths already uploaded, etc.
paths_to_upload = []
if upload_as is not None:
# upload_as only if 1 input path
if len(paths) != 1:
msg = "Upload error: to 'upload as', expected len(paths) == 1"
raise cliexcept.MCCLIException(msg)
local_abspaths = clipaths_to_local_abspaths(proj.local_path, paths,
working_dir)
local_abspaths = make_local_abspaths_for_upload(proj.local_path,
local_abspaths)
local_abspath = local_abspaths[0]
if not os.path.exists(local_abspath):
printpath = os.path.relpath(local_abspath, start=working_dir)
print(printpath + ": does not exist")
else:
paths_to_upload.append(local_abspath)
else:
# get info to compare local and remote files
checksum = True
if no_compare:
checksum = False
mcpaths = clipaths_to_mcpaths(proj.local_path, paths, working_dir)
mcpaths = make_mcpaths_for_upload(proj.local_path, mcpaths)
files_data, dirs_data, child_data, non_existing = treecompare(
proj, mcpaths, checksum=checksum, localtree=localtree, remotetree=remotetree)
# check for files that are already uploaded or do not exist
for mcpath in mcpaths:
local_abspath = filefuncs.make_local_abspath(proj.local_path, mcpath)
printpath = os.path.relpath(local_abspath, start=working_dir)
if os.path.isfile(local_abspath):
l_checksum = files_data[mcpath]['l_checksum']
r_checksum = files_data[mcpath]['r_checksum']
if l_checksum and l_checksum == r_checksum:
print(printpath + ": local is equivalent to remote (skipping)")
file_results[local_abspath] = files_data[mcpath]['r_obj']
continue
elif not os.path.exists(local_abspath):
print(printpath + ": does not exist")
continue
paths_to_upload.append(local_abspath)
# do uploads
for local_abspath in paths_to_upload:
printpath = os.path.relpath(local_abspath, start=working_dir)
# Materials Commons style path, where to upload
dest_path = None
if upload_as is None:
dest_path = filefuncs.make_mcpath(proj.local_path, local_abspath)
else:
dest_path = upload_as
printdestpath = os.path.relpath(
filefuncs.make_local_abspath(proj.local_path, dest_path),
start=working_dir)
# note: remote files are versioned, so we skip overwrite checking / force option
# create missing remote parent directories
parent_path = os.path.dirname(dest_path)
parent = mkdir(proj, parent_path, remote_only=True, create_intermediates=True, remotetree=remotetree)
if parent.path != parent_path:
msg = "Upload error: "
msg += " expected parent_path=" + os.path.dirname(parent_path)
msg += " got parent_path=" + parent.path
raise cliexcept.MCCLIException(msg)
try:
if os.path.isfile(local_abspath):
if not parent:
error_msg = printpath + ": parent=" + parent_path + " is not a directory on remote (not uploaded)"
error_results[local_abspath] = error_msg
print(error_msg)
continue
file_size_mb = os.path.getsize(local_abspath) >> 20
if file_size_mb > limit:
error_msg = printpath + ": file too large (size={1}MB, limit={0}MB) (not uploaded)".format(limit, file_size_mb)
error_results[local_abspath] = error_msg
print(error_msg)
continue
result = proj.remote.upload_file(proj.id, parent.id, local_abspath)
if not filefuncs.isfile(result):
error_msg = printpath + ": unknown error (not uploaded)"
error_results[local_abspath] = error_msg
print(error_msg)
continue
else:
if upload_as is None:
print("uploaded:", printpath)
else:
print("uploaded:", printpath, "as", printdestpath)
file_results[local_abspath] = result
elif os.path.isdir(local_abspath):
if recursive:
proj.remote.create_directory(proj.id, os.path.basename(dest_path), parent.id)
if upload_as is None:
child_paths = [os.path.join(local_abspath, name) for name in os.listdir(local_abspath)]
file_results_tmp, error_results_tmp = \
standard_upload(proj, child_paths, working_dir,
recursive=recursive, limit=limit,
remotetree=remotetree)
for tpath in file_results_tmp:
file_results[tpath] = file_results_tmp[tpath]
for tpath in error_results_tmp:
error_results[tpath] = error_results_tmp[tpath]
else:
for name in os.listdir(local_abspath):
child_path = os.path.join(local_abspath, name)
child_upload_as = os.path.join(upload_as, name)
file_results_tmp, error_results_tmp = \
standard_upload(proj, [child_path], working_dir,
recursive=recursive, limit=limit,
upload_as=child_upload_as,
remotetree=remotetree)
for tpath in file_results_tmp:
file_results[tpath] = file_results_tmp[tpath]
for tpath in error_results_tmp:
error_results[tpath] = error_results_tmp[tpath]
else:
error_msg = printpath + ": is a directory (not uploaded)"
error_results[local_abspath] = error_msg
print(error_msg)
continue
else:
# should not happen
msg = "Upload error: path does not exist"
msg += " path=" + local_abspath
raise cliexcept.MCCLIException(msg)
if remotetree:
remotetree.connect()
remotetree.update(parent_path, force=True)
remotetree.close()
except Exception as e:
error_msg = printpath + ": " + str(e) + " (not uploaded)"
error_results[local_abspath] = error_msg
print(error_msg)
continue
return (file_results, error_results)
class _TreeCompare(object):
"""Helper for the treecompare function.
It was slightly easier to write as a class so that variables could be stored as class attributes and assumed present in each subroutine.
Arguments
---------
proj: mcapi.Project
Project instance with proj.local_path indicating local project location
localtree: LocalTree object (optional, default=None)
A LocalTree object stores local file checksums to avoid unnecessary hashing. Will be used
and updated if provided and checksum == True in the call operator.
remotetree: RemoteTree object (optional, default=None)
A RemoteTree object stores remote file and directory information to minimize API calls and
data transfer. Will be used and updated if provided.
"""
def __init__(self, proj, localtree=None, remotetree=None):
self.proj = proj
self.localtree = localtree
self.remotetree = remotetree
columns = ['l_mtime', 'l_size', 'l_type', 'l_checksum', 'r_mtime', 'r_size', 'r_type', 'r_checksum', 'r_obj', 'path', 'id', 'parent_id']
self.record_init = {k: None for k in columns}
def _update_local_via_tree(self, path):
# update self.localtree for path (and if it is a directory, update the children)
self.localtree.connect()
self.localtree.update(path, get_children=True)
self._update_data_from_tree(path, self.localtree, 'l')
self.localtree.close()
def _update_local_record(self, record, local_abspath, checksum=False):
record['l_mtime'] = clifuncs.epoch_time(os.path.getmtime(local_abspath))
record['l_size'] = os.path.getsize(local_abspath)
if os.path.isfile(local_abspath):
record['l_type'] = 'file'
if checksum:
record['l_checksum'] = clifuncs.checksum(local_abspath)
elif os.path.isdir(local_abspath):
record['l_type'] = 'directory'
def _update_local(self, path, checksum=False):
"""Get local file or directory (and children) information"""
local_abspath = filefuncs.make_local_abspath(self.proj.local_path, path)
if not os.path.exists(local_abspath):
return
if os.path.isfile(local_abspath):
if path not in self.files_data:
self.files_data[path] = copy.deepcopy(self.record_init)
self._update_local_record(self.files_data[path], local_abspath, checksum=checksum)
elif os.path.isdir(local_abspath):
if path not in self.dirs_data:
self.dirs_data[path] = copy.deepcopy(self.record_init)
self._update_local_record(self.dirs_data[path], local_abspath, checksum=checksum)
# children
if path not in self.child_data:
self.child_data[path] = {}
for child in os.listdir(local_abspath):
childpath = os.path.join(path, child)
local_childpath = os.path.join(local_abspath, child)
if childpath not in self.child_data[path]:
self.child_data[path][childpath] = copy.deepcopy(self.record_init)
self._update_local_record(self.child_data[path][childpath], local_childpath, checksum=checksum)
else:
raise cliexcept.MCCLIException("TreeCompare error: os.path type error for '" + local_abspath + "'")
return
def _update_remote_via_tree(self, path):
# update self.remotetree for path (and if it is a directory, update the children)
self.remotetree.connect()
self.remotetree.update(path, get_children=True)
self._update_data_from_tree(path, self.remotetree, 'r')
self.remotetree.close()
def _update_remote_record(self, record, obj):
record['id'] = obj.id
record['parent_id'] = obj.directory_id
record['r_size'] = obj.size
record['r_mtime'] = clifuncs.epoch_time(obj.updated_at)
record['r_obj'] = obj
if filefuncs.isfile(obj):
record['r_type'] = 'file'
record['r_checksum'] = obj.checksum
elif filefuncs.isdir(obj):
record['r_type'] = 'directory'
return
def _update_remote(self, path):
"""Get remote file or directory (and children) information"""
obj = filefuncs.get_by_path_if_exists(self.proj.remote, self.proj.id, path)
if obj is not None:
if filefuncs.isfile(obj):
if path not in self.files_data:
self.files_data[path] = copy.deepcopy(self.record_init)
self._update_remote_record(self.files_data[path], obj)
elif filefuncs.isdir(obj):
if path not in self.dirs_data:
self.dirs_data[path] = copy.deepcopy(self.record_init)
self._update_remote_record(self.dirs_data[path], obj)
# children
if path not in self.child_data:
self.child_data[path] = {}
for child in self.proj.remote.list_directory(self.proj.id, obj.id):
childpath = os.path.join(path, child.name)
if childpath not in self.child_data[path]:
self.child_data[path][childpath] = copy.deepcopy(self.record_init)
self._update_remote_record(self.child_data[path][childpath], child)
else:
raise cliexcept.MCCLIException("TreeCompare error: get_by_path type error for '" + path + "'")
return
def _update_record_from_tree(self, record, file_or_dir, prefix):
record[prefix + '_mtime'] = file_or_dir['mtime']
record[prefix + '_size'] = file_or_dir['size']
record[prefix + '_type'] = file_or_dir['otype']
record[prefix + '_checksum'] = file_or_dir['checksum']
if 'id' in file_or_dir.keys():
record['id'] = file_or_dir['id']
if 'parent_id' in file_or_dir.keys():
record['parent_id'] = file_or_dir['parent_id']
def _update_data_from_tree(self, path, tree, prefix):
"""Common data conversion from tree to output data"""
res = tree.select_by_path(path)
if len(res) > 1:
raise cliexcept.MCCLIException("_update_data_from_tree error: found > 1 entry for '" + path + "'")
if not res:
return
file_or_dir = res[0]
if file_or_dir['otype'] == 'file':
if path not in self.files_data:
self.files_data[path] = copy.deepcopy(self.record_init)
self._update_record_from_tree(self.files_data[path], file_or_dir, prefix)
elif file_or_dir['otype'] == 'directory':
if path not in self.dirs_data:
self.dirs_data[path] = copy.deepcopy(self.record_init)
self._update_record_from_tree(self.dirs_data[path], file_or_dir, prefix)
# children
if path not in self.child_data:
self.child_data[path] = {}
results = tree.select_by_parent_path(path)
for file_or_dir in results:
childpath = file_or_dir['path']
if childpath not in self.child_data[path]:
self.child_data[path][childpath] = copy.deepcopy(self.record_init)
self._update_record_from_tree(self.child_data[path][childpath], file_or_dir, prefix)
elif file_or_dir['otype'] == None:
# this supports adding records for files that we checked for but do not exist
pass
else:
raise cliexcept.MCCLIException("Localtree error: otype error for '" + path + "'")
return
def __call__(self, paths, checksum=False):
"""Compare local and remote tree differences for paths
paths: List of str
List of Materials Commons style paths (absolute path, not including project name directory)
to query.
checksum: bool (optional, default=False)
If True, calculate MD5 checksum of local files and compare to remote. If localtree was
provided to the constructor, the checksums will be saved in the localtree database.
Returns
-------
(files_data, dirs_data, child_data, not_existing):
files_data: dict of filepath: file or directory comparison
Contains file comparisons
dirs_data: dict of dirpath: file or directory comparison
Contains directory comparisons
child_data: dict of dirpath: childpath: file or directory comparison
Contains directory children comparisons
not_existing: list of str
Paths that do not exist locally or remotely
For each file or directory the comparison data is:
'l_mtime': float, local file modify time (seconds since epoch)
'l_size': int, local file size in bytes
'l_type': str, local file type ('file' or 'directory')
'l_checksum': str, local file md5 hash
'r_mtime': float, remote file modify time (seconds since epoch)
'r_size': int, remote file size in bytes
'r_type': remote file type ('file' or 'directory')
'r_checksum': str, remote file md5 hash
'r_obj': File or Directory, remote object
'eq': bool, whether the local and remote files are equivalent
'path': str, path to file or directory (including the project top)
'id': str, Materials Commons ID, if exists
'parent_id': str, Materials Commons ID, if exists
Values are None if the file does not exist in the relevant tree.
Notes
-----
The equivalence check ('eq' in `data`) is only done for files. For directories, it is
always None.
When directories are updated in localtree and remotetree their children are also updated
(not recursively).
Remote objects, 'r_obj', are only returned if remotetree is None.
"""
self.files_data = {}
self.dirs_data = {}
self.child_data = {}
for path in paths:
if self.localtree and checksum:
self._update_local_via_tree(path)
else:
self._update_local(path, checksum=checksum)
if self.remotetree:
self._update_remote_via_tree(path)
else:
self._update_remote(path)
if checksum:
for key, value in self.files_data.items():
if value['l_checksum'] and value['r_checksum']:
value['eq'] = (value['l_checksum'] == value['r_checksum'])
for dir, cdata in self.child_data.items():
for key, value in cdata.items():
if value['l_checksum'] and value['r_checksum']:
value['eq'] = (value['l_checksum'] == value['r_checksum'])
not_existing = []
for path in paths:
if path not in self.files_data and path not in self.dirs_data:
not_existing.append(path)
return (self.files_data, self.dirs_data, self.child_data, not_existing)
[docs]def treecompare(proj, paths, checksum=False, localtree=None, remotetree=None):
"""
Compare files and directories on the local and remote trees.
Arguments
---------
proj: mcapi.Project
Project instance with proj.local_path indicating local project location
paths: List of str
List of Materials Commons style paths (absolute path, not including project name directory)
to query.
checksum: bool (optional, default=False)
If True, calculate MD5 checksum of local files and compared to remote. If False, 'eq' will not be included in the output data.
localtree: LocalTree object (optional, default=None)
A LocalTree object stores local file checksums to avoid unnecessary hashing. Will be used
and updated if provided and checksum == True.
remotetree: RemoteTree object (optional, default=None)
A RemoteTree object stores remote file and directory information to minimize API calls and
data transfer. Will be used and updated if provided.
Returns
-------
(files_data, dirs_data, child_data, not_existing):
files_data: dict of filepath: file or directory comparison
Contains file comparisons
dirs_data: dict of dirpath: file or directory comparison
Contains directory comparisons
child_data: dict of dirpath: childpath: file or directory comparison
Contains directory children comparisons
not_existing: list of str
Paths that do not exist locally or remotely
For each file or directory the comparison data is:
'l_mtime': float, local file modify time (seconds since epoch)
'l_size': int, local file size in bytes
'l_type': str, local file type ('file' or 'directory')
'l_checksum': str, local file md5 hash
'r_mtime': float, remote file modify time (seconds since epoch)
'r_size': int, remote file size in bytes
'r_type': remote file type ('file' or 'directory')
'r_checksum': str, remote file md5 hash
'r_obj': File or Directory, remote object
'eq': bool, whether the local and remote files are equivalent
'path': str, path to file or directory (including the project top)
'id': str, Materials Commons ID, if exists
'parent_id': str, Materials Commons ID, if exists
Values are None if the file does not exist in the relevant tree.
Notes
-----
The equivalence check ('eq' in `data`) is only done for files. For directories, it is
always None.
When directories are updated in localtree and remotetree their children are also updated
(not recursively).
Remote objects, 'r_obj', are only returned if remotetree is None.
"""
_treecomparer = _TreeCompare(proj, localtree=localtree, remotetree=remotetree)
return _treecomparer(paths, checksum=checksum)
[docs]def get_types(path, files_data, dirs_data):
"""Use treecompare output to get local and remote types
Args:
path (str): Path to check for type
files_data: The "files_data" output from :func:`treecompare`
dirs_data: The "dirs_data" output from :func:`treecompare`
Returns:
Tuple with (local_type, remote_type) of path.
"""
l_type = None
if path in files_data and files_data[path]['l_type']:
l_type = files_data[path]['l_type']
if path in dirs_data and dirs_data[path]['l_type']:
l_type = dirs_data[path]['l_type']
r_type = None
if path in files_data and files_data[path]['r_type']:
r_type = files_data[path]['r_type']
if path in dirs_data and dirs_data[path]['r_type']:
r_type = dirs_data[path]['r_type']
return (l_type, r_type)
[docs]def is_type_mismatch(path, files_data, dirs_data):
"""Check treecompare filds_data and dirs_data output to check for type mismatch"""
l_type, r_type = get_types(path, files_data, dirs_data)
if l_type and r_type and l_type != r_type:
return True
return False
[docs]def is_child_data_mismatch(child_data):
"""Check treecompare child_data file comparison for type mismatch"""
if child_data['l_type'] and child_data['r_type'] and child_data['l_type'] != child_data['r_type']:
return True
return False
class _Mover(object):
"""Helper for the move function"""
def __init__(self, proj, remote_only=False, localtree=None, remotetree=None):
self.proj = proj
self.remote_only = remote_only
self.localtree = localtree
self.remotetree = remotetree
def _move_remote_file(self, path, to_directory_path, to_directory_id, name=None):
file_id = self.files_data[path]['id']
if os.path.dirname(path) != to_directory_path:
self.proj.remote.move_file(self.proj.id, file_id, to_directory_id)
if name:
self.proj.remote.rename_file(self.proj.id, file_id, name)
def _move_remote_directory(self, path, to_directory_path, to_directory_id, name=None):
directory_id = self.dirs_data[path]['id']
if os.path.dirname(path) != to_directory_path:
self.proj.remote.move_directory(self.proj.id, directory_id, to_directory_id)
if name:
self.proj.remote.rename_directory(self.proj.id, directory_id, name)
def _move_remote(self, path, to_directory_path, to_directory_id, name=None):
""" Move file or directory on remote
Arguments
---------
path: str, Source file or directory
to_directory_path: str, Destination directory
name: str or None, If name is not None, rename file or directory after moving
"""
if path in self.files_data:
self._move_remote_file(path, to_directory_path, to_directory_id, name=name)
else:
self._move_remote_directory(path, to_directory_path, to_directory_id, name=name)
def _move_local(self, path, to_directory_path, name=None):
if name is None:
name = os.path.basename(path)
src = filefuncs.make_local_abspath(self.proj.local_path, path)
dest = filefuncs.make_local_abspath(self.proj.local_path, os.path.join(to_directory_path, name))
shutil.move(src, dest)
def _validate_destination(self, paths):
dest_path = paths[-1]
dest_local_abspath = filefuncs.make_local_abspath(self.proj.local_path, dest_path)
dest_printpath = os.path.relpath(dest_local_abspath)
# get type of remote destination
self.dest_remote_type = None
if dest_path in self.files_data:
self.dest_remote_type = self.files_data[dest_path]['r_type']
elif dest_path in self.dirs_data:
self.dest_remote_type = self.dirs_data[dest_path]['r_type']
# get type of local destination
self.dest_local_type = None
if dest_path in self.files_data:
self.dest_local_type = self.files_data[dest_path]['l_type']
elif dest_path in self.dirs_data:
self.dest_local_type = self.dirs_data[dest_path]['l_type']
valid_usage = True
# check remote dest type
if self.dest_remote_type == 'file':
print(dest_printpath + ": is an existing file on remote (will not overwrite)")
valid_usage = False
elif self.dest_remote_type is None:
# dest is non-existant on remote
if len(paths) != 2:
print(dest_printpath + ": does not exist on remote (may not rename multiple src)")
valid_usage = False
elif self.dest_remote_type != 'directory':
raise cliexcept.MCCLIException("Error in mv: dest_path='" + dest_path + "', dest_remote_type='" + str(dest_remote_type) + "'")
# check local dest type
if not self.remote_only:
if self.dest_local_type == 'file':
print(dest_printpath + ": is an existing file locally (will not overwrite)")
valid_usage = False
elif self.dest_local_type is None:
# dest is non-existant on remote
if len(paths) != 2:
print(dest_printpath + ": does not exist locally (may not rename multiple src)")
valid_usage = False
elif self.dest_local_type != 'directory':
raise cliexcept.MCCLIException("Error in mv: dest_path='" + dest_path + "', dest_remote_type='" + str(dest_local_type) + "'")
if self.dest_remote_type != self.dest_local_type:
print(dest_printpath + ": local and remote types do not match")
valid_usage = False
return valid_usage
def _validate_source(self, path, to_directory_path, name=None):
if name is None:
name = os.path.basename(path)
local_abspath = filefuncs.make_local_abspath(self.proj.local_path, path)
printpath = os.path.relpath(local_abspath)
dest_path = os.path.join(to_directory_path, name)
dest_local_abspath = filefuncs.make_local_abspath(self.proj.local_path, dest_path)
dest_printpath = os.path.relpath(dest_local_abspath)
# check source exists
if path in self.not_existing:
print(printpath + ": no such file or directory")
return False
# check source exists remotely
if path in self.files_data and not self.files_data[path]['r_type']:
print(printpath + ": does not exist on remote")
return False
if path in self.dirs_data and not self.dirs_data[path]['r_type']:
print(printpath + ": does not exist on remote")
return False
# if not remote_only, check that local and remote types match
if not self.remote_only:
if is_type_mismatch(path, self.files_data, self.dirs_data):
print(printpath + ": local and remote types do not match")
return False
return True
def __call__(self, paths):
dest_path = paths[-1]
if not paths or len(paths) < 2:
print("Expects 2 or more paths: `mc mv <src> <target>` or `mc mv <src> ... <directory>`")
return
self.files_data, self.dirs_data, self.child_data, self.not_existing = treecompare(
self.proj, paths, localtree=self.localtree, remotetree=self.remotetree)
if not self._validate_destination(paths):
return
if self.dest_remote_type == 'directory':
to_directory_path = dest_path
to_directory_id = self.dirs_data[dest_path]['id']
name = None
else:
to_directory_path = os.path.dirname(dest_path)
local_to_directory_abspath = filefuncs.make_local_abspath(self.proj.local_path, to_directory_path)
local_to_directory_printpath = os.path.relpath(local_to_directory_abspath)
# if destination name is different, must move then rename to `name`
name = None
if os.path.basename(paths[0]) != os.path.basename(dest_path):
name = os.path.basename(dest_path)
to_directory = filefuncs.get_by_path_if_exists(self.proj.remote, self.proj.id, to_directory_path)
if not filefuncs.isdir(to_directory):
print(to_directory_path + ": not a directory on remote")
return
if not self.remote_only and not os.path.isdir(local_to_directory_abspath):
print(local_to_directory_printpath + ": not a directory locally")
return
to_directory_id = to_directory.id
# move, and rename if necessary
for p in paths[0:-1]:
if not self._validate_source(p, to_directory_path, name=name):
continue
self._move_remote(p, to_directory_path, to_directory_id, name=name)
if self.remotetree:
self.remotetree.connect()
self.remotetree.update(p, force=True)
self.remotetree.update(to_directory_path, force=True)
self.remotetree.close()
if not self.remote_only:
self._move_local(p, to_directory_path, name=name)
if self.localtree:
self.localtree.connect()
self.localtree.update(p)
self.localtree.update(to_directory_path)
self.localtree.close()
[docs]def move(proj, paths, remote_only=False, localtree=None, remotetree=None):
"""Move files and directories
Arguments
---------
proj: mcapi.Project
Project instance with proj.local_path indicating local project location
paths: List of str
List of Materials Commons style paths (absolute path, not including project name directory)
to move.
remote_only: bool (optional, default=False)
If True, only move files and directories on remote. If False, move on local and remote.
localtree: LocalTree object (optional, default=None)
A LocalTree object stores local file checksums to avoid unnecessary hashing. Will be used
and updated if provided and checksum == True.
remotetree: RemoteTree object (optional, default=None)
A RemoteTree object stores remote file and directory information to minimize API calls and
data transfer. Will be used and updated if provided.
"""
_mover = _Mover(proj, remote_only=remote_only, localtree=localtree, remotetree=remotetree)
_mover(paths)
class _Remover(object):
"""Helper for the remove function"""
def __init__(self, proj, recursive=False, no_compare=False, remote_only=False, localtree=None, remotetree=None):
self.proj = proj
self.recursive = recursive
self.no_compare = no_compare
self.remote_only = remote_only
self.dry_run = False # needs work to support this
self.localtree = localtree
self.remotetree = remotetree
def _update_remote(self, path):
if not self.remotetree:
return
# sets updatetime temporarily, does not save
self.remotetree.connect()
self.remotetree.update(path, force=True)
self.remotetree.close()
def _update_local(self, path):
if not self.localtree:
return
self.localtree.connect()
self.localtree.update(path)
self.localtree.close()
def _remove_remote_file(self, path, record):
if self.dry_run:
print("(dry run) rm remote:", path)
return True
else:
print("rm remote:", path)
try:
self.proj.remote.delete_file(self.proj.id, record['id'])
return True
except requests.exceptions.HTTPError as e:
try:
print(e.response.json()['error'])
except:
print(e)
print(" FAILED, for unknown reason")
return False
def _remove_local_file(self, path):
local_abspath = filefuncs.make_local_abspath(self.proj.local_path, path)
if self.dry_run:
print("(dry run) rm local:", local_abspath)
else:
print("rm local:", local_abspath)
os.remove(local_abspath)
def _remove_file(self, path, record, updatetree=False):
"""Remove a file
- Will remove local and remote as specified by constructor options.
- Will update local and remote tree after deletion if updatree==True
"""
parent_path = os.path.dirname(path)
if not record['r_type']:
print(path + ": does not exist on remote")
return
elif not record['l_type']:
self._remove_remote_file(path, record)
if updatetree:
self._update_remote(parent_path)
return
elif self.remote_only:
self._remove_remote_file(path, record)
if updatetree:
self._update_remote(parent_path)
return
elif self.no_compare:
res = self._remove_remote_file(path, record)
if res:
self._remove_local_file(path)
if updatetree:
self._update_remote(parent_path)
self._update_local(parent_path)
return
elif not record['eq']:
print(path + ": local and remote are not equal")
return
else:
res = self._remove_remote_file(path, record)
if res:
self._remove_local_file(path)
if updatetree:
self._update_remote(parent_path)
self._update_local(parent_path)
return
def _remove_remote_directory(self, path, record):
if self.dry_run:
print("(dry run) rm remote:", path)
return True
else:
print("rm remote:", path)
try:
print("rm remote directory:", path)
self.proj.remote.delete_directory(self.proj.id, record['id'])
return True
except requests.exceptions.HTTPError as e:
try:
print(e)
print(json.dumps(e.response.json(), indent=2))
except:
print(" FAILED, for unknown reason")
return False
def _remove_local_directory(self, path):
local_abspath = filefuncs.make_local_abspath(self.proj.local_path, path)
if not os.path.exists(local_abspath):
return
if self.dry_run:
print("(dry run) rm local:", local_abspath)
else:
print("rm local:", local_abspath)
os.rmdir(local_abspath)
def _remove_directory(self, path, record, updatetree=False):
"""Remove a directory
- Before calling, all children ought to be deleted, but this will double check
- Will remove local and remote as specified by constructor options.
- Will always update local and remote tree once before deletion to check for children, and if updatetree==True will update again after deletion
"""
if self.remote_only:
self._remove_remote_directory(path, record)
if updatetree:
self._update_remote(path)
return
local_abspath = filefuncs.make_local_abspath(self.proj.local_path, path)
if self.localtree:
self._update_local(path)
self.localtree.connect()
local_children = self.localtree.select_by_parent_path(path)
self.localtree.close()
else:
local_children = os.listdir(local_abspath)
if len(local_children):
print(local_abspath + ": could not remove all local children (skipping)")
return
res = self._remove_remote_directory(path, record)
if res:
self._remove_local_directory(path)
if updatetree:
self._update_remote(path)
self._update_local(path)
def __call__(self, path):
checksum=True
if self.no_compare:
checksum=False
# if remotetree provided, set updatetime to now
if self.remotetree:
orig_remote_updatetime = self.remotetree.updatetime
self.remotetree.updatetime = time.time()
files_data, dirs_data, child_data, not_existing = treecompare(
self.proj, [path], checksum=checksum,
localtree=self.localtree, remotetree=self.remotetree)
# reset remotree updatetime
if self.remotetree:
self.remotetree.updatetime = orig_remote_updatetime
# act on treecompare results
# if path does not exist, do nothing
if not_existing:
for path in not_existing:
local_abspath = filefuncs.make_local_abspath(self.proj.local_path, path)
print(os.path.relpath(local_abspath) + ": No such file or directory")
# if path is a file, attempt to remove it
elif path in files_data:
self._remove_file(path, files_data[path], updatetree=True)
# if path is a directory, attempt to remove children and then it
elif path in dirs_data:
if not self.recursive:
print(path + ": is a directory")
return
if not dirs_data[path]['r_type']:
print(path + ": does not exist on remote")
return
for childpath, record in child_data[path].items():
if record['r_type'] == 'file':
self._remove_file(childpath, record)
elif record['r_type'] == 'directory':
self.__call__(childpath)
else: # local child without matching remote
print(childpath + ": does not exist on remote")
self._remove_directory(path, dirs_data[path])
self._update_remote(path)
self._update_local(path)
else:
raise cliexcept.MCCLIException("Error in rm_file: unknown error")
[docs]def remove(proj, paths, recursive=False, no_compare=False, remote_only=False, localtree=None, remotetree=None):
"""Remove files and directories
Arguments
---------
proj: mcapi.Project
Project instance with proj.local_path indicating local project location
paths: List of str
List of Materials Commons style paths (absolute path, not including project name directory)
to remove.
recursive: bool (optional, default=False)
If True, remove directories recursively. Otherwise, will not remove directories.
no_compare: bool (optional, default=False)
If True, remove files and directories without checking for equality between local and
remote.
remote_only: bool (optional, default=False)
If True, only remove files and directories on remote. If False, remove on local and remote.
localtree: LocalTree object (optional, default=None)
A LocalTree object stores local file checksums to avoid unnecessary hashing. Will be used
and updated if provided and checksum == True.
remotetree: RemoteTree object (optional, default=None)
A RemoteTree object stores remote file and directory information to minimize API calls and
data transfer. Will be used and updated if provided.
"""
_remover = _Remover(proj, recursive=recursive, no_compare=no_compare, remote_only=remote_only, localtree=localtree, remotetree=remotetree)
for p in paths:
_remover(p)
[docs]def mkdir(proj, path, remote_only=False, create_intermediates=False, remotetree=None):
"""Make directories
Arguments
---------
proj: mcapi.Project
Project instance with proj.local_path indicating local project location
path: str
Materials Commons style path (absolute path, not including project name directory) of
directory to make.
create_intermediates: bool (optional, default=False)
If True, make intermediate directories as necessary when they do not exist.
remote_only: bool (optional, default=False)
If True, only make directories on remote. If False, make on local and remote.
remotetree: RemoteTree object (optional, default=None)
A RemoteTree object stores remote file and directory information to minimize API calls and
data transfer. Will be used and updated if provided.
Returns
-------
result: mcapi.File or None
mcapi.File object representing the created directory, if successful.
Raises
------
Raises MCCLIException if unsuccessful with one of following messages:
- path + ": is a local file":
If attempting to create "/A/B/C" and any of "/A", "/A/B", or "/A/B/C" is an existing
file locally and remote_only==False.
- path + ": is a remote file":
If attempting to create "/A/B/C" and any of "/A", "/A/B", or "/A/B/C" is an existing
file on Materials Commons.
- parent_path + ": parent directory does not exist":
If attempting to create "/A/B/C" and the parent directory, "/A/B" does not exist
on Materials Commons and create_intermediates==False.
"""
local_abspath = filefuncs.make_local_abspath(proj.local_path, path)
if not remote_only:
if os.path.isfile(local_abspath):
raise cliexcept.MCCLIException(path + ": is a local file")
result = filefuncs.get_by_path_if_exists(proj.remote, proj.id, path)
if filefuncs.isdir(result):
if not remote_only:
clifuncs.mkdir_if(local_abspath)
return result
elif filefuncs.isfile(result):
raise cliexcept.MCCLIException(path + ": is a remote file")
elif result is None:
parent_path = os.path.dirname(path)
if create_intermediates:
parent = mkdir(proj, parent_path, remote_only=remote_only,
create_intermediates=create_intermediates, remotetree=remotetree)
result = proj.remote.create_directory(proj.id, os.path.basename(path), parent.id)
if remotetree:
remotetree.connect()
remotetree.update(parent_path, force=True)
remotetree.close()
if not remote_only:
clifuncs.mkdir_if(local_abspath)
return result
else:
parent = filefuncs.get_by_path_if_exists(proj.remote, proj.id, parent_path)
if filefuncs.isfile(parent):
raise cliexcept.MCCLIException(parent_path + ": is a remote file")
if parent is None:
raise cliexcept.MCCLIException(parent_path + ": parent directory does not exist")
result = proj.remote.create_directory(proj.id, os.path.basename(path), parent.id)
if not remote_only:
clifuncs.mkdir_if(local_abspath)
return result