Source code for clusterking.util.metadata

#!/usr/bin/env python3

""" Miscellaneous utilities """

# std
import collections
from collections.abc import Iterable
import json
import pathlib
import time
from typing import Dict

# 3rd party
try:
    import git
except ImportError:
    git = None


[docs]def nested_dict(): """This is very clever and stolen from https://stackoverflow.com/questions/16724788/ Use it to initialize a dictionary-like object which automatically adds levels. E.g. .. code-block:: python a = nested_dict() a['test']['this']['is']['working'] = "yaaay" """ return collections.defaultdict(nested_dict)
[docs]def turn_into_nested_dict(nested): """Turn a dictionary of dictionaries into a nested_dict default dict.""" new = nested_dict() for key, value in nested.items(): if isinstance(value, dict): new[key] = turn_into_nested_dict(value) else: new[key] = value return new
[docs]def version_info(log=None, path=None) -> Dict[str, str]: vinfo = {} vinfo.update(get_git_info(log=log, path=path)) vinfo["version"] = get_version() return vinfo
[docs]def get_git_info(log=None, path=None): """Return dictionary containing status of the git repository (commit hash, date etc. Args: log: logging.Logger object (optional) path: path to .git subfolder or search path (optional) Returns: dictionary """ # Fill in some dummy values first git_config = { "branch": "unknown", "sha": "unknown", "msg": "unknown", "time": "unknown", } if git is None: msg_warn = ( "Module 'git' not found, will not add git version " "information to the output files." ) msg_debug = ( "Install the 'git' module by running " "'sudo pip3 install gitpython' or similar. " ) if log: log.warning(msg_warn) log.debug(msg_debug) else: print(msg_warn) print(msg_debug) return git_config if not path: # give git.Repo the directory that includes this file as directory # and let it search this_dir = pathlib.Path(__file__) path = this_dir try: repo = git.Repo(path=path, search_parent_directories=True) except git.InvalidGitRepositoryError: return git_config git_config["branch"] = repo.head.name hcommit = repo.head.commit git_config["sha"] = hcommit.hexsha git_config["msg"] = hcommit.message.strip("\n") commit_time = hcommit.committed_date git_config["time"] = time.strftime( "%a %d %b %Y %H:%M", time.gmtime(commit_time) ) # todo: also add a nice string representation of git diff? return git_config
[docs]def save_git_info(output_path=None, *args, **kwargs) -> Dict[str, str]: """ Save output of git_info to a file. Args: output_path: Output path. If None, the default will be bclustering/git_info.json *args: Passed on to git_info **kwargs: Passed on to git_info Returns: Output of git_info """ if output_path: output_path = pathlib.Path(output_path) if not output_path: this_dir = pathlib.Path(__file__).parent.resolve() output_path = this_dir / ".." / "git_info.json" gi = get_git_info(*args, **kwargs) with output_path.open("w") as output_file: json.dump(gi, output_file, indent=4, sort_keys=True) return gi
[docs]def load_git_info(input_path=None) -> Dict[str, str]: """ Load previously saved output of git_info from a json file. Args: input_path: Input path to json file. If None, the default will be bclustering/git_info.json Returns: Parsed json file (should be identical to saved output of version_info). """ if input_path: input_path = pathlib.Path(input_path) if not input_path: this_dir = pathlib.Path(__file__).parent.resolve() input_path = this_dir / ".." / "git_info.json" with input_path.open() as input_file: info = json.loads(input_file.read()) return info
# todo: docstring
[docs]def failsafe_serialize(obj): if isinstance(obj, dict): return {key: failsafe_serialize(v) for key, v in obj.items()} elif isinstance(obj, Iterable) and not isinstance(obj, str): return [failsafe_serialize(v) for v in obj] elif isinstance(obj, (int, float)): return obj else: return str(obj)
[docs]def get_version(): """Return ClusterKinG version.""" version_path = pathlib.Path(__file__).parent.parent / "version.txt" with version_path.open("r") as version_file: version = version_file.read().strip() return version
if __name__ == "__main__": print("Testing version_info") print(version_info()) print("Saving git_info") save_git_info() print("Loading git info again") print(load_git_info())