Source code for qcdb.dbwrap

from __future__ import absolute_import
from __future__ import print_function
import os
import sys
import math

try:
    import cPickle as pickle
except ImportError:
    import pickle
import itertools
# from collections import defaultdict
try:
    from collections import OrderedDict
except ImportError:
    from oldpymodules import OrderedDict
from .exceptions import *
from .molecule import Molecule
from .modelchems import Method, BasisSet, Error, methods, bases, errors, pubs
from . import psiutil
from . import textables


[docs]def initialize_errors(): """Form OrderedDict of all possible statistical measures set to None""" error = OrderedDict() for e in ['e', 'pe', 'pbe', 'pce']: for m in ['pex', 'nex', 'max', 'min', 'm', 'ma', 'rms', 'std']: error[m + e] = None return error
[docs]def initialize_errors_elaborate(e=None, pe=None, pbe=None, pce=None, extrema=True): error = OrderedDict() error['maxe'] = None if (e is None or not extrema) else e # LD_XA error['mine'] = None if (e is None or not extrema) else e # LD_XI error['me'] = None if e is None else 0.0 # LD_MS error['mae'] = None if e is None else 0.0 # LD_MA error['rmse'] = None if e is None else 0.0 # LD_RA error['stde'] = None if e is None else 0.0 error['maxpe'] = None if (pe is None or not extrema) else pe # FD_XA error['minpe'] = None if (pe is None or not extrema) else pe # FD_XI error['mpe'] = None if pe is None else 0.0 # FD_MS error['mape'] = None if pe is None else 0.0 # FD_MA error['rmspe'] = None if pe is None else 0.0 # FD_RA error['stdpe'] = None if pe is None else 0.0 error['maxpbe'] = None if (pbe is None or not extrema) else pbe # BD_XA error['minpbe'] = None if (pbe is None or not extrema) else pbe # BD_XI error['mpbe'] = None if pbe is None else 0.0 # BD_MS error['mapbe'] = None if pbe is None else 0.0 # BD_MA error['rmspbe'] = None if pbe is None else 0.0 # BD_RA error['stdpbe'] = None if pbe is None else 0.0 error['maxpce'] = None if (pce is None or not extrema) else pce # BD_XA error['minpce'] = None if (pce is None or not extrema) else pce # BD_XI error['mpce'] = None if pce is None else 0.0 # BD_MS error['mapce'] = None if pce is None else 0.0 # BD_MA error['rmspce'] = None if pce is None else 0.0 # BD_RA error['stdpce'] = None if pce is None else 0.0 return error
[docs]def average_errors(*args): """Each item in *args* should be an error dictionary. Performs average-like operation over all items, which should be error dictionaries, in *args*. Defined for ME, MAE, STDE, and their relative-error variants. None returned for undefined statistics or when an item is missing. """ Ndb = float(len(args)) avgerror = initialize_errors() try: avgerror['pexe'] = max([x['pexe'] for x in args]) avgerror['nexe'] = min([x['nexe'] for x in args]) avgerror['maxe'] = max([x['maxe'] for x in args], key=lambda x: abs(x)) avgerror['mine'] = min([x['mine'] for x in args], key=lambda x: abs(x)) avgerror['me'] = sum([x['me'] for x in args]) / Ndb avgerror['mae'] = sum([x['mae'] for x in args]) / Ndb avgerror['rmse'] = sum([x['rmse'] for x in args]) / Ndb # TODO: unsure of op validity avgerror['stde'] = math.sqrt(sum([x['stde'] ** 2 for x in args]) / Ndb) avgerror['pexpe'] = max([x['pexpe'] for x in args]) avgerror['nexpe'] = min([x['nexpe'] for x in args]) avgerror['maxpe'] = max([x['maxpe'] for x in args], key=lambda x: abs(x)) avgerror['minpe'] = min([x['minpe'] for x in args], key=lambda x: abs(x)) avgerror['mpe'] = sum([x['mpe'] for x in args]) / Ndb avgerror['mape'] = sum([x['mape'] for x in args]) / Ndb avgerror['rmspe'] = sum([x['rmspe'] for x in args]) / Ndb # TODO: unsure of op validity avgerror['stdpe'] = math.sqrt(sum([x['stdpe'] * x['stdpe'] for x in args]) / Ndb) avgerror['pexpbe'] = max([x['pexpbe'] for x in args]) avgerror['nexpbe'] = min([x['nexpbe'] for x in args]) avgerror['maxpbe'] = max([x['maxpbe'] for x in args], key=lambda x: abs(x)) avgerror['minpbe'] = min([x['minpbe'] for x in args], key=lambda x: abs(x)) avgerror['mpbe'] = sum([x['mpbe'] for x in args]) / Ndb avgerror['mapbe'] = sum([x['mapbe'] for x in args]) / Ndb avgerror['rmspbe'] = sum([x['rmspbe'] for x in args]) / Ndb # TODO: unsure of op validity avgerror['stdpbe'] = math.sqrt(sum([x['stdpbe'] * x['stdpbe'] for x in args]) / Ndb) avgerror['pexpce'] = max([x['pexpce'] for x in args]) avgerror['nexpce'] = min([x['nexpce'] for x in args]) avgerror['maxpce'] = max([x['maxpce'] for x in args], key=lambda x: abs(x)) avgerror['minpce'] = min([x['minpce'] for x in args], key=lambda x: abs(x)) avgerror['mpce'] = sum([x['mpce'] for x in args]) / Ndb avgerror['mapce'] = sum([x['mapce'] for x in args]) / Ndb avgerror['rmspce'] = sum([x['rmspce'] for x in args]) / Ndb # TODO: unsure of op validity avgerror['stdpce'] = math.sqrt(sum([x['stdpce'] * x['stdpce'] for x in args]) / Ndb) except TypeError: pass return avgerror
[docs]def format_errors(err, mode=1): """From error dictionary *err*, returns a LaTeX-formatted string, after handling None entries. """ onedecimal = r"""{0:8.1f}""" twodecimal = r"""{0:8.2f}""" threedecimal = r"""{0:12.3f}""" fourdecimal = r"""{0:12.4f}""" shortblank = r"""{0:8s}""".format('') longblank = r"""{0:12s}""".format('') if mode == 1: me = ' ----' if err['me'] is None else '%+.2f' % (err['me']) stde = '----' if err['stde'] is None else '%.2f' % (err['stde']) mae = ' ----' if err['mae'] is None else '%6.2f' % (err['mae']) mape = ' ---- ' if err['mape'] is None else '%6.1f\%%' % (100 * err['mape']) mapbe = ' ---- ' if err['mapbe'] is None else '%6.1f\%%' % (100 * err['mapbe']) mapce = ' ---- ' if err['mapce'] is None else '%6.1f\%%' % (100 * err['mapce']) text = """$\{%s; %s\}$ %s %s %s""" % \ (me, stde, mae, mape, mapce) return text if mode == 2: sdict = OrderedDict() for lbl in ['pexe', 'nexe', 'maxe', 'mine', 'me', 'mae', 'rmse', 'stde']: sdict[lbl] = ' ----' if err[lbl] is None else fourdecimal.format(err[lbl]) for lbl in ['pexpe', 'nexpe', 'maxpe', 'minpe', 'mpe', 'mape', 'rmspe', 'stdpe', 'pexpbe', 'nexpbe', 'maxpbe', 'minpbe', 'mpbe', 'mapbe', 'rmspbe', 'stdpbe', 'pexpce', 'nexpce', 'maxpce', 'minpce', 'mpce', 'mapce', 'rmspce', 'stdpce']: sdict[lbl] = ' ----' if err[lbl] is None else threedecimal.format(100 * err[lbl]) text = """nex: {nexe}{nexpe}{nexpbe}{nexpce}\n""" \ """pex: {pexe}{pexpe}{pexpbe}{pexpce}\n""" \ """min: {mine}{minpe}{minpbe}{minpce}\n""" \ """max: {maxe}{maxpe}{maxpbe}{maxpce}\n""" \ """m: {me}{mpe}{mpbe}{mpce}\n""" \ """ma: {mae}{mape}{mapbe}{mapce}\n""" \ """rms: {rmse}{rmspe}{rmspbe}{rmspce}\n""" \ """std: {stde}{stdpe}{stdpbe}{stdpce}\n""".format(**sdict) return text if mode == 3: sdict = OrderedDict() # shortblanks changed from empty strings Aug 2015 for lbl in ['pexe', 'nexe', 'maxe', 'mine', 'me', 'mae', 'rmse', 'stde']: sdict[lbl] = shortblank if err[lbl] is None else twodecimal.format(err[lbl]) for lbl in ['pexpe', 'nexpe', 'maxpe', 'minpe', 'mpe', 'mape', 'rmspe', 'stdpe', 'pexpbe', 'nexpbe', 'maxpbe', 'minpbe', 'mpbe', 'mapbe', 'rmspbe', 'stdpbe', 'pexpce', 'nexpce', 'maxpce', 'minpce', 'mpce', 'mapce', 'rmspce', 'stdpce']: sdict[lbl] = shortblank if err[lbl] is None else onedecimal.format(100 * err[lbl]) return sdict
[docs]def string_contrast(ss): """From an array of strings, *ss*, returns maximum common prefix string, maximum common suffix string, and array of middles. """ s = [item + 'q' for item in ss if item is not None] short = min(s, key=len) for ib in range(len(short)): if not all([mc[ib] == short[ib] for mc in s]): preidx = ib break else: preidx = 0 for ib in range(len(short)): ie = -1 * (ib + 1) if not all([mc[ie] == short[ie] for mc in s]): sufidx = ie + 1 break else: sufidx = -1 * (len(short)) miditer = iter([mc[preidx:sufidx] for mc in s]) prefix = short[:preidx] suffix = short[sufidx:-1] middle = ['' if mc is None else next(miditer) for mc in ss] return prefix, suffix, middle
[docs]def oxcom(lst): """Returns gramatical comma separated string of *lst*.""" lst = [str(l) for l in lst] if not lst: return '' elif len(lst) == 1: return lst[0] elif len(lst) == 2: return ' and '.join(lst) else: return ', and '.join([', '.join(lst[:-1]), lst[-1]])
[docs]def cure_weight(refrxn, refeq, rrat, xi=0.2): """ :param refeq: value of benchmark for equilibrium Reaction :param rrat: ratio of intermonomer separation for Reaction to equilibrium Reaction :param xi: parameter :return: weight for CURE """ sigma = xi * abs(refeq) / (rrat ** 3) weight = max(abs(refrxn), sigma) return weight
[docs]def balanced_error(refrxn, refeq, rrat, m=0.03, p=10.0): """ :param refrxn: :param refeq: :param rrat: :param m: minimum permitted weight for a point :param p: multiples of abs(refeq) above refeq to which zero-line in head is displaced :return: """ one = float(1) q = one if rrat >= one else p qm1perat = q - 1 + refrxn / refeq weight = max(m, qm1perat / q) mask = weight * q / abs(qm1perat) return mask, weight
[docs]def fancify_mc_tag(mc, latex=False): """From the usual MTD-opt1_opt2-bas model chemistry identifier, return string based on fullname, if *latex* is False or latex if *latex* is True. """ try: mtd, mod, bas = mc.split('-') except ValueError: text = mc else: if latex: text = r"""%20s / %-20s %s""" % (methods[mtd].latex, bases[bas].latex, mod) else: text = r"""%20s / %s, %s""" % (methods[mtd].fullname, bases[bas].fullname, mod) return text
[docs]class ReactionDatum(object): """Piece of quantum chemical information that describes a qcdb.Reaction object. """ def __init__(self, dbse, rxn, method, mode, basis, value, units='kcal/mol', citation=None, doi=None, comment=None): # geometry self.dbrxn = dbse + '-' + str(rxn) # qcdb.Method self.method = method # mode, e.g., unCP, CP, RLX, etc. self.mode = mode # qcdb.BasisSet self.basis = basis # numerical value for reaction self.value = float(value) # energy unit attached to value, defaults to kcal/mol self.units = units # publication citation of value self.citation = citation # digital object identifier for publication (maybe this should be doi of datum, not of pub?) self.doi = doi # addl comments self.comment = comment @classmethod
[docs] def library_modelchem(cls, dbse, rxn, method, mode, basis, value, units='kcal/mol', citation=None, doi=None, comment=None): """Constructor when method and basis are strings corresponding to qcdb.Method and qcdb.BasisSet already defined in methods and bases. """ # computational method try: tmp_method = methods[method.upper()] except KeyError as e: raise ValidationError("""Invalid ReactionDatum method %s: %s""" % (method, e)) # computational basis set try: tmp_basis = bases[basis.lower()] except KeyError as e: raise ValidationError("""Invalid ReactionDatum basis %s: %s""" % (basis, e)) # publication if citation is None: tmp_pub = citation else: try: tmp_pub = pubs[citation.lower()] except KeyError as e: raise ValidationError("""Invalid ReactionDatum publication %s: %s""" % (citation, e)) return cls(dbse, rxn, tmp_method, mode, tmp_basis, value, units, citation=tmp_pub, doi=doi, comment=comment)
def __str__(self): text = '' text += """ ==> ReactionDatum <==\n\n""" text += """ Database reaction: %s\n""" % (self.dbrxn) text += """ Method: %s\n""" % (self.method.fullname) text += """ Mode: %s\n""" % (self.mode) text += """ Basis: %s\n""" % (self.basis.fullname) text += """ Value: %f [%s]\n""" % (self.value, self.units) text += """ Citation: %s %s\n""" % (self.citation.name, self.citation.doi) text += """ DOI: %s\n""" % (self.doi) text += """ Comment: %s\n""" % (self.comment) text += """\n""" return text
[docs]class Subset(object): """Affiliated qcdb.Reaction-s """ def __init__(self, name, hrxn, tagl=None, axis=None): # identifier self.name = name # array of reactions names self.hrxn = hrxn # description line self.tagl = tagl # mathematical relationships of reactions self.axis = OrderedDict() def __str__(self): text = '' text += """ ==> %s Subset <==\n\n""" % (self.name) text += """ Tagline: %s\n""" % (self.tagl) text += """ %20s""" % ('Reactions') for ax in self.axis.keys(): text += """ %8s""" % (ax) text += """\n""" for ix in range(len(self.hrxn)): text += """ %20s""" % (str(self.hrxn[ix])) for ax in self.axis.values(): text += """ %8.3f""" % (ax[ix]) text += """\n""" text += """\n""" return text
[docs]class Reagent(object): """Chemical entity only slightly dresed up from qcdb.Molecule. """ def __init__(self, name, mol, tagl=None, comment=None): # full name, e.g., 'S22-2-dimer' or 'NBC1-BzMe-8.0-monoA-CP' or 'HTBH-HCl-reagent' self.name = name # qcdb.Molecule try: self.NRE = mol.nuclear_repulsion_energy() except AttributeError: raise ValidationError("""Reagent must be instantiated with qcdb.Molecule object.""") else: self.mol = mol.create_psi4_string_from_molecule() # description line self.tagl = tagl # # addl comments # self.comment = comment # # fragmentation # self.fragments = mol.fragments # # frag activation # self.frtype = mol.fragment_types # # frag charge # self.frchg = mol.fragment_charges # # frag multiplicity # self.frmult = mol.fragment_multiplicities self.charge = mol.molecular_charge() def __str__(self): text = '' text += """ ==> %s Reagent <==\n\n""" % (self.name) text += """ Tagline: %s\n""" % (self.tagl) # text += """ Comment: %s\n""" % (self.comment) text += """ NRE: %f\n""" % (self.NRE) # text += """ Charge: %+d\n""" # text += """ Fragments: %d\n""" % (len(self.fragments)) # text += """ FrgNo Actv Chg Mult AtomRange\n""" # for fr in range(len(self.fragments)): # text += """ %-4d %1s %+2d %2d %s\n""" % (fr + 1, # '*' if self.frtype[fr] == 'Real' else '', # self.frchg[fr], self.frmult[fr], self.fragments[fr]) text += """ Molecule: \n%s""" % (self.mol) text += """\n""" return text
[docs]class Reaction(object): """ """ def __init__(self, name, dbse, indx, tagl=None, latex=None, color='black', comment=None): # name, e.g., '2' or 'BzMe-8.0' self.name = name # database reaction name, e.g., 'S22-2' or 'NBC1-BzMe-8.0' self.dbrxn = dbse + '-' + str(name) # numerical index of reaction self.indx = indx # description line self.tagl = tagl # latex description self.latex = latex # addl comments self.comment = comment # reaction matrices, specifying reagent contributions per reaction self.rxnm = {} # qcdb.ReactionDatum objects of quantum chemical data pertaining to reaction self.data = {} # benchmark qcdb.ReactionDatum self.benchmark = None # color for plotting self.color = color def __str__(self): text = '' text += """ ==> %s Reaction <==\n\n""" % (self.name) text += """ Database reaction: %s\n""" % (self.dbrxn) text += """ Index: %s\n""" % (self.indx) text += """ LaTeX representation: %s\n""" % (self.latex) text += """ Tagline: %s\n""" % (self.tagl) text += """ Comment: %s\n""" % (self.comment) if self.benchmark is None: text += """ Benchmark: %s\n""" % ('UNDEFINED') else: text += """ Benchmark: %f\n""" % (self.data[self.benchmark].value) text += """ Color: %s\n""" % (str(self.color)) text += """ Reaction matrix:\n""" for mode, rxnm in self.rxnm.iteritems(): text += """ %s\n""" % (mode) for rgt, coeff in rxnm.iteritems(): text += """ %3d %s\n""" % (coeff, rgt.name) text += """ Data:\n""" for label, datum in sorted(self.data.iteritems()): text += """ %8.2f %s\n""" % (datum.value, label) text += """\n""" return text
[docs] def compute_errors(self, benchmark='default', mcset='default', failoninc=True, verbose=False): """For all data or modelchem subset *mcset*, computes raw reaction errors between *modelchem* and *benchmark* model chemistries. Returns error if model chemistries are missing for any reaction in subset unless *failoninc* set to False, whereupon returns partial. Returns dictionary of reaction labels and error forms. """ if mcset == 'default': lsslist = self.data.keys() elif callable(mcset): # mcset is function that will generate subset of HRXN from sset(self) lsslist = [mc for mc in self.data.keys() if mc in mcset(self)] # untested else: # mcset is array containing modelchemistries lsslist = [mc for mc in self.data.keys() if mc in mcset] # assemble dict of qcdb.Reaction objects from array of reaction names lsset = OrderedDict() for mc in lsslist: lsset[mc] = self.data[mc] lbench = self.benchmark if benchmark == 'default' else benchmark try: mcGreater = self.data[lbench].value except KeyError as e: raise ValidationError("""Reaction %s missing benchmark datum %s.""" % (self.name, str(e))) err = {} for label, datum in lsset.iteritems(): try: mcLesser = datum.value except KeyError as e: if failoninc: raise ValidationError("""Reaction %s missing datum %s.""" % (label, str(e))) else: continue err[label] = [mcLesser - mcGreater, (mcLesser - mcGreater) / abs(mcGreater), (mcLesser - mcGreater) / abs(mcGreater)] # TODO define BER if verbose: print("""p = %6.2f, pe = %6.1f%%, bpe = %6.1f%% modelchem %s.""" % (err[label][0], 100 * err[label][1], 100 * err[label][2], label)) return err
[docs] def plot(self, benchmark='default', mcset='default', failoninc=True, verbose=False, color='sapt', xlimit=4.0, labeled=True, view=True, mousetext=None, mouselink=None, mouseimag=None, mousetitle=None, mousediv=None, saveas=None, relpath=False, graphicsformat=['pdf']): """Computes individual errors over model chemistries in *mcset* (which may be default or an array or a function generating an array) versus *benchmark*. Thread *color* can be 'rgb' for old coloring, a color name or 'sapt' for spectrum coloring. *saveas* conveys directory ('/') and/or filename for saving the resulting plot. File extension is not accessible, but *graphicsformat* array requests among 'png', 'pdf', and 'eps' formats. *relpath* forces paths to saved files to be relative to current directory, rather than absolute paths for returned code and file dictionary. Prepares thread diagram instructions and either executes them if matplotlib available (Canopy or Anaconda) or prints them. Returns a dictionary of all saved plot filenames. If any of *mousetext*, *mouselink*, or *mouseimag* is specified, htmlcode will be returned with an image map of slats to any of text, link, or image, respectively. """ # compute errors dbse = self.dbrxn.split('-')[0] indiv = self.compute_errors(benchmark=benchmark, mcset=mcset, failoninc=failoninc, verbose=verbose) # repackage dbdat = [] for mc in indiv.keys(): dbdat.append({'db': dbse, 'show': fancify_mc_tag(mc), 'sys': mc, 'color': self.color, 'data': [indiv[mc][0]]}) mae = None # [errors[ix][self.dbse]['mae'] for ix in index] mape = None # [100 * errors[ix][self.dbse]['mape'] for ix in index] # form unique filename # ixpre, ixsuf, ixmid = string_contrast(index) # title = self.dbse + ' ' + ixpre + '[]' + ixsuf title = self.dbrxn labels = [''] # generate matplotlib instructions and call or print try: from . import mpl import matplotlib.pyplot as plt except ImportError: # if not running from Canopy, print line to execute from Canopy print("""filedict, htmlcode = mpl.threads(%s,\n color='%s',\n title='%s',\n labels=%s,\n mae=%s,\n mape=%s\n xlimit=%s\n labeled=%s\n saveas=%s\n mousetext=%s\n mouselink=%s\n mouseimag=%s\n mousetitle=%s,\n mousediv=%s,\n relpath=%s\n graphicsformat=%s)\n\n""" % (dbdat, color, title, labels, mae, mape, str(xlimit), repr(labeled), repr(saveas), repr(mousetext), repr(mouselink), repr(mouseimag), repr(mousetitle), repr(mousediv), repr(relpath), repr(graphicsformat))) else: # if running from Canopy, call mpl directly filedict, htmlcode = mpl.threads(dbdat, color=color, title=title, labels=labels, mae=mae, mape=mape, xlimit=xlimit, labeled=labeled, view=view, mousetext=mousetext, mouselink=mouselink, mouseimag=mouseimag, mousetitle=mousetitle, mousediv=mousediv, saveas=saveas, relpath=relpath, graphicsformat=graphicsformat) return filedict, htmlcode
[docs]class WrappedDatabase(object): """Wrapper class for raw Psi4 database modules that does some validation of contents, creates member data and accessors for database structures, defines error computation, and handles database subsets. Not to be used directly-- see qcdb.Database for handling single or multiple qdcb.WrappedDatabase objects and defining nice statistics, plotting, and table functionalities. >>> asdf = qcdb.WrappedDatabase('Nbc10') """ def __init__(self, dbname, pythonpath=None): """Instantiate class with case insensitive name *dbname*. Module search path can be prepended with *pythonpath*. """ #: internal name of database #: #: >>> print asdf.dbse #: 'NBC1' self.dbse = None #: description line #: #: >>> print asdf.tagl #: 'interaction energies of dissociation curves for non-bonded systems' self.tagl = None #: OrderedDict of reactions/members #: #: >>> print asdf.hrxn.keys() #: ['BzBz_S-3.2', 'BzBz_S-3.3', ... 'BzBz_PD36-2.8', 'BzBz_PD36-3.0'] self.hrxn = None #: dict of reagents/geometries #: #: >>> print asdf.hrgt.keys() #: ['NBC1-BzBz_PD32-0.8-monoA-CP', 'NBC1-BzBz_PD34-0.6-dimer', ... 'NBC1-BzBz_PD34-1.7-dimer'] self.hrgt = None #: dict of defined reaction subsets. #: Note that self.sset['default'] contains all the nonredundant information. #: #: >>> print asdf.sset.keys() #: ['meme', 'mxddpp', '5min', ... 'small'] self.sset = None # Removing hrxn, hrgt etc. do not reduce the size of the object. # These attributes are stored for ease of access for adding qc info, etc. #: object of defined reaction subsets. self.oss = None # load database if pythonpath is not None: sys.path.insert(1, pythonpath) else: sys.path.append(os.path.dirname(__file__) + '/../databases') database = psiutil.import_ignorecase(dbname) if not database: print('\nPython module for database %s failed to load\n\n' % (dbname)) print('\nSearch path that was tried:\n') print(", ".join(map(str, sys.path))) raise ValidationError("Python module loading problem for database " + str(dbname)) # gross validation of database for item in ['dbse', 'GEOS', 'HRXN', 'ACTV', 'RXNM']: try: getattr(database, item) except AttributeError: raise ValidationError("""Database %s severely deformed with %s missing.""" % (database.__name__, item)) for item in ['TAGL', 'BIND']: try: getattr(database, item) except AttributeError: print("""Warning: Database %s possibly deformed with %s missing.\n""" % (database.__name__, item)) # form database name self.dbse = database.dbse try: self.tagl = database.TAGL['dbse'] except KeyError: print("""Warning: TAGL missing for database %s""" % (self.dbse)) # form array of database contents to process through pieces = [] for item in dir(database): if item in ['qcdb', 'rxn', 'dbse', 'TAGL']: pass elif item.startswith('__'): pass else: pieces.append(item) # form qcdb.Reagent objects from all defined geometries, GEOS oHRGT = {} for rgt, mol in database.GEOS.iteritems(): mol.update_geometry() try: tagl = database.TAGL[rgt] except KeyError: tagl = None print("""Warning: TAGL missing for reagent %s""" % (rgt)) oHRGT[rgt] = Reagent(name=rgt, mol=mol, tagl=tagl) pieces.remove('GEOS') self.hrgt = oHRGT # form qcdb.Reaction objects from comprehensive reaction list, HRXN oHRXN = OrderedDict() for rxn in database.HRXN: try: tagl = database.TAGL[database.dbse + '-' + str(rxn)] except KeyError: tagl = None print("""Warning: TAGL missing for reaction %s""" % (rxn)) try: elst = database.DATA['SAPT ELST ENERGY'][database.dbse + '-' + str(rxn)] disp = database.DATA['SAPT DISP ENERGY'][database.dbse + '-' + str(rxn)] color = abs(elst) / (abs(elst) + abs(disp)) except (KeyError, AttributeError): color = 'black' print("""Warning: DATA['SAPT * ENERGY'] missing for reaction %s""" % (rxn)) oHRXN[rxn] = Reaction(name=rxn, dbse=database.dbse, indx=database.HRXN.index(rxn) + 1, color=color, tagl=tagl) pieces.remove('HRXN') self.hrxn = oHRXN # list and align database stoichiometry modes, ACTV* and RXNM* oACTV = {} for modactv in [item for item in pieces if item.startswith('ACTV')]: modrxnm = modactv.replace('ACTV', 'RXNM') mode = 'default' if modactv == 'ACTV' else modactv.replace('ACTV_', '') try: getattr(database, modrxnm) except AttributeError: modrxnm = 'RXNM' oACTV[mode] = [modactv, modrxnm] for item in [tmp for tmp in pieces if tmp.startswith('ACTV') or tmp.startswith('RXNM')]: pieces.remove(item) # populate reaction matrices in qcdb.Reaction objects for rxn in database.HRXN: dbrxn = database.dbse + '-' + str(rxn) for mode, actvrxnm in oACTV.iteritems(): tdict = OrderedDict() for rgt in getattr(database, actvrxnm[0])[dbrxn]: tdict[oHRGT[rgt]] = getattr(database, actvrxnm[1])[dbrxn][rgt] oHRXN[rxn].rxnm[mode] = tdict # list embedded quantum chem info per rxn, incl. BIND* arrsbind = [item for item in pieces if item.startswith('BIND_')] if len(arrsbind) == 0: if 'BIND' in pieces: arrsbind = ['BIND'] else: arrsbind = [] print("""Warning: No BIND array with reference values.""") else: for arrbind in arrsbind: if getattr(database, arrbind) is database.BIND: break else: print("""Warning: No BIND_* array assigned to be master BIND.""") oBIND = {} for arrbind in arrsbind: ref = database.dbse + 'REF' if arrbind == 'BIND' else arrbind.replace('BIND_', '') methods[ref] = Method(name=ref) bases[ref] = BasisSet(name=ref) try: getattr(database, 'BINDINFO_' + ref) except AttributeError: arrbindinfo = None print("""Warning: No BINDINFO dict with BIND attribution and modelchem for %s.""" % (ref)) else: arrbindinfo = 'BINDINFO_' + ref oBIND[ref] = [methods[ref], 'default', bases[ref], arrbind, (getattr(database, arrbind) is database.BIND), arrbindinfo] for item in [tmp for tmp in pieces if tmp.startswith('BIND')]: pieces.remove(item) # populate data with reference values in qcdb.Reaction objects for rxn in database.HRXN: dbrxn = database.dbse + '-' + str(rxn) for ref, info in oBIND.iteritems(): bindval = getattr(database, info[3])[dbrxn] if info[5] is None: methodfeed = info[0] modefeed = info[1] basisfeed = info[2] citationkey = 'anon' else: bindinforxn = getattr(database, info[5])[dbrxn] methodfeed = methods[bindinforxn['method'].upper()] if 'method' in bindinforxn else info[0] modefeed = bindinforxn['mode'] if 'mode' in bindinforxn else info[1] basisfeed = bases[bindinforxn['basis'].lower()] if 'basis' in bindinforxn else info[2] citationkey = bindinforxn['citation'].lower() if 'citation' in bindinforxn else 'anon' citationfeed = pubs[citationkey] if bindval is not None: oHRXN[rxn].data[ref] = ReactionDatum(dbse=database.dbse, rxn=rxn, method=methodfeed, mode=modefeed, basis=basisfeed, citation=citationfeed, value=bindval) # oHRXN[rxn].data[ref] = ReactionDatum(dbse=database.dbse, # rxn=rxn, # method=info[0], # mode=info[1], # basis=info[2], # value=bindval) # #value=getattr(database, info[3])[dbrxn]) if info[4]: oHRXN[rxn].benchmark = ref # Process subsets oSSET = {} fsHRXN = frozenset(database.HRXN) for sset in pieces: if not sset.startswith('AXIS_'): try: fssset = frozenset(getattr(database, sset)) except TypeError: continue if fssset.issubset(fsHRXN): oSSET[sset] = getattr(database, sset) for item in oSSET.keys(): pieces.remove(item) oSSET['HRXN'] = database.HRXN self.sset = OrderedDict() self.oss = OrderedDict() # just in case oss replaces sset someday for item in oSSET.keys(): if item == 'HRXN_SM': label = 'small' elif item == 'HRXN_LG': label = 'large' elif item == 'HRXN_EQ': label = 'equilibrium' elif item == 'HRXN': label = 'default' elif item.startswith('HRXN_'): label = item.replace('HRXN_', '').lower() else: label = item.lower() # subsets may have different ordering from HRXN self.sset[label] = OrderedDict() for rxn in oSSET[item]: self.sset[label][rxn] = oHRXN[rxn] # initialize subset objects with light info try: sstagl = database.TAGL[item] except KeyError: try: sstagl = database.TAGL[label] except KeyError: sstagl = None print("""Warning: TAGL missing for subset %s""" % (label)) self.oss[label] = Subset(name=label, hrxn=self.sset[label].keys(), tagl=sstagl) # Process axes for axis in [item for item in pieces if item.startswith('AXIS_')]: label = axis.replace('AXIS_', '') try: defn = getattr(database, axis) except AttributeError: raise ValidationError("""Axis %s not importable.""" % (label)) axisrxns = frozenset(defn.keys()) attached = False for ss, rxns in self.sset.iteritems(): if frozenset(rxns).issubset(axisrxns): ordered_floats = [] for rx in self.oss[ss].hrxn: ordered_floats.append(defn[rx]) self.oss[ss].axis[label] = ordered_floats attached = True if not attached: print("""Warning: AXIS %s not affiliated with a subset""" % (label)) pieces.remove(axis) print("""WrappedDatabase %s: Unparsed attributes""" % (self.dbse), pieces) def __str__(self): text = '' text += """ ==> %s WrappedDatabase <==\n\n""" % (self.dbse) text += """ Reagents: %s\n""" % (self.hrgt.keys()) text += """ Reactions: %s\n""" % (self.hrxn.keys()) text += """ Subsets: %s\n""" % (self.sset.keys()) text += """ Reference: %s\n""" % (self.benchmark()) text += """\n""" return text
[docs] def add_ReactionDatum(self, dbse, rxn, method, mode, basis, value, units='kcal/mol', citation=None, comment=None, overwrite=False): """Add a new quantum chemical value to *rxn* by creating a qcdb.ReactionDatum from same arguments as that class's object-less constructor. *rxn* may be actual Reaction.name or Reaction.indx. """ if (self.dbse == dbse): if rxn in self.hrxn: rxnname = rxn # rxn is proper reaction name else: try: if (rxn + 1 > 0) and (rxn == self.hrxn.items()[rxn - 1][1].indx): rxnname = self.hrxn.items()[rxn - 1][1].name # rxn is reaction index (maybe dangerous?) except (TypeError, IndexError): raise ValidationError( """Inconsistent to add ReactionDatum for %s to database %s with reactions %s.""" % (dbse + '-' + str(rxn), self.dbse, self.hrxn.keys())) label = '-'.join([method, mode, basis]) if overwrite or (label not in self.hrxn[rxnname].data): self.hrxn[rxnname].data[label] = ReactionDatum.library_modelchem(dbse=dbse, rxn=rxnname, method=method, mode=mode, basis=basis, value=value, units=units, comment=comment, citation=citation) else: raise ValidationError("""ReactionDatum %s already present in Database.""" % (label)) else: raise ValidationError("""Inconsistent to add ReactionDatum for %s to database %s.""" % (dbse + '-' + str(rxn), self.dbse))
[docs] def add_Subset(self, name, func): """Define a new subset labeled *name* by providing a function *func* that filters *self.hrxn*. """ sname = name.lower().split('\n') label = sname.pop(0) tagl = sname[0].strip() if sname else None try: filtered = func(self) lsslist = [rxn for rxn in self.sset['default'].keys() if rxn in filtered] except TypeError as e: raise ValidationError("""Function %s did not return list: %s.""" % (func.__name__, str(e))) if len(lsslist) == 0: print("""WrappedDatabase %s: Subset %s NOT formed: empty""" % (self.dbse, label)) return self.sset[label] = OrderedDict() for rxn in lsslist: self.sset[label][rxn] = self.hrxn[rxn] self.oss[label] = Subset(name=label, hrxn=self.sset[label].keys(), tagl=tagl) print("""WrappedDatabase %s: Subset %s formed: %d""" % (self.dbse, label, len(self.sset[label].keys())))
[docs] def compute_errors(self, modelchem, benchmark='default', sset='default', failoninc=True, verbose=False): """For full database or subset *sset*, computes raw reaction errors between *modelchem* and *benchmark* model chemistries. Returns error if model chemistries are missing for any reaction in subset unless *failoninc* set to False, whereupon returns partial. Returns dictionary of reaction labels and error forms. """ if isinstance(sset, basestring): # sset is normal subset name 'MX' corresponding to HRXN_MX or MX array in database module try: lsset = self.sset[sset.lower()] except KeyError as e: # raise ValidationError("""Subset named %s not available""" % (str(e))) lsset = OrderedDict() else: if callable(sset): # sset is function that will generate subset of HRXN from sset(self) lsslist = [rxn for rxn in self.sset['default'].keys() if rxn in sset(self)] else: # sset is array containing reactions lsslist = [rxn for rxn in self.sset['default'].keys() if rxn in sset] # assemble dict of qcdb.Reaction objects from array of reaction names lsset = OrderedDict() for rxn in lsslist: lsset[rxn] = self.hrxn[rxn] # cureinfo = self.get_pec_weightinfo() err = {} for rxn, oRxn in lsset.iteritems(): lbench = oRxn.benchmark if benchmark == 'default' else benchmark try: mcLesser = oRxn.data[modelchem].value except KeyError as e: if failoninc: raise ValidationError("""Reaction %s missing datum %s.""" % (str(rxn), str(e))) else: continue try: mcGreater = oRxn.data[lbench].value except KeyError as e: if lbench == 'ZEROS': pass else: print("""Reaction %s missing benchmark""" % (str(rxn))) continue # handle particulars of PEC error measures # rxncureinfo = cureinfo[rxn] # try: # mcGreaterCrvmin = self.hrxn[rxncureinfo['eq']].data[lbench].value # except KeyError as e: # print """Reaction %s missing benchmark""" % (str(eqrxn)) # cure_denom = cure_weight(refrxn=mcGreater, refeq=mcGreaterCrvmin, rrat=rxncureinfo['Rrat']) # balanced_mask, balwt = balanced_error(refrxn=mcGreater, refeq=mcGreaterCrvmin, rrat=rxncureinfo['Rrat']) if lbench == 'ZEROS': err[rxn] = [mcLesser, 0.0, 0.0, 0.0, 1.0] # FAKE else: err[rxn] = [mcLesser - mcGreater, (mcLesser - mcGreater) / abs(mcGreater), (mcLesser - mcGreater) / abs(mcGreater), # FAKE (mcLesser - mcGreater) / abs(mcGreater), # FKAE 1.0 # FAKE ] # (mcLesser - mcGreater) / abs(cure_denom), # (mcLesser - mcGreater) * balanced_mask / abs(mcGreaterCrvmin), # balwt] if verbose: print("""p = %8.4f, pe = %8.3f%%, pbe = %8.3f%% pce = %8.3f%% reaction %s.""" % (err[rxn][0], 100 * err[rxn][1], 100 * err[rxn][3], 100 * err[rxn][2], str(rxn))) return err
[docs] def compute_statistics(self, modelchem, benchmark='default', sset='default', failoninc=True, verbose=False, returnindiv=False): """For full database or subset *sset*, computes many error statistics between single *modelchem* and *benchmark* model chemistries. Returns error if model chemistries are missing for any reaction in subset unless *failoninc* set to False, whereupon returns partial statistics. Returns dictionary of statistics labels and values. """ err = self.compute_errors(modelchem, benchmark=benchmark, sset=sset, failoninc=failoninc, verbose=verbose) if len(err) == 0: error = initialize_errors() if verbose: print("""Warning: nothing to compute.""") else: Nrxn = float(len(err)) error = OrderedDict() # linear (absolute) error linear = [val[0] for val in err.values()] error['pexe'] = max(linear) error['nexe'] = min(linear) error['maxe'] = max(linear, key=lambda x: abs(x)) error['mine'] = min(linear, key=lambda x: abs(x)) error['me'] = sum(linear) / Nrxn error['mae'] = sum(map(abs, linear)) / Nrxn error['rmse'] = math.sqrt(sum(map(lambda x: x ** 2, linear)) / Nrxn) error['stde'] = math.sqrt((sum(map(lambda x: x ** 2, linear)) - (sum(linear) ** 2) / Nrxn) / Nrxn) # fractional (relative) error relative = [val[1] for val in err.values()] error['pexpe'] = max(relative) error['nexpe'] = min(relative) error['maxpe'] = max(relative, key=lambda x: abs(x)) error['minpe'] = min(relative, key=lambda x: abs(x)) error['mpe'] = sum(relative) / Nrxn error['mape'] = sum(map(abs, relative)) / Nrxn error['rmspe'] = math.sqrt(sum(map(lambda x: x ** 2, relative)) / Nrxn) error['stdpe'] = math.sqrt((sum(map(lambda x: x ** 2, relative)) - (sum(relative) ** 2) / Nrxn) / Nrxn) # balanced (relative) error balanced = [val[3] for val in err.values()] balwt = sum([val[4] for val in err.values()]) # get the wt fn. highly irregular TODO error['pexpbe'] = max(balanced) error['nexpbe'] = min(balanced) error['maxpbe'] = max(balanced, key=lambda x: abs(x)) error['minpbe'] = min(balanced, key=lambda x: abs(x)) error['mpbe'] = sum(balanced) / balwt #Nrxn error['mapbe'] = sum(map(abs, balanced)) / balwt #Nrxn error['rmspbe'] = math.sqrt(sum(map(lambda x: x ** 2, balanced)) / balwt) #Nrxn) error['stdpbe'] = None # get math domain errors w/wt in denom math.sqrt((sum(map(lambda x: x ** 2, balanced)) - (sum(balanced) ** 2) / balwt) / balwt) #/ Nrxn) / Nrxn) # capped (relative) error capped = [val[2] for val in err.values()] error['pexpce'] = max(capped) error['nexpce'] = min(capped) error['maxpce'] = max(capped, key=lambda x: abs(x)) error['minpce'] = min(capped, key=lambda x: abs(x)) error['mpce'] = sum(capped) / Nrxn error['mapce'] = sum(map(abs, capped)) / Nrxn error['rmspce'] = math.sqrt(sum(map(lambda x: x ** 2, capped)) / Nrxn) error['stdpce'] = math.sqrt((sum(map(lambda x: x ** 2, capped)) - (sum(capped) ** 2) / Nrxn) / Nrxn) if verbose: print("""%d systems in %s for %s vs. %s, subset %s.\n%s""" % (len(err), self.dbse, modelchem, benchmark, sset, format_errors(error, mode=2))) if returnindiv: return error, err else: return error
[docs] def load_qcdata(self, modname, funcname, pythonpath=None, failoninc=True): """Loads qcdb.ReactionDatums from module *modname* function *funcname*. Module search path can be prepended with *pythonpath*. """ if pythonpath is not None: sys.path.insert(1, pythonpath) else: sys.path.append(os.path.dirname(__file__) + '/../data') try: datamodule = __import__(modname) except ImportError: if not failoninc: print("""%s data unavailable for database %s.\n""" % (modname, self.dbse)) return else: print("""\nPython module for database data %s failed to load\n\n""" % (modname)) print("""\nSearch path that was tried:\n""") print(', '.join(map(str, sys.path))) raise ValidationError("""Python module loading problem for database data """ + str(modname)) try: getattr(datamodule, funcname)(self) except AttributeError: if not failoninc: print("""%s %s data unavailable for database %s.\n""" % (modname, funcname, self.dbse)) return else: raise ValidationError("Python module missing function %s for loading data " % (str(funcname))) print("""WrappedDatabase %s: %s %s results loaded""" % (self.dbse, modname, funcname))
[docs] def load_qcdata_byproject(self, project, pythonpath=None): """Loads qcdb.ReactionDatums from standard location for *project* :module dbse_project and function load_project. Module search path can be prepended with *pythonpath*. """ mod = self.dbse + '_' + project func = 'load_' + project self.load_qcdata(modname=mod, funcname=func, pythonpath=pythonpath)
[docs] def load_qcdata_hrxn_byproject(self, project, path=None): """""" if path is None: path = os.path.dirname(__file__) + '/../data' pklfile = os.path.abspath(path) + os.sep + self.dbse + '_hrxn_' + project + '.pickle' if not os.path.isfile(pklfile): raise ValidationError( "Reactions pickle file for loading database data from file %s does not exist" % (pklfile)) with open(pklfile, 'rb') as handle: hrxns = pickle.load(handle) # no error checking for speed for rxn, data in hrxns.iteritems(): self.hrxn[rxn].data.update(data)
[docs] def load_qcdata_hdf5_trusted(self, project, path=None): """Loads qcdb.ReactionDatums from HDF5 file at path/dbse_project.h5 . If path not given, looks in qcdb/data. This file is written by reap-DB and so has been largely validated. """ if path is None: path = os.path.dirname(__file__) + '/../data' hdf5file = os.path.abspath(path) + os.sep + self.dbse + '_' + project + '.h5' if not os.path.isfile(hdf5file): raise ValidationError("HDF5 file for loading database data from file %s does not exist" % (hdf5file)) try: import pandas as pd except ImportError: raise ValidationError("Pandas data managment module must be available for import") try: next(self.hrxn.iterkeys()) + 1 except TypeError: intrxn = False else: intrxn = True with pd.get_store(hdf5file) as handle: for mc in handle['pdie'].keys(): lmc = mc.split('-') # TODO could be done better method = lmc[0] bsse = '_'.join(lmc[1:-1]) basis = lmc[-1] df = handle['pdie'][mc] for dbrxn in df.index[df.notnull()].values: [dbse, rxn] = dbrxn.split('-', 1) if intrxn: rxn = int(rxn) self.hrxn[rxn].data[mc] = ReactionDatum.library_modelchem(dbse=dbse, rxn=rxn, method=method, mode=bsse, basis=basis, value=df[dbrxn])
[docs] def integer_reactions(self): """Returns boolean of whether reaction names need to be cast to integer""" try: next(self.hrxn.iterkeys()) + 1 except TypeError: return False else: return True
@staticmethod
[docs] def load_pickled(dbname, path=None): """ """ if path is None: path = os.path.dirname(__file__) + '/../data' picklefile = psiutil.findfile_ignorecase(dbname, pre=os.path.abspath(path) + os.sep, post='_WDb.pickle') if not picklefile: raise ValidationError("Pickle file for loading database data from file %s does not exist" % ( os.path.abspath(path) + os.sep + dbname + '.pickle')) # with open('/var/www/html/bfdb_devel/bfdb/scratch/ASDFlogfile.txt', 'a') as handle: # handle.write('<!-- PICKLE %s\n' % (picklefile)) with open(picklefile, 'rb') as handle: instance = pickle.load(handle) return instance
[docs] def available_modelchems(self, union=True): """Returns all the labels of model chemistries that have been loaded. Either all modelchems that have data for any reaction if *union* is True or all modelchems that have data for all reactions if *union* is False. """ mcs = [set(v.data) for v in self.hrxn.itervalues()] if union: return sorted(set.union(*mcs)) else: return sorted(set.intersection(*mcs))
[docs] def benchmark(self): """Returns the model chemistry label for the database's benchmark.""" bm = None rxns = self.hrxn.itervalues() while bm is None: try: bm = next(rxns).benchmark except StopIteration: break return bm
# return next(self.hrxn.itervalues()).benchmark # TODO all rxns have same bench in db module so all have same here in obj # but the way things stored in Reactions, this doesn't have to be so
[docs] def load_subsets(self, modname='subsetgenerator', pythonpath=None): """Loads subsets from all functions in module *modname*. """ if pythonpath is not None: sys.path.insert(1, pythonpath) else: sys.path.append(os.path.dirname(__file__)) try: ssmod = __import__(modname) except ImportError: print("""\nPython module for database data %s failed to load\n\n""" % (modname)) print("""\nSearch path that was tried:\n""") print(', '.join(map(str, sys.path))) raise ValidationError("Python module loading problem for database subset generator " + str(modname)) for func in dir(ssmod): if callable(getattr(ssmod, func)): self.add_Subset(getattr(ssmod, func).__doc__, getattr(ssmod, func)) print("""WrappedDatabase %s: Defined subsets loaded""" % (self.dbse))
[docs] def get_pec_weightinfo(self): """ """ def closest(u, options): return max(options, key=lambda v: len(os.path.commonprefix([u, v]))) dbdat = {} oss = self.oss['default'] eqrxns = [rxn for rxn, rr in zip(oss.hrxn, oss.axis['Rrat']) if rr == 1.0] for rxnix, rxn in enumerate(oss.hrxn): dbdat[rxn] = {'eq': closest(rxn, eqrxns), 'Rrat': oss.axis['Rrat'][rxnix]} return dbdat
# def table_simple1(self, mtd, bas, opt=['CP'], err=['mae'], benchmark='default', failoninc=True, # plotpath='analysis/flats/flat_', theme='smmerge'): # rowplan = ['bas', 'mtd'] # columnplan = [ # ['l', r"""Method \& Basis Set""", '', textables.label, {}], # ['d', r'S22', 'HB', textables.val, {'sset': 'hb'}], # ['d', r'S22', 'MX', textables.val, {'sset': 'mx'}], # ['d', r'S22', 'DD', textables.val, {'sset': 'dd'}], # ['d', r'S22', 'TT', textables.val, {'sset': 'default'}], # ] # # def table_simple2(self, mtd, bas, opt=['CP'], err=['mae'], benchmark='default', failoninc=True, # plotpath='analysis/flats/flat_', theme='smmerge'): # rowplan = ['bas', 'mtd'] # columnplan = [ # ['l', r"""Method \& Basis Set""", '', textables.label, {}], # ['d', r'MAE', 'HB', textables.val, {'sset': 'hb'}], # ['d', r'MAE', 'MX', textables.val, {'sset': 'mx'}], # ['d', r'MAE', 'DD', textables.val, {'sset': 'dd'}], # ['d', r'MAE', 'TT', textables.val, {'sset': 'default'}], # ['d', r'MA\%E', 'HB', textables.val, {'sset': 'hb', 'err': 'mape'}], # ['d', r'MA\%E', 'MX', textables.val, {'sset': 'mx', 'err': 'mape'}], # ['d', r'MA\%E', 'DD', textables.val, {'sset': 'dd', 'err': 'mape'}], # ['d', r'MA\%E', 'TT', textables.val, {'sset': 'default', 'err': 'mape'}], # ['d', r'maxE', 'TT ', textables.val, {'sset': 'default', 'err': 'maxe'}], # ['d', r'min\%E', ' TT', textables.val, {'sset': 'default', 'err': 'minpe'}], # ['d', r'rmsE', 'TT ', textables.val, {'sset': 'default', 'err': 'rmse'}], # ['d', r'devE', ' TT', textables.val, {'sset': 'default', 'err': 'stde'}], # ] # # def table_simple3(self, mtd, bas, opt=['CP'], err=['mae'], benchmark='default', failoninc=True, # plotpath='analysis/flats/flat_', theme='smmerge'): # rowplan = ['err', 'bas', 'mtd'] # columnplan = [ # ['l', r"""Method \& Basis Set""", '', textables.label, {}], # ['d', r'MAE', 'HB', textables.val, {'sset': 'hb'}], # ['d', r'MAE', 'MX', textables.val, {'sset': 'mx'}], # ['d', r'MAE', 'DD', textables.val, {'sset': 'dd'}], # ['d', r'MAE', 'TT', textables.val, {'sset': 'default'}], # ] # # def table_simple4(self, mtd, bas, opt=['CP'], err=['mae'], benchmark='default', failoninc=True, # plotpath='analysis/flats/flat_', theme='smmerge'): # plotpath = 'autogen' # TODO handle better # rowplan = ['bas', 'mtd'] # columnplan = [ # ['l', r"""Method \& Basis Set""", '', textables.label, {}], # ['d', r'S22', 'HB', textables.val, {'sset': 'hb'}], # ['d', r'S22', 'MX', textables.val, {'sset': 'mx'}], # ['d', r'S22', 'DD', textables.val, {'sset': 'dd'}], # ['d', r'S22', 'TT', textables.val, {'sset': 'default'}], # # ['l', r"""Error Distribution\footnotemark[1]""", r"""\includegraphics[width=6.67cm,height=3.5mm]{%s%s.pdf}""" % (plotpath, 'blank'), textables.graphics, {}], # ['l', r"""Error Distribution\footnotemark[1]""", r"""""", textables.graphics, {}], # ]
[docs]class Database(object): """Collection for handling single or multiple qcdb.WrappedDatabase objects. Particularly, unifying modelchem and subset names that when inconsistent across component databases. Also, defining statistics across databases. >>> asdf = qcdb.Database(['s22', 'Nbc10', 'hbc6', 'HSG'], 'DB4') >>> qwer = qcdb.Database('s22') """ def __init__(self, dbnamelist, dbse=None, pythonpath=None, loadfrompickle=False, path=None): #: internal name of database collection #: #: >>> print asdf.dbse #: 'DB4' self.dbse = None #: ordered component Database objects #: #: >>> print asdf.dbdict #: XXXX self.dbdict = OrderedDict() #: subset assembly pattern #: #: >>> print asdf.sset.keys() #: XXXX self.sset = OrderedDict() #: assembly pattern for transspecies modelchems #: #: >>> print asdf.mcs.keys() #: XXXX self.mcs = {} self.benchmark = None # slight validation, repackaging into dbnamelist if isinstance(dbnamelist, basestring): dbnamelist = [dbnamelist] elif all(isinstance(item, basestring) for item in dbnamelist): pass else: raise ValidationError('Database::constructor: Inappropriate configuration of constructor arguments') # load databases for db in dbnamelist: if loadfrompickle: tmp = WrappedDatabase.load_pickled(db, path=path) else: tmp = WrappedDatabase(db, pythonpath=pythonpath) self.dbdict[tmp.dbse] = tmp # slurp up the obvious overlaps consolidated_bench = [odb.benchmark() for odb in self.dbdict.values()] if len(set(consolidated_bench)) == 1: self.benchmark = consolidated_bench[0] else: self.benchmark = ''.join(consolidated_bench) self.mcs[self.benchmark] = consolidated_bench # methods[ref] = Method(name=ref) # bases[ref] = BasisSet(name=ref) self.mcs['default'] = consolidated_bench # self.mcs['default'] = [odb.benchmark() for odb in self.dbdict.values()] self._intersect_subsets() self._intersect_modelchems() # complex subsets self.load_subsets() # collection name self.dbse = ''.join(self.dbdict.keys()) if dbse is None else dbse # merge Reaction-s self.hrxn = OrderedDict() for db, odb in self.dbdict.iteritems(): for rxn, orxn in odb.hrxn.iteritems(): self.hrxn[orxn.dbrxn] = orxn # merge Reagent-s self.hrgt = OrderedDict() for db, odb in self.dbdict.iteritems(): for rgt, orgt in odb.hrgt.iteritems(): self.hrgt[orgt.name] = orgt print("""Database %s: %s""" % (self.dbse, ', '.join(self.dbdict.keys()))) def __str__(self): text = '' text += """ ===> %s Database <===\n\n""" % (self.dbse) # text += """ Reagents: %s\n""" % (self.hrgt.keys()) # text += """ Reactions: %s\n""" % (self.hrxn.keys()) text += """ Subsets: %s\n""" % (self.sset.keys()) # text += """ Reference: %s\n""" % ('default: ' + ' + '.join(self.mcs['default'])) try: text += """ Reference: %s\n""" % (self.benchmark + ': ' + ' + '.join(self.mcs[self.benchmark])) except TypeError: text += """ Reference: %s\n""" % ('UNDEFINED') text += """ Model Chemistries: %s\n""" % ( ', '.join(sorted([mc for mc in self.mcs.keys() if mc is not None]))) text += """\n""" for db in self.dbdict.keys(): text += self.dbdict[db].__str__() return text # def benchmark(self): # """Returns the model chemistry label for the database's benchmark.""" # return self.benchmark #TODO not sure if right way to go about this self.mcs['default']
[docs] def fancy_mcs(self, latex=False): """ """ fmcs = {} for mc in self.mcs.keys(): try: mtd, mod, bas = mc.split('-') except ValueError: fmcs[mc] = mc else: if latex: tmp = """%s/%s, %s""" % \ (methods[mtd].latex, bases[bas].latex, mod.replace('_', '\\_')) fmcs[mc] = """%45s""" % (tmp) else: fmcs[mc] = """%20s / %-20s, %s""" % \ (methods[mtd].fullname, bases[bas].fullname, mod) return fmcs
# def fancy_mcs_nested(self): # """ # """ # fmcs = defaultdict(lambda: defaultdict(dict)) # for mc in self.mcs.keys(): # try: # mtd, mod, bas = mc.split('-') # except ValueError: # fmcs['All']['All'][mc] = mc # fmcs['Method']['Others'][mc] = mc # fmcs['Options']['Others'][mc] = mc # fmcs['Basis Treatment']['Others'][mc] = mc # else: # fancyrepr = """%20s / %-20s %s""" % (methods[mtd].latex, bases[bas].latex, mod) # fmcs['All']['All'][mc] = fancyrepr # fmcs['Method'][methods[mtd].latex][mc] = fancyrepr # fmcs['Options'][mod][mc] = fancyrepr # fmcs['Basis Treatment'][bases[bas].latex][mc] = fancyrepr # return fmcs
[docs] def integer_reactions(self): """Returns boolean of whether reaction names need to be cast to integer""" return {db: odb.integer_reactions() for db, odb in self.dbdict.items()}
[docs] def load_qcdata_byproject(self, project, pythonpath=None): """For each component database, loads qcdb.ReactionDatums from standard location for *project* :module dbse_project and function load_project. Module search path can be prepended with *pythonpath*. """ for db, odb in self.dbdict.items(): odb.load_qcdata_byproject(project, pythonpath=pythonpath) self._intersect_modelchems()
[docs] def load_qcdata_hdf5_trusted(self, project, path=None): """For each component database, loads qcdb.ReactionDatums from HDF5 file at path/dbse_project.h5 . If path not given, looks in qcdb/data. This file is written by reap-DB and so has been largely validated. """ for db, odb in self.dbdict.items(): odb.load_qcdata_hdf5_trusted(project, path=path) self._intersect_modelchems()
[docs] def load_qcdata_hrxn_byproject(self, project, path=None): for db, odb in self.dbdict.items(): odb.load_qcdata_hrxn_byproject(project, path=path) self._intersect_modelchems()
[docs] def available_projects(self, path=None): """""" import glob if path is None: path = os.path.dirname(__file__) + '/../data' projects = [] for pjfn in glob.glob(path + '/*_hrxn_*.pickle'): pj = pjfn[:-7].split('_')[-1] projects.append(pj) complete_projects = [] for pj in set(projects): if all([os.path.isfile(path + '/' + db + '_hrxn_' + pj + '.pickle') for db in self.dbdict.keys()]): complete_projects.append(pj) return complete_projects
[docs] def load_subsets(self, modname='subsetgenerator', pythonpath=None): """For each component database, loads subsets from all functions in module *modname*. Default *modname* usues standard generators. """ for db, odb in self.dbdict.items(): odb.load_subsets(modname=modname, pythonpath=pythonpath) self._intersect_subsets()
[docs] def add_Subset(self, name, func): """Define a new subset labeled *name* by providing a database *func* whose keys are the keys of dbdict and whose values are a function that filters each WrappedDatabase's *self.hrxn*. """ label = name.lower() merged = [] for db, odb in self.dbdict.iteritems(): if callable(func[db]): ssfunc = func[db] else: ssfunc = lambda x: func[db] odb.add_Subset(name=name, func=ssfunc) if name in odb.sset: merged.append(name) else: merged.append(None) if any(merged): self.sset[label] = merged print("""Database %s: Subset %s formed: %s""" % (self.dbse, label, self.sset[label])) else: print("""Database %s: Subset %s NOT formed: empty""" % (self.dbse, label))
[docs] def add_Subset_union(self, name, sslist): """ Define a new subset labeled *name* (note that there's nothing to prevent overwriting an existing subset name) from the union of existing named subsets in *sslist*. """ funcdb = {} for db, odb in self.dbdict.iteritems(): dbix = self.dbdict.keys().index(db) overlapping_dbrxns = [] for ss in sslist: lss = self.sset[ss][dbix] if lss is not None: overlapping_dbrxns.append(self.dbdict[db].sset[lss].keys()) rxnlist = set().union(*overlapping_dbrxns) funcdb[db] = rxnlist self.add_Subset(name, funcdb)
[docs] def add_sampled_Subset(self, sset='default', number_of_samples=1, sample_size=5, prefix='rand'): """Generate and register *number_of_samples* new subsets of size *sample_size* and name built from *prefix*. Reactions chosen from *sset*. """ import random intrxn = self.integer_reactions() rxns = self.get_hrxn(sset=sset).keys() def random_sample(ssname): """Generate and register a single new subset of size *sample_size* and name *ssname*. """ sample = {db: [] for db in self.dbdict.keys()} for dbrxn in random.sample(rxns, sample_size): db, rxn = dbrxn.split('-', 1) typed_rxn = int(rxn) if intrxn[db] else rxn sample[db].append(typed_rxn) self.add_Subset(ssname, sample) for sidx in range(number_of_samples): if number_of_samples == 1: ssname = prefix else: ssname = prefix + '_' + str(sidx) random_sample(ssname)
[docs] def promote_Subset(self, name=None): """Examine component databases and elevate subset *name* not necessarily present for all component databases to a subset for the *self*. When *name* is None, promotes all subsets found for component databases. Also promotes entirety of each component database as a subset with name of component database dbse in lowercase. """ if name is None: sss = [set(odb.sset.keys()) for db, odb in self.dbdict.items()] new = sorted(set.union(*sss)) else: new = [name] for ss in new: if ss not in self.sset: self.sset[ss] = [ss if ss in odb.sset else None for db, odb in self.dbdict.iteritems()] print("""Database %s: Subset %s promoted: %s""" % (self.dbse, ss, self.sset[ss])) if name is None and len(self.dbdict) > 1: for db, odb in self.dbdict.items(): dbix = self.dbdict.keys().index(db) ss = odb.dbse.lower() if ss not in self.sset: self.sset[ss] = ['default' if ix == dbix else None for ix in range(len(self.dbdict))] print("""Database %s: Subset %s promoted: %s""" % (self.dbse, ss, self.sset[ss]))
def _intersect_subsets(self): """Examine component database subsets and collect common names as Database subset. """ sss = [set(odb.sset.keys()) for db, odb in self.dbdict.items()] new = sorted(set.intersection(*sss)) for ss in new: self.sset[ss] = [ss] * len(self.dbdict.keys()) def _intersect_modelchems(self): """Examine component database qcdata and collect common names as Database modelchem. """ mcs = [set(odb.available_modelchems()) for odb in self.dbdict.itervalues()] new = sorted(set.intersection(*mcs)) for mc in new: self.mcs[mc] = [mc] * len(self.dbdict.keys()) # def reaction_generator(self): # """ # """ # for db, odb in self.dbdict.items(): # for rxn, orxn in odb.hrxn.items(): # yield orxn
[docs] def compute_statistics(self, modelchem, benchmark='default', sset='default', failoninc=True, verbose=False, returnindiv=False): """Computes summary statistics and, if *returnindiv* True, individual errors for single model chemistry *modelchem* versus *benchmark* over subset *sset* over all component databases. Particularly, imposes cross-database definitions for sset and modelchem. #Returns error if model chemistries are missing #for any reaction in subset unless *failoninc* set to False, #whereupon returns partial statistics. Returns dictionary of #statistics labels and values. """ errors = OrderedDict() indiv = OrderedDict() actvdb = [] for db, odb in self.dbdict.items(): dbix = self.dbdict.keys().index(db) if self.sset[sset][dbix] is None: errors[db], indiv[db] = (None, None) else: errors[db], indiv[db] = odb.compute_statistics(self.mcs[modelchem][dbix], sset=self.sset[sset][dbix], benchmark='ZEROS' if benchmark == 'ZEROS' else self.mcs[benchmark][dbix], failoninc=failoninc, verbose=verbose, returnindiv=True) actvdb.append(errors[db]) errors[self.dbse] = average_errors(*actvdb) if returnindiv: return errors, indiv else: return errors
[docs] def analyze_modelchems(self, modelchem, benchmark='default', failoninc=True, verbose=False): """For each component database, compute and print nicely formatted summary error statistics for each model chemistry in array *modelchem* versus *benchmark* for all available subsets. """ # compute errors errors = {} for mc in modelchem: errors[mc] = {} for ss in self.sset.keys(): errors[mc][ss] = self.compute_statistics(mc, benchmark=benchmark, sset=ss, failoninc=failoninc, verbose=verbose, returnindiv=False) # present errors pre, suf, mid = string_contrast(modelchem) text = """\n ==> %s %s[]%s Errors <==\n""" % (self.dbse, pre, suf) text += """%20s %44s""" % ('', '==> ' + self.dbse + ' <==') for db, odb in self.dbdict.items(): text += """%44s""" % ('=> ' + odb.dbse + ' <=') text += '\n' collabel = """ {:5} {:4} {:6} {:6} {:6}""".format( 'ME', 'STDE', 'MAE', 'MA%E', 'MA%BE') text += """{:20} """.format('') + collabel for db in self.dbdict.keys(): text += collabel text += '\n' text += """{:20} {}""".format('', '=' * 44) ul = False for db in self.dbdict.keys(): text += """{}""".format('_' * 44 if ul else ' ' * 44) ul = not ul text += '\n' for ss in self.sset.keys(): text += """ => %s <=\n""" % (ss) for mc in modelchem: perr = errors[mc][ss] text += """%20s %44s""" % (mid[modelchem.index(mc)], format_errors(perr[self.dbse])) for db in self.dbdict.keys(): text += """%44s""" % ('' if perr[db] is None else format_errors(perr[db])) text += '\n' print(text)
[docs] def plot_bars(self, modelchem, benchmark='default', sset=['default', 'hb', 'mx', 'dd'], failoninc=True, verbose=False, view=True, saveas=None, relpath=False, graphicsformat=['pdf']): """Prepares 'grey bars' diagram for each model chemistry in array *modelchem* versus *benchmark* over all component databases. A wide bar is plotted with three smaller bars, corresponding to the 'mae' summary statistic of the four subsets in *sset*. *saveas* conveys directory ('/') and/or filename for saving the resulting plot. File extension is not accessible, but *graphicsformat* array requests among 'png', 'pdf', and 'eps' formats. *relpath* forces paths to saved files to be relative to current directory, rather than absolute paths for returned code and file dictionary. Prepares bars diagram instructions and either executes them if matplotlib available (Canopy or Anaconda) or prints them. Returns a dictionary of all saved plot filenames. >>> asdf.plot_bars(['MP2-CP-adz', 'MP2-CP-adtz'], sset=['tt-5min', 'hb-5min', 'mx-5min', 'dd-5min']) """ # compute errors errors = {} for mc in modelchem: if mc is not None: errors[mc] = {} for ss in sset: errors[mc][ss] = self.compute_statistics(mc, benchmark=benchmark, sset=ss, failoninc=failoninc, verbose=verbose, returnindiv=False) # repackage pre, suf, mid = string_contrast(modelchem) dbdat = [] for mc in modelchem: if mc is None: dbdat.append(None) else: dbdat.append({'mc': mid[modelchem.index(mc)], 'data': [errors[mc][ss][self.dbse]['mae'] for ss in sset]}) title = self.dbse + ' ' + pre + '[]' + suf + ' ' + ','.join(sset) # generate matplotlib instructions and call or print try: from . import mpl import matplotlib.pyplot as plt except ImportError: # if not running from Canopy, print line to execute from Canopy print("""filedict = mpl.bars(%s,\n title='%s'\n saveas=%s\n relpath=%s\n graphicsformat=%s)\n\n""" % (dbdat, title, repr(saveas), repr(relpath), repr(graphicsformat))) else: # if running from Canopy, call mpl directly filedict = mpl.bars(dbdat, title=title, view=view, saveas=saveas, relpath=relpath, graphicsformat=graphicsformat) return filedict
# def get_pec_weightinfo(self): # """ # # """ # def closest(u, options): # return max(options, key=lambda v: len(os.path.commonprefix([u, v]))) # # dbdat = {} # for db, odb in self.dbdict.iteritems(): # #dbix = self.dbdict.keys().index(db) # oss = odb.oss['default'] # eqrxns = [rxn for rxn, rr in zip(oss.hrxn, oss.axis['Rrat']) if rr == 1.0] # for rxnix, rxn in enumerate(oss.hrxn): # dbrxn = '-'.join([db, rxn]) # rrat = oss.axis['Rrat'][rxnix] # eq = closest(rxn, eqrxns) # print rxn, rxnix, eq, rrat, dbrxn # dbdat[dbrxn] = {'eq': eq, 'Rrat': rrat} # return dbdat
[docs] def plot_axis(self, axis, modelchem, benchmark='default', sset='default', failoninc=True, verbose=False, color='sapt', view=True, saveas=None, relpath=False, graphicsformat=['pdf']): """ """ dbdatdict = OrderedDict() for mc in modelchem: # compute errors errors, indiv = self.compute_statistics(mc, benchmark=benchmark, sset=sset, failoninc=failoninc, verbose=verbose, returnindiv=True) # repackage dbdat = [] for db, odb in self.dbdict.iteritems(): dbix = self.dbdict.keys().index(db) oss = odb.oss[self.sset[sset][dbix]] # TODO may need to make axis name distributable across wrappeddbs # TODO not handling mc present bm absent if indiv[db] is not None: for rxn in oss.hrxn: rxnix = oss.hrxn.index(rxn) bm = self.mcs[benchmark][dbix] bmpresent = False if (bm is None or bm not in odb.hrxn[rxn].data) else True mcpresent = False if (self.mcs[mc][dbix] not in odb.hrxn[rxn].data) else True entry = {'db': db, 'sys': str(rxn), 'color': odb.hrxn[rxn].color, 'axis': oss.axis[axis][rxnix]} if bmpresent: entry['bmdata'] = odb.hrxn[rxn].data[self.mcs[benchmark][dbix]].value else: entry['bmdata'] = None if mcpresent: entry['mcdata'] = odb.hrxn[rxn].data[self.mcs[mc][dbix]].value else: continue if bmpresent and mcpresent: entry['error'] = [indiv[db][rxn][0]] else: entry['error'] = [None] dbdat.append(entry) dbdatdict[fancify_mc_tag(mc).strip()] = dbdat pre, suf, mid = string_contrast(modelchem) title = """%s[%s]%s vs %s axis %s for %s subset %s""" % (pre, str(len(mid)), suf, benchmark, axis, self.dbse, sset) print(title) #for mc, dbdat in dbdatdict.iteritems(): # print mc # for d in dbdat: # print '{:20s} {:8.2f} {:8.2f} {:8.2f}'.format(d['sys'], d['axis'], # 0.0 if d['bmdata'] is None else d['bmdata'], # 0.0 if d['mcdata'] is None else d['mcdata']) # generate matplotlib instructions and call or print try: from . import mpl import matplotlib.pyplot as plt except ImportError: # if not running from Canopy, print line to execute from Canopy print("""filedict = mpl.valerr(%s,\n color='%s',\n title='%s',\n xtitle='%s',\n view=%s\n saveas=%s\n relpath=%s\n graphicsformat=%s)\n\n""" % (dbdat, color, title, axis, view, repr(saveas), repr(relpath), repr(graphicsformat))) else: # if running from Canopy, call mpl directly filedict = mpl.valerr(dbdatdict, color=color, title=title, xtitle=axis, view=view, saveas=saveas, relpath=relpath, graphicsformat=graphicsformat) return filedict
[docs] def load_saptdata_frombfdb(self, sset='default', pythonpath='/Users/loriab/linux/bfdb/sapt_punt', failoninc=True): # pythonpath=None """This is a stopgap function that loads sapt component data from sapt_punt in bfdb repo. """ saptpackage = OrderedDict() for db, odb in self.dbdict.items(): modname = 'sapt_' + odb.dbse if pythonpath is not None: sys.path.insert(1, pythonpath) else: sys.path.append(os.path.dirname(__file__) + '/../data') try: datamodule = __import__(modname) except ImportError: print("""\nPython module for database data %s failed to load\n\n""" % (modname)) print("""\nSearch path that was tried:\n""") print(', '.join(map(str, sys.path))) raise ValidationError("Python module loading problem for database subset generator " + str(modname)) try: saptdata = getattr(datamodule, 'DATA') except AttributeError: raise ValidationError("SAPT punt module does not contain DATA" + str(modname)) saptmc = saptdata['SAPT MODELCHEM'] dbix = self.dbdict.keys().index(db) for rxn, orxn in odb.hrxn.iteritems(): lss = self.sset[sset][dbix] if lss is not None: if rxn in odb.sset[lss]: dbrxn = orxn.dbrxn try: elst = saptdata['SAPT ELST ENERGY'][dbrxn] exch = saptdata['SAPT EXCH ENERGY'][dbrxn] ind = saptdata['SAPT IND ENERGY'][dbrxn] disp = saptdata['SAPT DISP ENERGY'][dbrxn] except (KeyError, AttributeError): print("""Warning: DATA['SAPT * ENERGY'] missing for reaction %s""" % (dbrxn)) if failoninc: break else: if not all([elst, ind, disp]): # exch sometimes physically zero print("""Warning: DATA['SAPT * ENERGY'] missing piece for reaction %s: %s""" % (dbrxn, [elst, exch, ind, disp])) if failoninc: break saptpackage[dbrxn] = {'mc': saptmc, 'elst': elst, 'exch': exch, 'ind': ind, 'disp': disp} return saptpackage
[docs] def plot_ternary(self, sset='default', labeled=True, pythonpath='/Users/loriab/linux/bfdb/sapt_punt', failoninc=True, # pythonpath=None view=True, saveas=None, relpath=False, graphicsformat=['pdf']): """This is a stopgap function that loads sapt component data from sapt_punt in bfdb repo, then formats it to plot a ternary diagram. """ saptdata = self.load_saptdata_frombfdb(sset=sset, pythonpath=pythonpath, failoninc=failoninc) dbdat = [] mcs = [] for dat in saptdata.values(): dbdat.append([dat['elst'], dat['ind'], dat['disp']]) if dat['mc'] not in mcs: mcs.append(dat['mc']) title = ' '.join([self.dbse, sset, ' '.join(mcs)]) # generate matplotlib instructions and call or print try: from . import mpl import matplotlib.pyplot as plt except ImportError: pass # if not running from Canopy, print line to execute from Canopy else: # if running from Canopy, call mpl directly filedict = mpl.ternary(dbdat, title=title, labeled=labeled, view=view, saveas=saveas, relpath=relpath, graphicsformat=graphicsformat) return filedict
[docs] def plot_flat(self, modelchem, benchmark='default', sset='default', failoninc=True, verbose=False, color='sapt', xlimit=4.0, xlines=[0.0, 0.3, 1.0], view=True, saveas=None, relpath=False, graphicsformat=['pdf']): """Computes individual errors and summary statistics for single model chemistry *modelchem* versus *benchmark* over subset *sset* over all component databases. Thread *color* can be 'rgb' for old coloring, a color name or 'sapt' for spectrum coloring. *saveas* conveys directory ('/') and/or filename for saving the resulting plot. File extension is not accessible, but *graphicsformat* array requests among 'png', 'pdf', and 'eps' formats. *relpath* forces paths to saved files to be relative to current directory, rather than absolute paths for returned code and file dictionary. Prepares flat diagram instructions and either executes them if matplotlib available (Canopy or Anaconda) or prints them. Returns a dictionary of all saved plot filenames. asdf.plot_flat('CCSD-CP-atqzadz', failoninc=False) """ # compute errors mc = modelchem errors, indiv = self.compute_statistics(mc, benchmark=benchmark, sset=sset, failoninc=failoninc, verbose=verbose, returnindiv=True) # repackage dbdat = [] for db, odb in self.dbdict.items(): if indiv[db] is not None: for rxn in indiv[db].keys(): dbdat.append({'db': db, 'sys': str(rxn), 'color': odb.hrxn[rxn].color, 'data': [indiv[db][rxn][0]]}) pre, suf, mid = string_contrast(mc) title = self.dbse + '-' + sset + ' ' + pre + '[]' + suf mae = errors[self.dbse]['mae'] mape = None # mape = 100 * errors[self.dbse]['mape'] mapbe = None # generate matplotlib instructions and call or print try: from . import mpl import matplotlib.pyplot as plt except ImportError: # if not running from Canopy, print line to execute from Canopy print("""filedict = mpl.flat(%s,\n color='%s',\n title='%s',\n mae=%s,\n mape=%s,\n xlimit=%s,\n xlines=%s,\n view=%s\n saveas=%s\n relpath=%s\n graphicsformat=%s)\n\n""" % (dbdat, color, mc, mae, mape, xlimit, repr(xlines), view, repr(saveas), repr(relpath), repr(graphicsformat))) else: # if running from Canopy, call mpl directly filedict = mpl.flat(dbdat, color=color, title=mc, mae=mae, mape=mape, xlimit=xlimit, xlines=xlines, view=view, saveas=saveas, relpath=relpath, graphicsformat=graphicsformat) return filedict
[docs] def write_xyz_files(self, path=None): """Writes xyz files for every reagent in the Database to directory in *path* or to directory dbse_xyzfiles that it createsin cwd if *path* is None. Additionally, writes a script to that directory that will generate transparent-background ray-traced png files for every reagent with PyMol. """ if path is None: xyzdir = os.getcwd() + os.sep + self.dbse + '_xyzfiles' + os.sep else: xyzdir = os.path.abspath(path) + os.sep if not os.path.exists(xyzdir): os.mkdir(xyzdir) for rgt, orgt in self.hrgt.iteritems(): omol = Molecule(orgt.mol) omol.update_geometry() omol.save_xyz(xyzdir + rgt + '.xyz') with open(xyzdir + 'pymol_xyz2png_script.pml', 'w') as handle: handle.write(""" # Launch PyMOL and run from its command line: # PyMOL> cd {} # PyMOL> @{} """.format(xyzdir, 'pymol_xyz2png_script.pml')) for rgt in self.hrgt.keys(): handle.write(""" load {xyzfile} hide lines show sticks color grey, name c cmd.set('''opaque_background''','''0''',quiet=0) reset orient cmd.zoom(buffer=0.3, complete=1) ray png {pngfile} reinitialize """.format( xyzfile=xyzdir + rgt + '.xyz', pngfile=xyzdir + rgt + '.png'))
[docs] def plot_all_flats(self, modelchem=None, sset='default', xlimit=4.0, failoninc=True, saveas=None, relpath=False, graphicsformat=['pdf']): """Generate pieces for inclusion into tables. Supply list of modelchemistries to plot from *modelchem*, otherwise defaults to all those available. Can modify subset *sset* and plotting range *xlimit*. >>> asdf.plot_all_flats(sset='tt-5min', xlimit=4.0) """ mcs = self.mcs.keys() if modelchem is None else modelchem filedict = OrderedDict() for mc in sorted(mcs): minifiledict = self.plot_flat(mc, sset=sset, xlimit=xlimit, view=False, failoninc=failoninc, saveas=saveas, relpath=relpath, graphicsformat=graphicsformat) filedict[mc] = minifiledict return filedict
[docs] def get_hrxn(self, sset='default'): """ """ rhrxn = OrderedDict() for db, odb in self.dbdict.items(): dbix = self.dbdict.keys().index(db) lss = self.sset[sset][dbix] if lss is not None: for rxn in odb.hrxn: if rxn in odb.sset[lss]: orxn = odb.hrxn[rxn] rhrxn[orxn.dbrxn] = orxn # this is a change and conflict with vergil version return rhrxn
[docs] def get_hrgt(self, sset='default', actv='default'): """ """ rhrxn = self.get_hrxn(sset=sset) rhrgt = OrderedDict() for rxn, orxn in rhrxn.iteritems(): for orgt in orxn.rxnm[actv].keys(): rhrgt[orgt.name] = orgt # TODO prob need to avoid duplicates or pass return rhrgt
[docs] def get_reactions(self, modelchem, sset='default', benchmark='default', failoninc=True): """Collects the reactions present in *sset* from each WrappedDatabase, checks that *modelchem* and *benchmark* ReactionDatum are present (fails if *failoninc* True), then returns in an array a tuple for each reaction containing the modelchem key needed to access *modelchem*, the modelchem key needed to access *benchmark*, and the Reaction object. """ dbdat = [] rhrxn = self.get_hrxn(sset=sset) for orxn in rhrxn.itervalues(): dbix = self.dbdict.keys().index(orxn.dbrxn.split('-')[0]) lmc = self.mcs[modelchem][dbix] lbm = self.mcs[benchmark][dbix] try: orxn.data[lbm] except KeyError as e: # not sure if should treat bm differently lbm = None try: orxn.data[lmc] except KeyError as e: if failoninc: raise e else: lmc = None dbdat.append((lmc, lbm, orxn)) # this is diff in that returning empties not just pass over- may break bfdb # try: # orxn.data[lmc] # orxn.data[lbm] # except KeyError as e: # if failoninc: # raise e # else: # # not sure yet if should return empties or just pass over # pass # else: # dbdat.append((lmc, lbm, orxn)) return dbdat
[docs] def get_missing_reactions(self, modelchem, sset='default'): """Returns a dictionary (keys self.dbse and all component WrappedDatabase.dbse) of two elements, the first being the number of reactions *sset* should contain and the second being a list of the reaction names (dbrxn) not available for *modelchem*. Absence of benchmark not considered. """ counts = OrderedDict() counts[self.dbse] = [0, []] soledb = True if (len(self.dbdict) == 1 and self.dbdict.items()[0][0] == self.dbse) else False if not soledb: for db in self.dbdict.keys(): counts[db] = [0, []] for (lmc, lbm, orxn) in self.get_reactions(modelchem, benchmark='default', sset=sset, failoninc=False): db, rxn = orxn.dbrxn.split('-', 1) mcdatum = orxn.data[lmc].value if lmc else None counts[self.dbse][0] += 1 if not soledb: counts[db][0] += 1 if mcdatum is None: counts[self.dbse][1].append(orxn.dbrxn) if not soledb: counts[db][1].append(orxn.dbrxn) return counts
[docs] def plot_disthist(self, modelchem, benchmark='default', sset='default', failoninc=True, verbose=False, xtitle='', view=True, saveas=None, relpath=False, graphicsformat=['pdf']): """Computes individual errors and summary statistics for single model chemistry *modelchem* versus *benchmark* over subset *sset* over all component databases. Computes histogram of errors and gaussian distribution. *saveas* conveys directory ('/') and/or filename for saving the resulting plot. File extension is not accessible, but *graphicsformat* array requests among 'png', 'pdf', and 'eps' formats. *relpath* forces paths to saved files to be relative to current directory, rather than absolute paths for returned code and file dictionary. Prepares disthist diagram instructions and either executes them if matplotlib available (Canopy or Anaconda) or prints them. Returns a dictionary of all saved plot filenames. >>> """ # compute errors mc = modelchem errors, indiv = self.compute_statistics(mc, benchmark=benchmark, sset=sset, failoninc=failoninc, verbose=verbose, returnindiv=True) # repackage dbdat = [] for db in self.dbdict.keys(): if indiv[db] is not None: for rxn in indiv[db].keys(): dbdat.append(indiv[db][rxn][0]) title = """%s vs %s for %s subset %s""" % (mc, benchmark, self.dbse, sset) me = errors[self.dbse]['me'] stde = errors[self.dbse]['stde'] # generate matplotlib instructions and call or print try: from . import mpl import matplotlib.pyplot as plt except ImportError: # if not running from Canopy, print line to execute from Canopy print("""filedict = mpl.disthist(%s,\n title='%s',\n xtitle='%s'\n me=%s,\n stde=%s,\n saveas=%s,\n relpath=%s\n graphicsformat=%s)\n\n""" % (dbdat, title, xtitle, me, stde, repr(saveas), repr(relpath), repr(graphicsformat))) else: # if running from Canopy, call mpl directly filedict = mpl.disthist(dbdat, title=title, xtitle=xtitle, me=me, stde=stde, view=view, saveas=saveas, relpath=relpath, graphicsformat=graphicsformat) return filedict
[docs] def plot_modelchems(self, modelchem, benchmark='default', mbenchmark=None, sset='default', msset=None, failoninc=True, verbose=False, color='sapt', xlimit=4.0, labeled=True, view=True, mousetext=None, mouselink=None, mouseimag=None, mousetitle=None, mousediv=None, saveas=None, relpath=False, graphicsformat=['pdf']): """Computes individual errors and summary statistics over all component databases for each model chemistry in array *modelchem* versus *benchmark* over subset *sset*. *mbenchmark* and *msset* are array options (same length as *modelchem*) that override *benchmark* and *sset*, respectively, for non-uniform specification. Thread *color* can be 'rgb' for old coloring, a color name or 'sapt' for spectrum coloring. *saveas* conveys directory ('/') and/or filename for saving the resulting plot. File extension is not accessible, but *graphicsformat* array requests among 'png', 'pdf', and 'eps' formats. *relpath* forces paths to saved files to be relative to current directory, rather than absolute paths for returned code and file dictionary. Prepares thread diagram instructions and either executes them if matplotlib available (Canopy or Anaconda) or prints them. Returns a dictionary of all saved plot filenames. If any of *mousetext*, *mouselink*, or *mouseimag* is specified, htmlcode will be returned with an image map of slats to any of text, link, or image, respectively. """ # distribute benchmark if mbenchmark is None: lbenchmark = [benchmark] * len(modelchem) # normal bm modelchem name else: if isinstance(mbenchmark, basestring) or len(mbenchmark) != len(modelchem): raise ValidationError( """mbenchmark must be array of length distributable among modelchem""" % (str(mbenchmark))) else: lbenchmark = mbenchmark # array of bm for each modelchem # distribute sset if msset is None: lsset = [sset] * len(modelchem) # normal ss name like 'MX' else: if isinstance(msset, basestring) or len(msset) != len(modelchem): raise ValidationError("""msset must be array of length distributable among modelchem""" % (str(msset))) else: lsset = msset # array of ss for each modelchem # compute errors index = [] errors = {} indiv = {} for mc, bm, ss in zip(modelchem, lbenchmark, lsset): ix = '%s_%s_%s' % (ss, mc, bm) index.append(ix) errors[ix], indiv[ix] = self.compute_statistics(mc, benchmark=bm, sset=ss, failoninc=failoninc, verbose=verbose, returnindiv=True) # repackage dbdat = [] for db, odb in self.dbdict.items(): dbix = self.dbdict.keys().index(db) for rxn in odb.hrxn: data = [] for ix in index: if indiv[ix][db] is not None: if rxn in odb.sset[self.sset[lsset[index.index(ix)]][dbix]]: try: data.append(indiv[ix][db][rxn][0]) except KeyError as e: if failoninc: raise e else: data.append(None) else: data.append(None) else: data.append(None) if not data or all(item is None for item in data): pass # filter out empty reactions else: dbdat.append({'db': db, 'sys': str(rxn), 'show': str(rxn), 'color': odb.hrxn[rxn].color, 'data': data}) mae = [errors[ix][self.dbse]['mae'] for ix in index] mape = [100 * errors[ix][self.dbse]['mape'] for ix in index] # form unique filename ixpre, ixsuf, ixmid = string_contrast(index) title = self.dbse + ' ' + ixpre + '[]' + ixsuf # generate matplotlib instructions and call or print try: from . import mpl import matplotlib.pyplot as plt except ImportError: # if not running from Canopy, print line to execute from Canopy print("""filedict, htmlcode = mpl.threads(%s,\n color='%s',\n title='%s',\n labels=%s,\n mae=%s,\n mape=%s\n xlimit=%s\n labeled=%s\n saveas=%s\n mousetext=%s\n mouselink=%s\n mouseimag=%s\n mousetitle=%s,\n mousediv=%s,\n relpath=%s\n graphicsformat=%s)\n\n""" % (dbdat, color, title, ixmid, mae, mape, str(xlimit), repr(labeled), repr(saveas), repr(mousetext), repr(mouselink), repr(mouseimag), repr(mousetitle), repr(mousediv), repr(relpath), repr(graphicsformat))) else: # if running from Canopy, call mpl directly filedict, htmlcode = mpl.threads(dbdat, color=color, title=title, labels=ixmid, mae=mae, mape=mape, xlimit=xlimit, labeled=labeled, view=view, mousetext=mousetext, mouselink=mouselink, mouseimag=mouseimag, mousetitle=mousetitle, mousediv=mousediv, saveas=saveas, relpath=relpath, graphicsformat=graphicsformat) return filedict, htmlcode
[docs] def plot_liliowa(self, modelchem, benchmark='default', failoninc=True, xlimit=2.0, view=True, saveas=None, relpath=False, graphicsformat=['pdf']): """ Note that not possible to access sset of component databases. That is, for Database SSIBBI, SSI-only arylaryl is accessible b/c not defined in BBI, but SSI-only neutral is not accessible. """ # compute errors mc = modelchem errors = {} for ss in self.sset.keys(): errors[ss] = self.compute_statistics(mc, benchmark=benchmark, sset=ss, failoninc=failoninc, verbose=False, returnindiv=False) # repackage dbdat = [] ssarray = ['pospos', 'posneg', 'pospolar', 'posaliph', 'posaryl', None, 'negneg', 'negpolar', 'negaliph', 'negaryl', None, None, 'polarpolar', 'polaraliph', 'polararyl', None, None, None, 'aliphaliph', 'alipharyl', None, None, None, None, 'arylaryl'] for ss in ssarray: dbdat.append(0.0 if ss is None else errors[ss][self.dbse]['mae']) # generate matplotlib instructions and call or print try: from . import mpl import matplotlib.pyplot as plt except ImportError: print('Matplotlib not avail') else: filedict = mpl.liliowa(dbdat, xlimit=xlimit, view=view, saveas=saveas, relpath=relpath, graphicsformat=graphicsformat) return filedict
[docs] def plot_iowa(self, modelchem, benchmark='default', sset='default', failoninc=True, verbose=False, title='', xtitle='', xlimit=2.0, view=True, saveas=None, relpath=False, graphicsformat=['pdf']): """Computes individual errors for single *modelchem* versus *benchmark* over subset *sset*. Coloring green-to-purple with maximum intensity at *xlimit*. Prepares Iowa plot instructions and either executes them if matplotlib available (Canopy) or prints them. """ title = self.dbse + ' ' + modelchem # compute errors mc = modelchem errors, indiv = self.compute_statistics(mc, benchmark=benchmark, sset=sset, failoninc=failoninc, verbose=verbose, returnindiv=True) # repackage dbdat = [] dblbl = [] for db in self.dbdict.keys(): if indiv[db] is not None: for rxn in indiv[db].keys(): dbdat.append(indiv[db][rxn][0]) dblbl.append(str(rxn)) title = """%s vs %s for %s subset %s""" % (mc, benchmark, self.dbse, sset) me = errors[self.dbse]['me'] # generate matplotlib instructions and call or print try: from . import mpl import matplotlib.pyplot as plt except ImportError: # if not running from Canopy, print line to execute from Canopy print("""mpl.iowa(%s,\n %s,\n title='%s',\n xtitle='%s'\n xlimit=%s,\n saveas=%s,\n relpath=%s\n graphicsformat=%s)\n\n""" % (dbdat, dblbl, title, xtitle, xlimit, repr(saveas), repr(relpath), repr(graphicsformat))) else: # if running from Canopy, call mpl directly filedict = mpl.iowa(dbdat, dblbl, title=title, xtitle=xtitle, xlimit=xlimit, view=view, saveas=saveas, relpath=relpath, graphicsformat=graphicsformat) return filedict
[docs] def export_pandas(self, modelchem=[], benchmark='default', sset='default', modelchemlabels=None, failoninc=True): """ *modelchem* is array of model chemistries, if modelchem is empty, get only benchmark is benchmark needed? """ import pandas as pd import numpy as np if self.dbse not in ['ACONF', 'SCONF', 'PCONF', 'CYCONF']: saptdata = self.load_saptdata_frombfdb(sset=sset, pythonpath='/Users/loriab/linux/bfdb/sapt_punt', failoninc=failoninc) listodicts = [] rhrxn = self.get_hrxn(sset=sset) for dbrxn, orxn in rhrxn.iteritems(): wdb = dbrxn.split('-')[0] dbix = self.dbdict.keys().index(wdb) wbm = self.mcs[benchmark][dbix] wss = self.sset[sset][dbix] woss = self.dbdict[wdb].oss[wss] try: Rrat = woss.axis['Rrat'][woss.hrxn.index(orxn.name)] except KeyError: Rrat = 1.0 # TODO generic soln? dictorxn = {} dictorxn['DB'] = wdb dictorxn['System'] = orxn.tagl dictorxn['Name'] = orxn.name dictorxn['R'] = Rrat dictorxn['System #'] = orxn.indx dictorxn['Benchmark'] = np.NaN if orxn.benchmark is None else orxn.data[ wbm].value # this NaN exception is new and experimental dictorxn['QcdbSys'] = orxn.dbrxn if self.dbse not in ['ACONF', 'SCONF', 'PCONF', 'CYCONF']: dictorxn['SAPT ELST ENERGY'] = saptdata[dbrxn]['elst'] dictorxn['SAPT EXCH ENERGY'] = saptdata[dbrxn]['exch'] dictorxn['SAPT IND ENERGY'] = saptdata[dbrxn]['ind'] dictorxn['SAPT DISP ENERGY'] = saptdata[dbrxn]['disp'] dictorxn['SAPT TOTAL ENERGY'] = dictorxn['SAPT ELST ENERGY'] + dictorxn['SAPT EXCH ENERGY'] + \ dictorxn['SAPT IND ENERGY'] + dictorxn['SAPT DISP ENERGY'] orgts = orxn.rxnm['default'].keys() omolD = Molecule(orgts[0].mol) # TODO this is only going to work with Reaction ~= Reagent databases npmolD = omolD.format_molecule_for_numpy() omolA = Molecule(orgts[1].mol) # TODO this is only going to work with Reaction ~= Reagent databases omolA.update_geometry() dictorxn['MonA'] = omolA.natom() # this whole member fn not well defined for db of varying stoichiometry if self.dbse in ['ACONF', 'SCONF', 'PCONF', 'CYCONF']: npmolD = omolD.format_molecule_for_numpy() npmolA = omolA.format_molecule_for_numpy() dictorxn['Geometry'] = np.vstack([npmolD, npmolA]) else: dictorxn['Geometry'] = omolD.format_molecule_for_numpy() # print '\nD', npmolD.shape[0], npmolA.shape[0], dictorxn['MonA'], npmolD, npmolA, dictorxn['Geometry'] for mc in modelchem: try: wmc = self.mcs[mc][dbix] except KeyError: # modelchem not in Database at all print(mc, 'not found') continue key = mc if modelchemlabels is None else modelchemlabels[modelchem.index(mc)] try: dictorxn[key] = orxn.data[wmc].value except KeyError as e: # reaction not in modelchem if failoninc: raise ValidationError("""Reaction %s missing datum %s.""" % (key, str(e))) else: print(mc, str(e), 'not found') continue listodicts.append(dictorxn) df = pd.DataFrame(listodicts) pd.set_option('display.width', 500) print(df.head(5)) print(df.tail(5)) return df
[docs] def table_reactions(self, modelchem, benchmark='default', sset='default', failoninc=True, columnplan=['indx', 'tagl', 'bm', 'mc', 'e', 'pe'], title="""Reaction energies [kcal/mol] for {sset} $\subset$ {dbse} with {mc}""", indextitle="""Detailed results for {sset} $\subset$ {dbse} with {mc}""", plotpath='analysis/mols/', standalone=True, theme='rxns', filename=None): r"""Prepare single LaTeX table to *filename* or return lines if None showing the per-reaction results for reactions in *sset* for single or array or 'all' *modelchem*, where the last uses self.mcs(), model chemistries versus *benchmark*. Use *failoninc* to toggle between command failing or blank lines in table. Use *standalone* to toggle between full compilable document and suitable for inclusion in another LaTeX document. Use *columnplan* to customize column (from among columnreservoir, below) layout. Use *title* and *indextitle* to customize table caption and table-of-contents caption, respectively; variables in curly braces will be substituted. Use *theme* to customize the \ref{tbl:} code. """ # define eligible columns for inclusion columnreservoir = { 'dbrxn': ['l', r"""\textbf{Reaction}""", """{0:25s}"""], 'indx': ['r', '', """{0:14s}"""], 'tagl': ['l', r"""\textbf{Reaction}""", """{0:50s}"""], 'bm': ['d', r"""\multicolumn{1}{c}{\textbf{Benchmark}}""", """{0:8.2f}"""], 'mc': ['d', r"""\multicolumn{1}{c}{\textbf{ModelChem}}""", """{0:8.2f}"""], 'e': ['d', r"""\multicolumn{1}{c}{\textbf{Error}}""", """{0:8.2f}"""], 'pe': ['d', r"""\multicolumn{1}{c}{\textbf{\% Err.}}""", """{0:8.1f}"""], 'imag': ['l', '', r"""\includegraphics[width=1.0cm,height=3.5mm]{%s%%ss.png}""" % (plotpath)], # untested } for col in columnplan: if col not in columnreservoir.keys(): raise ValidationError('Column {0} not recognized. Register with columnreservoir.'.format(col)) if isinstance(modelchem, basestring): if modelchem.lower() == 'all': mcs = sorted(self.mcs.keys()) else: mcs = [modelchem] else: mcs = modelchem # commence to generate LaTeX code tablelines = [] indexlines = [] if standalone: tablelines += textables.begin_latex_document() # iterate to produce one LaTeX table per modelchem for mc in mcs: # prepare summary statistics perr = self.compute_statistics(mc, benchmark=benchmark, sset=sset, failoninc=failoninc, verbose=False, returnindiv=False) serrors = OrderedDict() for db in self.dbdict.keys(): serrors[db] = None if perr[db] is None else format_errors(perr[db], mode=3) serrors[self.dbse] = format_errors(perr[self.dbse], mode=3) # prepare individual reactions and errors terrors = OrderedDict() isComplete = True for (lmc, lbm, orxn) in self.get_reactions(mc, benchmark=benchmark, sset=sset, failoninc=failoninc): tmp = {} dbrxn = orxn.dbrxn tmp['dbrxn'] = dbrxn.replace('_', '\\_') tmp['indx'] = r"""\textit{""" + str(orxn.indx) + """}""" tmp['tagl'] = dbrxn.split('-')[0] + ' ' + \ (orxn.latex if orxn.latex else orxn.tagl.replace('_', '\\_')) tmp['imag'] = None # name of primary rgt bmdatum = orxn.data[lbm].value if lbm else None mcdatum = orxn.data[lmc].value if lmc else None tmp['bm'] = bmdatum tmp['mc'] = mcdatum if lmc and lbm: tmp['e'] = mcdatum - bmdatum tmp['pe'] = 100 * (mcdatum - bmdatum) / abs(bmdatum) # TODO redefining errors not good practice else: isComplete = False tmp['e'] = None tmp['pe'] = None terrors[dbrxn] = {} for c in columnreservoir.keys(): terrors[dbrxn][c] = '' if tmp[c] is None else \ columnreservoir[c][2].format(tmp[c]) fancymodelchem = self.fancy_mcs(latex=True)[mc] thistitle = title.format(dbse=self.dbse, mc=fancymodelchem, sset='All' if sset == 'default' else sset.upper()) lref = [r"""tbl:qcdb"""] if theme: lref.append(theme) lref.append(self.dbse) if sset != 'default': lref.append(sset) lref.append(mc) ref = '-'.join(lref) # table intro tablelines.append(r"""\begingroup""") tablelines.append(r"""\squeezetable""") tablelines.append(r"""\LTcapwidth=\textwidth""") tablelines.append(r"""\begin{longtable}{%s}""" % (''.join([columnreservoir[col][0] for col in columnplan]))) tablelines.append(r"""\caption{%s""" % (thistitle)) tablelines.append(r"""\label{%s}} \\ """ % (ref)) tablelines.append(r"""\hline\hline""") columntitles = [columnreservoir[col][1] for col in columnplan] # initial header tablelines.append(' & '.join(columntitles) + r""" \\ """) tablelines.append(r"""\hline""") tablelines.append(r"""\endfirsthead""") # to be continued header tablelines.append(r"""\multicolumn{%d}{@{}l}{\textit{\ldots continued} %s} \\ """ % (len(columnplan), fancymodelchem)) tablelines.append(r"""\hline\hline""") tablelines.append(' & '.join(columntitles) + r""" \\ """) tablelines.append(r"""\hline""") tablelines.append(r"""\endhead""") # to be continued footer tablelines.append(r"""\hline\hline""") tablelines.append(r"""\multicolumn{%d}{r@{}}{\textit{continued \ldots}} \\ """ % (len(columnplan))) tablelines.append(r"""\endfoot""") # final footer tablelines.append(r"""\hline\hline""") tablelines.append(r"""\endlastfoot""") # table body for dbrxn, stuff in terrors.iteritems(): tablelines.append(' & '.join([stuff[col] for col in columnplan]) + r""" \\ """) # table body summary if any(col in ['e', 'pe'] for col in columnplan): field_to_put_labels = [col for col in ['tagl', 'dbrxn', 'indx'] if col in columnplan] if field_to_put_labels: for block, blkerrors in serrors.iteritems(): if blkerrors: # skip e.g., NBC block in HB of DB4 tablelines.append(r"""\hline""") summlines = [[] for i in range(8)] for col in columnplan: if col == field_to_put_labels[0]: summlines[0].append( r"""\textbf{Summary Statistics: %s%s}%s""" % \ ('' if sset == 'default' else sset + r""" $\subset$ """, block, '' if isComplete else r""", \textit{partial}""")) summlines[1].append(r"""\textit{Minimal Signed Error} """) summlines[2].append(r"""\textit{Minimal Absolute Error} """) summlines[3].append(r"""\textit{Maximal Signed Error} """) summlines[4].append(r"""\textit{Maximal Absolute Error} """) summlines[5].append(r"""\textit{Mean Signed Error} """) summlines[6].append(r"""\textit{Mean Absolute Error} """) summlines[7].append(r"""\textit{Root-Mean-Square Error} """) elif col in ['e', 'pe']: summlines[0].append('') summlines[1].append(blkerrors['nex' + col]) summlines[2].append(blkerrors['min' + col]) summlines[3].append(blkerrors['pex' + col]) summlines[4].append(blkerrors['max' + col]) summlines[5].append(blkerrors['m' + col]) summlines[6].append(blkerrors['ma' + col]) summlines[7].append(blkerrors['rms' + col]) else: for ln in range(len(summlines)): summlines[ln].append('') for ln in range(len(summlines)): tablelines.append(' & '.join(summlines[ln]) + r""" \\ """) # table conclusion tablelines.append(r"""\end{longtable}""") tablelines.append(r"""\endgroup""") tablelines.append(r"""\clearpage""") tablelines.append('\n\n') # form table index thisindextitle = indextitle.format(dbse=self.dbse, mc=fancymodelchem.strip(), sset='All' if sset == 'default' else sset.upper()) indexlines.append(r"""\scriptsize \ref{%s} & \scriptsize %s \\ """ % \ (ref, thisindextitle)) if standalone: tablelines += textables.end_latex_document() # form table and index return structures if filename is None: return tablelines, indexlines else: if filename.endswith('.tex'): filename = filename[:-4] with open(filename + '.tex', 'w') as handle: handle.write('\n'.join(tablelines)) with open(filename + '_index.tex', 'w') as handle: handle.write('\n'.join(indexlines) + '\n') print("""\n LaTeX index written to {filename}_index.tex\n""" """ LaTeX table written to {filename}.tex\n""" """ >>> pdflatex {filename}\n""" """ >>> open /Applications/Preview.app {filename}.pdf\n""".format(filename=filename)) filedict = {'data': os.path.abspath(filename) + '.tex', 'index': os.path.abspath(filename + '_index.tex')} return filedict
[docs] def table_wrapper(self, mtd, bas, tableplan, benchmark='default', opt=['CP'], err=['mae'], sset=['default'], dbse=None, opttarget=None, failoninc=True, xlimit=4.0, xlines=[0.0, 0.3, 1.0], ialimit=2.0, plotpath='autogen', subjoin=True, title=None, indextitle=None, suppressblanks=False, standalone=True, theme=None, filename=None): """Prepares dictionary of errors for all combinations of *mtd*, *opt*, *bas* with respect to model chemistry *benchmark*, mindful of *failoninc*. The general plan for the table, as well as defaults for landscape, footnotes, *title*, *indextitle, and *theme* are got from function *tableplan*. Once error dictionary is ready, it and all other arguments are passed along to textables.table_generic. Two arrays, one of table lines and one of index lines are returned unless *filename* is given, in which case they're written to file and a filedict returned. """ # get plan for table from *tableplan* and some default values kwargs = {'plotpath': plotpath, 'subjoin': subjoin, 'xlines': xlines, 'xlimit': xlimit, 'ialimit': ialimit} rowplan, columnplan, landscape, footnotes, \ suggestedtitle, suggestedtheme = tableplan(**kwargs) #suggestedtitle, suggestedtheme = tableplan(plotpath=plotpath, subjoin=subjoin) # make figure files write themselves autothread = {} autoliliowa = {} if plotpath == 'autogen': for col in columnplan: if col[3].__name__ == 'flat': if col[4] and autothread: print('TODO: merge not handled') elif col[4] or autothread: autothread.update(col[4]) else: autothread = {'dummy': True} elif col[3].__name__ == 'liliowa': autoliliowa = {'dummy': True} # negotiate some defaults dbse = [self.dbse] if dbse is None else dbse theme = suggestedtheme if theme is None else theme title = suggestedtitle if title is None else title indextitle = title if indextitle is None else indextitle opttarget = {'default': ['']} if opttarget is None else opttarget def unify_options(orequired, opossible): """Perform a merge of options tags in *orequired* and *opossible* so that the result is free of duplication and has the mode at the end. """ opt_combos = [] for oreq in orequired: for opos in opossible: pieces = sorted(set(oreq.split('_') + opos.split('_'))) if '' in pieces: pieces.remove('') for mode in ['CP', 'unCP', 'SA']: if mode in pieces: pieces.remove(mode) pieces.append(mode) pieces = '_'.join(pieces) opt_combos.append(pieces) return opt_combos # gather list of model chemistries for table mcs = ['-'.join(prod) for prod in itertools.product(mtd, opt, bas)] mc_translator = {} for m, o, b in itertools.product(mtd, opt, bas): nominal_mc = '-'.join([m, o, b]) for oo in unify_options([o], opttarget['default']): trial_mc = '-'.join([m, oo, b]) try: perr = self.compute_statistics(trial_mc, benchmark=benchmark, sset='default', # prob. too restrictive by choosing subset failoninc=False, verbose=False, returnindiv=False) except KeyError as e: continue else: mc_translator[nominal_mc] = trial_mc break else: mc_translator[nominal_mc] = None # compute errors serrors = {} for mc in mcs: serrors[mc] = {} for ss in self.sset.keys(): serrors[mc][ss] = {} if mc_translator[mc] in self.mcs: # Note: not handling when one component Wdb has one translated pattern and another another perr = self.compute_statistics(mc_translator[mc], benchmark=benchmark, sset=ss, failoninc=failoninc, verbose=False, returnindiv=False) serrors[mc][ss][self.dbse] = format_errors(perr[self.dbse], mode=3) if not failoninc: mcsscounts = self.get_missing_reactions(mc_translator[mc], sset=ss) serrors[mc][ss][self.dbse]['tgtcnt'] = mcsscounts[self.dbse][0] serrors[mc][ss][self.dbse]['misscnt'] = len(mcsscounts[self.dbse][1]) if autothread: if ('sset' in autothread and ss in autothread['sset']) or ('sset' not in autothread): mcssplots = self.plot_flat(mc_translator[mc], benchmark=benchmark, sset=ss, failoninc=failoninc, color='sapt', xlimit=xlimit, xlines=xlines, view=False, saveas='flat_' + '-'.join([self.dbse, ss, mc]), relpath=True, graphicsformat=['pdf']) serrors[mc][ss][self.dbse]['plotflat'] = mcssplots['pdf'] if autoliliowa and ss == 'default': mcssplots = self.plot_liliowa(mc_translator[mc], benchmark=benchmark, failoninc=failoninc, xlimit=ialimit, view=False, saveas='liliowa_' + '-'.join([self.dbse, ss, mc]), relpath=True, graphicsformat=['pdf']) serrors[mc][ss][self.dbse]['plotliliowa'] = mcssplots['pdf'] for db in self.dbdict.keys(): if perr[db] is None: serrors[mc][ss][db] = None else: serrors[mc][ss][db] = format_errors(perr[db], mode=3) if not failoninc: serrors[mc][ss][db]['tgtcnt'] = mcsscounts[db][0] serrors[mc][ss][db]['misscnt'] = len(mcsscounts[db][1]) else: serrors[mc][ss][self.dbse] = format_errors(initialize_errors(), mode=3) for db in self.dbdict.keys(): serrors[mc][ss][db] = format_errors(initialize_errors(), mode=3) for key in serrors.keys(): print("""{:>35}{:>35}{}""".format(key, mc_translator[key], serrors[key]['default'][self.dbse]['mae'])) # find indices that would be neglected in a single sweep over table_generic keysinplan = set(sum([col[-1].keys() for col in columnplan], rowplan)) obvious = {'dbse': dbse, 'sset': sset, 'mtd': mtd, 'opt': opt, 'bas': bas, 'err': err} for key, vari in obvious.items(): if len(vari) == 1 or key in keysinplan: del obvious[key] iteroers = [(prod) for prod in itertools.product(*obvious.values())] # commence to generate LaTeX code tablelines = [] indexlines = [] if standalone: tablelines += textables.begin_latex_document() for io in iteroers: actvargs = dict(zip(obvious.keys(), [[k] for k in io])) nudbse = actvargs['dbse'] if 'dbse' in actvargs else dbse nusset = actvargs['sset'] if 'sset' in actvargs else sset numtd = actvargs['mtd'] if 'mtd' in actvargs else mtd nuopt = actvargs['opt'] if 'opt' in actvargs else opt nubas = actvargs['bas'] if 'bas' in actvargs else bas nuerr = actvargs['err'] if 'err' in actvargs else err table, index = textables.table_generic( mtd=numtd, bas=nubas, opt=nuopt, err=nuerr, sset=nusset, dbse=nudbse, rowplan=rowplan, columnplan=columnplan, serrors=serrors, plotpath='' if plotpath == 'autogen' else plotpath, subjoin=subjoin, title=title, indextitle=indextitle, suppressblanks=suppressblanks, landscape=landscape, footnotes=footnotes, standalone=False, theme=theme) tablelines += table tablelines.append('\n\n') indexlines += index if standalone: tablelines += textables.end_latex_document() # form table and index return structures if filename is None: return tablelines, indexlines else: if filename.endswith('.tex'): filename = filename[:-4] with open(filename + '.tex', 'w') as handle: handle.write('\n'.join(tablelines)) with open(filename + '_index.tex', 'w') as handle: handle.write('\n'.join(indexlines)) print("""\n LaTeX index written to {filename}_index.tex\n""" """ LaTeX table written to {filename}.tex\n""" """ >>> pdflatex {filename}\n""" """ >>> open /Applications/Preview.app {filename}.pdf\n""".format(filename=filename)) filedict = {'data': os.path.abspath(filename) + '.tex', 'index': os.path.abspath(filename + '_index.tex')} return filedict
[docs] def table_scrunch(self, plotpath, subjoin): rowplan = ['mtd'] columnplan = [ ['l', r'Method', '', textables.label, {}], ['c', r'Description', '', textables.empty, {}], ['d', r'aug-cc-pVDZ', 'unCP', textables.val, {'bas': 'adz', 'opt': 'unCP'}], ['d', r'aug-cc-pVDZ', 'CP', textables.val, {'bas': 'adz', 'opt': 'CP'}], ['d', r'aug-cc-pVTZ', 'unCP', textables.val, {'bas': 'atz', 'opt': 'unCP'}], ['d', r'aug-cc-pVTZ', 'CP', textables.val, {'bas': 'atz', 'opt': 'CP'}]] footnotes = [] landscape = False theme = 'summavg' title = r"""Classification and Performance of model chemistries. Interaction energy [kcal/mol] {{err}} statistics.""".format() return rowplan, columnplan, landscape, footnotes, title, theme
[docs] def table_merge_abbr(self, plotpath, subjoin): """Specialization of table_generic into table with minimal statistics (three S22 and three overall) plus embedded slat diagram as suitable for main paper. A single table is formed in sections by *bas* with lines *mtd* within each section. """ rowplan = ['bas', 'mtd'] columnplan = [ ['l', r"""Method \& Basis Set""", '', textables.label, {}], ['d', r'S22', 'HB', textables.val, {'sset': 'hb', 'dbse': 'S22'}], ['d', r'S22', 'MX/DD', textables.val, {'sset': 'mxdd', 'dbse': 'S22'}], ['d', r'S22', 'TT', textables.val, {'sset': 'tt', 'dbse': 'S22'}], ['d', r'Overall', 'HB', textables.val, {'sset': 'hb', 'dbse': 'DB4'}], ['d', r'Overall', 'MX/DD', textables.val, {'sset': 'mxdd', 'dbse': 'DB4'}], ['d', r'Overall', 'TT', textables.val, {'sset': 'tt', 'dbse': 'DB4'}], ['l', r"""Error Distribution\footnotemark[1]""", r"""\includegraphics[width=6.67cm,height=3.5mm]{%s%s.pdf}""" % (plotpath, 'blank'), textables.graphics, {}], ['d', r'Time', '', textables.empty, {}]] # TODO Time column not right at all footnotes = [fnreservoir['blankslat']] landscape = False theme = 'smmerge' title = r"""Interaction energy [kcal/mol] {{err}} subset statistics with computed with {{opt}}{0}.""".format( '' if subjoin else r""" and {bas}""") return rowplan, columnplan, landscape, footnotes, title, theme
[docs] def table_merge_suppmat(self, plotpath, subjoin): """Specialization of table_generic into table with as many statistics as will fit (mostly fullcurve and a few 5min) plus embedded slat diagram as suitable for supplementary material. Multiple tables are formed, one for each in *bas* with lines *mtd* within each table. """ rowplan = ['bas', 'mtd'] columnplan = [ ['l', r"""Method \& Basis Set""", '', textables.label, {}], ['d', 'S22', 'HB', textables.val, {'sset': 'hb', 'dbse': 'S22'}], ['d', 'S22', 'MX', textables.val, {'sset': 'mx', 'dbse': 'S22'}], ['d', 'S22', 'DD', textables.val, {'sset': 'dd', 'dbse': 'S22'}], ['d', 'S22', 'TT', textables.val, {'sset': 'tt', 'dbse': 'S22'}], ['d', 'NBC10', 'MX', textables.val, {'sset': 'mx', 'dbse': 'NBC1'}], ['d', 'NBC10', 'DD', textables.val, {'sset': 'dd', 'dbse': 'NBC1'}], ['d', 'NBC10', 'TT', textables.val, {'sset': 'tt', 'dbse': 'NBC1'}], ['d', 'HBC6', 'HB/TT', textables.val, {'sset': 'tt', 'dbse': 'HBC1'}], ['d', 'HSG', 'HB', textables.val, {'sset': 'hb', 'dbse': 'HSG'}], ['d', 'HSG', 'MX', textables.val, {'sset': 'mx', 'dbse': 'HSG'}], ['d', 'HSG', 'DD', textables.val, {'sset': 'dd', 'dbse': 'HSG'}], ['d', 'HSG', 'TT', textables.val, {'sset': 'tt', 'dbse': 'HSG'}], ['d', 'Avg', 'TT ', textables.val, {'sset': 'tt', 'dbse': 'DB4'}], ['l', r"""Error Distribution\footnotemark[1]""", r"""\includegraphics[width=6.67cm,height=3.5mm]{%s%s.pdf}""" % (plotpath, 'blank'), textables.graphics, {}], ['d', 'NBC10', r"""TT\footnotemark[2]""", textables.val, {'sset': 'tt-5min', 'dbse': 'NBC1'}], ['d', 'HBC6', r"""TT\footnotemark[2] """, textables.val, {'sset': 'tt-5min', 'dbse': 'HBC1'}], ['d', 'Avg', r"""TT\footnotemark[2]""", textables.val, {'sset': 'tt-5min', 'dbse': 'DB4'}]] footnotes = [fnreservoir['blankslat'], fnreservoir['5min']] landscape = True theme = 'lgmerge' title = r"""Interaction energy [kcal/mol] {{err}} subset statistics with computed with {{opt}}{0}.""".format( '' if subjoin else r""" and {bas}""") return rowplan, columnplan, landscape, footnotes, title, theme
[docs]class DB4(Database): def __init__(self, pythonpath=None, loadfrompickle=False, path=None): """Initialize FourDatabases object from SuperDatabase""" Database.__init__(self, ['s22', 'nbc10', 'hbc6', 'hsg'], dbse='DB4', pythonpath=pythonpath, loadfrompickle=loadfrompickle, path=path) # # load up data and definitions # self.load_qcdata_byproject('dft') # self.load_qcdata_byproject('pt2') # #self.load_qcdata_byproject('dhdft') # self.load_subsets() self.define_supersubsets() self.define_supermodelchems()
[docs] def define_supersubsets(self): """ """ self.sset['tt'] = ['default', 'default', 'default', 'default'] self.sset['hb'] = ['hb', None, 'default', 'hb'] self.sset['mx'] = ['mx', 'mx', None, 'mx'] self.sset['dd'] = ['dd', 'dd', None, 'dd'] self.sset['mxdd'] = ['mxdd', 'default', None, 'mxdd'] self.sset['pp'] = ['mxddpp', 'mxddpp', None, None] self.sset['np'] = ['mxddnp', 'mxddnp', None, 'mxdd'] self.sset['tt-5min'] = ['default', '5min', '5min', 'default'] self.sset['hb-5min'] = ['hb', None, '5min', 'hb'] self.sset['mx-5min'] = ['mx', 'mx-5min', None, 'mx'] self.sset['dd-5min'] = ['dd', 'dd-5min', None, 'dd'] self.sset['mxdd-5min'] = ['mxdd', '5min', None, 'mxdd'] self.sset['pp-5min'] = ['mxddpp', 'mxddpp-5min', None, None] self.sset['np-5min'] = ['mxddnp', 'mxddnp-5min', None, 'mxdd']
# def benchmark(self): # """Returns the model chemistry label for the database's benchmark.""" # return 'C2001BENCH'
[docs] def define_supermodelchems(self): """ """ self.benchmark = 'C2011BENCH' self.mcs['C2010BENCH'] = ['S22A', 'NBC100', 'HBC60', 'HSG0'] self.mcs['C2011BENCH'] = ['S22B', 'NBC10A', 'HBC6A', 'HSGA'] self.mcs['CCSD-CP-adz'] = ['CCSD-CP-adz', 'CCSD-CP-hadz', 'CCSD-CP-adz', 'CCSD-CP-hadz'] self.mcs['CCSD-CP-atz'] = ['CCSD-CP-atz', 'CCSD-CP-hatz', 'CCSD-CP-atz', 'CCSD-CP-hatz'] self.mcs['CCSD-CP-adtz'] = ['CCSD-CP-adtz', 'CCSD-CP-hadtz', 'CCSD-CP-adtz', 'CCSD-CP-hadtz'] self.mcs['CCSD-CP-adtzadz'] = ['CCSD-CP-adtzadz', 'CCSD-CP-adtzhadz', 'CCSD-CP-adtzadz', 'CCSD-CP-adtzhadz'] self.mcs['CCSD-CP-atzadz'] = ['CCSD-CP-atzadz', 'CCSD-CP-atzhadz', 'CCSD-CP-atzadz', 'CCSD-CP-atzhadz'] self.mcs['CCSD-CP-atqzadz'] = ['CCSD-CP-atqzadz', 'CCSD-CP-atqzhadz', 'CCSD-CP-atqzadz', 'CCSD-CP-atqzhadz'] self.mcs['CCSD-CP-atzadtz'] = ['CCSD-CP-atzadtz', 'CCSD-CP-atzhadtz', 'CCSD-CP-atzadtz', 'CCSD-CP-atzhadtz'] self.mcs['CCSD-CP-atqzadtz'] = ['CCSD-CP-atqzadtz', 'CCSD-CP-atqzhadtz', 'CCSD-CP-atqzadtz', 'CCSD-CP-atqzhadtz'] self.mcs['CCSD-CP-atqzatz'] = ['CCSD-CP-atqzatz', 'CCSD-CP-atqzhatz', 'CCSD-CP-atqzatz', 'CCSD-CP-atqzhatz'] self.mcs['SCSCCSD-CP-adz'] = ['SCSCCSD-CP-adz', 'SCSCCSD-CP-hadz', 'SCSCCSD-CP-adz', 'SCSCCSD-CP-hadz'] self.mcs['SCSCCSD-CP-atz'] = ['SCSCCSD-CP-atz', 'SCSCCSD-CP-hatz', 'SCSCCSD-CP-atz', 'SCSCCSD-CP-hatz'] self.mcs['SCSCCSD-CP-adtz'] = ['SCSCCSD-CP-adtz', 'SCSCCSD-CP-hadtz', 'SCSCCSD-CP-adtz', 'SCSCCSD-CP-hadtz'] self.mcs['SCSCCSD-CP-adtzadz'] = ['SCSCCSD-CP-adtzadz', 'SCSCCSD-CP-adtzhadz', 'SCSCCSD-CP-adtzadz', 'SCSCCSD-CP-adtzhadz'] self.mcs['SCSCCSD-CP-atzadz'] = ['SCSCCSD-CP-atzadz', 'SCSCCSD-CP-atzhadz', 'SCSCCSD-CP-atzadz', 'SCSCCSD-CP-atzhadz'] self.mcs['SCSCCSD-CP-atqzadz'] = ['SCSCCSD-CP-atqzadz', 'SCSCCSD-CP-atqzhadz', 'SCSCCSD-CP-atqzadz', 'SCSCCSD-CP-atqzhadz'] self.mcs['SCSCCSD-CP-atzadtz'] = ['SCSCCSD-CP-atzadtz', 'SCSCCSD-CP-atzhadtz', 'SCSCCSD-CP-atzadtz', 'SCSCCSD-CP-atzhadtz'] self.mcs['SCSCCSD-CP-atqzadtz'] = ['SCSCCSD-CP-atqzadtz', 'SCSCCSD-CP-atqzhadtz', 'SCSCCSD-CP-atqzadtz', 'SCSCCSD-CP-atqzhadtz'] self.mcs['SCSCCSD-CP-atqzatz'] = ['SCSCCSD-CP-atqzatz', 'SCSCCSD-CP-atqzhatz', 'SCSCCSD-CP-atqzatz', 'SCSCCSD-CP-atqzhatz'] self.mcs['SCSMICCSD-CP-adz'] = ['SCSMICCSD-CP-adz', 'SCSMICCSD-CP-hadz', 'SCSMICCSD-CP-adz', 'SCSMICCSD-CP-hadz'] self.mcs['SCSMICCSD-CP-atz'] = ['SCSMICCSD-CP-atz', 'SCSMICCSD-CP-hatz', 'SCSMICCSD-CP-atz', 'SCSMICCSD-CP-hatz'] self.mcs['SCSMICCSD-CP-adtz'] = ['SCSMICCSD-CP-adtz', 'SCSMICCSD-CP-hadtz', 'SCSMICCSD-CP-adtz', 'SCSMICCSD-CP-hadtz'] self.mcs['SCSMICCSD-CP-adtzadz'] = ['SCSMICCSD-CP-adtzadz', 'SCSMICCSD-CP-adtzhadz', 'SCSMICCSD-CP-adtzadz', 'SCSMICCSD-CP-adtzhadz'] self.mcs['SCSMICCSD-CP-atzadz'] = ['SCSMICCSD-CP-atzadz', 'SCSMICCSD-CP-atzhadz', 'SCSMICCSD-CP-atzadz', 'SCSMICCSD-CP-atzhadz'] self.mcs['SCSMICCSD-CP-atqzadz'] = ['SCSMICCSD-CP-atqzadz', 'SCSMICCSD-CP-atqzhadz', 'SCSMICCSD-CP-atqzadz', 'SCSMICCSD-CP-atqzhadz'] self.mcs['SCSMICCSD-CP-atzadtz'] = ['SCSMICCSD-CP-atzadtz', 'SCSMICCSD-CP-atzhadtz', 'SCSMICCSD-CP-atzadtz', 'SCSMICCSD-CP-atzhadtz'] self.mcs['SCSMICCSD-CP-atqzadtz'] = ['SCSMICCSD-CP-atqzadtz', 'SCSMICCSD-CP-atqzhadtz', 'SCSMICCSD-CP-atqzadtz', 'SCSMICCSD-CP-atqzhadtz'] self.mcs['SCSMICCSD-CP-atqzatz'] = ['SCSMICCSD-CP-atqzatz', 'SCSMICCSD-CP-atqzhatz', 'SCSMICCSD-CP-atqzatz', 'SCSMICCSD-CP-atqzhatz'] self.mcs['CCSDT-CP-adz'] = ['CCSDT-CP-adz', 'CCSDT-CP-hadz', 'CCSDT-CP-adz', 'CCSDT-CP-hadz'] self.mcs['CCSDT-CP-atz'] = ['CCSDT-CP-atz', 'CCSDT-CP-hatz', 'CCSDT-CP-atz', 'CCSDT-CP-hatz'] self.mcs['CCSDT-CP-adtz'] = ['CCSDT-CP-adtz', 'CCSDT-CP-hadtz', 'CCSDT-CP-adtz', 'CCSDT-CP-hadtz'] self.mcs['CCSDT-CP-adtzadz'] = ['CCSDT-CP-adtzadz', 'CCSDT-CP-adtzhadz', 'CCSDT-CP-adtzadz', 'CCSDT-CP-adtzhadz'] self.mcs['CCSDT-CP-atzadz'] = ['CCSDT-CP-atzadz', 'CCSDT-CP-atzhadz', 'CCSDT-CP-atzadz', 'CCSDT-CP-atzhadz'] self.mcs['CCSDT-CP-atqzadz'] = ['CCSDT-CP-atqzadz', 'CCSDT-CP-atqzhadz', 'CCSDT-CP-atqzadz', 'CCSDT-CP-atqzhadz'] self.mcs['CCSDT-CP-atzadtz'] = ['CCSDT-CP-atzadtz', 'CCSDT-CP-atzhadtz', 'CCSDT-CP-atzadtz', 'CCSDT-CP-atzhadtz'] self.mcs['CCSDT-CP-atqzadtz'] = ['CCSDT-CP-atqzadtz', 'CCSDT-CP-atqzhadtz', 'CCSDT-CP-atqzadtz', 'CCSDT-CP-atqzhadtz'] self.mcs['CCSDT-CP-atqzatz'] = ['CCSDT-CP-atqzatz', 'CCSDT-CP-atqzhatz', 'CCSDT-CP-atqzatz', 'CCSDT-CP-atqzhatz']
# def make_pt2_flats(self): # def plot_all_flats(self): # """Generate pieces for inclusion into tables for PT2 paper. # Note that DB4 flats use near-equilibrium subset. # # """ # Database.plot_all_flats(self, modelchem=None, sset='tt-5min', xlimit=4.0, # graphicsformat=['pdf'])
[docs] def make_pt2_Figure_3(self): """Plot all the graphics needed for the calendar grey bars plot in Fig. 3 of PT2. Note that in the modern implementation of class DB4, would need to pass ``sset=['tt-5min', 'hb-5min', 'mx-5min', 'dd-5min']`` to get published figure. """ # Fig. bars (a) self.plot_bars(['MP2-CP-dz', 'MP2-CP-jadz', 'MP2-CP-hadz', 'MP2-CP-adz', 'MP2-CP-tz', 'MP2-CP-matz', 'MP2-CP-jatz', 'MP2-CP-hatz', 'MP2-CP-atz', 'MP2-CP-dtz', 'MP2-CP-jadtz', 'MP2-CP-hadtz', 'MP2-CP-adtz', 'MP2-CP-qz', 'MP2-CP-aaqz', 'MP2-CP-maqz', 'MP2-CP-jaqz', 'MP2-CP-haqz', 'MP2-CP-aqz', 'MP2-CP-tqz', 'MP2-CP-matqz', 'MP2-CP-jatqz', 'MP2-CP-hatqz', 'MP2-CP-atqz', 'MP2-CP-a5z', 'MP2-CP-aq5z']) self.plot_bars(['SCSMP2-CP-dz', 'SCSMP2-CP-jadz', 'SCSMP2-CP-hadz', 'SCSMP2-CP-adz', 'SCSMP2-CP-tz', 'SCSMP2-CP-matz', 'SCSMP2-CP-jatz', 'SCSMP2-CP-hatz', 'SCSMP2-CP-atz', 'SCSMP2-CP-dtz', 'SCSMP2-CP-jadtz', 'SCSMP2-CP-hadtz', 'SCSMP2-CP-adtz', 'SCSMP2-CP-qz', 'SCSMP2-CP-aaqz', 'SCSMP2-CP-maqz', 'SCSMP2-CP-jaqz', 'SCSMP2-CP-haqz', 'SCSMP2-CP-aqz', 'SCSMP2-CP-tqz', 'SCSMP2-CP-matqz', 'SCSMP2-CP-jatqz', 'SCSMP2-CP-hatqz', 'SCSMP2-CP-atqz', 'SCSMP2-CP-a5z', 'SCSMP2-CP-aq5z']) self.plot_bars(['SCSNMP2-CP-dz', 'SCSNMP2-CP-jadz', 'SCSNMP2-CP-hadz', 'SCSNMP2-CP-adz', 'SCSNMP2-CP-tz', 'SCSNMP2-CP-matz', 'SCSNMP2-CP-jatz', 'SCSNMP2-CP-hatz', 'SCSNMP2-CP-atz', 'SCSNMP2-CP-dtz', 'SCSNMP2-CP-jadtz', 'SCSNMP2-CP-hadtz', 'SCSNMP2-CP-adtz', 'SCSNMP2-CP-qz', 'SCSNMP2-CP-aaqz', 'SCSNMP2-CP-maqz', 'SCSNMP2-CP-jaqz', 'SCSNMP2-CP-haqz', 'SCSNMP2-CP-aqz', 'SCSNMP2-CP-tqz', 'SCSNMP2-CP-matqz', 'SCSNMP2-CP-jatqz', 'SCSNMP2-CP-hatqz', 'SCSNMP2-CP-atqz', 'SCSNMP2-CP-a5z', 'SCSNMP2-CP-aq5z']) self.plot_bars([None, None, None, None, 'SCSMIMP2-CP-tz', 'SCSMIMP2-CP-matz', 'SCSMIMP2-CP-jatz', 'SCSMIMP2-CP-hatz', 'SCSMIMP2-CP-atz', 'SCSMIMP2-CP-dtz', 'SCSMIMP2-CP-jadtz', 'SCSMIMP2-CP-hadtz', 'SCSMIMP2-CP-adtz', 'SCSMIMP2-CP-qz', 'SCSMIMP2-CP-aaqz', 'SCSMIMP2-CP-maqz', 'SCSMIMP2-CP-jaqz', 'SCSMIMP2-CP-haqz', 'SCSMIMP2-CP-aqz', 'SCSMIMP2-CP-tqz', 'SCSMIMP2-CP-matqz', 'SCSMIMP2-CP-jatqz', 'SCSMIMP2-CP-hatqz', 'SCSMIMP2-CP-atqz', None, None]) self.plot_bars(['DWMP2-CP-dz', 'DWMP2-CP-jadz', 'DWMP2-CP-hadz', 'DWMP2-CP-adz', 'DWMP2-CP-tz', 'DWMP2-CP-matz', 'DWMP2-CP-jatz', 'DWMP2-CP-hatz', 'DWMP2-CP-atz', 'DWMP2-CP-dtz', 'DWMP2-CP-jadtz', 'DWMP2-CP-hadtz', 'DWMP2-CP-adtz', 'DWMP2-CP-qz', 'DWMP2-CP-aaqz', 'DWMP2-CP-maqz', 'DWMP2-CP-jaqz', 'DWMP2-CP-haqz', 'DWMP2-CP-aqz', 'DWMP2-CP-tqz', 'DWMP2-CP-matqz', 'DWMP2-CP-jatqz', 'DWMP2-CP-hatqz', 'DWMP2-CP-atqz', 'DWMP2-CP-a5z', 'DWMP2-CP-aq5z']) self.plot_bars(['MP2C-CP-dz', 'MP2C-CP-jadz', 'MP2C-CP-hadz', 'MP2C-CP-adz', 'MP2C-CP-tz', 'MP2C-CP-matz', 'MP2C-CP-jatz', 'MP2C-CP-hatz', 'MP2C-CP-atz', 'MP2C-CP-dtz', 'MP2C-CP-jadtz', 'MP2C-CP-hadtz', 'MP2C-CP-adtz', None, None, None, None, None, 'MP2C-CP-aqz', None, None, None, None, 'MP2C-CP-atqz', None, None]) self.plot_bars(['MP2C-CP-atqzdz', 'MP2C-CP-atqzjadz', 'MP2C-CP-atqzhadz', 'MP2C-CP-atqzadz', 'MP2C-CP-atqztz', 'MP2C-CP-atqzmatz', 'MP2C-CP-atqzjatz', 'MP2C-CP-atqzhatz', 'MP2C-CP-atqzatz', 'MP2C-CP-atqzdtz', 'MP2C-CP-atqzjadtz', 'MP2C-CP-atqzhadtz', 'MP2C-CP-atqzadtz']) # Fig. bars (c) self.plot_bars(['MP2F12-CP-dz', 'MP2F12-CP-jadz', 'MP2F12-CP-hadz', 'MP2F12-CP-adz', 'MP2F12-CP-tz', 'MP2F12-CP-matz', 'MP2F12-CP-jatz', 'MP2F12-CP-hatz', 'MP2F12-CP-atz', 'MP2F12-CP-dtz', 'MP2F12-CP-jadtz', 'MP2F12-CP-hadtz', 'MP2F12-CP-adtz', 'MP2F12-CP-aqz', 'MP2F12-CP-atqz']) self.plot_bars(['SCSMP2F12-CP-dz', 'SCSMP2F12-CP-jadz', 'SCSMP2F12-CP-hadz', 'SCSMP2F12-CP-adz', 'SCSMP2F12-CP-tz', 'SCSMP2F12-CP-matz', 'SCSMP2F12-CP-jatz', 'SCSMP2F12-CP-hatz', 'SCSMP2F12-CP-atz', 'SCSMP2F12-CP-dtz', 'SCSMP2F12-CP-jadtz', 'SCSMP2F12-CP-hadtz', 'SCSMP2F12-CP-adtz', 'SCSMP2F12-CP-aqz', 'SCSMP2F12-CP-atqz']) self.plot_bars(['SCSNMP2F12-CP-dz', 'SCSNMP2F12-CP-jadz', 'SCSNMP2F12-CP-hadz', 'SCSNMP2F12-CP-adz', 'SCSNMP2F12-CP-tz', 'SCSNMP2F12-CP-matz', 'SCSNMP2F12-CP-jatz', 'SCSNMP2F12-CP-hatz', 'SCSNMP2F12-CP-atz', 'SCSNMP2F12-CP-dtz', 'SCSNMP2F12-CP-jadtz', 'SCSNMP2F12-CP-adtz', 'SCSNMP2F12-CP-adtz', 'SCSNMP2F12-CP-aqz', 'SCSNMP2F12-CP-atqz']) self.plot_bars([None, None, None, None, 'SCSMIMP2F12-CP-tz', 'SCSMIMP2F12-CP-matz', 'SCSMIMP2F12-CP-jatz', 'SCSMIMP2F12-CP-hatz', 'SCSMIMP2F12-CP-atz', 'SCSMIMP2F12-CP-dtz', 'SCSMIMP2F12-CP-jadtz', 'SCSMIMP2F12-CP-hadtz', 'SCSMIMP2F12-CP-adtz', 'SCSMIMP2F12-CP-aqz', 'SCSMIMP2F12-CP-atqz']) self.plot_bars(['DWMP2F12-CP-dz', 'DWMP2F12-CP-jadz', 'DWMP2F12-CP-hadz', 'DWMP2F12-CP-adz', 'DWMP2F12-CP-tz', 'DWMP2F12-CP-matz', 'DWMP2F12-CP-jatz', 'DWMP2F12-CP-hatz', 'DWMP2F12-CP-atz', 'DWMP2F12-CP-dtz', 'DWMP2F12-CP-jadtz', 'DWMP2F12-CP-hadtz', 'DWMP2F12-CP-adtz', 'DWMP2F12-CP-aqz', 'DWMP2F12-CP-atqz']) self.plot_bars(['MP2CF12-CP-dz', 'MP2CF12-CP-jadz', 'MP2CF12-CP-hadz', 'MP2CF12-CP-adz', 'MP2CF12-CP-tz', 'MP2CF12-CP-matz', 'MP2CF12-CP-jatz', 'MP2CF12-CP-hatz', 'MP2CF12-CP-atz', 'MP2CF12-CP-dtz', 'MP2CF12-CP-jadtz', 'MP2CF12-CP-hadtz', 'MP2CF12-CP-adtz', 'MP2CF12-CP-aqz', 'MP2CF12-CP-atqz']) self.plot_bars(['MP2CF12-CP-atqzdz', 'MP2CF12-CP-atqzjadz', 'MP2CF12-CP-atqzhadz', 'MP2CF12-CP-atqzadz', 'MP2CF12-CP-atqztz', 'MP2CF12-CP-atqzmatz', 'MP2CF12-CP-atqzjatz', 'MP2CF12-CP-atqzhatz', 'MP2CF12-CP-atqzatz', 'MP2CF12-CP-atqzdtz', 'MP2CF12-CP-atqzjadtz', 'MP2CF12-CP-atqzhadtz', 'MP2CF12-CP-atqzadtz'])
[docs] def make_pt2_Figure_2(self): """Plot all the graphics needed for the diffuse augmented grey bars plot in Fig. 2 of PT2. Note that in the modern implementation of class DB4, would need to pass ``sset=['tt-5min', 'hb-5min', 'mx-5min', 'dd-5min']`` to get published figure. """ # Fig. bars (a) self.plot_bars(['MP2-CP-adz', 'MP2-CP-atz', 'MP2-CP-adtz', 'MP2-CP-aqz', 'MP2-CP-atqz', 'MP2-CP-a5z', 'MP2-CP-aq5z']) self.plot_bars(['SCSMP2-CP-adz', 'SCSMP2-CP-atz', 'SCSMP2-CP-adtz', 'SCSMP2-CP-aqz', 'SCSMP2-CP-atqz', 'SCSMP2-CP-a5z', 'SCSMP2-CP-aq5z']) self.plot_bars(['SCSNMP2-CP-adz', 'SCSNMP2-CP-atz', 'SCSNMP2-CP-adtz', 'SCSNMP2-CP-aqz', 'SCSNMP2-CP-atqz', 'SCSNMP2-CP-a5z', 'SCSNMP2-CP-aq5z']) self.plot_bars(['SCSMIMP2-CP-atz', 'SCSMIMP2-CP-atz', 'SCSMIMP2-CP-adtz', 'SCSMIMP2-CP-aqz', 'SCSMIMP2-CP-atqz']) self.plot_bars(['SCSMIMP2-CP-tz', 'SCSMIMP2-CP-tz', 'SCSMIMP2-CP-dtz', 'SCSMIMP2-CP-qz', 'SCSMIMP2-CP-tqz']) self.plot_bars(['DWMP2-CP-adz', 'DWMP2-CP-atz', 'DWMP2-CP-adtz', 'DWMP2-CP-aqz', 'DWMP2-CP-atqz', 'DWMP2-CP-a5z', 'DWMP2-CP-aq5z']) self.plot_bars(['MP2C-CP-adz', 'MP2C-CP-adtzadz', 'MP2C-CP-atqzadz', 'MP2C-CP-aq5zadz', 'MP2C-CP-atz', 'MP2C-CP-atqzatz', 'MP2C-CP-aq5zatz', 'MP2C-CP-adtz', 'MP2C-CP-atqzadtz', 'MP2C-CP-aqz', 'MP2C-CP-atqz']) # Fig. bars (b) self.plot_bars(['MP3-CP-adz', 'MP3-CP-adtzadz', 'MP3-CP-atqzadz', 'MP3-CP-atz', 'MP3-CP-atqzatz', 'MP3-CP-adtz', 'MP3-CP-atqzadtz']) self.plot_bars(['MP25-CP-adz', 'MP25-CP-adtzadz', 'MP25-CP-atqzadz', 'MP25-CP-atz', 'MP25-CP-atqzatz', 'MP25-CP-adtz', 'MP25-CP-atqzadtz']) self.plot_bars(['CCSD-CP-adz', 'CCSD-CP-adtzadz', 'CCSD-CP-atqzadz', 'CCSD-CP-atz', 'CCSD-CP-atqzatz', 'CCSD-CP-adtz', 'CCSD-CP-atqzadtz']) self.plot_bars(['SCSCCSD-CP-adz', 'SCSCCSD-CP-adtzadz', 'SCSCCSD-CP-atqzadz', 'SCSCCSD-CP-atz', 'SCSCCSD-CP-atqzatz', 'SCSCCSD-CP-adtz', 'SCSCCSD-CP-atqzadtz']) self.plot_bars(['SCSMICCSD-CP-adz', 'SCSMICCSD-CP-adtzadz', 'SCSMICCSD-CP-atqzadz', 'SCSMICCSD-CP-atz', 'SCSMICCSD-CP-atqzatz', 'SCSMICCSD-CP-adtz', 'SCSMICCSD-CP-atqzadtz']) self.plot_bars(['CCSDT-CP-adz', 'CCSDT-CP-adtzadz', 'CCSDT-CP-atqzadz', 'CCSDT-CP-atz', 'CCSDT-CP-atqzatz', 'CCSDT-CP-adtz', 'CCSDT-CP-atqzadtz']) # Fig. bars (c) self.plot_bars(['MP2F12-CP-adz', 'MP2F12-CP-atz', 'MP2F12-CP-adtz', 'MP2F12-CP-aqz', 'MP2F12-CP-atqz']) self.plot_bars(['SCSMP2F12-CP-adz', 'SCSMP2F12-CP-atz', 'SCSMP2F12-CP-adtz', 'SCSMP2F12-CP-aqz', 'SCSMP2F12-CP-atqz']) self.plot_bars(['SCSNMP2F12-CP-adz', 'SCSNMP2F12-CP-atz', 'SCSNMP2F12-CP-adtz', 'SCSNMP2F12-CP-aqz', 'SCSNMP2F12-CP-atqz']) self.plot_bars(['SCSMIMP2F12-CP-atz', 'SCSMIMP2F12-CP-atz', 'SCSMIMP2F12-CP-adtz', 'SCSMIMP2F12-CP-aqz', 'SCSMIMP2F12-CP-atqz']) self.plot_bars(['SCSMIMP2F12-CP-tz', 'SCSMIMP2F12-CP-tz', 'SCSMIMP2F12-CP-dtz']) self.plot_bars(['DWMP2F12-CP-adz', 'DWMP2F12-CP-atz', 'DWMP2F12-CP-adtz', 'DWMP2F12-CP-aqz', 'DWMP2F12-CP-atqz']) self.plot_bars(['MP2CF12-CP-adz', 'MP2CF12-CP-adtzadz', 'MP2CF12-CP-atqzadz', 'MP2CF12-CP-atz', 'MP2CF12-CP-atqzatz', 'MP2CF12-CP-adtz', 'MP2CF12-CP-atqzadtz', 'MP2CF12-CP-aqz', 'MP2CF12-CP-atqz']) # Fig. bars (d) self.plot_bars(['CCSDAF12-CP-adz', 'CCSDAF12-CP-adtzadz', 'CCSDAF12-CP-atqzadz']) self.plot_bars(['CCSDBF12-CP-adz', 'CCSDBF12-CP-adtzadz', 'CCSDBF12-CP-atqzadz']) self.plot_bars(['SCSCCSDAF12-CP-adz', 'SCSCCSDAF12-CP-adtzadz', 'SCSCCSDAF12-CP-atqzadz']) self.plot_bars(['SCSCCSDBF12-CP-adz', 'SCSCCSDBF12-CP-adtzadz', 'SCSCCSDBF12-CP-atqzadz']) self.plot_bars(['SCMICCSDAF12-CP-adz', 'SCMICCSDAF12-CP-adtzadz', 'SCMICCSDAF12-CP-atqzadz']) self.plot_bars(['SCMICCSDBF12-CP-adz', 'SCMICCSDBF12-CP-adtzadz', 'SCMICCSDBF12-CP-atqzadz']) self.plot_bars(['CCSDTAF12-CP-adz', 'CCSDTAF12-CP-adtzadz', 'CCSDTAF12-CP-atqzadz']) self.plot_bars(['CCSDTBF12-CP-adz', 'CCSDTBF12-CP-adtzadz', 'CCSDTBF12-CP-atqzadz']) self.plot_bars(['DWCCSDTF12-CP-adz', 'DWCCSDTF12-CP-adtzadz', 'DWCCSDTF12-CP-atqzadz'])
[docs] def plot_dhdft_flats(self): """Generate pieces for grey bars figure for DH-DFT paper.""" self.plot_all_flats( ['B97D3-CP-adz', 'PBED3-CP-adz', 'M11L-CP-adz', 'DLDFD-CP-adz', 'B3LYPD3-CP-adz', 'PBE0D3-CP-adz', 'WB97XD-CP-adz', 'M052X-CP-adz', 'M062X-CP-adz', 'M08HX-CP-adz', 'M08SO-CP-adz', 'M11-CP-adz', 'VV10-CP-adz', 'LCVV10-CP-adz', 'WB97XV-CP-adz', 'PBE02-CP-adz', 'WB97X2-CP-adz', 'B2PLYPD3-CP-adz', 'DSDPBEP86D2OPT-CP-adz', 'MP2-CP-adz'], sset='tt-5min') self.plot_all_flats(['B97D3-unCP-adz', 'PBED3-unCP-adz', 'M11L-unCP-adz', 'DLDFD-unCP-adz', 'B3LYPD3-unCP-adz', 'PBE0D3-unCP-adz', 'WB97XD-unCP-adz', 'M052X-unCP-adz', 'M062X-unCP-adz', 'M08HX-unCP-adz', 'M08SO-unCP-adz', 'M11-unCP-adz', 'VV10-unCP-adz', 'LCVV10-unCP-adz', 'WB97XV-unCP-adz', 'PBE02-unCP-adz', 'WB97X2-unCP-adz', 'B2PLYPD3-unCP-adz', 'DSDPBEP86D2OPT-unCP-adz', 'MP2-unCP-adz'], sset='tt-5min') self.plot_all_flats( ['B97D3-CP-atz', 'PBED3-CP-atz', 'M11L-CP-atz', 'DLDFD-CP-atz', 'B3LYPD3-CP-atz', 'PBE0D3-CP-atz', 'WB97XD-CP-atz', 'M052X-CP-atz', 'M062X-CP-atz', 'M08HX-CP-atz', 'M08SO-CP-atz', 'M11-CP-atz', 'VV10-CP-atz', 'LCVV10-CP-atz', 'WB97XV-CP-atz', 'PBE02-CP-atz', 'WB97X2-CP-atz', 'B2PLYPD3-CP-atz', 'DSDPBEP86D2OPT-CP-atz', 'MP2-CP-atz'], sset='tt-5min') self.plot_all_flats(['B97D3-unCP-atz', 'PBED3-unCP-atz', 'M11L-unCP-atz', 'DLDFD-unCP-atz', 'B3LYPD3-unCP-atz', 'PBE0D3-unCP-atz', 'WB97XD-unCP-atz', 'M052X-unCP-atz', 'M062X-unCP-atz', 'M08HX-unCP-atz', 'M08SO-unCP-atz', 'M11-unCP-atz', 'VV10-unCP-atz', 'LCVV10-unCP-atz', 'WB97XV-unCP-atz', 'PBE02-unCP-atz', 'WB97X2-unCP-atz', 'B2PLYPD3-unCP-atz', 'DSDPBEP86D2OPT-unCP-atz', 'MP2-unCP-atz'], sset='tt-5min')
[docs] def make_dhdft_Figure_1(self): """Plot all the graphics needed for the grey bars plot in Fig. 1 of DHDFT. """ # Fig. bars (a) self.plot_bars([ 'M052X-unCP-adz', 'M052X-CP-adz', 'M052X-unCP-atz', 'M052X-CP-atz', None, 'M062X-unCP-adz', 'M062X-CP-adz', 'M062X-unCP-atz', 'M062X-CP-atz', None, 'M08SO-unCP-adz', 'M08SO-CP-adz', 'M08SO-unCP-atz', 'M08SO-CP-atz', None, 'M08HX-unCP-adz', 'M08HX-CP-adz', 'M08HX-unCP-atz', 'M08HX-CP-atz', None, 'M11-unCP-adz', 'M11-CP-adz', 'M11-unCP-atz', 'M11-CP-atz', None, 'M11L-unCP-adz', 'M11L-CP-adz', 'M11L-unCP-atz', 'M11L-CP-atz'], sset=['tt-5min', 'hb-5min', 'mx-5min', 'dd-5min']) # Fig. bars (b) self.plot_bars([ 'PBED3-unCP-adz', 'PBED3-CP-adz', 'PBED3-unCP-atz', 'PBED3-CP-atz', None, 'B97D3-unCP-adz', 'B97D3-CP-adz', 'B97D3-unCP-atz', 'B97D3-CP-atz', None, 'PBE0D3-unCP-adz', 'PBE0D3-CP-adz', 'PBE0D3-unCP-atz', 'PBE0D3-CP-atz', None, 'B3LYPD3-unCP-adz', 'B3LYPD3-CP-adz', 'B3LYPD3-unCP-atz', 'B3LYPD3-CP-atz', None, 'DLDFD-unCP-adz', 'DLDFD-CP-adz', 'DLDFD-unCP-atz', 'DLDFD-CP-atz', None, 'WB97XD-unCP-adz', 'WB97XD-CP-adz', 'WB97XD-unCP-atz', 'WB97XD-CP-atz'], sset=['tt-5min', 'hb-5min', 'mx-5min', 'dd-5min']) # Fig. bars (c) self.plot_bars([ 'VV10-unCP-adz', 'VV10-CP-adz', 'VV10-unCP-atz', 'VV10-CP-atz', None, None, 'LCVV10-unCP-adz', 'LCVV10-CP-adz', 'LCVV10-unCP-atz', 'LCVV10-CP-atz', None, None, 'WB97XV-unCP-adz', 'WB97XV-CP-adz', 'WB97XV-unCP-atz', 'WB97XV-CP-atz'], sset=['tt-5min', 'hb-5min', 'mx-5min', 'dd-5min']) # Fig. bars (d) self.plot_bars([ 'PBE02-unCP-adz', 'PBE02-CP-adz', 'PBE02-unCP-atz', 'PBE02-CP-atz', None, 'WB97X2-unCP-adz', 'WB97X2-CP-adz', 'WB97X2-unCP-atz', 'WB97X2-CP-atz', None, 'B2PLYPD3-unCP-adz', 'B2PLYPD3-CP-adz', 'B2PLYPD3-unCP-atz', 'B2PLYPD3-CP-atz', None, 'DSDPBEP86D2OPT-unCP-adz', 'DSDPBEP86D2OPT-CP-adz', 'DSDPBEP86D2OPT-unCP-atz', 'DSDPBEP86D2OPT-CP-atz'], sset=['tt-5min', 'hb-5min', 'mx-5min', 'dd-5min']) # Fig. bars (e) self.plot_bars([ 'MP2-unCP-adz', 'MP2-CP-adz', 'MP2-unCP-atz', 'MP2-CP-atz'], sset=['tt-5min', 'hb-5min', 'mx-5min', 'dd-5min'])
[docs] def make_dhdft_Figure_2(self): """Plot all the graphics needed for the SAPT/DFT/WFN comparison plot in Fig. 2 of DHDFT. Note that benchmark set as reminder, not necessity, since default. """ self.plot_bars([ 'SAPT0S-CP-jadz', 'SAPTDFT-CP-atz', 'SAPT2P-CP-adz', 'SAPT3M-CP-atz', 'SAPT2PCM-CP-atz', None, 'B97D3-unCP-atz', 'B3LYPD3-CP-adz', 'M052X-unCP-adz', 'WB97XD-CP-atz', 'WB97XV-CP-adz', 'WB97X2-CP-atz', 'DSDPBEP86D2OPT-CP-atz', 'B2PLYPD3-CP-atz', None, 'MP2-CP-atz', 'SCSMP2-CP-atz', 'SCSMIMP2-CP-qz', 'MP2C-CP-atqzadz', 'MP2CF12-CP-adz', 'SCMICCSDAF12-CP-adz', 'CCSDT-CP-atz', 'CCSDT-CP-atqzatz', 'DWCCSDTF12-CP-adz'], sset=['tt-5min', 'hb-5min', 'mx-5min', 'dd-5min'], benchmark='C2011BENCH')
[docs] def plot_dhdft_modelchems(self): self.plot_modelchems( ['B97D3-CP-adz', 'PBED3-CP-adz', 'M11L-CP-adz', 'DLDFD-CP-adz', 'B3LYPD3-CP-adz', 'PBE0D3-CP-adz', 'WB97XD-CP-adz', 'M052X-CP-adz', 'M062X-CP-adz', 'M08HX-CP-adz', 'M08SO-CP-adz', 'M11-CP-adz', 'VV10-CP-adz', 'LCVV10-CP-adz', 'WB97XV-CP-adz', 'PBE02-CP-adz', 'WB97X2-CP-adz', 'B2PLYPD3-CP-adz', 'DSDPBEP86D2OPT-CP-adz', 'MP2-CP-adz'], sset='tt-5min') self.plot_modelchems(['B97D3-unCP-adz', 'PBED3-unCP-adz', 'M11L-unCP-adz', 'DLDFD-unCP-adz', 'B3LYPD3-unCP-adz', 'PBE0D3-unCP-adz', 'WB97XD-unCP-adz', 'M052X-unCP-adz', 'M062X-unCP-adz', 'M08HX-unCP-adz', 'M08SO-unCP-adz', 'M11-unCP-adz', 'VV10-unCP-adz', 'LCVV10-unCP-adz', 'WB97XV-unCP-adz', 'PBE02-unCP-adz', 'WB97X2-unCP-adz', 'B2PLYPD3-unCP-adz', 'DSDPBEP86D2OPT-unCP-adz', 'MP2-unCP-adz'], sset='tt-5min') self.plot_modelchems( ['B97D3-CP-atz', 'PBED3-CP-atz', 'M11L-CP-atz', 'DLDFD-CP-atz', 'B3LYPD3-CP-atz', 'PBE0D3-CP-atz', 'WB97XD-CP-atz', 'M052X-CP-atz', 'M062X-CP-atz', 'M08HX-CP-atz', 'M08SO-CP-atz', 'M11-CP-atz', 'VV10-CP-atz', 'LCVV10-CP-atz', 'WB97XV-CP-atz', 'PBE02-CP-atz', 'WB97X2-CP-atz', 'B2PLYPD3-CP-atz', 'DSDPBEP86D2OPT-CP-atz', 'MP2-CP-atz'], sset='tt-5min') self.plot_modelchems(['B97D3-unCP-atz', 'PBED3-unCP-atz', 'M11L-unCP-atz', 'DLDFD-unCP-atz', 'B3LYPD3-unCP-atz', 'PBE0D3-unCP-atz', 'WB97XD-unCP-atz', 'M052X-unCP-atz', 'M062X-unCP-atz', 'M08HX-unCP-atz', 'M08SO-unCP-atz', 'M11-unCP-atz', 'VV10-unCP-atz', 'LCVV10-unCP-atz', 'WB97XV-unCP-atz', 'PBE02-unCP-atz', 'WB97X2-unCP-atz', 'B2PLYPD3-unCP-atz', 'DSDPBEP86D2OPT-unCP-atz', 'MP2-unCP-atz'], sset='tt-5min')
[docs] def plot_minn_modelchems(self): self.plot_modelchems( ['DLDFD-unCP-adz', 'M052X-unCP-adz', 'M062X-unCP-adz', 'M08HX-unCP-adz', 'M08SO-unCP-adz', 'M11-unCP-adz', 'M11L-unCP-adz', 'DLDFD-CP-adz', 'M052X-CP-adz', 'M062X-CP-adz', 'M08HX-CP-adz', 'M08SO-CP-adz', 'M11-CP-adz', 'M11L-CP-adz']) self.plot_modelchems( ['DlDFD-unCP-atz', 'M052X-unCP-atz', 'M062X-unCP-atz', 'M08HX-unCP-atz', 'M08SO-unCP-atz', 'M11-unCP-atz', 'M11L-unCP-atz', 'DLDFD-CP-atz', 'M052X-CP-atz', 'M062X-CP-atz', 'M08HX-CP-atz', 'M08SO-CP-atz', 'M11-CP-atz', 'M11L-CP-atz'])
[docs] def make_dhdft_Table_I(self): """Generate the in-manuscript summary slat table for DHDFT. """ self.table_wrapper(mtd=['B97D3', 'PBED3', 'M11L', 'DLDFD', 'B3LYPD3', 'PBE0D3', 'WB97XD', 'M052X', 'M062X', 'M08HX', 'M08SO', 'M11', 'VV10', 'LCVV10', 'WB97XV', 'PBE02', 'WB97X2', 'DSDPBEP86D2OPT', 'B2PLYPD3', 'MP2', 'SCSNMP2', 'SCSMIMP2', 'MP2CF12', 'SCMICCSDAF12', 'SAPTDFT', 'SAPT0S', 'SAPT2P', 'SAPT3M', 'SAPT2PCM'], bas=['adz', 'atz'], tableplan=self.table_scrunch, opt=['CP', 'unCP'], err=['mae'], subjoin=None, plotpath=None, standalone=False, filename='tblssets_ex1')
[docs] def make_dhdft_Table_II(self): """Generate the in-manuscript CP slat table for DHDFT. """ self.table_wrapper(mtd=['B97D3', 'PBED3', 'M11L', 'DLDFD', 'B3LYPD3', 'PBE0D3', 'WB97XD', 'M052X', 'M062X', 'M08HX', 'M08SO', 'M11', 'VV10', 'LCVV10', 'WB97XV', 'PBE02', 'WB97X2', 'DSDPBEP86D2OPT', 'B2PLYPD3', 'MP2'], bas=['adz', 'atz'], tableplan=self.table_merge_abbr, opt=['CP'], err=['mae'], subjoin=True, plotpath='analysis/flats/mplflat_', # proj still has 'mpl' prefix standalone=False, filename='tblssets_ex2')
[docs] def make_dhdft_Table_III(self): """Generate the in-manuscript unCP slat table for DHDFT. """ self.table_wrapper(mtd=['B97D3', 'PBED3', 'M11L', 'DLDFD', 'B3LYPD3', 'PBE0D3', 'WB97XD', 'M052X', 'M062X', 'M08HX', 'M08SO', 'M11', 'VV10', 'LCVV10', 'WB97XV', 'PBE02', 'WB97X2', 'DSDPBEP86D2OPT', 'B2PLYPD3', 'MP2'], bas=['adz', 'atz'], tableplan=self.table_merge_abbr, opt=['unCP'], err=['mae'], subjoin=True, plotpath='analysis/flats/mplflat_', # proj still has 'mpl' prefix standalone=False, filename='tblssets_ex3')
[docs] def make_dhdft_Tables_SII(self): """Generate the subset details suppmat Part II tables and their indices for DHDFT. """ self.table_wrapper(mtd=['B97D3', 'PBED3', 'M11L', 'DLDFD', 'B3LYPD3', 'PBE0D3', 'WB97XD', 'M052X', 'M062X', 'M08HX', 'M08SO', 'M11', 'VV10', 'LCVV10', 'WB97XV', 'PBE02', 'WB97X2', 'DSDPBEP86D2OPT', 'B2PLYPD3'], # 'MP2'] bas=['adz', 'atz'], tableplan=self.table_merge_suppmat, opt=['CP', 'unCP'], err=['mae', 'mape'], subjoin=False, plotpath='analysis/flats/mplflat_', # proj still has 'mpl' prefix standalone=False, filename='tblssets')
[docs] def make_dhdft_Tables_SIII(self): """Generate the per-reaction suppmat Part III tables and their indices for DHDFT. """ self.table_reactions( ['B97D3-unCP-adz', 'B97D3-CP-adz', 'B97D3-unCP-atz', 'B97D3-CP-atz', 'PBED3-unCP-adz', 'PBED3-CP-adz', 'PBED3-unCP-atz', 'PBED3-CP-atz', 'M11L-unCP-adz', 'M11L-CP-adz', 'M11L-unCP-atz', 'M11L-CP-atz', 'DLDFD-unCP-adz', 'DLDFD-CP-adz', 'DLDFD-unCP-atz', 'DLDFD-CP-atz', 'B3LYPD3-unCP-adz', 'B3LYPD3-CP-adz', 'B3LYPD3-unCP-atz', 'B3LYPD3-CP-atz', 'PBE0D3-unCP-adz', 'PBE0D3-CP-adz', 'PBE0D3-unCP-atz', 'PBE0D3-CP-atz', 'WB97XD-unCP-adz', 'WB97XD-CP-adz', 'WB97XD-unCP-atz', 'WB97XD-CP-atz', 'M052X-unCP-adz', 'M052X-CP-adz', 'M052X-unCP-atz', 'M052X-CP-atz', 'M062X-unCP-adz', 'M062X-CP-adz', 'M062X-unCP-atz', 'M062X-CP-atz', 'M08HX-unCP-adz', 'M08HX-CP-adz', 'M08HX-unCP-atz', 'M08HX-CP-atz', 'M08SO-unCP-adz', 'M08SO-CP-adz', 'M08SO-unCP-atz', 'M08SO-CP-atz', 'M11-unCP-adz', 'M11-CP-adz', 'M11-unCP-atz', 'M11-CP-atz', 'VV10-unCP-adz', 'VV10-CP-adz', 'VV10-unCP-atz', 'VV10-CP-atz', 'LCVV10-unCP-adz', 'LCVV10-CP-adz', 'LCVV10-unCP-atz', 'LCVV10-CP-atz', 'WB97XV-unCP-adz', 'WB97XV-CP-adz', 'WB97XV-unCP-atz', 'WB97XV-CP-atz', 'PBE02-unCP-adz', 'PBE02-CP-adz', 'PBE02-unCP-atz', 'PBE02-CP-atz', 'WB97X2-unCP-adz', 'WB97X2-CP-adz', 'WB97X2-unCP-atz', 'WB97X2-CP-atz', 'DSDPBEP86D2OPT-unCP-adz', 'DSDPBEP86D2OPT-CP-adz', 'DSDPBEP86D2OPT-unCP-atz', 'DSDPBEP86D2OPT-CP-atz', 'B2PLYPD3-unCP-adz', 'B2PLYPD3-CP-adz', 'B2PLYPD3-unCP-atz', 'B2PLYPD3-CP-atz'], # 'MP2-unCP-adz', 'MP2-CP-adz', 'MP2-unCP-atz', 'MP2-CP-atz'], standalone=False, filename='tblrxn_all')
[docs]class ThreeDatabases(Database): """ """ def __init__(self, pythonpath=None): """Initialize ThreeDatabases object from Database""" Database.__init__(self, ['s22', 'a24', 'hsg'], dbse='DB3', pythonpath=None) # load up data and definitions self.load_qcdata_byproject('pt2') self.load_qcdata_byproject('dilabio') self.load_qcdata_byproject('f12dilabio') self.load_subsets() self.define_supersubsets() self.define_supermodelchems()
[docs] def define_supersubsets(self): """ """ self.sset['tt'] = ['default', 'default', 'default'] self.sset['hb'] = ['hb', 'hb', 'hb'] self.sset['mx'] = ['mx', 'mx', 'mx'] self.sset['dd'] = ['dd', 'dd', 'dd'] self.sset['mxdd'] = ['mxdd', 'mxdd', 'mxdd'] self.sset['pp'] = ['mxddpp', 'mxddpp', 'mxddpp'] self.sset['np'] = ['mxddnp', 'mxddnp', 'mxddnp'] self.sset['tt-5min'] = ['default', 'default', 'default'] self.sset['hb-5min'] = ['hb', 'hb', 'hb'] self.sset['mx-5min'] = ['mx', 'mx', 'mx'] self.sset['dd-5min'] = ['dd', 'dd', 'dd'] self.sset['mxdd-5min'] = ['mxdd', 'mxdd', 'mxdd'] self.sset['pp-5min'] = ['mxddpp', 'mxddpp', 'mxddpp'] self.sset['np-5min'] = ['mxddnp', 'mxddnp', 'mxddnp'] self.sset['weak'] = ['weak', 'weak', 'weak'] self.sset['weak_hb'] = ['weak_hb', None, 'weak_hb'] self.sset['weak_mx'] = ['weak_mx', 'weak_mx', 'weak_mx'] self.sset['weak_dd'] = ['weak_dd', 'weak_dd', 'weak_dd']
[docs] def define_supermodelchems(self): """ """ self.mc['CCSD-CP-adz'] = ['CCSD-CP-adz', 'CCSD-CP-hadz', 'CCSD-CP-adz'] self.mc['CCSD-CP-atz'] = ['CCSD-CP-atz', 'CCSD-CP-hatz', 'CCSD-CP-atz'] self.mc['CCSD-CP-adtz'] = ['CCSD-CP-adtz', 'CCSD-CP-hadtz', 'CCSD-CP-adtz'] self.mc['CCSD-CP-adtzadz'] = ['CCSD-CP-adtzadz', 'CCSD-CP-adtzhadz', 'CCSD-CP-adtzadz'] self.mc['CCSD-CP-atzadz'] = ['CCSD-CP-atzadz', 'CCSD-CP-atzhadz', 'CCSD-CP-atzadz'] self.mc['CCSD-CP-atqzadz'] = ['CCSD-CP-atqzadz', 'CCSD-CP-atqzhadz', 'CCSD-CP-atqzadz'] self.mc['CCSD-CP-atzadtz'] = ['CCSD-CP-atzadtz', 'CCSD-CP-atzhadtz', 'CCSD-CP-atzadtz'] self.mc['CCSD-CP-atqzadtz'] = ['CCSD-CP-atqzadtz', 'CCSD-CP-atqzhadtz', 'CCSD-CP-atqzadtz'] self.mc['CCSD-CP-atqzatz'] = ['CCSD-CP-atqzatz', 'CCSD-CP-atqzhatz', 'CCSD-CP-atqzatz'] self.mc['CCSDT-CP-adz'] = ['CCSDT-CP-adz', 'CCSDT-CP-hadz', 'CCSDT-CP-adz'] self.mc['CCSDT-CP-atz'] = ['CCSDT-CP-atz', 'CCSDT-CP-hatz', 'CCSDT-CP-atz'] self.mc['CCSDT-CP-adtz'] = ['CCSDT-CP-adtz', 'CCSDT-CP-hadtz', 'CCSDT-CP-adtz'] self.mc['CCSDT-CP-adtzadz'] = ['CCSDT-CP-adtzadz', 'CCSDT-CP-adtzhadz', 'CCSDT-CP-adtzadz'] self.mc['CCSDT-CP-atzadz'] = ['CCSDT-CP-atzadz', 'CCSDT-CP-atzhadz', 'CCSDT-CP-atzadz'] self.mc['CCSDT-CP-atqzadz'] = ['CCSDT-CP-atqzadz', 'CCSDT-CP-atqzhadz', 'CCSDT-CP-atqzadz'] self.mc['CCSDT-CP-atzadtz'] = ['CCSDT-CP-atzadtz', 'CCSDT-CP-atzhadtz', 'CCSDT-CP-atzadtz'] self.mc['CCSDT-CP-atqzadtz'] = ['CCSDT-CP-atqzadtz', 'CCSDT-CP-atqzhadtz', 'CCSDT-CP-atqzadtz'] self.mc['CCSDT-CP-atqzatz'] = ['CCSDT-CP-atqzatz', 'CCSDT-CP-atqzhatz', 'CCSDT-CP-atqzatz']
# print certain statistic for all 4 db and summary and indiv sys if min or max fnreservoir = {} fnreservoir['blankslat'] = r"""Errors with respect to Benchmark. Guide lines are at 0, 0.3, and 1.0 kcal/mol overbound ($-$) and underbound ($+$).""" fnreservoir['5min'] = r"""Only equilibrium and near-equilibrium systems included. (All S22 and HSG, 50/194 NBC10, 28/118 HBC6.)""" fnreservoir['liliowa'] = r"""{0}MAE (dark by {1} kcal/mol) for subsets in residue classes cation, anion, polar, aliphatic, \& aromatic (L to R).""" fnreservoir['flat'] = r"""{0}Errors with respect to benchmark within $\pm${1} kcal/mol. Guide lines are at {2} overbound ($-$) and underbound ($+$)."""