from __future__ import absolute_import
from __future__ import print_function
import os
import sys
import math
try:
import cPickle as pickle
except ImportError:
import pickle
import itertools
# from collections import defaultdict
try:
from collections import OrderedDict
except ImportError:
from oldpymodules import OrderedDict
from .exceptions import *
from .molecule import Molecule
from .modelchems import Method, BasisSet, Error, methods, bases, errors, pubs
from . import psiutil
from . import textables
[docs]def initialize_errors():
"""Form OrderedDict of all possible statistical measures set to None"""
error = OrderedDict()
for e in ['e', 'pe', 'pbe', 'pce']:
for m in ['pex', 'nex', 'max', 'min', 'm', 'ma', 'rms', 'std']:
error[m + e] = None
return error
[docs]def initialize_errors_elaborate(e=None, pe=None, pbe=None, pce=None, extrema=True):
error = OrderedDict()
error['maxe'] = None if (e is None or not extrema) else e # LD_XA
error['mine'] = None if (e is None or not extrema) else e # LD_XI
error['me'] = None if e is None else 0.0 # LD_MS
error['mae'] = None if e is None else 0.0 # LD_MA
error['rmse'] = None if e is None else 0.0 # LD_RA
error['stde'] = None if e is None else 0.0
error['maxpe'] = None if (pe is None or not extrema) else pe # FD_XA
error['minpe'] = None if (pe is None or not extrema) else pe # FD_XI
error['mpe'] = None if pe is None else 0.0 # FD_MS
error['mape'] = None if pe is None else 0.0 # FD_MA
error['rmspe'] = None if pe is None else 0.0 # FD_RA
error['stdpe'] = None if pe is None else 0.0
error['maxpbe'] = None if (pbe is None or not extrema) else pbe # BD_XA
error['minpbe'] = None if (pbe is None or not extrema) else pbe # BD_XI
error['mpbe'] = None if pbe is None else 0.0 # BD_MS
error['mapbe'] = None if pbe is None else 0.0 # BD_MA
error['rmspbe'] = None if pbe is None else 0.0 # BD_RA
error['stdpbe'] = None if pbe is None else 0.0
error['maxpce'] = None if (pce is None or not extrema) else pce # BD_XA
error['minpce'] = None if (pce is None or not extrema) else pce # BD_XI
error['mpce'] = None if pce is None else 0.0 # BD_MS
error['mapce'] = None if pce is None else 0.0 # BD_MA
error['rmspce'] = None if pce is None else 0.0 # BD_RA
error['stdpce'] = None if pce is None else 0.0
return error
[docs]def average_errors(*args):
"""Each item in *args* should be an error dictionary. Performs
average-like operation over all items, which should be error
dictionaries, in *args*. Defined for ME, MAE, STDE, and their
relative-error variants. None returned for undefined statistics or
when an item is missing.
"""
Ndb = float(len(args))
avgerror = initialize_errors()
try:
avgerror['pexe'] = max([x['pexe'] for x in args])
avgerror['nexe'] = min([x['nexe'] for x in args])
avgerror['maxe'] = max([x['maxe'] for x in args], key=lambda x: abs(x))
avgerror['mine'] = min([x['mine'] for x in args], key=lambda x: abs(x))
avgerror['me'] = sum([x['me'] for x in args]) / Ndb
avgerror['mae'] = sum([x['mae'] for x in args]) / Ndb
avgerror['rmse'] = sum([x['rmse'] for x in args]) / Ndb # TODO: unsure of op validity
avgerror['stde'] = math.sqrt(sum([x['stde'] ** 2 for x in args]) / Ndb)
avgerror['pexpe'] = max([x['pexpe'] for x in args])
avgerror['nexpe'] = min([x['nexpe'] for x in args])
avgerror['maxpe'] = max([x['maxpe'] for x in args], key=lambda x: abs(x))
avgerror['minpe'] = min([x['minpe'] for x in args], key=lambda x: abs(x))
avgerror['mpe'] = sum([x['mpe'] for x in args]) / Ndb
avgerror['mape'] = sum([x['mape'] for x in args]) / Ndb
avgerror['rmspe'] = sum([x['rmspe'] for x in args]) / Ndb # TODO: unsure of op validity
avgerror['stdpe'] = math.sqrt(sum([x['stdpe'] * x['stdpe'] for x in args]) / Ndb)
avgerror['pexpbe'] = max([x['pexpbe'] for x in args])
avgerror['nexpbe'] = min([x['nexpbe'] for x in args])
avgerror['maxpbe'] = max([x['maxpbe'] for x in args], key=lambda x: abs(x))
avgerror['minpbe'] = min([x['minpbe'] for x in args], key=lambda x: abs(x))
avgerror['mpbe'] = sum([x['mpbe'] for x in args]) / Ndb
avgerror['mapbe'] = sum([x['mapbe'] for x in args]) / Ndb
avgerror['rmspbe'] = sum([x['rmspbe'] for x in args]) / Ndb # TODO: unsure of op validity
avgerror['stdpbe'] = math.sqrt(sum([x['stdpbe'] * x['stdpbe'] for x in args]) / Ndb)
avgerror['pexpce'] = max([x['pexpce'] for x in args])
avgerror['nexpce'] = min([x['nexpce'] for x in args])
avgerror['maxpce'] = max([x['maxpce'] for x in args], key=lambda x: abs(x))
avgerror['minpce'] = min([x['minpce'] for x in args], key=lambda x: abs(x))
avgerror['mpce'] = sum([x['mpce'] for x in args]) / Ndb
avgerror['mapce'] = sum([x['mapce'] for x in args]) / Ndb
avgerror['rmspce'] = sum([x['rmspce'] for x in args]) / Ndb # TODO: unsure of op validity
avgerror['stdpce'] = math.sqrt(sum([x['stdpce'] * x['stdpce'] for x in args]) / Ndb)
except TypeError:
pass
return avgerror
[docs]def string_contrast(ss):
"""From an array of strings, *ss*, returns maximum common prefix
string, maximum common suffix string, and array of middles.
"""
s = [item + 'q' for item in ss if item is not None]
short = min(s, key=len)
for ib in range(len(short)):
if not all([mc[ib] == short[ib] for mc in s]):
preidx = ib
break
else:
preidx = 0
for ib in range(len(short)):
ie = -1 * (ib + 1)
if not all([mc[ie] == short[ie] for mc in s]):
sufidx = ie + 1
break
else:
sufidx = -1 * (len(short))
miditer = iter([mc[preidx:sufidx] for mc in s])
prefix = short[:preidx]
suffix = short[sufidx:-1]
middle = ['' if mc is None else next(miditer) for mc in ss]
return prefix, suffix, middle
[docs]def oxcom(lst):
"""Returns gramatical comma separated string of *lst*."""
lst = [str(l) for l in lst]
if not lst:
return ''
elif len(lst) == 1:
return lst[0]
elif len(lst) == 2:
return ' and '.join(lst)
else:
return ', and '.join([', '.join(lst[:-1]), lst[-1]])
[docs]def cure_weight(refrxn, refeq, rrat, xi=0.2):
"""
:param refeq: value of benchmark for equilibrium Reaction
:param rrat: ratio of intermonomer separation for Reaction to equilibrium Reaction
:param xi: parameter
:return: weight for CURE
"""
sigma = xi * abs(refeq) / (rrat ** 3)
weight = max(abs(refrxn), sigma)
return weight
[docs]def balanced_error(refrxn, refeq, rrat, m=0.03, p=10.0):
"""
:param refrxn:
:param refeq:
:param rrat:
:param m: minimum permitted weight for a point
:param p: multiples of abs(refeq) above refeq to which zero-line in head is displaced
:return:
"""
one = float(1)
q = one if rrat >= one else p
qm1perat = q - 1 + refrxn / refeq
weight = max(m, qm1perat / q)
mask = weight * q / abs(qm1perat)
return mask, weight
[docs]def fancify_mc_tag(mc, latex=False):
"""From the usual MTD-opt1_opt2-bas model chemistry identifier, return
string based on fullname, if *latex* is False or latex if *latex* is True.
"""
try:
mtd, mod, bas = mc.split('-')
except ValueError:
text = mc
else:
if latex:
text = r"""%20s / %-20s %s""" % (methods[mtd].latex, bases[bas].latex, mod)
else:
text = r"""%20s / %s, %s""" % (methods[mtd].fullname, bases[bas].fullname, mod)
return text
[docs]class ReactionDatum(object):
"""Piece of quantum chemical information that describes a qcdb.Reaction object.
"""
def __init__(self, dbse, rxn, method, mode, basis, value, units='kcal/mol', citation=None, doi=None, comment=None):
# geometry
self.dbrxn = dbse + '-' + str(rxn)
# qcdb.Method
self.method = method
# mode, e.g., unCP, CP, RLX, etc.
self.mode = mode
# qcdb.BasisSet
self.basis = basis
# numerical value for reaction
self.value = float(value)
# energy unit attached to value, defaults to kcal/mol
self.units = units
# publication citation of value
self.citation = citation
# digital object identifier for publication (maybe this should be doi of datum, not of pub?)
self.doi = doi
# addl comments
self.comment = comment
@classmethod
[docs] def library_modelchem(cls, dbse, rxn, method, mode, basis, value, units='kcal/mol', citation=None, doi=None,
comment=None):
"""Constructor when method and basis are strings corresponding to
qcdb.Method and qcdb.BasisSet already defined in methods and bases.
"""
# computational method
try:
tmp_method = methods[method.upper()]
except KeyError as e:
raise ValidationError("""Invalid ReactionDatum method %s: %s""" % (method, e))
# computational basis set
try:
tmp_basis = bases[basis.lower()]
except KeyError as e:
raise ValidationError("""Invalid ReactionDatum basis %s: %s""" % (basis, e))
# publication
if citation is None:
tmp_pub = citation
else:
try:
tmp_pub = pubs[citation.lower()]
except KeyError as e:
raise ValidationError("""Invalid ReactionDatum publication %s: %s""" % (citation, e))
return cls(dbse, rxn, tmp_method, mode, tmp_basis, value, units, citation=tmp_pub, doi=doi, comment=comment)
def __str__(self):
text = ''
text += """ ==> ReactionDatum <==\n\n"""
text += """ Database reaction: %s\n""" % (self.dbrxn)
text += """ Method: %s\n""" % (self.method.fullname)
text += """ Mode: %s\n""" % (self.mode)
text += """ Basis: %s\n""" % (self.basis.fullname)
text += """ Value: %f [%s]\n""" % (self.value, self.units)
text += """ Citation: %s %s\n""" % (self.citation.name, self.citation.doi)
text += """ DOI: %s\n""" % (self.doi)
text += """ Comment: %s\n""" % (self.comment)
text += """\n"""
return text
[docs]class Subset(object):
"""Affiliated qcdb.Reaction-s
"""
def __init__(self, name, hrxn, tagl=None, axis=None):
# identifier
self.name = name
# array of reactions names
self.hrxn = hrxn
# description line
self.tagl = tagl
# mathematical relationships of reactions
self.axis = OrderedDict()
def __str__(self):
text = ''
text += """ ==> %s Subset <==\n\n""" % (self.name)
text += """ Tagline: %s\n""" % (self.tagl)
text += """ %20s""" % ('Reactions')
for ax in self.axis.keys():
text += """ %8s""" % (ax)
text += """\n"""
for ix in range(len(self.hrxn)):
text += """ %20s""" % (str(self.hrxn[ix]))
for ax in self.axis.values():
text += """ %8.3f""" % (ax[ix])
text += """\n"""
text += """\n"""
return text
[docs]class Reagent(object):
"""Chemical entity only slightly dresed up from qcdb.Molecule.
"""
def __init__(self, name, mol, tagl=None, comment=None):
# full name, e.g., 'S22-2-dimer' or 'NBC1-BzMe-8.0-monoA-CP' or 'HTBH-HCl-reagent'
self.name = name
# qcdb.Molecule
try:
self.NRE = mol.nuclear_repulsion_energy()
except AttributeError:
raise ValidationError("""Reagent must be instantiated with qcdb.Molecule object.""")
else:
self.mol = mol.create_psi4_string_from_molecule()
# description line
self.tagl = tagl
# # addl comments
# self.comment = comment
# # fragmentation
# self.fragments = mol.fragments
# # frag activation
# self.frtype = mol.fragment_types
# # frag charge
# self.frchg = mol.fragment_charges
# # frag multiplicity
# self.frmult = mol.fragment_multiplicities
self.charge = mol.molecular_charge()
def __str__(self):
text = ''
text += """ ==> %s Reagent <==\n\n""" % (self.name)
text += """ Tagline: %s\n""" % (self.tagl)
# text += """ Comment: %s\n""" % (self.comment)
text += """ NRE: %f\n""" % (self.NRE)
# text += """ Charge: %+d\n"""
# text += """ Fragments: %d\n""" % (len(self.fragments))
# text += """ FrgNo Actv Chg Mult AtomRange\n"""
# for fr in range(len(self.fragments)):
# text += """ %-4d %1s %+2d %2d %s\n""" % (fr + 1,
# '*' if self.frtype[fr] == 'Real' else '',
# self.frchg[fr], self.frmult[fr], self.fragments[fr])
text += """ Molecule: \n%s""" % (self.mol)
text += """\n"""
return text
[docs]class Reaction(object):
"""
"""
def __init__(self, name, dbse, indx, tagl=None, latex=None, color='black', comment=None):
# name, e.g., '2' or 'BzMe-8.0'
self.name = name
# database reaction name, e.g., 'S22-2' or 'NBC1-BzMe-8.0'
self.dbrxn = dbse + '-' + str(name)
# numerical index of reaction
self.indx = indx
# description line
self.tagl = tagl
# latex description
self.latex = latex
# addl comments
self.comment = comment
# reaction matrices, specifying reagent contributions per reaction
self.rxnm = {}
# qcdb.ReactionDatum objects of quantum chemical data pertaining to reaction
self.data = {}
# benchmark qcdb.ReactionDatum
self.benchmark = None
# color for plotting
self.color = color
def __str__(self):
text = ''
text += """ ==> %s Reaction <==\n\n""" % (self.name)
text += """ Database reaction: %s\n""" % (self.dbrxn)
text += """ Index: %s\n""" % (self.indx)
text += """ LaTeX representation: %s\n""" % (self.latex)
text += """ Tagline: %s\n""" % (self.tagl)
text += """ Comment: %s\n""" % (self.comment)
if self.benchmark is None:
text += """ Benchmark: %s\n""" % ('UNDEFINED')
else:
text += """ Benchmark: %f\n""" % (self.data[self.benchmark].value)
text += """ Color: %s\n""" % (str(self.color))
text += """ Reaction matrix:\n"""
for mode, rxnm in self.rxnm.iteritems():
text += """ %s\n""" % (mode)
for rgt, coeff in rxnm.iteritems():
text += """ %3d %s\n""" % (coeff, rgt.name)
text += """ Data:\n"""
for label, datum in sorted(self.data.iteritems()):
text += """ %8.2f %s\n""" % (datum.value, label)
text += """\n"""
return text
[docs] def compute_errors(self, benchmark='default', mcset='default', failoninc=True, verbose=False):
"""For all data or modelchem subset *mcset*, computes raw reaction
errors between *modelchem* and *benchmark* model chemistries.
Returns error if model chemistries are missing for any reaction in
subset unless *failoninc* set to False, whereupon returns partial.
Returns dictionary of reaction labels and error forms.
"""
if mcset == 'default':
lsslist = self.data.keys()
elif callable(mcset):
# mcset is function that will generate subset of HRXN from sset(self)
lsslist = [mc for mc in self.data.keys() if mc in mcset(self)] # untested
else:
# mcset is array containing modelchemistries
lsslist = [mc for mc in self.data.keys() if mc in mcset]
# assemble dict of qcdb.Reaction objects from array of reaction names
lsset = OrderedDict()
for mc in lsslist:
lsset[mc] = self.data[mc]
lbench = self.benchmark if benchmark == 'default' else benchmark
try:
mcGreater = self.data[lbench].value
except KeyError as e:
raise ValidationError("""Reaction %s missing benchmark datum %s.""" % (self.name, str(e)))
err = {}
for label, datum in lsset.iteritems():
try:
mcLesser = datum.value
except KeyError as e:
if failoninc:
raise ValidationError("""Reaction %s missing datum %s.""" % (label, str(e)))
else:
continue
err[label] = [mcLesser - mcGreater,
(mcLesser - mcGreater) / abs(mcGreater),
(mcLesser - mcGreater) / abs(mcGreater)] # TODO define BER
if verbose:
print("""p = %6.2f, pe = %6.1f%%, bpe = %6.1f%% modelchem %s.""" %
(err[label][0], 100 * err[label][1], 100 * err[label][2], label))
return err
[docs] def plot(self, benchmark='default', mcset='default',
failoninc=True, verbose=False, color='sapt',
xlimit=4.0, labeled=True, view=True,
mousetext=None, mouselink=None, mouseimag=None, mousetitle=None, mousediv=None,
saveas=None, relpath=False, graphicsformat=['pdf']):
"""Computes individual errors over model chemistries in *mcset* (which
may be default or an array or a function generating an array) versus
*benchmark*. Thread *color* can be 'rgb' for old coloring, a color
name or 'sapt' for spectrum coloring.
*saveas* conveys directory ('/') and/or filename for saving the
resulting plot. File extension is not accessible, but *graphicsformat*
array requests among 'png', 'pdf', and 'eps' formats. *relpath*
forces paths to saved files to be relative to current directory,
rather than absolute paths for returned code and file dictionary.
Prepares thread diagram instructions and either executes them if
matplotlib available (Canopy or Anaconda) or prints them. Returns a
dictionary of all saved plot filenames. If any of *mousetext*, *mouselink*,
or *mouseimag* is specified, htmlcode will be returned with an image map of
slats to any of text, link, or image, respectively.
"""
# compute errors
dbse = self.dbrxn.split('-')[0]
indiv = self.compute_errors(benchmark=benchmark, mcset=mcset,
failoninc=failoninc, verbose=verbose)
# repackage
dbdat = []
for mc in indiv.keys():
dbdat.append({'db': dbse,
'show': fancify_mc_tag(mc),
'sys': mc,
'color': self.color,
'data': [indiv[mc][0]]})
mae = None # [errors[ix][self.dbse]['mae'] for ix in index]
mape = None # [100 * errors[ix][self.dbse]['mape'] for ix in index]
# form unique filename
# ixpre, ixsuf, ixmid = string_contrast(index)
# title = self.dbse + ' ' + ixpre + '[]' + ixsuf
title = self.dbrxn
labels = ['']
# generate matplotlib instructions and call or print
try:
from . import mpl
import matplotlib.pyplot as plt
except ImportError:
# if not running from Canopy, print line to execute from Canopy
print("""filedict, htmlcode = mpl.threads(%s,\n color='%s',\n title='%s',\n labels=%s,\n mae=%s,\n mape=%s\n xlimit=%s\n labeled=%s\n saveas=%s\n mousetext=%s\n mouselink=%s\n mouseimag=%s\n mousetitle=%s,\n mousediv=%s,\n relpath=%s\n graphicsformat=%s)\n\n""" %
(dbdat, color, title, labels, mae, mape, str(xlimit),
repr(labeled), repr(saveas), repr(mousetext), repr(mouselink), repr(mouseimag),
repr(mousetitle), repr(mousediv), repr(relpath), repr(graphicsformat)))
else:
# if running from Canopy, call mpl directly
filedict, htmlcode = mpl.threads(dbdat, color=color, title=title, labels=labels, mae=mae, mape=mape,
xlimit=xlimit, labeled=labeled, view=view,
mousetext=mousetext, mouselink=mouselink,
mouseimag=mouseimag, mousetitle=mousetitle, mousediv=mousediv,
saveas=saveas, relpath=relpath, graphicsformat=graphicsformat)
return filedict, htmlcode
[docs]class WrappedDatabase(object):
"""Wrapper class for raw Psi4 database modules that does some validation
of contents, creates member data and accessors for database structures,
defines error computation, and handles database subsets. Not to be used
directly-- see qcdb.Database for handling single or multiple
qdcb.WrappedDatabase objects and defining nice statistics, plotting, and
table functionalities.
>>> asdf = qcdb.WrappedDatabase('Nbc10')
"""
def __init__(self, dbname, pythonpath=None):
"""Instantiate class with case insensitive name *dbname*. Module
search path can be prepended with *pythonpath*.
"""
#: internal name of database
#:
#: >>> print asdf.dbse
#: 'NBC1'
self.dbse = None
#: description line
#:
#: >>> print asdf.tagl
#: 'interaction energies of dissociation curves for non-bonded systems'
self.tagl = None
#: OrderedDict of reactions/members
#:
#: >>> print asdf.hrxn.keys()
#: ['BzBz_S-3.2', 'BzBz_S-3.3', ... 'BzBz_PD36-2.8', 'BzBz_PD36-3.0']
self.hrxn = None
#: dict of reagents/geometries
#:
#: >>> print asdf.hrgt.keys()
#: ['NBC1-BzBz_PD32-0.8-monoA-CP', 'NBC1-BzBz_PD34-0.6-dimer', ... 'NBC1-BzBz_PD34-1.7-dimer']
self.hrgt = None
#: dict of defined reaction subsets.
#: Note that self.sset['default'] contains all the nonredundant information.
#:
#: >>> print asdf.sset.keys()
#: ['meme', 'mxddpp', '5min', ... 'small']
self.sset = None
# Removing hrxn, hrgt etc. do not reduce the size of the object.
# These attributes are stored for ease of access for adding qc info, etc.
#: object of defined reaction subsets.
self.oss = None
# load database
if pythonpath is not None:
sys.path.insert(1, pythonpath)
else:
sys.path.append(os.path.dirname(__file__) + '/../databases')
database = psiutil.import_ignorecase(dbname)
if not database:
print('\nPython module for database %s failed to load\n\n' % (dbname))
print('\nSearch path that was tried:\n')
print(", ".join(map(str, sys.path)))
raise ValidationError("Python module loading problem for database " + str(dbname))
# gross validation of database
for item in ['dbse', 'GEOS', 'HRXN', 'ACTV', 'RXNM']:
try:
getattr(database, item)
except AttributeError:
raise ValidationError("""Database %s severely deformed with %s missing.""" % (database.__name__, item))
for item in ['TAGL', 'BIND']:
try:
getattr(database, item)
except AttributeError:
print("""Warning: Database %s possibly deformed with %s missing.\n""" % (database.__name__, item))
# form database name
self.dbse = database.dbse
try:
self.tagl = database.TAGL['dbse']
except KeyError:
print("""Warning: TAGL missing for database %s""" % (self.dbse))
# form array of database contents to process through
pieces = []
for item in dir(database):
if item in ['qcdb', 'rxn', 'dbse', 'TAGL']:
pass
elif item.startswith('__'):
pass
else:
pieces.append(item)
# form qcdb.Reagent objects from all defined geometries, GEOS
oHRGT = {}
for rgt, mol in database.GEOS.iteritems():
mol.update_geometry()
try:
tagl = database.TAGL[rgt]
except KeyError:
tagl = None
print("""Warning: TAGL missing for reagent %s""" % (rgt))
oHRGT[rgt] = Reagent(name=rgt, mol=mol, tagl=tagl)
pieces.remove('GEOS')
self.hrgt = oHRGT
# form qcdb.Reaction objects from comprehensive reaction list, HRXN
oHRXN = OrderedDict()
for rxn in database.HRXN:
try:
tagl = database.TAGL[database.dbse + '-' + str(rxn)]
except KeyError:
tagl = None
print("""Warning: TAGL missing for reaction %s""" % (rxn))
try:
elst = database.DATA['SAPT ELST ENERGY'][database.dbse + '-' + str(rxn)]
disp = database.DATA['SAPT DISP ENERGY'][database.dbse + '-' + str(rxn)]
color = abs(elst) / (abs(elst) + abs(disp))
except (KeyError, AttributeError):
color = 'black'
print("""Warning: DATA['SAPT * ENERGY'] missing for reaction %s""" % (rxn))
oHRXN[rxn] = Reaction(name=rxn,
dbse=database.dbse,
indx=database.HRXN.index(rxn) + 1,
color=color,
tagl=tagl)
pieces.remove('HRXN')
self.hrxn = oHRXN
# list and align database stoichiometry modes, ACTV* and RXNM*
oACTV = {}
for modactv in [item for item in pieces if item.startswith('ACTV')]:
modrxnm = modactv.replace('ACTV', 'RXNM')
mode = 'default' if modactv == 'ACTV' else modactv.replace('ACTV_', '')
try:
getattr(database, modrxnm)
except AttributeError:
modrxnm = 'RXNM'
oACTV[mode] = [modactv, modrxnm]
for item in [tmp for tmp in pieces if tmp.startswith('ACTV') or tmp.startswith('RXNM')]:
pieces.remove(item)
# populate reaction matrices in qcdb.Reaction objects
for rxn in database.HRXN:
dbrxn = database.dbse + '-' + str(rxn)
for mode, actvrxnm in oACTV.iteritems():
tdict = OrderedDict()
for rgt in getattr(database, actvrxnm[0])[dbrxn]:
tdict[oHRGT[rgt]] = getattr(database, actvrxnm[1])[dbrxn][rgt]
oHRXN[rxn].rxnm[mode] = tdict
# list embedded quantum chem info per rxn, incl. BIND*
arrsbind = [item for item in pieces if item.startswith('BIND_')]
if len(arrsbind) == 0:
if 'BIND' in pieces:
arrsbind = ['BIND']
else:
arrsbind = []
print("""Warning: No BIND array with reference values.""")
else:
for arrbind in arrsbind:
if getattr(database, arrbind) is database.BIND:
break
else:
print("""Warning: No BIND_* array assigned to be master BIND.""")
oBIND = {}
for arrbind in arrsbind:
ref = database.dbse + 'REF' if arrbind == 'BIND' else arrbind.replace('BIND_', '')
methods[ref] = Method(name=ref)
bases[ref] = BasisSet(name=ref)
try:
getattr(database, 'BINDINFO_' + ref)
except AttributeError:
arrbindinfo = None
print("""Warning: No BINDINFO dict with BIND attribution and modelchem for %s.""" % (ref))
else:
arrbindinfo = 'BINDINFO_' + ref
oBIND[ref] = [methods[ref], 'default', bases[ref], arrbind,
(getattr(database, arrbind) is database.BIND),
arrbindinfo]
for item in [tmp for tmp in pieces if tmp.startswith('BIND')]:
pieces.remove(item)
# populate data with reference values in qcdb.Reaction objects
for rxn in database.HRXN:
dbrxn = database.dbse + '-' + str(rxn)
for ref, info in oBIND.iteritems():
bindval = getattr(database, info[3])[dbrxn]
if info[5] is None:
methodfeed = info[0]
modefeed = info[1]
basisfeed = info[2]
citationkey = 'anon'
else:
bindinforxn = getattr(database, info[5])[dbrxn]
methodfeed = methods[bindinforxn['method'].upper()] if 'method' in bindinforxn else info[0]
modefeed = bindinforxn['mode'] if 'mode' in bindinforxn else info[1]
basisfeed = bases[bindinforxn['basis'].lower()] if 'basis' in bindinforxn else info[2]
citationkey = bindinforxn['citation'].lower() if 'citation' in bindinforxn else 'anon'
citationfeed = pubs[citationkey]
if bindval is not None:
oHRXN[rxn].data[ref] = ReactionDatum(dbse=database.dbse, rxn=rxn,
method=methodfeed, mode=modefeed,
basis=basisfeed, citation=citationfeed,
value=bindval)
# oHRXN[rxn].data[ref] = ReactionDatum(dbse=database.dbse,
# rxn=rxn,
# method=info[0],
# mode=info[1],
# basis=info[2],
# value=bindval)
# #value=getattr(database, info[3])[dbrxn])
if info[4]:
oHRXN[rxn].benchmark = ref
# Process subsets
oSSET = {}
fsHRXN = frozenset(database.HRXN)
for sset in pieces:
if not sset.startswith('AXIS_'):
try:
fssset = frozenset(getattr(database, sset))
except TypeError:
continue
if fssset.issubset(fsHRXN):
oSSET[sset] = getattr(database, sset)
for item in oSSET.keys():
pieces.remove(item)
oSSET['HRXN'] = database.HRXN
self.sset = OrderedDict()
self.oss = OrderedDict() # just in case oss replaces sset someday
for item in oSSET.keys():
if item == 'HRXN_SM':
label = 'small'
elif item == 'HRXN_LG':
label = 'large'
elif item == 'HRXN_EQ':
label = 'equilibrium'
elif item == 'HRXN':
label = 'default'
elif item.startswith('HRXN_'):
label = item.replace('HRXN_', '').lower()
else:
label = item.lower()
# subsets may have different ordering from HRXN
self.sset[label] = OrderedDict()
for rxn in oSSET[item]:
self.sset[label][rxn] = oHRXN[rxn]
# initialize subset objects with light info
try:
sstagl = database.TAGL[item]
except KeyError:
try:
sstagl = database.TAGL[label]
except KeyError:
sstagl = None
print("""Warning: TAGL missing for subset %s""" % (label))
self.oss[label] = Subset(name=label,
hrxn=self.sset[label].keys(),
tagl=sstagl)
# Process axes
for axis in [item for item in pieces if item.startswith('AXIS_')]:
label = axis.replace('AXIS_', '')
try:
defn = getattr(database, axis)
except AttributeError:
raise ValidationError("""Axis %s not importable.""" % (label))
axisrxns = frozenset(defn.keys())
attached = False
for ss, rxns in self.sset.iteritems():
if frozenset(rxns).issubset(axisrxns):
ordered_floats = []
for rx in self.oss[ss].hrxn:
ordered_floats.append(defn[rx])
self.oss[ss].axis[label] = ordered_floats
attached = True
if not attached:
print("""Warning: AXIS %s not affiliated with a subset""" % (label))
pieces.remove(axis)
print("""WrappedDatabase %s: Unparsed attributes""" % (self.dbse), pieces)
def __str__(self):
text = ''
text += """ ==> %s WrappedDatabase <==\n\n""" % (self.dbse)
text += """ Reagents: %s\n""" % (self.hrgt.keys())
text += """ Reactions: %s\n""" % (self.hrxn.keys())
text += """ Subsets: %s\n""" % (self.sset.keys())
text += """ Reference: %s\n""" % (self.benchmark())
text += """\n"""
return text
[docs] def add_ReactionDatum(self, dbse, rxn, method, mode, basis, value, units='kcal/mol', citation=None, comment=None,
overwrite=False):
"""Add a new quantum chemical value to *rxn* by creating a
qcdb.ReactionDatum from same arguments as that class's
object-less constructor. *rxn* may be actual Reaction.name
or Reaction.indx.
"""
if (self.dbse == dbse):
if rxn in self.hrxn:
rxnname = rxn # rxn is proper reaction name
else:
try:
if (rxn + 1 > 0) and (rxn == self.hrxn.items()[rxn - 1][1].indx):
rxnname = self.hrxn.items()[rxn - 1][1].name # rxn is reaction index (maybe dangerous?)
except (TypeError, IndexError):
raise ValidationError(
"""Inconsistent to add ReactionDatum for %s to database %s with reactions %s.""" %
(dbse + '-' + str(rxn), self.dbse, self.hrxn.keys()))
label = '-'.join([method, mode, basis])
if overwrite or (label not in self.hrxn[rxnname].data):
self.hrxn[rxnname].data[label] = ReactionDatum.library_modelchem(dbse=dbse, rxn=rxnname,
method=method, mode=mode, basis=basis,
value=value, units=units,
comment=comment, citation=citation)
else:
raise ValidationError("""ReactionDatum %s already present in Database.""" % (label))
else:
raise ValidationError("""Inconsistent to add ReactionDatum for %s to database %s.""" %
(dbse + '-' + str(rxn), self.dbse))
[docs] def add_Subset(self, name, func):
"""Define a new subset labeled *name* by providing a function
*func* that filters *self.hrxn*.
"""
sname = name.lower().split('\n')
label = sname.pop(0)
tagl = sname[0].strip() if sname else None
try:
filtered = func(self)
lsslist = [rxn for rxn in self.sset['default'].keys() if rxn in filtered]
except TypeError as e:
raise ValidationError("""Function %s did not return list: %s.""" % (func.__name__, str(e)))
if len(lsslist) == 0:
print("""WrappedDatabase %s: Subset %s NOT formed: empty""" % (self.dbse, label))
return
self.sset[label] = OrderedDict()
for rxn in lsslist:
self.sset[label][rxn] = self.hrxn[rxn]
self.oss[label] = Subset(name=label,
hrxn=self.sset[label].keys(),
tagl=tagl)
print("""WrappedDatabase %s: Subset %s formed: %d""" % (self.dbse, label, len(self.sset[label].keys())))
[docs] def compute_errors(self, modelchem, benchmark='default', sset='default', failoninc=True, verbose=False):
"""For full database or subset *sset*, computes raw reaction
errors between *modelchem* and *benchmark* model chemistries.
Returns error if model chemistries are missing for any reaction in
subset unless *failoninc* set to False, whereupon returns partial.
Returns dictionary of reaction labels and error forms.
"""
if isinstance(sset, basestring):
# sset is normal subset name 'MX' corresponding to HRXN_MX or MX array in database module
try:
lsset = self.sset[sset.lower()]
except KeyError as e:
# raise ValidationError("""Subset named %s not available""" % (str(e)))
lsset = OrderedDict()
else:
if callable(sset):
# sset is function that will generate subset of HRXN from sset(self)
lsslist = [rxn for rxn in self.sset['default'].keys() if rxn in sset(self)]
else:
# sset is array containing reactions
lsslist = [rxn for rxn in self.sset['default'].keys() if rxn in sset]
# assemble dict of qcdb.Reaction objects from array of reaction names
lsset = OrderedDict()
for rxn in lsslist:
lsset[rxn] = self.hrxn[rxn]
# cureinfo = self.get_pec_weightinfo()
err = {}
for rxn, oRxn in lsset.iteritems():
lbench = oRxn.benchmark if benchmark == 'default' else benchmark
try:
mcLesser = oRxn.data[modelchem].value
except KeyError as e:
if failoninc:
raise ValidationError("""Reaction %s missing datum %s.""" % (str(rxn), str(e)))
else:
continue
try:
mcGreater = oRxn.data[lbench].value
except KeyError as e:
if lbench == 'ZEROS':
pass
else:
print("""Reaction %s missing benchmark""" % (str(rxn)))
continue
# handle particulars of PEC error measures
# rxncureinfo = cureinfo[rxn]
# try:
# mcGreaterCrvmin = self.hrxn[rxncureinfo['eq']].data[lbench].value
# except KeyError as e:
# print """Reaction %s missing benchmark""" % (str(eqrxn))
# cure_denom = cure_weight(refrxn=mcGreater, refeq=mcGreaterCrvmin, rrat=rxncureinfo['Rrat'])
# balanced_mask, balwt = balanced_error(refrxn=mcGreater, refeq=mcGreaterCrvmin, rrat=rxncureinfo['Rrat'])
if lbench == 'ZEROS':
err[rxn] = [mcLesser,
0.0, 0.0, 0.0, 1.0] # FAKE
else:
err[rxn] = [mcLesser - mcGreater,
(mcLesser - mcGreater) / abs(mcGreater),
(mcLesser - mcGreater) / abs(mcGreater), # FAKE
(mcLesser - mcGreater) / abs(mcGreater), # FKAE
1.0 # FAKE
]
# (mcLesser - mcGreater) / abs(cure_denom),
# (mcLesser - mcGreater) * balanced_mask / abs(mcGreaterCrvmin),
# balwt]
if verbose:
print("""p = %8.4f, pe = %8.3f%%, pbe = %8.3f%% pce = %8.3f%% reaction %s.""" %
(err[rxn][0], 100 * err[rxn][1], 100 * err[rxn][3], 100 * err[rxn][2], str(rxn)))
return err
[docs] def compute_statistics(self, modelchem, benchmark='default', sset='default',
failoninc=True, verbose=False, returnindiv=False):
"""For full database or subset *sset*, computes many error
statistics between single *modelchem* and *benchmark* model
chemistries. Returns error if model chemistries are missing
for any reaction in subset unless *failoninc* set to False,
whereupon returns partial statistics. Returns dictionary of
statistics labels and values.
"""
err = self.compute_errors(modelchem, benchmark=benchmark, sset=sset, failoninc=failoninc, verbose=verbose)
if len(err) == 0:
error = initialize_errors()
if verbose:
print("""Warning: nothing to compute.""")
else:
Nrxn = float(len(err))
error = OrderedDict()
# linear (absolute) error
linear = [val[0] for val in err.values()]
error['pexe'] = max(linear)
error['nexe'] = min(linear)
error['maxe'] = max(linear, key=lambda x: abs(x))
error['mine'] = min(linear, key=lambda x: abs(x))
error['me'] = sum(linear) / Nrxn
error['mae'] = sum(map(abs, linear)) / Nrxn
error['rmse'] = math.sqrt(sum(map(lambda x: x ** 2, linear)) / Nrxn)
error['stde'] = math.sqrt((sum(map(lambda x: x ** 2, linear)) - (sum(linear) ** 2) / Nrxn) / Nrxn)
# fractional (relative) error
relative = [val[1] for val in err.values()]
error['pexpe'] = max(relative)
error['nexpe'] = min(relative)
error['maxpe'] = max(relative, key=lambda x: abs(x))
error['minpe'] = min(relative, key=lambda x: abs(x))
error['mpe'] = sum(relative) / Nrxn
error['mape'] = sum(map(abs, relative)) / Nrxn
error['rmspe'] = math.sqrt(sum(map(lambda x: x ** 2, relative)) / Nrxn)
error['stdpe'] = math.sqrt((sum(map(lambda x: x ** 2, relative)) - (sum(relative) ** 2) / Nrxn) / Nrxn)
# balanced (relative) error
balanced = [val[3] for val in err.values()]
balwt = sum([val[4] for val in err.values()]) # get the wt fn. highly irregular TODO
error['pexpbe'] = max(balanced)
error['nexpbe'] = min(balanced)
error['maxpbe'] = max(balanced, key=lambda x: abs(x))
error['minpbe'] = min(balanced, key=lambda x: abs(x))
error['mpbe'] = sum(balanced) / balwt #Nrxn
error['mapbe'] = sum(map(abs, balanced)) / balwt #Nrxn
error['rmspbe'] = math.sqrt(sum(map(lambda x: x ** 2, balanced)) / balwt) #Nrxn)
error['stdpbe'] = None # get math domain errors w/wt in denom math.sqrt((sum(map(lambda x: x ** 2, balanced)) - (sum(balanced) ** 2) / balwt) / balwt) #/ Nrxn) / Nrxn)
# capped (relative) error
capped = [val[2] for val in err.values()]
error['pexpce'] = max(capped)
error['nexpce'] = min(capped)
error['maxpce'] = max(capped, key=lambda x: abs(x))
error['minpce'] = min(capped, key=lambda x: abs(x))
error['mpce'] = sum(capped) / Nrxn
error['mapce'] = sum(map(abs, capped)) / Nrxn
error['rmspce'] = math.sqrt(sum(map(lambda x: x ** 2, capped)) / Nrxn)
error['stdpce'] = math.sqrt((sum(map(lambda x: x ** 2, capped)) - (sum(capped) ** 2) / Nrxn) / Nrxn)
if verbose:
print("""%d systems in %s for %s vs. %s, subset %s.\n%s""" %
(len(err), self.dbse, modelchem, benchmark, sset, format_errors(error, mode=2)))
if returnindiv:
return error, err
else:
return error
[docs] def load_qcdata(self, modname, funcname, pythonpath=None, failoninc=True):
"""Loads qcdb.ReactionDatums from module *modname* function
*funcname*. Module search path can be prepended with *pythonpath*.
"""
if pythonpath is not None:
sys.path.insert(1, pythonpath)
else:
sys.path.append(os.path.dirname(__file__) + '/../data')
try:
datamodule = __import__(modname)
except ImportError:
if not failoninc:
print("""%s data unavailable for database %s.\n""" % (modname, self.dbse))
return
else:
print("""\nPython module for database data %s failed to load\n\n""" % (modname))
print("""\nSearch path that was tried:\n""")
print(', '.join(map(str, sys.path)))
raise ValidationError("""Python module loading problem for database data """ + str(modname))
try:
getattr(datamodule, funcname)(self)
except AttributeError:
if not failoninc:
print("""%s %s data unavailable for database %s.\n""" % (modname, funcname, self.dbse))
return
else:
raise ValidationError("Python module missing function %s for loading data " % (str(funcname)))
print("""WrappedDatabase %s: %s %s results loaded""" % (self.dbse, modname, funcname))
[docs] def load_qcdata_byproject(self, project, pythonpath=None):
"""Loads qcdb.ReactionDatums from standard location for *project*
:module dbse_project and function load_project. Module search path
can be prepended with *pythonpath*.
"""
mod = self.dbse + '_' + project
func = 'load_' + project
self.load_qcdata(modname=mod, funcname=func, pythonpath=pythonpath)
[docs] def load_qcdata_hrxn_byproject(self, project, path=None):
""""""
if path is None:
path = os.path.dirname(__file__) + '/../data'
pklfile = os.path.abspath(path) + os.sep + self.dbse + '_hrxn_' + project + '.pickle'
if not os.path.isfile(pklfile):
raise ValidationError(
"Reactions pickle file for loading database data from file %s does not exist" % (pklfile))
with open(pklfile, 'rb') as handle:
hrxns = pickle.load(handle)
# no error checking for speed
for rxn, data in hrxns.iteritems():
self.hrxn[rxn].data.update(data)
[docs] def load_qcdata_hdf5_trusted(self, project, path=None):
"""Loads qcdb.ReactionDatums from HDF5 file at path/dbse_project.h5 .
If path not given, looks in qcdb/data. This file is written by
reap-DB and so has been largely validated.
"""
if path is None:
path = os.path.dirname(__file__) + '/../data'
hdf5file = os.path.abspath(path) + os.sep + self.dbse + '_' + project + '.h5'
if not os.path.isfile(hdf5file):
raise ValidationError("HDF5 file for loading database data from file %s does not exist" % (hdf5file))
try:
import pandas as pd
except ImportError:
raise ValidationError("Pandas data managment module must be available for import")
try:
next(self.hrxn.iterkeys()) + 1
except TypeError:
intrxn = False
else:
intrxn = True
with pd.get_store(hdf5file) as handle:
for mc in handle['pdie'].keys():
lmc = mc.split('-') # TODO could be done better
method = lmc[0]
bsse = '_'.join(lmc[1:-1])
basis = lmc[-1]
df = handle['pdie'][mc]
for dbrxn in df.index[df.notnull()].values:
[dbse, rxn] = dbrxn.split('-', 1)
if intrxn:
rxn = int(rxn)
self.hrxn[rxn].data[mc] = ReactionDatum.library_modelchem(dbse=dbse, rxn=rxn,
method=method, mode=bsse, basis=basis,
value=df[dbrxn])
[docs] def integer_reactions(self):
"""Returns boolean of whether reaction names need to be cast to integer"""
try:
next(self.hrxn.iterkeys()) + 1
except TypeError:
return False
else:
return True
@staticmethod
[docs] def load_pickled(dbname, path=None):
"""
"""
if path is None:
path = os.path.dirname(__file__) + '/../data'
picklefile = psiutil.findfile_ignorecase(dbname,
pre=os.path.abspath(path) + os.sep, post='_WDb.pickle')
if not picklefile:
raise ValidationError("Pickle file for loading database data from file %s does not exist" % (
os.path.abspath(path) + os.sep + dbname + '.pickle'))
# with open('/var/www/html/bfdb_devel/bfdb/scratch/ASDFlogfile.txt', 'a') as handle:
# handle.write('<!-- PICKLE %s\n' % (picklefile))
with open(picklefile, 'rb') as handle:
instance = pickle.load(handle)
return instance
[docs] def available_modelchems(self, union=True):
"""Returns all the labels of model chemistries that have been
loaded. Either all modelchems that have data for any reaction if
*union* is True or all modelchems that have data for all reactions
if *union* is False.
"""
mcs = [set(v.data) for v in self.hrxn.itervalues()]
if union:
return sorted(set.union(*mcs))
else:
return sorted(set.intersection(*mcs))
[docs] def benchmark(self):
"""Returns the model chemistry label for the database's benchmark."""
bm = None
rxns = self.hrxn.itervalues()
while bm is None:
try:
bm = next(rxns).benchmark
except StopIteration:
break
return bm
# return next(self.hrxn.itervalues()).benchmark
# TODO all rxns have same bench in db module so all have same here in obj
# but the way things stored in Reactions, this doesn't have to be so
[docs] def load_subsets(self, modname='subsetgenerator', pythonpath=None):
"""Loads subsets from all functions in module *modname*.
"""
if pythonpath is not None:
sys.path.insert(1, pythonpath)
else:
sys.path.append(os.path.dirname(__file__))
try:
ssmod = __import__(modname)
except ImportError:
print("""\nPython module for database data %s failed to load\n\n""" % (modname))
print("""\nSearch path that was tried:\n""")
print(', '.join(map(str, sys.path)))
raise ValidationError("Python module loading problem for database subset generator " + str(modname))
for func in dir(ssmod):
if callable(getattr(ssmod, func)):
self.add_Subset(getattr(ssmod, func).__doc__, getattr(ssmod, func))
print("""WrappedDatabase %s: Defined subsets loaded""" % (self.dbse))
[docs] def get_pec_weightinfo(self):
"""
"""
def closest(u, options):
return max(options, key=lambda v: len(os.path.commonprefix([u, v])))
dbdat = {}
oss = self.oss['default']
eqrxns = [rxn for rxn, rr in zip(oss.hrxn, oss.axis['Rrat']) if rr == 1.0]
for rxnix, rxn in enumerate(oss.hrxn):
dbdat[rxn] = {'eq': closest(rxn, eqrxns),
'Rrat': oss.axis['Rrat'][rxnix]}
return dbdat
# def table_simple1(self, mtd, bas, opt=['CP'], err=['mae'], benchmark='default', failoninc=True,
# plotpath='analysis/flats/flat_', theme='smmerge'):
# rowplan = ['bas', 'mtd']
# columnplan = [
# ['l', r"""Method \& Basis Set""", '', textables.label, {}],
# ['d', r'S22', 'HB', textables.val, {'sset': 'hb'}],
# ['d', r'S22', 'MX', textables.val, {'sset': 'mx'}],
# ['d', r'S22', 'DD', textables.val, {'sset': 'dd'}],
# ['d', r'S22', 'TT', textables.val, {'sset': 'default'}],
# ]
#
# def table_simple2(self, mtd, bas, opt=['CP'], err=['mae'], benchmark='default', failoninc=True,
# plotpath='analysis/flats/flat_', theme='smmerge'):
# rowplan = ['bas', 'mtd']
# columnplan = [
# ['l', r"""Method \& Basis Set""", '', textables.label, {}],
# ['d', r'MAE', 'HB', textables.val, {'sset': 'hb'}],
# ['d', r'MAE', 'MX', textables.val, {'sset': 'mx'}],
# ['d', r'MAE', 'DD', textables.val, {'sset': 'dd'}],
# ['d', r'MAE', 'TT', textables.val, {'sset': 'default'}],
# ['d', r'MA\%E', 'HB', textables.val, {'sset': 'hb', 'err': 'mape'}],
# ['d', r'MA\%E', 'MX', textables.val, {'sset': 'mx', 'err': 'mape'}],
# ['d', r'MA\%E', 'DD', textables.val, {'sset': 'dd', 'err': 'mape'}],
# ['d', r'MA\%E', 'TT', textables.val, {'sset': 'default', 'err': 'mape'}],
# ['d', r'maxE', 'TT ', textables.val, {'sset': 'default', 'err': 'maxe'}],
# ['d', r'min\%E', ' TT', textables.val, {'sset': 'default', 'err': 'minpe'}],
# ['d', r'rmsE', 'TT ', textables.val, {'sset': 'default', 'err': 'rmse'}],
# ['d', r'devE', ' TT', textables.val, {'sset': 'default', 'err': 'stde'}],
# ]
#
# def table_simple3(self, mtd, bas, opt=['CP'], err=['mae'], benchmark='default', failoninc=True,
# plotpath='analysis/flats/flat_', theme='smmerge'):
# rowplan = ['err', 'bas', 'mtd']
# columnplan = [
# ['l', r"""Method \& Basis Set""", '', textables.label, {}],
# ['d', r'MAE', 'HB', textables.val, {'sset': 'hb'}],
# ['d', r'MAE', 'MX', textables.val, {'sset': 'mx'}],
# ['d', r'MAE', 'DD', textables.val, {'sset': 'dd'}],
# ['d', r'MAE', 'TT', textables.val, {'sset': 'default'}],
# ]
#
# def table_simple4(self, mtd, bas, opt=['CP'], err=['mae'], benchmark='default', failoninc=True,
# plotpath='analysis/flats/flat_', theme='smmerge'):
# plotpath = 'autogen' # TODO handle better
# rowplan = ['bas', 'mtd']
# columnplan = [
# ['l', r"""Method \& Basis Set""", '', textables.label, {}],
# ['d', r'S22', 'HB', textables.val, {'sset': 'hb'}],
# ['d', r'S22', 'MX', textables.val, {'sset': 'mx'}],
# ['d', r'S22', 'DD', textables.val, {'sset': 'dd'}],
# ['d', r'S22', 'TT', textables.val, {'sset': 'default'}],
# # ['l', r"""Error Distribution\footnotemark[1]""", r"""\includegraphics[width=6.67cm,height=3.5mm]{%s%s.pdf}""" % (plotpath, 'blank'), textables.graphics, {}],
# ['l', r"""Error Distribution\footnotemark[1]""", r"""""", textables.graphics, {}],
# ]
[docs]class Database(object):
"""Collection for handling single or multiple qcdb.WrappedDatabase objects.
Particularly, unifying modelchem and subset names that when inconsistent
across component databases. Also, defining statistics across databases.
>>> asdf = qcdb.Database(['s22', 'Nbc10', 'hbc6', 'HSG'], 'DB4')
>>> qwer = qcdb.Database('s22')
"""
def __init__(self, dbnamelist, dbse=None, pythonpath=None, loadfrompickle=False, path=None):
#: internal name of database collection
#:
#: >>> print asdf.dbse
#: 'DB4'
self.dbse = None
#: ordered component Database objects
#:
#: >>> print asdf.dbdict
#: XXXX
self.dbdict = OrderedDict()
#: subset assembly pattern
#:
#: >>> print asdf.sset.keys()
#: XXXX
self.sset = OrderedDict()
#: assembly pattern for transspecies modelchems
#:
#: >>> print asdf.mcs.keys()
#: XXXX
self.mcs = {}
self.benchmark = None
# slight validation, repackaging into dbnamelist
if isinstance(dbnamelist, basestring):
dbnamelist = [dbnamelist]
elif all(isinstance(item, basestring) for item in dbnamelist):
pass
else:
raise ValidationError('Database::constructor: Inappropriate configuration of constructor arguments')
# load databases
for db in dbnamelist:
if loadfrompickle:
tmp = WrappedDatabase.load_pickled(db, path=path)
else:
tmp = WrappedDatabase(db, pythonpath=pythonpath)
self.dbdict[tmp.dbse] = tmp
# slurp up the obvious overlaps
consolidated_bench = [odb.benchmark() for odb in self.dbdict.values()]
if len(set(consolidated_bench)) == 1:
self.benchmark = consolidated_bench[0]
else:
self.benchmark = ''.join(consolidated_bench)
self.mcs[self.benchmark] = consolidated_bench
# methods[ref] = Method(name=ref)
# bases[ref] = BasisSet(name=ref)
self.mcs['default'] = consolidated_bench
# self.mcs['default'] = [odb.benchmark() for odb in self.dbdict.values()]
self._intersect_subsets()
self._intersect_modelchems()
# complex subsets
self.load_subsets()
# collection name
self.dbse = ''.join(self.dbdict.keys()) if dbse is None else dbse
# merge Reaction-s
self.hrxn = OrderedDict()
for db, odb in self.dbdict.iteritems():
for rxn, orxn in odb.hrxn.iteritems():
self.hrxn[orxn.dbrxn] = orxn
# merge Reagent-s
self.hrgt = OrderedDict()
for db, odb in self.dbdict.iteritems():
for rgt, orgt in odb.hrgt.iteritems():
self.hrgt[orgt.name] = orgt
print("""Database %s: %s""" % (self.dbse, ', '.join(self.dbdict.keys())))
def __str__(self):
text = ''
text += """ ===> %s Database <===\n\n""" % (self.dbse)
# text += """ Reagents: %s\n""" % (self.hrgt.keys())
# text += """ Reactions: %s\n""" % (self.hrxn.keys())
text += """ Subsets: %s\n""" % (self.sset.keys())
# text += """ Reference: %s\n""" % ('default: ' + ' + '.join(self.mcs['default']))
try:
text += """ Reference: %s\n""" % (self.benchmark + ': ' + ' + '.join(self.mcs[self.benchmark]))
except TypeError:
text += """ Reference: %s\n""" % ('UNDEFINED')
text += """ Model Chemistries: %s\n""" % (
', '.join(sorted([mc for mc in self.mcs.keys() if mc is not None])))
text += """\n"""
for db in self.dbdict.keys():
text += self.dbdict[db].__str__()
return text
# def benchmark(self):
# """Returns the model chemistry label for the database's benchmark."""
# return self.benchmark #TODO not sure if right way to go about this self.mcs['default']
[docs] def fancy_mcs(self, latex=False):
"""
"""
fmcs = {}
for mc in self.mcs.keys():
try:
mtd, mod, bas = mc.split('-')
except ValueError:
fmcs[mc] = mc
else:
if latex:
tmp = """%s/%s, %s""" % \
(methods[mtd].latex, bases[bas].latex, mod.replace('_', '\\_'))
fmcs[mc] = """%45s""" % (tmp)
else:
fmcs[mc] = """%20s / %-20s, %s""" % \
(methods[mtd].fullname, bases[bas].fullname, mod)
return fmcs
# def fancy_mcs_nested(self):
# """
# """
# fmcs = defaultdict(lambda: defaultdict(dict))
# for mc in self.mcs.keys():
# try:
# mtd, mod, bas = mc.split('-')
# except ValueError:
# fmcs['All']['All'][mc] = mc
# fmcs['Method']['Others'][mc] = mc
# fmcs['Options']['Others'][mc] = mc
# fmcs['Basis Treatment']['Others'][mc] = mc
# else:
# fancyrepr = """%20s / %-20s %s""" % (methods[mtd].latex, bases[bas].latex, mod)
# fmcs['All']['All'][mc] = fancyrepr
# fmcs['Method'][methods[mtd].latex][mc] = fancyrepr
# fmcs['Options'][mod][mc] = fancyrepr
# fmcs['Basis Treatment'][bases[bas].latex][mc] = fancyrepr
# return fmcs
[docs] def integer_reactions(self):
"""Returns boolean of whether reaction names need to be cast to integer"""
return {db: odb.integer_reactions() for db, odb in self.dbdict.items()}
[docs] def load_qcdata_byproject(self, project, pythonpath=None):
"""For each component database, loads qcdb.ReactionDatums from
standard location for *project* :module dbse_project and function
load_project. Module search path can be prepended with *pythonpath*.
"""
for db, odb in self.dbdict.items():
odb.load_qcdata_byproject(project, pythonpath=pythonpath)
self._intersect_modelchems()
[docs] def load_qcdata_hdf5_trusted(self, project, path=None):
"""For each component database, loads qcdb.ReactionDatums from
HDF5 file at path/dbse_project.h5 . If path not given, looks in
qcdb/data. This file is written by reap-DB and so has been largely
validated.
"""
for db, odb in self.dbdict.items():
odb.load_qcdata_hdf5_trusted(project, path=path)
self._intersect_modelchems()
[docs] def load_qcdata_hrxn_byproject(self, project, path=None):
for db, odb in self.dbdict.items():
odb.load_qcdata_hrxn_byproject(project, path=path)
self._intersect_modelchems()
[docs] def available_projects(self, path=None):
""""""
import glob
if path is None:
path = os.path.dirname(__file__) + '/../data'
projects = []
for pjfn in glob.glob(path + '/*_hrxn_*.pickle'):
pj = pjfn[:-7].split('_')[-1]
projects.append(pj)
complete_projects = []
for pj in set(projects):
if all([os.path.isfile(path + '/' + db + '_hrxn_' + pj + '.pickle') for db in self.dbdict.keys()]):
complete_projects.append(pj)
return complete_projects
[docs] def load_subsets(self, modname='subsetgenerator', pythonpath=None):
"""For each component database, loads subsets from all functions
in module *modname*. Default *modname* usues standard generators.
"""
for db, odb in self.dbdict.items():
odb.load_subsets(modname=modname, pythonpath=pythonpath)
self._intersect_subsets()
[docs] def add_Subset(self, name, func):
"""Define a new subset labeled *name* by providing a database
*func* whose keys are the keys of dbdict and whose values are a
function that filters each WrappedDatabase's *self.hrxn*.
"""
label = name.lower()
merged = []
for db, odb in self.dbdict.iteritems():
if callable(func[db]):
ssfunc = func[db]
else:
ssfunc = lambda x: func[db]
odb.add_Subset(name=name, func=ssfunc)
if name in odb.sset:
merged.append(name)
else:
merged.append(None)
if any(merged):
self.sset[label] = merged
print("""Database %s: Subset %s formed: %s""" % (self.dbse, label, self.sset[label]))
else:
print("""Database %s: Subset %s NOT formed: empty""" % (self.dbse, label))
[docs] def add_Subset_union(self, name, sslist):
"""
Define a new subset labeled *name* (note that there's nothing to
prevent overwriting an existing subset name) from the union of
existing named subsets in *sslist*.
"""
funcdb = {}
for db, odb in self.dbdict.iteritems():
dbix = self.dbdict.keys().index(db)
overlapping_dbrxns = []
for ss in sslist:
lss = self.sset[ss][dbix]
if lss is not None:
overlapping_dbrxns.append(self.dbdict[db].sset[lss].keys())
rxnlist = set().union(*overlapping_dbrxns)
funcdb[db] = rxnlist
self.add_Subset(name, funcdb)
[docs] def add_sampled_Subset(self, sset='default', number_of_samples=1, sample_size=5, prefix='rand'):
"""Generate and register *number_of_samples* new subsets of size
*sample_size* and name built from *prefix*. Reactions chosen from *sset*.
"""
import random
intrxn = self.integer_reactions()
rxns = self.get_hrxn(sset=sset).keys()
def random_sample(ssname):
"""Generate and register a single new subset of size *sample_size* and
name *ssname*.
"""
sample = {db: [] for db in self.dbdict.keys()}
for dbrxn in random.sample(rxns, sample_size):
db, rxn = dbrxn.split('-', 1)
typed_rxn = int(rxn) if intrxn[db] else rxn
sample[db].append(typed_rxn)
self.add_Subset(ssname, sample)
for sidx in range(number_of_samples):
if number_of_samples == 1:
ssname = prefix
else:
ssname = prefix + '_' + str(sidx)
random_sample(ssname)
def _intersect_subsets(self):
"""Examine component database subsets and collect common names as
Database subset.
"""
sss = [set(odb.sset.keys()) for db, odb in self.dbdict.items()]
new = sorted(set.intersection(*sss))
for ss in new:
self.sset[ss] = [ss] * len(self.dbdict.keys())
def _intersect_modelchems(self):
"""Examine component database qcdata and collect common names as
Database modelchem.
"""
mcs = [set(odb.available_modelchems()) for odb in self.dbdict.itervalues()]
new = sorted(set.intersection(*mcs))
for mc in new:
self.mcs[mc] = [mc] * len(self.dbdict.keys())
# def reaction_generator(self):
# """
# """
# for db, odb in self.dbdict.items():
# for rxn, orxn in odb.hrxn.items():
# yield orxn
[docs] def compute_statistics(self, modelchem, benchmark='default', sset='default',
failoninc=True, verbose=False, returnindiv=False):
"""Computes summary statistics and, if *returnindiv* True,
individual errors for single model chemistry *modelchem* versus
*benchmark* over subset *sset* over all component databases.
Particularly, imposes cross-database definitions for sset and
modelchem.
#Returns error if model chemistries are missing
#for any reaction in subset unless *failoninc* set to False,
#whereupon returns partial statistics. Returns dictionary of
#statistics labels and values.
"""
errors = OrderedDict()
indiv = OrderedDict()
actvdb = []
for db, odb in self.dbdict.items():
dbix = self.dbdict.keys().index(db)
if self.sset[sset][dbix] is None:
errors[db], indiv[db] = (None, None)
else:
errors[db], indiv[db] = odb.compute_statistics(self.mcs[modelchem][dbix],
sset=self.sset[sset][dbix],
benchmark='ZEROS' if benchmark == 'ZEROS' else self.mcs[benchmark][dbix],
failoninc=failoninc, verbose=verbose, returnindiv=True)
actvdb.append(errors[db])
errors[self.dbse] = average_errors(*actvdb)
if returnindiv:
return errors, indiv
else:
return errors
[docs] def analyze_modelchems(self, modelchem, benchmark='default', failoninc=True, verbose=False):
"""For each component database, compute and print nicely formatted
summary error statistics for each model chemistry in array
*modelchem* versus *benchmark* for all available subsets.
"""
# compute errors
errors = {}
for mc in modelchem:
errors[mc] = {}
for ss in self.sset.keys():
errors[mc][ss] = self.compute_statistics(mc, benchmark=benchmark, sset=ss,
failoninc=failoninc, verbose=verbose, returnindiv=False)
# present errors
pre, suf, mid = string_contrast(modelchem)
text = """\n ==> %s %s[]%s Errors <==\n""" % (self.dbse, pre, suf)
text += """%20s %44s""" % ('', '==> ' + self.dbse + ' <==')
for db, odb in self.dbdict.items():
text += """%44s""" % ('=> ' + odb.dbse + ' <=')
text += '\n'
collabel = """ {:5} {:4} {:6} {:6} {:6}""".format(
'ME', 'STDE', 'MAE', 'MA%E', 'MA%BE')
text += """{:20} """.format('') + collabel
for db in self.dbdict.keys():
text += collabel
text += '\n'
text += """{:20} {}""".format('', '=' * 44)
ul = False
for db in self.dbdict.keys():
text += """{}""".format('_' * 44 if ul else ' ' * 44)
ul = not ul
text += '\n'
for ss in self.sset.keys():
text += """ => %s <=\n""" % (ss)
for mc in modelchem:
perr = errors[mc][ss]
text += """%20s %44s""" % (mid[modelchem.index(mc)],
format_errors(perr[self.dbse]))
for db in self.dbdict.keys():
text += """%44s""" % ('' if perr[db] is None else format_errors(perr[db]))
text += '\n'
print(text)
[docs] def plot_bars(self, modelchem, benchmark='default', sset=['default', 'hb', 'mx', 'dd'],
failoninc=True, verbose=False, view=True,
saveas=None, relpath=False, graphicsformat=['pdf']):
"""Prepares 'grey bars' diagram for each model chemistry in array
*modelchem* versus *benchmark* over all component databases. A wide bar
is plotted with three smaller bars, corresponding to the 'mae'
summary statistic of the four subsets in *sset*.
*saveas* conveys directory ('/') and/or filename for saving the
resulting plot. File extension is not accessible, but *graphicsformat*
array requests among 'png', 'pdf', and 'eps' formats. *relpath*
forces paths to saved files to be relative to current directory,
rather than absolute paths for returned code and file dictionary.
Prepares bars diagram instructions and either executes them if
matplotlib available (Canopy or Anaconda) or prints them. Returns a
dictionary of all saved plot filenames.
>>> asdf.plot_bars(['MP2-CP-adz', 'MP2-CP-adtz'], sset=['tt-5min', 'hb-5min', 'mx-5min', 'dd-5min'])
"""
# compute errors
errors = {}
for mc in modelchem:
if mc is not None:
errors[mc] = {}
for ss in sset:
errors[mc][ss] = self.compute_statistics(mc, benchmark=benchmark, sset=ss,
failoninc=failoninc, verbose=verbose, returnindiv=False)
# repackage
pre, suf, mid = string_contrast(modelchem)
dbdat = []
for mc in modelchem:
if mc is None:
dbdat.append(None)
else:
dbdat.append({'mc': mid[modelchem.index(mc)],
'data': [errors[mc][ss][self.dbse]['mae'] for ss in sset]})
title = self.dbse + ' ' + pre + '[]' + suf + ' ' + ','.join(sset)
# generate matplotlib instructions and call or print
try:
from . import mpl
import matplotlib.pyplot as plt
except ImportError:
# if not running from Canopy, print line to execute from Canopy
print("""filedict = mpl.bars(%s,\n title='%s'\n saveas=%s\n relpath=%s\n graphicsformat=%s)\n\n""" %
(dbdat, title, repr(saveas), repr(relpath), repr(graphicsformat)))
else:
# if running from Canopy, call mpl directly
filedict = mpl.bars(dbdat, title=title,
view=view,
saveas=saveas, relpath=relpath, graphicsformat=graphicsformat)
return filedict
# def get_pec_weightinfo(self):
# """
#
# """
# def closest(u, options):
# return max(options, key=lambda v: len(os.path.commonprefix([u, v])))
#
# dbdat = {}
# for db, odb in self.dbdict.iteritems():
# #dbix = self.dbdict.keys().index(db)
# oss = odb.oss['default']
# eqrxns = [rxn for rxn, rr in zip(oss.hrxn, oss.axis['Rrat']) if rr == 1.0]
# for rxnix, rxn in enumerate(oss.hrxn):
# dbrxn = '-'.join([db, rxn])
# rrat = oss.axis['Rrat'][rxnix]
# eq = closest(rxn, eqrxns)
# print rxn, rxnix, eq, rrat, dbrxn
# dbdat[dbrxn] = {'eq': eq, 'Rrat': rrat}
# return dbdat
[docs] def plot_axis(self, axis, modelchem, benchmark='default', sset='default',
failoninc=True, verbose=False, color='sapt', view=True,
saveas=None, relpath=False, graphicsformat=['pdf']):
"""
"""
dbdatdict = OrderedDict()
for mc in modelchem:
# compute errors
errors, indiv = self.compute_statistics(mc, benchmark=benchmark, sset=sset,
failoninc=failoninc, verbose=verbose, returnindiv=True)
# repackage
dbdat = []
for db, odb in self.dbdict.iteritems():
dbix = self.dbdict.keys().index(db)
oss = odb.oss[self.sset[sset][dbix]]
# TODO may need to make axis name distributable across wrappeddbs
# TODO not handling mc present bm absent
if indiv[db] is not None:
for rxn in oss.hrxn:
rxnix = oss.hrxn.index(rxn)
bm = self.mcs[benchmark][dbix]
bmpresent = False if (bm is None or bm not in odb.hrxn[rxn].data) else True
mcpresent = False if (self.mcs[mc][dbix] not in odb.hrxn[rxn].data) else True
entry = {'db': db,
'sys': str(rxn),
'color': odb.hrxn[rxn].color,
'axis': oss.axis[axis][rxnix]}
if bmpresent:
entry['bmdata'] = odb.hrxn[rxn].data[self.mcs[benchmark][dbix]].value
else:
entry['bmdata'] = None
if mcpresent:
entry['mcdata'] = odb.hrxn[rxn].data[self.mcs[mc][dbix]].value
else:
continue
if bmpresent and mcpresent:
entry['error'] = [indiv[db][rxn][0]]
else:
entry['error'] = [None]
dbdat.append(entry)
dbdatdict[fancify_mc_tag(mc).strip()] = dbdat
pre, suf, mid = string_contrast(modelchem)
title = """%s[%s]%s vs %s axis %s for %s subset %s""" % (pre, str(len(mid)), suf, benchmark, axis, self.dbse, sset)
print(title)
#for mc, dbdat in dbdatdict.iteritems():
# print mc
# for d in dbdat:
# print '{:20s} {:8.2f} {:8.2f} {:8.2f}'.format(d['sys'], d['axis'],
# 0.0 if d['bmdata'] is None else d['bmdata'],
# 0.0 if d['mcdata'] is None else d['mcdata'])
# generate matplotlib instructions and call or print
try:
from . import mpl
import matplotlib.pyplot as plt
except ImportError:
# if not running from Canopy, print line to execute from Canopy
print("""filedict = mpl.valerr(%s,\n color='%s',\n title='%s',\n xtitle='%s',\n view=%s\n saveas=%s\n relpath=%s\n graphicsformat=%s)\n\n""" %
(dbdat, color, title, axis, view, repr(saveas), repr(relpath), repr(graphicsformat)))
else:
# if running from Canopy, call mpl directly
filedict = mpl.valerr(dbdatdict, color=color, title=title, xtitle=axis,
view=view,
saveas=saveas, relpath=relpath, graphicsformat=graphicsformat)
return filedict
[docs] def load_saptdata_frombfdb(self, sset='default',
pythonpath='/Users/loriab/linux/bfdb/sapt_punt', failoninc=True): # pythonpath=None
"""This is a stopgap function that loads sapt component data from
sapt_punt in bfdb repo.
"""
saptpackage = OrderedDict()
for db, odb in self.dbdict.items():
modname = 'sapt_' + odb.dbse
if pythonpath is not None:
sys.path.insert(1, pythonpath)
else:
sys.path.append(os.path.dirname(__file__) + '/../data')
try:
datamodule = __import__(modname)
except ImportError:
print("""\nPython module for database data %s failed to load\n\n""" % (modname))
print("""\nSearch path that was tried:\n""")
print(', '.join(map(str, sys.path)))
raise ValidationError("Python module loading problem for database subset generator " + str(modname))
try:
saptdata = getattr(datamodule, 'DATA')
except AttributeError:
raise ValidationError("SAPT punt module does not contain DATA" + str(modname))
saptmc = saptdata['SAPT MODELCHEM']
dbix = self.dbdict.keys().index(db)
for rxn, orxn in odb.hrxn.iteritems():
lss = self.sset[sset][dbix]
if lss is not None:
if rxn in odb.sset[lss]:
dbrxn = orxn.dbrxn
try:
elst = saptdata['SAPT ELST ENERGY'][dbrxn]
exch = saptdata['SAPT EXCH ENERGY'][dbrxn]
ind = saptdata['SAPT IND ENERGY'][dbrxn]
disp = saptdata['SAPT DISP ENERGY'][dbrxn]
except (KeyError, AttributeError):
print("""Warning: DATA['SAPT * ENERGY'] missing for reaction %s""" % (dbrxn))
if failoninc:
break
else:
if not all([elst, ind, disp]): # exch sometimes physically zero
print("""Warning: DATA['SAPT * ENERGY'] missing piece for reaction %s: %s""" % (dbrxn, [elst, exch, ind, disp]))
if failoninc:
break
saptpackage[dbrxn] = {'mc': saptmc,
'elst': elst,
'exch': exch,
'ind': ind,
'disp': disp}
return saptpackage
[docs] def plot_ternary(self, sset='default', labeled=True,
pythonpath='/Users/loriab/linux/bfdb/sapt_punt', failoninc=True, # pythonpath=None
view=True,
saveas=None, relpath=False, graphicsformat=['pdf']):
"""This is a stopgap function that loads sapt component data from
sapt_punt in bfdb repo, then formats it to plot a ternary diagram.
"""
saptdata = self.load_saptdata_frombfdb(sset=sset, pythonpath=pythonpath,
failoninc=failoninc)
dbdat = []
mcs = []
for dat in saptdata.values():
dbdat.append([dat['elst'], dat['ind'], dat['disp']])
if dat['mc'] not in mcs:
mcs.append(dat['mc'])
title = ' '.join([self.dbse, sset, ' '.join(mcs)])
# generate matplotlib instructions and call or print
try:
from . import mpl
import matplotlib.pyplot as plt
except ImportError:
pass
# if not running from Canopy, print line to execute from Canopy
else:
# if running from Canopy, call mpl directly
filedict = mpl.ternary(dbdat, title=title, labeled=labeled,
view=view,
saveas=saveas, relpath=relpath, graphicsformat=graphicsformat)
return filedict
[docs] def plot_flat(self, modelchem, benchmark='default', sset='default',
failoninc=True, verbose=False, color='sapt', xlimit=4.0, xlines=[0.0, 0.3, 1.0],
view=True,
saveas=None, relpath=False, graphicsformat=['pdf']):
"""Computes individual errors and summary statistics for single
model chemistry *modelchem* versus *benchmark* over
subset *sset* over all component databases. Thread *color* can be
'rgb' for old coloring, a color name or 'sapt' for spectrum coloring.
*saveas* conveys directory ('/') and/or filename for saving the
resulting plot. File extension is not accessible, but *graphicsformat*
array requests among 'png', 'pdf', and 'eps' formats. *relpath*
forces paths to saved files to be relative to current directory,
rather than absolute paths for returned code and file dictionary.
Prepares flat diagram instructions and either executes them if
matplotlib available (Canopy or Anaconda) or prints them. Returns a
dictionary of all saved plot filenames.
asdf.plot_flat('CCSD-CP-atqzadz', failoninc=False)
"""
# compute errors
mc = modelchem
errors, indiv = self.compute_statistics(mc, benchmark=benchmark, sset=sset,
failoninc=failoninc, verbose=verbose, returnindiv=True)
# repackage
dbdat = []
for db, odb in self.dbdict.items():
if indiv[db] is not None:
for rxn in indiv[db].keys():
dbdat.append({'db': db,
'sys': str(rxn),
'color': odb.hrxn[rxn].color,
'data': [indiv[db][rxn][0]]})
pre, suf, mid = string_contrast(mc)
title = self.dbse + '-' + sset + ' ' + pre + '[]' + suf
mae = errors[self.dbse]['mae']
mape = None
# mape = 100 * errors[self.dbse]['mape']
mapbe = None
# generate matplotlib instructions and call or print
try:
from . import mpl
import matplotlib.pyplot as plt
except ImportError:
# if not running from Canopy, print line to execute from Canopy
print("""filedict = mpl.flat(%s,\n color='%s',\n title='%s',\n mae=%s,\n mape=%s,\n xlimit=%s,\n xlines=%s,\n view=%s\n saveas=%s\n relpath=%s\n graphicsformat=%s)\n\n""" %
(dbdat, color, mc, mae, mape, xlimit, repr(xlines), view, repr(saveas), repr(relpath), repr(graphicsformat)))
else:
# if running from Canopy, call mpl directly
filedict = mpl.flat(dbdat, color=color, title=mc, mae=mae, mape=mape,
xlimit=xlimit, xlines=xlines, view=view,
saveas=saveas, relpath=relpath, graphicsformat=graphicsformat)
return filedict
[docs] def write_xyz_files(self, path=None):
"""Writes xyz files for every reagent in the Database to directory
in *path* or to directory dbse_xyzfiles that it createsin cwd if
*path* is None. Additionally, writes a script to that directory
that will generate transparent-background ray-traced png files for
every reagent with PyMol.
"""
if path is None:
xyzdir = os.getcwd() + os.sep + self.dbse + '_xyzfiles' + os.sep
else:
xyzdir = os.path.abspath(path) + os.sep
if not os.path.exists(xyzdir):
os.mkdir(xyzdir)
for rgt, orgt in self.hrgt.iteritems():
omol = Molecule(orgt.mol)
omol.update_geometry()
omol.save_xyz(xyzdir + rgt + '.xyz')
with open(xyzdir + 'pymol_xyz2png_script.pml', 'w') as handle:
handle.write("""
# Launch PyMOL and run from its command line:
# PyMOL> cd {}
# PyMOL> @{}
""".format(xyzdir, 'pymol_xyz2png_script.pml'))
for rgt in self.hrgt.keys():
handle.write("""
load {xyzfile}
hide lines
show sticks
color grey, name c
cmd.set('''opaque_background''','''0''',quiet=0)
reset
orient
cmd.zoom(buffer=0.3, complete=1)
ray
png {pngfile}
reinitialize
""".format(
xyzfile=xyzdir + rgt + '.xyz',
pngfile=xyzdir + rgt + '.png'))
[docs] def plot_all_flats(self, modelchem=None, sset='default', xlimit=4.0,
failoninc=True,
saveas=None, relpath=False, graphicsformat=['pdf']):
"""Generate pieces for inclusion into tables. Supply list of
modelchemistries to plot from *modelchem*, otherwise defaults to
all those available. Can modify subset *sset* and plotting
range *xlimit*.
>>> asdf.plot_all_flats(sset='tt-5min', xlimit=4.0)
"""
mcs = self.mcs.keys() if modelchem is None else modelchem
filedict = OrderedDict()
for mc in sorted(mcs):
minifiledict = self.plot_flat(mc, sset=sset, xlimit=xlimit, view=False,
failoninc=failoninc,
saveas=saveas, relpath=relpath, graphicsformat=graphicsformat)
filedict[mc] = minifiledict
return filedict
[docs] def get_hrxn(self, sset='default'):
"""
"""
rhrxn = OrderedDict()
for db, odb in self.dbdict.items():
dbix = self.dbdict.keys().index(db)
lss = self.sset[sset][dbix]
if lss is not None:
for rxn in odb.hrxn:
if rxn in odb.sset[lss]:
orxn = odb.hrxn[rxn]
rhrxn[orxn.dbrxn] = orxn # this is a change and conflict with vergil version
return rhrxn
[docs] def get_hrgt(self, sset='default', actv='default'):
"""
"""
rhrxn = self.get_hrxn(sset=sset)
rhrgt = OrderedDict()
for rxn, orxn in rhrxn.iteritems():
for orgt in orxn.rxnm[actv].keys():
rhrgt[orgt.name] = orgt
# TODO prob need to avoid duplicates or pass
return rhrgt
[docs] def get_reactions(self, modelchem, sset='default', benchmark='default',
failoninc=True):
"""Collects the reactions present in *sset* from each WrappedDatabase,
checks that *modelchem* and *benchmark* ReactionDatum are present
(fails if *failoninc* True), then returns in an array a tuple for
each reaction containing the modelchem key needed to access
*modelchem*, the modelchem key needed to access *benchmark*, and
the Reaction object.
"""
dbdat = []
rhrxn = self.get_hrxn(sset=sset)
for orxn in rhrxn.itervalues():
dbix = self.dbdict.keys().index(orxn.dbrxn.split('-')[0])
lmc = self.mcs[modelchem][dbix]
lbm = self.mcs[benchmark][dbix]
try:
orxn.data[lbm]
except KeyError as e:
# not sure if should treat bm differently
lbm = None
try:
orxn.data[lmc]
except KeyError as e:
if failoninc:
raise e
else:
lmc = None
dbdat.append((lmc, lbm, orxn))
# this is diff in that returning empties not just pass over- may break bfdb
# try:
# orxn.data[lmc]
# orxn.data[lbm]
# except KeyError as e:
# if failoninc:
# raise e
# else:
# # not sure yet if should return empties or just pass over
# pass
# else:
# dbdat.append((lmc, lbm, orxn))
return dbdat
[docs] def get_missing_reactions(self, modelchem, sset='default'):
"""Returns a dictionary (keys self.dbse and all component
WrappedDatabase.dbse) of two elements, the first being the number
of reactions *sset* should contain and the second being a list of
the reaction names (dbrxn) not available for *modelchem*. Absence
of benchmark not considered.
"""
counts = OrderedDict()
counts[self.dbse] = [0, []]
soledb = True if (len(self.dbdict) == 1 and self.dbdict.items()[0][0] == self.dbse) else False
if not soledb:
for db in self.dbdict.keys():
counts[db] = [0, []]
for (lmc, lbm, orxn) in self.get_reactions(modelchem, benchmark='default',
sset=sset, failoninc=False):
db, rxn = orxn.dbrxn.split('-', 1)
mcdatum = orxn.data[lmc].value if lmc else None
counts[self.dbse][0] += 1
if not soledb:
counts[db][0] += 1
if mcdatum is None:
counts[self.dbse][1].append(orxn.dbrxn)
if not soledb:
counts[db][1].append(orxn.dbrxn)
return counts
[docs] def plot_disthist(self, modelchem, benchmark='default', sset='default',
failoninc=True, verbose=False, xtitle='', view=True,
saveas=None, relpath=False, graphicsformat=['pdf']):
"""Computes individual errors and summary statistics for single
model chemistry *modelchem* versus *benchmark* over
subset *sset* over all component databases. Computes histogram
of errors and gaussian distribution.
*saveas* conveys directory ('/') and/or filename for saving the
resulting plot. File extension is not accessible, but *graphicsformat*
array requests among 'png', 'pdf', and 'eps' formats. *relpath*
forces paths to saved files to be relative to current directory,
rather than absolute paths for returned code and file dictionary.
Prepares disthist diagram instructions and either executes them if
matplotlib available (Canopy or Anaconda) or prints them. Returns a
dictionary of all saved plot filenames.
>>>
"""
# compute errors
mc = modelchem
errors, indiv = self.compute_statistics(mc, benchmark=benchmark, sset=sset,
failoninc=failoninc, verbose=verbose, returnindiv=True)
# repackage
dbdat = []
for db in self.dbdict.keys():
if indiv[db] is not None:
for rxn in indiv[db].keys():
dbdat.append(indiv[db][rxn][0])
title = """%s vs %s for %s subset %s""" % (mc, benchmark, self.dbse, sset)
me = errors[self.dbse]['me']
stde = errors[self.dbse]['stde']
# generate matplotlib instructions and call or print
try:
from . import mpl
import matplotlib.pyplot as plt
except ImportError:
# if not running from Canopy, print line to execute from Canopy
print("""filedict = mpl.disthist(%s,\n title='%s',\n xtitle='%s'\n me=%s,\n stde=%s,\n saveas=%s,\n relpath=%s\n graphicsformat=%s)\n\n""" %
(dbdat, title, xtitle, me, stde, repr(saveas), repr(relpath), repr(graphicsformat)))
else:
# if running from Canopy, call mpl directly
filedict = mpl.disthist(dbdat, title=title, xtitle=xtitle, me=me, stde=stde,
view=view,
saveas=saveas, relpath=relpath, graphicsformat=graphicsformat)
return filedict
[docs] def plot_modelchems(self, modelchem, benchmark='default', mbenchmark=None,
sset='default', msset=None, failoninc=True, verbose=False, color='sapt',
xlimit=4.0, labeled=True, view=True,
mousetext=None, mouselink=None, mouseimag=None, mousetitle=None, mousediv=None,
saveas=None, relpath=False, graphicsformat=['pdf']):
"""Computes individual errors and summary statistics over all component
databases for each model chemistry in array *modelchem* versus *benchmark*
over subset *sset*. *mbenchmark* and *msset* are array options (same
length as *modelchem*) that override *benchmark* and *sset*, respectively,
for non-uniform specification. Thread *color* can be 'rgb' for old
coloring, a color name or 'sapt' for spectrum coloring.
*saveas* conveys directory ('/') and/or filename for saving the
resulting plot. File extension is not accessible, but *graphicsformat*
array requests among 'png', 'pdf', and 'eps' formats. *relpath*
forces paths to saved files to be relative to current directory,
rather than absolute paths for returned code and file dictionary.
Prepares thread diagram instructions and either executes them if
matplotlib available (Canopy or Anaconda) or prints them. Returns a
dictionary of all saved plot filenames. If any of *mousetext*, *mouselink*,
or *mouseimag* is specified, htmlcode will be returned with an image map of
slats to any of text, link, or image, respectively.
"""
# distribute benchmark
if mbenchmark is None:
lbenchmark = [benchmark] * len(modelchem) # normal bm modelchem name
else:
if isinstance(mbenchmark, basestring) or len(mbenchmark) != len(modelchem):
raise ValidationError(
"""mbenchmark must be array of length distributable among modelchem""" % (str(mbenchmark)))
else:
lbenchmark = mbenchmark # array of bm for each modelchem
# distribute sset
if msset is None:
lsset = [sset] * len(modelchem) # normal ss name like 'MX'
else:
if isinstance(msset, basestring) or len(msset) != len(modelchem):
raise ValidationError("""msset must be array of length distributable among modelchem""" % (str(msset)))
else:
lsset = msset # array of ss for each modelchem
# compute errors
index = []
errors = {}
indiv = {}
for mc, bm, ss in zip(modelchem, lbenchmark, lsset):
ix = '%s_%s_%s' % (ss, mc, bm)
index.append(ix)
errors[ix], indiv[ix] = self.compute_statistics(mc, benchmark=bm, sset=ss,
failoninc=failoninc, verbose=verbose, returnindiv=True)
# repackage
dbdat = []
for db, odb in self.dbdict.items():
dbix = self.dbdict.keys().index(db)
for rxn in odb.hrxn:
data = []
for ix in index:
if indiv[ix][db] is not None:
if rxn in odb.sset[self.sset[lsset[index.index(ix)]][dbix]]:
try:
data.append(indiv[ix][db][rxn][0])
except KeyError as e:
if failoninc:
raise e
else:
data.append(None)
else:
data.append(None)
else:
data.append(None)
if not data or all(item is None for item in data):
pass # filter out empty reactions
else:
dbdat.append({'db': db,
'sys': str(rxn),
'show': str(rxn),
'color': odb.hrxn[rxn].color,
'data': data})
mae = [errors[ix][self.dbse]['mae'] for ix in index]
mape = [100 * errors[ix][self.dbse]['mape'] for ix in index]
# form unique filename
ixpre, ixsuf, ixmid = string_contrast(index)
title = self.dbse + ' ' + ixpre + '[]' + ixsuf
# generate matplotlib instructions and call or print
try:
from . import mpl
import matplotlib.pyplot as plt
except ImportError:
# if not running from Canopy, print line to execute from Canopy
print("""filedict, htmlcode = mpl.threads(%s,\n color='%s',\n title='%s',\n labels=%s,\n mae=%s,\n mape=%s\n xlimit=%s\n labeled=%s\n saveas=%s\n mousetext=%s\n mouselink=%s\n mouseimag=%s\n mousetitle=%s,\n mousediv=%s,\n relpath=%s\n graphicsformat=%s)\n\n""" %
(dbdat, color, title, ixmid, mae, mape, str(xlimit),
repr(labeled), repr(saveas), repr(mousetext), repr(mouselink), repr(mouseimag),
repr(mousetitle), repr(mousediv), repr(relpath), repr(graphicsformat)))
else:
# if running from Canopy, call mpl directly
filedict, htmlcode = mpl.threads(dbdat, color=color, title=title, labels=ixmid, mae=mae, mape=mape,
xlimit=xlimit, labeled=labeled, view=view,
mousetext=mousetext, mouselink=mouselink,
mouseimag=mouseimag, mousetitle=mousetitle, mousediv=mousediv,
saveas=saveas, relpath=relpath, graphicsformat=graphicsformat)
return filedict, htmlcode
[docs] def plot_liliowa(self, modelchem, benchmark='default',
failoninc=True, xlimit=2.0, view=True,
saveas=None, relpath=False, graphicsformat=['pdf']):
"""
Note that not possible to access sset of component databases. That is, for Database SSIBBI, SSI-only arylaryl is accessible b/c not defined in BBI, but SSI-only neutral is not accessible.
"""
# compute errors
mc = modelchem
errors = {}
for ss in self.sset.keys():
errors[ss] = self.compute_statistics(mc, benchmark=benchmark, sset=ss,
failoninc=failoninc, verbose=False, returnindiv=False)
# repackage
dbdat = []
ssarray = ['pospos', 'posneg', 'pospolar', 'posaliph', 'posaryl',
None, 'negneg', 'negpolar', 'negaliph', 'negaryl',
None, None, 'polarpolar', 'polaraliph', 'polararyl',
None, None, None, 'aliphaliph', 'alipharyl',
None, None, None, None, 'arylaryl']
for ss in ssarray:
dbdat.append(0.0 if ss is None else errors[ss][self.dbse]['mae'])
# generate matplotlib instructions and call or print
try:
from . import mpl
import matplotlib.pyplot as plt
except ImportError:
print('Matplotlib not avail')
else:
filedict = mpl.liliowa(dbdat, xlimit=xlimit, view=view,
saveas=saveas, relpath=relpath, graphicsformat=graphicsformat)
return filedict
[docs] def plot_iowa(self, modelchem, benchmark='default', sset='default',
failoninc=True, verbose=False,
title='', xtitle='', xlimit=2.0,
view=True,
saveas=None, relpath=False, graphicsformat=['pdf']):
"""Computes individual errors for single *modelchem* versus
*benchmark* over subset *sset*. Coloring green-to-purple with
maximum intensity at *xlimit*. Prepares Iowa plot instructions and
either executes them if matplotlib available (Canopy) or prints them.
"""
title = self.dbse + ' ' + modelchem
# compute errors
mc = modelchem
errors, indiv = self.compute_statistics(mc, benchmark=benchmark, sset=sset,
failoninc=failoninc, verbose=verbose, returnindiv=True)
# repackage
dbdat = []
dblbl = []
for db in self.dbdict.keys():
if indiv[db] is not None:
for rxn in indiv[db].keys():
dbdat.append(indiv[db][rxn][0])
dblbl.append(str(rxn))
title = """%s vs %s for %s subset %s""" % (mc, benchmark, self.dbse, sset)
me = errors[self.dbse]['me']
# generate matplotlib instructions and call or print
try:
from . import mpl
import matplotlib.pyplot as plt
except ImportError:
# if not running from Canopy, print line to execute from Canopy
print("""mpl.iowa(%s,\n %s,\n title='%s',\n xtitle='%s'\n xlimit=%s,\n saveas=%s,\n relpath=%s\n graphicsformat=%s)\n\n""" %
(dbdat, dblbl, title, xtitle, xlimit, repr(saveas), repr(relpath), repr(graphicsformat)))
else:
# if running from Canopy, call mpl directly
filedict = mpl.iowa(dbdat, dblbl, title=title, xtitle=xtitle, xlimit=xlimit,
view=view,
saveas=saveas, relpath=relpath, graphicsformat=graphicsformat)
return filedict
[docs] def export_pandas(self, modelchem=[], benchmark='default', sset='default', modelchemlabels=None,
failoninc=True):
"""
*modelchem* is array of model chemistries, if modelchem is empty, get only benchmark
is benchmark needed?
"""
import pandas as pd
import numpy as np
if self.dbse not in ['ACONF', 'SCONF', 'PCONF', 'CYCONF']:
saptdata = self.load_saptdata_frombfdb(sset=sset, pythonpath='/Users/loriab/linux/bfdb/sapt_punt',
failoninc=failoninc)
listodicts = []
rhrxn = self.get_hrxn(sset=sset)
for dbrxn, orxn in rhrxn.iteritems():
wdb = dbrxn.split('-')[0]
dbix = self.dbdict.keys().index(wdb)
wbm = self.mcs[benchmark][dbix]
wss = self.sset[sset][dbix]
woss = self.dbdict[wdb].oss[wss]
try:
Rrat = woss.axis['Rrat'][woss.hrxn.index(orxn.name)]
except KeyError:
Rrat = 1.0 # TODO generic soln?
dictorxn = {}
dictorxn['DB'] = wdb
dictorxn['System'] = orxn.tagl
dictorxn['Name'] = orxn.name
dictorxn['R'] = Rrat
dictorxn['System #'] = orxn.indx
dictorxn['Benchmark'] = np.NaN if orxn.benchmark is None else orxn.data[
wbm].value # this NaN exception is new and experimental
dictorxn['QcdbSys'] = orxn.dbrxn
if self.dbse not in ['ACONF', 'SCONF', 'PCONF', 'CYCONF']:
dictorxn['SAPT ELST ENERGY'] = saptdata[dbrxn]['elst']
dictorxn['SAPT EXCH ENERGY'] = saptdata[dbrxn]['exch']
dictorxn['SAPT IND ENERGY'] = saptdata[dbrxn]['ind']
dictorxn['SAPT DISP ENERGY'] = saptdata[dbrxn]['disp']
dictorxn['SAPT TOTAL ENERGY'] = dictorxn['SAPT ELST ENERGY'] + dictorxn['SAPT EXCH ENERGY'] + \
dictorxn['SAPT IND ENERGY'] + dictorxn['SAPT DISP ENERGY']
orgts = orxn.rxnm['default'].keys()
omolD = Molecule(orgts[0].mol) # TODO this is only going to work with Reaction ~= Reagent databases
npmolD = omolD.format_molecule_for_numpy()
omolA = Molecule(orgts[1].mol) # TODO this is only going to work with Reaction ~= Reagent databases
omolA.update_geometry()
dictorxn['MonA'] = omolA.natom()
# this whole member fn not well defined for db of varying stoichiometry
if self.dbse in ['ACONF', 'SCONF', 'PCONF', 'CYCONF']:
npmolD = omolD.format_molecule_for_numpy()
npmolA = omolA.format_molecule_for_numpy()
dictorxn['Geometry'] = np.vstack([npmolD, npmolA])
else:
dictorxn['Geometry'] = omolD.format_molecule_for_numpy()
# print '\nD', npmolD.shape[0], npmolA.shape[0], dictorxn['MonA'], npmolD, npmolA, dictorxn['Geometry']
for mc in modelchem:
try:
wmc = self.mcs[mc][dbix]
except KeyError:
# modelchem not in Database at all
print(mc, 'not found')
continue
key = mc if modelchemlabels is None else modelchemlabels[modelchem.index(mc)]
try:
dictorxn[key] = orxn.data[wmc].value
except KeyError as e:
# reaction not in modelchem
if failoninc:
raise ValidationError("""Reaction %s missing datum %s.""" % (key, str(e)))
else:
print(mc, str(e), 'not found')
continue
listodicts.append(dictorxn)
df = pd.DataFrame(listodicts)
pd.set_option('display.width', 500)
print(df.head(5))
print(df.tail(5))
return df
[docs] def table_reactions(self, modelchem, benchmark='default', sset='default',
failoninc=True,
columnplan=['indx', 'tagl', 'bm', 'mc', 'e', 'pe'],
title="""Reaction energies [kcal/mol] for {sset} $\subset$ {dbse} with {mc}""",
indextitle="""Detailed results for {sset} $\subset$ {dbse} with {mc}""",
plotpath='analysis/mols/',
standalone=True, theme='rxns', filename=None):
r"""Prepare single LaTeX table to *filename* or return lines if None showing
the per-reaction results for reactions in *sset* for single or array
or 'all' *modelchem*, where the last uses self.mcs(), model chemistries
versus *benchmark*. Use *failoninc* to toggle between command failing
or blank lines in table. Use *standalone* to toggle between full
compilable document and suitable for inclusion in another LaTeX document.
Use *columnplan* to customize column (from among columnreservoir, below)
layout. Use *title* and *indextitle* to customize table caption and
table-of-contents caption, respectively; variables in curly braces will
be substituted. Use *theme* to customize the \ref{tbl:} code.
"""
# define eligible columns for inclusion
columnreservoir = {
'dbrxn': ['l', r"""\textbf{Reaction}""", """{0:25s}"""],
'indx': ['r', '', """{0:14s}"""],
'tagl': ['l', r"""\textbf{Reaction}""", """{0:50s}"""],
'bm': ['d', r"""\multicolumn{1}{c}{\textbf{Benchmark}}""", """{0:8.2f}"""],
'mc': ['d', r"""\multicolumn{1}{c}{\textbf{ModelChem}}""", """{0:8.2f}"""],
'e': ['d', r"""\multicolumn{1}{c}{\textbf{Error}}""", """{0:8.2f}"""],
'pe': ['d', r"""\multicolumn{1}{c}{\textbf{\% Err.}}""", """{0:8.1f}"""],
'imag': ['l', '', r"""\includegraphics[width=1.0cm,height=3.5mm]{%s%%ss.png}""" % (plotpath)], # untested
}
for col in columnplan:
if col not in columnreservoir.keys():
raise ValidationError('Column {0} not recognized. Register with columnreservoir.'.format(col))
if isinstance(modelchem, basestring):
if modelchem.lower() == 'all':
mcs = sorted(self.mcs.keys())
else:
mcs = [modelchem]
else:
mcs = modelchem
# commence to generate LaTeX code
tablelines = []
indexlines = []
if standalone:
tablelines += textables.begin_latex_document()
# iterate to produce one LaTeX table per modelchem
for mc in mcs:
# prepare summary statistics
perr = self.compute_statistics(mc, benchmark=benchmark, sset=sset,
failoninc=failoninc, verbose=False,
returnindiv=False)
serrors = OrderedDict()
for db in self.dbdict.keys():
serrors[db] = None if perr[db] is None else format_errors(perr[db], mode=3)
serrors[self.dbse] = format_errors(perr[self.dbse], mode=3)
# prepare individual reactions and errors
terrors = OrderedDict()
isComplete = True
for (lmc, lbm, orxn) in self.get_reactions(mc, benchmark=benchmark,
sset=sset, failoninc=failoninc):
tmp = {}
dbrxn = orxn.dbrxn
tmp['dbrxn'] = dbrxn.replace('_', '\\_')
tmp['indx'] = r"""\textit{""" + str(orxn.indx) + """}"""
tmp['tagl'] = dbrxn.split('-')[0] + ' ' + \
(orxn.latex if orxn.latex else orxn.tagl.replace('_', '\\_'))
tmp['imag'] = None # name of primary rgt
bmdatum = orxn.data[lbm].value if lbm else None
mcdatum = orxn.data[lmc].value if lmc else None
tmp['bm'] = bmdatum
tmp['mc'] = mcdatum
if lmc and lbm:
tmp['e'] = mcdatum - bmdatum
tmp['pe'] = 100 * (mcdatum - bmdatum) / abs(bmdatum)
# TODO redefining errors not good practice
else:
isComplete = False
tmp['e'] = None
tmp['pe'] = None
terrors[dbrxn] = {}
for c in columnreservoir.keys():
terrors[dbrxn][c] = '' if tmp[c] is None else \
columnreservoir[c][2].format(tmp[c])
fancymodelchem = self.fancy_mcs(latex=True)[mc]
thistitle = title.format(dbse=self.dbse, mc=fancymodelchem,
sset='All' if sset == 'default' else sset.upper())
lref = [r"""tbl:qcdb"""]
if theme:
lref.append(theme)
lref.append(self.dbse)
if sset != 'default':
lref.append(sset)
lref.append(mc)
ref = '-'.join(lref)
# table intro
tablelines.append(r"""\begingroup""")
tablelines.append(r"""\squeezetable""")
tablelines.append(r"""\LTcapwidth=\textwidth""")
tablelines.append(r"""\begin{longtable}{%s}""" % (''.join([columnreservoir[col][0] for col in columnplan])))
tablelines.append(r"""\caption{%s""" % (thistitle))
tablelines.append(r"""\label{%s}} \\ """ % (ref))
tablelines.append(r"""\hline\hline""")
columntitles = [columnreservoir[col][1] for col in columnplan]
# initial header
tablelines.append(' & '.join(columntitles) + r""" \\ """)
tablelines.append(r"""\hline""")
tablelines.append(r"""\endfirsthead""")
# to be continued header
tablelines.append(r"""\multicolumn{%d}{@{}l}{\textit{\ldots continued} %s} \\ """ %
(len(columnplan), fancymodelchem))
tablelines.append(r"""\hline\hline""")
tablelines.append(' & '.join(columntitles) + r""" \\ """)
tablelines.append(r"""\hline""")
tablelines.append(r"""\endhead""")
# to be continued footer
tablelines.append(r"""\hline\hline""")
tablelines.append(r"""\multicolumn{%d}{r@{}}{\textit{continued \ldots}} \\ """ %
(len(columnplan)))
tablelines.append(r"""\endfoot""")
# final footer
tablelines.append(r"""\hline\hline""")
tablelines.append(r"""\endlastfoot""")
# table body
for dbrxn, stuff in terrors.iteritems():
tablelines.append(' & '.join([stuff[col] for col in columnplan]) + r""" \\ """)
# table body summary
if any(col in ['e', 'pe'] for col in columnplan):
field_to_put_labels = [col for col in ['tagl', 'dbrxn', 'indx'] if col in columnplan]
if field_to_put_labels:
for block, blkerrors in serrors.iteritems():
if blkerrors: # skip e.g., NBC block in HB of DB4
tablelines.append(r"""\hline""")
summlines = [[] for i in range(8)]
for col in columnplan:
if col == field_to_put_labels[0]:
summlines[0].append(
r"""\textbf{Summary Statistics: %s%s}%s""" % \
('' if sset == 'default' else sset + r""" $\subset$ """,
block,
'' if isComplete else r""", \textit{partial}"""))
summlines[1].append(r"""\textit{Minimal Signed Error} """)
summlines[2].append(r"""\textit{Minimal Absolute Error} """)
summlines[3].append(r"""\textit{Maximal Signed Error} """)
summlines[4].append(r"""\textit{Maximal Absolute Error} """)
summlines[5].append(r"""\textit{Mean Signed Error} """)
summlines[6].append(r"""\textit{Mean Absolute Error} """)
summlines[7].append(r"""\textit{Root-Mean-Square Error} """)
elif col in ['e', 'pe']:
summlines[0].append('')
summlines[1].append(blkerrors['nex' + col])
summlines[2].append(blkerrors['min' + col])
summlines[3].append(blkerrors['pex' + col])
summlines[4].append(blkerrors['max' + col])
summlines[5].append(blkerrors['m' + col])
summlines[6].append(blkerrors['ma' + col])
summlines[7].append(blkerrors['rms' + col])
else:
for ln in range(len(summlines)):
summlines[ln].append('')
for ln in range(len(summlines)):
tablelines.append(' & '.join(summlines[ln]) + r""" \\ """)
# table conclusion
tablelines.append(r"""\end{longtable}""")
tablelines.append(r"""\endgroup""")
tablelines.append(r"""\clearpage""")
tablelines.append('\n\n')
# form table index
thisindextitle = indextitle.format(dbse=self.dbse, mc=fancymodelchem.strip(),
sset='All' if sset == 'default' else sset.upper())
indexlines.append(r"""\scriptsize \ref{%s} & \scriptsize %s \\ """ % \
(ref, thisindextitle))
if standalone:
tablelines += textables.end_latex_document()
# form table and index return structures
if filename is None:
return tablelines, indexlines
else:
if filename.endswith('.tex'):
filename = filename[:-4]
with open(filename + '.tex', 'w') as handle:
handle.write('\n'.join(tablelines))
with open(filename + '_index.tex', 'w') as handle:
handle.write('\n'.join(indexlines) + '\n')
print("""\n LaTeX index written to {filename}_index.tex\n"""
""" LaTeX table written to {filename}.tex\n"""
""" >>> pdflatex {filename}\n"""
""" >>> open /Applications/Preview.app {filename}.pdf\n""".format(filename=filename))
filedict = {'data': os.path.abspath(filename) + '.tex',
'index': os.path.abspath(filename + '_index.tex')}
return filedict
[docs] def table_wrapper(self, mtd, bas, tableplan, benchmark='default',
opt=['CP'], err=['mae'], sset=['default'], dbse=None,
opttarget=None,
failoninc=True,
xlimit=4.0, xlines=[0.0, 0.3, 1.0],
ialimit=2.0,
plotpath='autogen',
subjoin=True,
title=None, indextitle=None,
suppressblanks=False,
standalone=True, theme=None, filename=None):
"""Prepares dictionary of errors for all combinations of *mtd*, *opt*,
*bas* with respect to model chemistry *benchmark*, mindful of *failoninc*.
The general plan for the table, as well as defaults for landscape,
footnotes, *title*, *indextitle, and *theme* are got from function
*tableplan*. Once error dictionary is ready, it and all other arguments
are passed along to textables.table_generic. Two arrays, one of table
lines and one of index lines are returned unless *filename* is given,
in which case they're written to file and a filedict returned.
"""
# get plan for table from *tableplan* and some default values
kwargs = {'plotpath': plotpath,
'subjoin': subjoin,
'xlines': xlines,
'xlimit': xlimit,
'ialimit': ialimit}
rowplan, columnplan, landscape, footnotes, \
suggestedtitle, suggestedtheme = tableplan(**kwargs)
#suggestedtitle, suggestedtheme = tableplan(plotpath=plotpath, subjoin=subjoin)
# make figure files write themselves
autothread = {}
autoliliowa = {}
if plotpath == 'autogen':
for col in columnplan:
if col[3].__name__ == 'flat':
if col[4] and autothread:
print('TODO: merge not handled')
elif col[4] or autothread:
autothread.update(col[4])
else:
autothread = {'dummy': True}
elif col[3].__name__ == 'liliowa':
autoliliowa = {'dummy': True}
# negotiate some defaults
dbse = [self.dbse] if dbse is None else dbse
theme = suggestedtheme if theme is None else theme
title = suggestedtitle if title is None else title
indextitle = title if indextitle is None else indextitle
opttarget = {'default': ['']} if opttarget is None else opttarget
def unify_options(orequired, opossible):
"""Perform a merge of options tags in *orequired* and *opossible* so
that the result is free of duplication and has the mode at the end.
"""
opt_combos = []
for oreq in orequired:
for opos in opossible:
pieces = sorted(set(oreq.split('_') + opos.split('_')))
if '' in pieces:
pieces.remove('')
for mode in ['CP', 'unCP', 'SA']:
if mode in pieces:
pieces.remove(mode)
pieces.append(mode)
pieces = '_'.join(pieces)
opt_combos.append(pieces)
return opt_combos
# gather list of model chemistries for table
mcs = ['-'.join(prod) for prod in itertools.product(mtd, opt, bas)]
mc_translator = {}
for m, o, b in itertools.product(mtd, opt, bas):
nominal_mc = '-'.join([m, o, b])
for oo in unify_options([o], opttarget['default']):
trial_mc = '-'.join([m, oo, b])
try:
perr = self.compute_statistics(trial_mc, benchmark=benchmark, sset='default', # prob. too restrictive by choosing subset
failoninc=False, verbose=False, returnindiv=False)
except KeyError as e:
continue
else:
mc_translator[nominal_mc] = trial_mc
break
else:
mc_translator[nominal_mc] = None
# compute errors
serrors = {}
for mc in mcs:
serrors[mc] = {}
for ss in self.sset.keys():
serrors[mc][ss] = {}
if mc_translator[mc] in self.mcs:
# Note: not handling when one component Wdb has one translated pattern and another another
perr = self.compute_statistics(mc_translator[mc], benchmark=benchmark, sset=ss,
failoninc=failoninc, verbose=False, returnindiv=False)
serrors[mc][ss][self.dbse] = format_errors(perr[self.dbse], mode=3)
if not failoninc:
mcsscounts = self.get_missing_reactions(mc_translator[mc], sset=ss)
serrors[mc][ss][self.dbse]['tgtcnt'] = mcsscounts[self.dbse][0]
serrors[mc][ss][self.dbse]['misscnt'] = len(mcsscounts[self.dbse][1])
if autothread:
if ('sset' in autothread and ss in autothread['sset']) or ('sset' not in autothread):
mcssplots = self.plot_flat(mc_translator[mc], benchmark=benchmark, sset=ss,
failoninc=failoninc, color='sapt', xlimit=xlimit, xlines=xlines, view=False,
saveas='flat_' + '-'.join([self.dbse, ss, mc]), relpath=True, graphicsformat=['pdf'])
serrors[mc][ss][self.dbse]['plotflat'] = mcssplots['pdf']
if autoliliowa and ss == 'default':
mcssplots = self.plot_liliowa(mc_translator[mc], benchmark=benchmark,
failoninc=failoninc, xlimit=ialimit, view=False,
saveas='liliowa_' + '-'.join([self.dbse, ss, mc]), relpath=True, graphicsformat=['pdf'])
serrors[mc][ss][self.dbse]['plotliliowa'] = mcssplots['pdf']
for db in self.dbdict.keys():
if perr[db] is None:
serrors[mc][ss][db] = None
else:
serrors[mc][ss][db] = format_errors(perr[db], mode=3)
if not failoninc:
serrors[mc][ss][db]['tgtcnt'] = mcsscounts[db][0]
serrors[mc][ss][db]['misscnt'] = len(mcsscounts[db][1])
else:
serrors[mc][ss][self.dbse] = format_errors(initialize_errors(), mode=3)
for db in self.dbdict.keys():
serrors[mc][ss][db] = format_errors(initialize_errors(), mode=3)
for key in serrors.keys():
print("""{:>35}{:>35}{}""".format(key, mc_translator[key], serrors[key]['default'][self.dbse]['mae']))
# find indices that would be neglected in a single sweep over table_generic
keysinplan = set(sum([col[-1].keys() for col in columnplan], rowplan))
obvious = {'dbse': dbse, 'sset': sset, 'mtd': mtd, 'opt': opt, 'bas': bas, 'err': err}
for key, vari in obvious.items():
if len(vari) == 1 or key in keysinplan:
del obvious[key]
iteroers = [(prod) for prod in itertools.product(*obvious.values())]
# commence to generate LaTeX code
tablelines = []
indexlines = []
if standalone:
tablelines += textables.begin_latex_document()
for io in iteroers:
actvargs = dict(zip(obvious.keys(), [[k] for k in io]))
nudbse = actvargs['dbse'] if 'dbse' in actvargs else dbse
nusset = actvargs['sset'] if 'sset' in actvargs else sset
numtd = actvargs['mtd'] if 'mtd' in actvargs else mtd
nuopt = actvargs['opt'] if 'opt' in actvargs else opt
nubas = actvargs['bas'] if 'bas' in actvargs else bas
nuerr = actvargs['err'] if 'err' in actvargs else err
table, index = textables.table_generic(
mtd=numtd, bas=nubas, opt=nuopt, err=nuerr, sset=nusset, dbse=nudbse,
rowplan=rowplan, columnplan=columnplan, serrors=serrors,
plotpath='' if plotpath == 'autogen' else plotpath,
subjoin=subjoin,
title=title, indextitle=indextitle,
suppressblanks=suppressblanks,
landscape=landscape, footnotes=footnotes,
standalone=False, theme=theme)
tablelines += table
tablelines.append('\n\n')
indexlines += index
if standalone:
tablelines += textables.end_latex_document()
# form table and index return structures
if filename is None:
return tablelines, indexlines
else:
if filename.endswith('.tex'):
filename = filename[:-4]
with open(filename + '.tex', 'w') as handle:
handle.write('\n'.join(tablelines))
with open(filename + '_index.tex', 'w') as handle:
handle.write('\n'.join(indexlines))
print("""\n LaTeX index written to {filename}_index.tex\n"""
""" LaTeX table written to {filename}.tex\n"""
""" >>> pdflatex {filename}\n"""
""" >>> open /Applications/Preview.app {filename}.pdf\n""".format(filename=filename))
filedict = {'data': os.path.abspath(filename) + '.tex',
'index': os.path.abspath(filename + '_index.tex')}
return filedict
[docs] def table_scrunch(self, plotpath, subjoin):
rowplan = ['mtd']
columnplan = [
['l', r'Method', '', textables.label, {}],
['c', r'Description', '', textables.empty, {}],
['d', r'aug-cc-pVDZ', 'unCP', textables.val, {'bas': 'adz', 'opt': 'unCP'}],
['d', r'aug-cc-pVDZ', 'CP', textables.val, {'bas': 'adz', 'opt': 'CP'}],
['d', r'aug-cc-pVTZ', 'unCP', textables.val, {'bas': 'atz', 'opt': 'unCP'}],
['d', r'aug-cc-pVTZ', 'CP', textables.val, {'bas': 'atz', 'opt': 'CP'}]]
footnotes = []
landscape = False
theme = 'summavg'
title = r"""Classification and Performance of model chemistries. Interaction energy [kcal/mol] {{err}} statistics.""".format()
return rowplan, columnplan, landscape, footnotes, title, theme
[docs] def table_merge_abbr(self, plotpath, subjoin):
"""Specialization of table_generic into table with minimal statistics
(three S22 and three overall) plus embedded slat diagram as suitable
for main paper. A single table is formed in sections by *bas* with
lines *mtd* within each section.
"""
rowplan = ['bas', 'mtd']
columnplan = [
['l', r"""Method \& Basis Set""", '', textables.label, {}],
['d', r'S22', 'HB', textables.val, {'sset': 'hb', 'dbse': 'S22'}],
['d', r'S22', 'MX/DD', textables.val, {'sset': 'mxdd', 'dbse': 'S22'}],
['d', r'S22', 'TT', textables.val, {'sset': 'tt', 'dbse': 'S22'}],
['d', r'Overall', 'HB', textables.val, {'sset': 'hb', 'dbse': 'DB4'}],
['d', r'Overall', 'MX/DD', textables.val, {'sset': 'mxdd', 'dbse': 'DB4'}],
['d', r'Overall', 'TT', textables.val, {'sset': 'tt', 'dbse': 'DB4'}],
['l', r"""Error Distribution\footnotemark[1]""",
r"""\includegraphics[width=6.67cm,height=3.5mm]{%s%s.pdf}""" % (plotpath, 'blank'),
textables.graphics, {}],
['d', r'Time', '', textables.empty, {}]]
# TODO Time column not right at all
footnotes = [fnreservoir['blankslat']]
landscape = False
theme = 'smmerge'
title = r"""Interaction energy [kcal/mol] {{err}} subset statistics with computed with {{opt}}{0}.""".format(
'' if subjoin else r""" and {bas}""")
return rowplan, columnplan, landscape, footnotes, title, theme
[docs] def table_merge_suppmat(self, plotpath, subjoin):
"""Specialization of table_generic into table with as many statistics
as will fit (mostly fullcurve and a few 5min) plus embedded slat
diagram as suitable for supplementary material. Multiple tables are
formed, one for each in *bas* with lines *mtd* within each table.
"""
rowplan = ['bas', 'mtd']
columnplan = [
['l', r"""Method \& Basis Set""", '', textables.label, {}],
['d', 'S22', 'HB', textables.val, {'sset': 'hb', 'dbse': 'S22'}],
['d', 'S22', 'MX', textables.val, {'sset': 'mx', 'dbse': 'S22'}],
['d', 'S22', 'DD', textables.val, {'sset': 'dd', 'dbse': 'S22'}],
['d', 'S22', 'TT', textables.val, {'sset': 'tt', 'dbse': 'S22'}],
['d', 'NBC10', 'MX', textables.val, {'sset': 'mx', 'dbse': 'NBC1'}],
['d', 'NBC10', 'DD', textables.val, {'sset': 'dd', 'dbse': 'NBC1'}],
['d', 'NBC10', 'TT', textables.val, {'sset': 'tt', 'dbse': 'NBC1'}],
['d', 'HBC6', 'HB/TT', textables.val, {'sset': 'tt', 'dbse': 'HBC1'}],
['d', 'HSG', 'HB', textables.val, {'sset': 'hb', 'dbse': 'HSG'}],
['d', 'HSG', 'MX', textables.val, {'sset': 'mx', 'dbse': 'HSG'}],
['d', 'HSG', 'DD', textables.val, {'sset': 'dd', 'dbse': 'HSG'}],
['d', 'HSG', 'TT', textables.val, {'sset': 'tt', 'dbse': 'HSG'}],
['d', 'Avg', 'TT ', textables.val, {'sset': 'tt', 'dbse': 'DB4'}],
['l', r"""Error Distribution\footnotemark[1]""",
r"""\includegraphics[width=6.67cm,height=3.5mm]{%s%s.pdf}""" % (plotpath, 'blank'),
textables.graphics, {}],
['d', 'NBC10', r"""TT\footnotemark[2]""", textables.val, {'sset': 'tt-5min', 'dbse': 'NBC1'}],
['d', 'HBC6', r"""TT\footnotemark[2] """, textables.val, {'sset': 'tt-5min', 'dbse': 'HBC1'}],
['d', 'Avg', r"""TT\footnotemark[2]""", textables.val, {'sset': 'tt-5min', 'dbse': 'DB4'}]]
footnotes = [fnreservoir['blankslat'], fnreservoir['5min']]
landscape = True
theme = 'lgmerge'
title = r"""Interaction energy [kcal/mol] {{err}} subset statistics with computed with {{opt}}{0}.""".format(
'' if subjoin else r""" and {bas}""")
return rowplan, columnplan, landscape, footnotes, title, theme
[docs]class DB4(Database):
def __init__(self, pythonpath=None, loadfrompickle=False, path=None):
"""Initialize FourDatabases object from SuperDatabase"""
Database.__init__(self, ['s22', 'nbc10', 'hbc6', 'hsg'], dbse='DB4',
pythonpath=pythonpath, loadfrompickle=loadfrompickle, path=path)
# # load up data and definitions
# self.load_qcdata_byproject('dft')
# self.load_qcdata_byproject('pt2')
# #self.load_qcdata_byproject('dhdft')
# self.load_subsets()
self.define_supersubsets()
self.define_supermodelchems()
[docs] def define_supersubsets(self):
"""
"""
self.sset['tt'] = ['default', 'default', 'default', 'default']
self.sset['hb'] = ['hb', None, 'default', 'hb']
self.sset['mx'] = ['mx', 'mx', None, 'mx']
self.sset['dd'] = ['dd', 'dd', None, 'dd']
self.sset['mxdd'] = ['mxdd', 'default', None, 'mxdd']
self.sset['pp'] = ['mxddpp', 'mxddpp', None, None]
self.sset['np'] = ['mxddnp', 'mxddnp', None, 'mxdd']
self.sset['tt-5min'] = ['default', '5min', '5min', 'default']
self.sset['hb-5min'] = ['hb', None, '5min', 'hb']
self.sset['mx-5min'] = ['mx', 'mx-5min', None, 'mx']
self.sset['dd-5min'] = ['dd', 'dd-5min', None, 'dd']
self.sset['mxdd-5min'] = ['mxdd', '5min', None, 'mxdd']
self.sset['pp-5min'] = ['mxddpp', 'mxddpp-5min', None, None]
self.sset['np-5min'] = ['mxddnp', 'mxddnp-5min', None, 'mxdd']
# def benchmark(self):
# """Returns the model chemistry label for the database's benchmark."""
# return 'C2001BENCH'
[docs] def define_supermodelchems(self):
"""
"""
self.benchmark = 'C2011BENCH'
self.mcs['C2010BENCH'] = ['S22A', 'NBC100', 'HBC60', 'HSG0']
self.mcs['C2011BENCH'] = ['S22B', 'NBC10A', 'HBC6A', 'HSGA']
self.mcs['CCSD-CP-adz'] = ['CCSD-CP-adz', 'CCSD-CP-hadz', 'CCSD-CP-adz', 'CCSD-CP-hadz']
self.mcs['CCSD-CP-atz'] = ['CCSD-CP-atz', 'CCSD-CP-hatz', 'CCSD-CP-atz', 'CCSD-CP-hatz']
self.mcs['CCSD-CP-adtz'] = ['CCSD-CP-adtz', 'CCSD-CP-hadtz', 'CCSD-CP-adtz', 'CCSD-CP-hadtz']
self.mcs['CCSD-CP-adtzadz'] = ['CCSD-CP-adtzadz', 'CCSD-CP-adtzhadz', 'CCSD-CP-adtzadz', 'CCSD-CP-adtzhadz']
self.mcs['CCSD-CP-atzadz'] = ['CCSD-CP-atzadz', 'CCSD-CP-atzhadz', 'CCSD-CP-atzadz', 'CCSD-CP-atzhadz']
self.mcs['CCSD-CP-atqzadz'] = ['CCSD-CP-atqzadz', 'CCSD-CP-atqzhadz', 'CCSD-CP-atqzadz', 'CCSD-CP-atqzhadz']
self.mcs['CCSD-CP-atzadtz'] = ['CCSD-CP-atzadtz', 'CCSD-CP-atzhadtz', 'CCSD-CP-atzadtz', 'CCSD-CP-atzhadtz']
self.mcs['CCSD-CP-atqzadtz'] = ['CCSD-CP-atqzadtz', 'CCSD-CP-atqzhadtz', 'CCSD-CP-atqzadtz',
'CCSD-CP-atqzhadtz']
self.mcs['CCSD-CP-atqzatz'] = ['CCSD-CP-atqzatz', 'CCSD-CP-atqzhatz', 'CCSD-CP-atqzatz', 'CCSD-CP-atqzhatz']
self.mcs['SCSCCSD-CP-adz'] = ['SCSCCSD-CP-adz', 'SCSCCSD-CP-hadz', 'SCSCCSD-CP-adz', 'SCSCCSD-CP-hadz']
self.mcs['SCSCCSD-CP-atz'] = ['SCSCCSD-CP-atz', 'SCSCCSD-CP-hatz', 'SCSCCSD-CP-atz', 'SCSCCSD-CP-hatz']
self.mcs['SCSCCSD-CP-adtz'] = ['SCSCCSD-CP-adtz', 'SCSCCSD-CP-hadtz', 'SCSCCSD-CP-adtz', 'SCSCCSD-CP-hadtz']
self.mcs['SCSCCSD-CP-adtzadz'] = ['SCSCCSD-CP-adtzadz', 'SCSCCSD-CP-adtzhadz', 'SCSCCSD-CP-adtzadz',
'SCSCCSD-CP-adtzhadz']
self.mcs['SCSCCSD-CP-atzadz'] = ['SCSCCSD-CP-atzadz', 'SCSCCSD-CP-atzhadz', 'SCSCCSD-CP-atzadz',
'SCSCCSD-CP-atzhadz']
self.mcs['SCSCCSD-CP-atqzadz'] = ['SCSCCSD-CP-atqzadz', 'SCSCCSD-CP-atqzhadz', 'SCSCCSD-CP-atqzadz',
'SCSCCSD-CP-atqzhadz']
self.mcs['SCSCCSD-CP-atzadtz'] = ['SCSCCSD-CP-atzadtz', 'SCSCCSD-CP-atzhadtz', 'SCSCCSD-CP-atzadtz',
'SCSCCSD-CP-atzhadtz']
self.mcs['SCSCCSD-CP-atqzadtz'] = ['SCSCCSD-CP-atqzadtz', 'SCSCCSD-CP-atqzhadtz', 'SCSCCSD-CP-atqzadtz',
'SCSCCSD-CP-atqzhadtz']
self.mcs['SCSCCSD-CP-atqzatz'] = ['SCSCCSD-CP-atqzatz', 'SCSCCSD-CP-atqzhatz', 'SCSCCSD-CP-atqzatz',
'SCSCCSD-CP-atqzhatz']
self.mcs['SCSMICCSD-CP-adz'] = ['SCSMICCSD-CP-adz', 'SCSMICCSD-CP-hadz', 'SCSMICCSD-CP-adz',
'SCSMICCSD-CP-hadz']
self.mcs['SCSMICCSD-CP-atz'] = ['SCSMICCSD-CP-atz', 'SCSMICCSD-CP-hatz', 'SCSMICCSD-CP-atz',
'SCSMICCSD-CP-hatz']
self.mcs['SCSMICCSD-CP-adtz'] = ['SCSMICCSD-CP-adtz', 'SCSMICCSD-CP-hadtz', 'SCSMICCSD-CP-adtz',
'SCSMICCSD-CP-hadtz']
self.mcs['SCSMICCSD-CP-adtzadz'] = ['SCSMICCSD-CP-adtzadz', 'SCSMICCSD-CP-adtzhadz', 'SCSMICCSD-CP-adtzadz',
'SCSMICCSD-CP-adtzhadz']
self.mcs['SCSMICCSD-CP-atzadz'] = ['SCSMICCSD-CP-atzadz', 'SCSMICCSD-CP-atzhadz', 'SCSMICCSD-CP-atzadz',
'SCSMICCSD-CP-atzhadz']
self.mcs['SCSMICCSD-CP-atqzadz'] = ['SCSMICCSD-CP-atqzadz', 'SCSMICCSD-CP-atqzhadz', 'SCSMICCSD-CP-atqzadz',
'SCSMICCSD-CP-atqzhadz']
self.mcs['SCSMICCSD-CP-atzadtz'] = ['SCSMICCSD-CP-atzadtz', 'SCSMICCSD-CP-atzhadtz', 'SCSMICCSD-CP-atzadtz',
'SCSMICCSD-CP-atzhadtz']
self.mcs['SCSMICCSD-CP-atqzadtz'] = ['SCSMICCSD-CP-atqzadtz', 'SCSMICCSD-CP-atqzhadtz', 'SCSMICCSD-CP-atqzadtz',
'SCSMICCSD-CP-atqzhadtz']
self.mcs['SCSMICCSD-CP-atqzatz'] = ['SCSMICCSD-CP-atqzatz', 'SCSMICCSD-CP-atqzhatz', 'SCSMICCSD-CP-atqzatz',
'SCSMICCSD-CP-atqzhatz']
self.mcs['CCSDT-CP-adz'] = ['CCSDT-CP-adz', 'CCSDT-CP-hadz', 'CCSDT-CP-adz', 'CCSDT-CP-hadz']
self.mcs['CCSDT-CP-atz'] = ['CCSDT-CP-atz', 'CCSDT-CP-hatz', 'CCSDT-CP-atz', 'CCSDT-CP-hatz']
self.mcs['CCSDT-CP-adtz'] = ['CCSDT-CP-adtz', 'CCSDT-CP-hadtz', 'CCSDT-CP-adtz', 'CCSDT-CP-hadtz']
self.mcs['CCSDT-CP-adtzadz'] = ['CCSDT-CP-adtzadz', 'CCSDT-CP-adtzhadz', 'CCSDT-CP-adtzadz',
'CCSDT-CP-adtzhadz']
self.mcs['CCSDT-CP-atzadz'] = ['CCSDT-CP-atzadz', 'CCSDT-CP-atzhadz', 'CCSDT-CP-atzadz', 'CCSDT-CP-atzhadz']
self.mcs['CCSDT-CP-atqzadz'] = ['CCSDT-CP-atqzadz', 'CCSDT-CP-atqzhadz', 'CCSDT-CP-atqzadz',
'CCSDT-CP-atqzhadz']
self.mcs['CCSDT-CP-atzadtz'] = ['CCSDT-CP-atzadtz', 'CCSDT-CP-atzhadtz', 'CCSDT-CP-atzadtz',
'CCSDT-CP-atzhadtz']
self.mcs['CCSDT-CP-atqzadtz'] = ['CCSDT-CP-atqzadtz', 'CCSDT-CP-atqzhadtz', 'CCSDT-CP-atqzadtz',
'CCSDT-CP-atqzhadtz']
self.mcs['CCSDT-CP-atqzatz'] = ['CCSDT-CP-atqzatz', 'CCSDT-CP-atqzhatz', 'CCSDT-CP-atqzatz',
'CCSDT-CP-atqzhatz']
# def make_pt2_flats(self):
# def plot_all_flats(self):
# """Generate pieces for inclusion into tables for PT2 paper.
# Note that DB4 flats use near-equilibrium subset.
#
# """
# Database.plot_all_flats(self, modelchem=None, sset='tt-5min', xlimit=4.0,
# graphicsformat=['pdf'])
[docs] def plot_dhdft_flats(self):
"""Generate pieces for grey bars figure for DH-DFT paper."""
self.plot_all_flats(
['B97D3-CP-adz', 'PBED3-CP-adz', 'M11L-CP-adz', 'DLDFD-CP-adz', 'B3LYPD3-CP-adz', 'PBE0D3-CP-adz',
'WB97XD-CP-adz', 'M052X-CP-adz', 'M062X-CP-adz', 'M08HX-CP-adz', 'M08SO-CP-adz', 'M11-CP-adz',
'VV10-CP-adz',
'LCVV10-CP-adz', 'WB97XV-CP-adz', 'PBE02-CP-adz', 'WB97X2-CP-adz', 'B2PLYPD3-CP-adz',
'DSDPBEP86D2OPT-CP-adz', 'MP2-CP-adz'], sset='tt-5min')
self.plot_all_flats(['B97D3-unCP-adz', 'PBED3-unCP-adz', 'M11L-unCP-adz', 'DLDFD-unCP-adz', 'B3LYPD3-unCP-adz',
'PBE0D3-unCP-adz',
'WB97XD-unCP-adz', 'M052X-unCP-adz', 'M062X-unCP-adz', 'M08HX-unCP-adz', 'M08SO-unCP-adz',
'M11-unCP-adz', 'VV10-unCP-adz',
'LCVV10-unCP-adz', 'WB97XV-unCP-adz', 'PBE02-unCP-adz', 'WB97X2-unCP-adz',
'B2PLYPD3-unCP-adz', 'DSDPBEP86D2OPT-unCP-adz', 'MP2-unCP-adz'], sset='tt-5min')
self.plot_all_flats(
['B97D3-CP-atz', 'PBED3-CP-atz', 'M11L-CP-atz', 'DLDFD-CP-atz', 'B3LYPD3-CP-atz', 'PBE0D3-CP-atz',
'WB97XD-CP-atz', 'M052X-CP-atz', 'M062X-CP-atz', 'M08HX-CP-atz', 'M08SO-CP-atz', 'M11-CP-atz',
'VV10-CP-atz',
'LCVV10-CP-atz', 'WB97XV-CP-atz', 'PBE02-CP-atz', 'WB97X2-CP-atz', 'B2PLYPD3-CP-atz',
'DSDPBEP86D2OPT-CP-atz', 'MP2-CP-atz'], sset='tt-5min')
self.plot_all_flats(['B97D3-unCP-atz', 'PBED3-unCP-atz', 'M11L-unCP-atz', 'DLDFD-unCP-atz', 'B3LYPD3-unCP-atz',
'PBE0D3-unCP-atz',
'WB97XD-unCP-atz', 'M052X-unCP-atz', 'M062X-unCP-atz', 'M08HX-unCP-atz', 'M08SO-unCP-atz',
'M11-unCP-atz', 'VV10-unCP-atz',
'LCVV10-unCP-atz', 'WB97XV-unCP-atz', 'PBE02-unCP-atz', 'WB97X2-unCP-atz',
'B2PLYPD3-unCP-atz', 'DSDPBEP86D2OPT-unCP-atz', 'MP2-unCP-atz'], sset='tt-5min')
[docs] def plot_dhdft_modelchems(self):
self.plot_modelchems(
['B97D3-CP-adz', 'PBED3-CP-adz', 'M11L-CP-adz', 'DLDFD-CP-adz', 'B3LYPD3-CP-adz', 'PBE0D3-CP-adz',
'WB97XD-CP-adz', 'M052X-CP-adz', 'M062X-CP-adz', 'M08HX-CP-adz', 'M08SO-CP-adz', 'M11-CP-adz',
'VV10-CP-adz',
'LCVV10-CP-adz', 'WB97XV-CP-adz', 'PBE02-CP-adz', 'WB97X2-CP-adz', 'B2PLYPD3-CP-adz',
'DSDPBEP86D2OPT-CP-adz', 'MP2-CP-adz'], sset='tt-5min')
self.plot_modelchems(['B97D3-unCP-adz', 'PBED3-unCP-adz', 'M11L-unCP-adz', 'DLDFD-unCP-adz', 'B3LYPD3-unCP-adz',
'PBE0D3-unCP-adz',
'WB97XD-unCP-adz', 'M052X-unCP-adz', 'M062X-unCP-adz', 'M08HX-unCP-adz', 'M08SO-unCP-adz',
'M11-unCP-adz', 'VV10-unCP-adz',
'LCVV10-unCP-adz', 'WB97XV-unCP-adz', 'PBE02-unCP-adz', 'WB97X2-unCP-adz',
'B2PLYPD3-unCP-adz', 'DSDPBEP86D2OPT-unCP-adz', 'MP2-unCP-adz'], sset='tt-5min')
self.plot_modelchems(
['B97D3-CP-atz', 'PBED3-CP-atz', 'M11L-CP-atz', 'DLDFD-CP-atz', 'B3LYPD3-CP-atz', 'PBE0D3-CP-atz',
'WB97XD-CP-atz', 'M052X-CP-atz', 'M062X-CP-atz', 'M08HX-CP-atz', 'M08SO-CP-atz', 'M11-CP-atz',
'VV10-CP-atz',
'LCVV10-CP-atz', 'WB97XV-CP-atz', 'PBE02-CP-atz', 'WB97X2-CP-atz', 'B2PLYPD3-CP-atz',
'DSDPBEP86D2OPT-CP-atz', 'MP2-CP-atz'], sset='tt-5min')
self.plot_modelchems(['B97D3-unCP-atz', 'PBED3-unCP-atz', 'M11L-unCP-atz', 'DLDFD-unCP-atz', 'B3LYPD3-unCP-atz',
'PBE0D3-unCP-atz',
'WB97XD-unCP-atz', 'M052X-unCP-atz', 'M062X-unCP-atz', 'M08HX-unCP-atz', 'M08SO-unCP-atz',
'M11-unCP-atz', 'VV10-unCP-atz',
'LCVV10-unCP-atz', 'WB97XV-unCP-atz', 'PBE02-unCP-atz', 'WB97X2-unCP-atz',
'B2PLYPD3-unCP-atz', 'DSDPBEP86D2OPT-unCP-atz', 'MP2-unCP-atz'], sset='tt-5min')
[docs] def plot_minn_modelchems(self):
self.plot_modelchems(
['DLDFD-unCP-adz', 'M052X-unCP-adz', 'M062X-unCP-adz', 'M08HX-unCP-adz', 'M08SO-unCP-adz', 'M11-unCP-adz',
'M11L-unCP-adz',
'DLDFD-CP-adz', 'M052X-CP-adz', 'M062X-CP-adz', 'M08HX-CP-adz', 'M08SO-CP-adz', 'M11-CP-adz',
'M11L-CP-adz'])
self.plot_modelchems(
['DlDFD-unCP-atz', 'M052X-unCP-atz', 'M062X-unCP-atz', 'M08HX-unCP-atz', 'M08SO-unCP-atz', 'M11-unCP-atz',
'M11L-unCP-atz',
'DLDFD-CP-atz', 'M052X-CP-atz', 'M062X-CP-atz', 'M08HX-CP-atz', 'M08SO-CP-atz', 'M11-CP-atz',
'M11L-CP-atz'])
[docs] def make_dhdft_Table_I(self):
"""Generate the in-manuscript summary slat table for DHDFT.
"""
self.table_wrapper(mtd=['B97D3', 'PBED3', 'M11L', 'DLDFD', 'B3LYPD3',
'PBE0D3', 'WB97XD', 'M052X', 'M062X', 'M08HX',
'M08SO', 'M11', 'VV10', 'LCVV10', 'WB97XV',
'PBE02', 'WB97X2', 'DSDPBEP86D2OPT', 'B2PLYPD3',
'MP2', 'SCSNMP2', 'SCSMIMP2', 'MP2CF12', 'SCMICCSDAF12',
'SAPTDFT', 'SAPT0S', 'SAPT2P', 'SAPT3M', 'SAPT2PCM'],
bas=['adz', 'atz'],
tableplan=self.table_scrunch,
opt=['CP', 'unCP'], err=['mae'],
subjoin=None,
plotpath=None,
standalone=False, filename='tblssets_ex1')
[docs] def make_dhdft_Table_II(self):
"""Generate the in-manuscript CP slat table for DHDFT.
"""
self.table_wrapper(mtd=['B97D3', 'PBED3', 'M11L', 'DLDFD', 'B3LYPD3',
'PBE0D3', 'WB97XD', 'M052X', 'M062X', 'M08HX',
'M08SO', 'M11', 'VV10', 'LCVV10', 'WB97XV',
'PBE02', 'WB97X2', 'DSDPBEP86D2OPT', 'B2PLYPD3', 'MP2'],
bas=['adz', 'atz'],
tableplan=self.table_merge_abbr,
opt=['CP'], err=['mae'],
subjoin=True,
plotpath='analysis/flats/mplflat_', # proj still has 'mpl' prefix
standalone=False, filename='tblssets_ex2')
[docs] def make_dhdft_Table_III(self):
"""Generate the in-manuscript unCP slat table for DHDFT.
"""
self.table_wrapper(mtd=['B97D3', 'PBED3', 'M11L', 'DLDFD', 'B3LYPD3',
'PBE0D3', 'WB97XD', 'M052X', 'M062X', 'M08HX',
'M08SO', 'M11', 'VV10', 'LCVV10', 'WB97XV',
'PBE02', 'WB97X2', 'DSDPBEP86D2OPT', 'B2PLYPD3', 'MP2'],
bas=['adz', 'atz'],
tableplan=self.table_merge_abbr,
opt=['unCP'], err=['mae'],
subjoin=True,
plotpath='analysis/flats/mplflat_', # proj still has 'mpl' prefix
standalone=False, filename='tblssets_ex3')
[docs] def make_dhdft_Tables_SII(self):
"""Generate the subset details suppmat Part II tables and their indices for DHDFT.
"""
self.table_wrapper(mtd=['B97D3', 'PBED3', 'M11L', 'DLDFD', 'B3LYPD3',
'PBE0D3', 'WB97XD', 'M052X', 'M062X', 'M08HX',
'M08SO', 'M11', 'VV10', 'LCVV10', 'WB97XV',
'PBE02', 'WB97X2', 'DSDPBEP86D2OPT', 'B2PLYPD3'], # 'MP2']
bas=['adz', 'atz'],
tableplan=self.table_merge_suppmat,
opt=['CP', 'unCP'], err=['mae', 'mape'],
subjoin=False,
plotpath='analysis/flats/mplflat_', # proj still has 'mpl' prefix
standalone=False, filename='tblssets')
[docs] def make_dhdft_Tables_SIII(self):
"""Generate the per-reaction suppmat Part III tables and their indices for DHDFT.
"""
self.table_reactions(
['B97D3-unCP-adz', 'B97D3-CP-adz', 'B97D3-unCP-atz', 'B97D3-CP-atz',
'PBED3-unCP-adz', 'PBED3-CP-adz', 'PBED3-unCP-atz', 'PBED3-CP-atz',
'M11L-unCP-adz', 'M11L-CP-adz', 'M11L-unCP-atz', 'M11L-CP-atz',
'DLDFD-unCP-adz', 'DLDFD-CP-adz', 'DLDFD-unCP-atz', 'DLDFD-CP-atz',
'B3LYPD3-unCP-adz', 'B3LYPD3-CP-adz', 'B3LYPD3-unCP-atz', 'B3LYPD3-CP-atz',
'PBE0D3-unCP-adz', 'PBE0D3-CP-adz', 'PBE0D3-unCP-atz', 'PBE0D3-CP-atz',
'WB97XD-unCP-adz', 'WB97XD-CP-adz', 'WB97XD-unCP-atz', 'WB97XD-CP-atz',
'M052X-unCP-adz', 'M052X-CP-adz', 'M052X-unCP-atz', 'M052X-CP-atz',
'M062X-unCP-adz', 'M062X-CP-adz', 'M062X-unCP-atz', 'M062X-CP-atz',
'M08HX-unCP-adz', 'M08HX-CP-adz', 'M08HX-unCP-atz', 'M08HX-CP-atz',
'M08SO-unCP-adz', 'M08SO-CP-adz', 'M08SO-unCP-atz', 'M08SO-CP-atz',
'M11-unCP-adz', 'M11-CP-adz', 'M11-unCP-atz', 'M11-CP-atz',
'VV10-unCP-adz', 'VV10-CP-adz', 'VV10-unCP-atz', 'VV10-CP-atz',
'LCVV10-unCP-adz', 'LCVV10-CP-adz', 'LCVV10-unCP-atz', 'LCVV10-CP-atz',
'WB97XV-unCP-adz', 'WB97XV-CP-adz', 'WB97XV-unCP-atz', 'WB97XV-CP-atz',
'PBE02-unCP-adz', 'PBE02-CP-adz', 'PBE02-unCP-atz', 'PBE02-CP-atz',
'WB97X2-unCP-adz', 'WB97X2-CP-adz', 'WB97X2-unCP-atz', 'WB97X2-CP-atz',
'DSDPBEP86D2OPT-unCP-adz', 'DSDPBEP86D2OPT-CP-adz', 'DSDPBEP86D2OPT-unCP-atz', 'DSDPBEP86D2OPT-CP-atz',
'B2PLYPD3-unCP-adz', 'B2PLYPD3-CP-adz', 'B2PLYPD3-unCP-atz', 'B2PLYPD3-CP-atz'],
# 'MP2-unCP-adz', 'MP2-CP-adz', 'MP2-unCP-atz', 'MP2-CP-atz'],
standalone=False, filename='tblrxn_all')
[docs]class ThreeDatabases(Database):
"""
"""
def __init__(self, pythonpath=None):
"""Initialize ThreeDatabases object from Database"""
Database.__init__(self, ['s22', 'a24', 'hsg'], dbse='DB3', pythonpath=None)
# load up data and definitions
self.load_qcdata_byproject('pt2')
self.load_qcdata_byproject('dilabio')
self.load_qcdata_byproject('f12dilabio')
self.load_subsets()
self.define_supersubsets()
self.define_supermodelchems()
[docs] def define_supersubsets(self):
"""
"""
self.sset['tt'] = ['default', 'default', 'default']
self.sset['hb'] = ['hb', 'hb', 'hb']
self.sset['mx'] = ['mx', 'mx', 'mx']
self.sset['dd'] = ['dd', 'dd', 'dd']
self.sset['mxdd'] = ['mxdd', 'mxdd', 'mxdd']
self.sset['pp'] = ['mxddpp', 'mxddpp', 'mxddpp']
self.sset['np'] = ['mxddnp', 'mxddnp', 'mxddnp']
self.sset['tt-5min'] = ['default', 'default', 'default']
self.sset['hb-5min'] = ['hb', 'hb', 'hb']
self.sset['mx-5min'] = ['mx', 'mx', 'mx']
self.sset['dd-5min'] = ['dd', 'dd', 'dd']
self.sset['mxdd-5min'] = ['mxdd', 'mxdd', 'mxdd']
self.sset['pp-5min'] = ['mxddpp', 'mxddpp', 'mxddpp']
self.sset['np-5min'] = ['mxddnp', 'mxddnp', 'mxddnp']
self.sset['weak'] = ['weak', 'weak', 'weak']
self.sset['weak_hb'] = ['weak_hb', None, 'weak_hb']
self.sset['weak_mx'] = ['weak_mx', 'weak_mx', 'weak_mx']
self.sset['weak_dd'] = ['weak_dd', 'weak_dd', 'weak_dd']
[docs] def define_supermodelchems(self):
"""
"""
self.mc['CCSD-CP-adz'] = ['CCSD-CP-adz', 'CCSD-CP-hadz', 'CCSD-CP-adz']
self.mc['CCSD-CP-atz'] = ['CCSD-CP-atz', 'CCSD-CP-hatz', 'CCSD-CP-atz']
self.mc['CCSD-CP-adtz'] = ['CCSD-CP-adtz', 'CCSD-CP-hadtz', 'CCSD-CP-adtz']
self.mc['CCSD-CP-adtzadz'] = ['CCSD-CP-adtzadz', 'CCSD-CP-adtzhadz', 'CCSD-CP-adtzadz']
self.mc['CCSD-CP-atzadz'] = ['CCSD-CP-atzadz', 'CCSD-CP-atzhadz', 'CCSD-CP-atzadz']
self.mc['CCSD-CP-atqzadz'] = ['CCSD-CP-atqzadz', 'CCSD-CP-atqzhadz', 'CCSD-CP-atqzadz']
self.mc['CCSD-CP-atzadtz'] = ['CCSD-CP-atzadtz', 'CCSD-CP-atzhadtz', 'CCSD-CP-atzadtz']
self.mc['CCSD-CP-atqzadtz'] = ['CCSD-CP-atqzadtz', 'CCSD-CP-atqzhadtz', 'CCSD-CP-atqzadtz']
self.mc['CCSD-CP-atqzatz'] = ['CCSD-CP-atqzatz', 'CCSD-CP-atqzhatz', 'CCSD-CP-atqzatz']
self.mc['CCSDT-CP-adz'] = ['CCSDT-CP-adz', 'CCSDT-CP-hadz', 'CCSDT-CP-adz']
self.mc['CCSDT-CP-atz'] = ['CCSDT-CP-atz', 'CCSDT-CP-hatz', 'CCSDT-CP-atz']
self.mc['CCSDT-CP-adtz'] = ['CCSDT-CP-adtz', 'CCSDT-CP-hadtz', 'CCSDT-CP-adtz']
self.mc['CCSDT-CP-adtzadz'] = ['CCSDT-CP-adtzadz', 'CCSDT-CP-adtzhadz', 'CCSDT-CP-adtzadz']
self.mc['CCSDT-CP-atzadz'] = ['CCSDT-CP-atzadz', 'CCSDT-CP-atzhadz', 'CCSDT-CP-atzadz']
self.mc['CCSDT-CP-atqzadz'] = ['CCSDT-CP-atqzadz', 'CCSDT-CP-atqzhadz', 'CCSDT-CP-atqzadz']
self.mc['CCSDT-CP-atzadtz'] = ['CCSDT-CP-atzadtz', 'CCSDT-CP-atzhadtz', 'CCSDT-CP-atzadtz']
self.mc['CCSDT-CP-atqzadtz'] = ['CCSDT-CP-atqzadtz', 'CCSDT-CP-atqzhadtz', 'CCSDT-CP-atqzadtz']
self.mc['CCSDT-CP-atqzatz'] = ['CCSDT-CP-atqzatz', 'CCSDT-CP-atqzhatz', 'CCSDT-CP-atqzatz']
# print certain statistic for all 4 db and summary and indiv sys if min or max
fnreservoir = {}
fnreservoir['blankslat'] = r"""Errors with respect to Benchmark. Guide lines are at 0, 0.3, and 1.0 kcal/mol overbound ($-$) and underbound ($+$)."""
fnreservoir['5min'] = r"""Only equilibrium and near-equilibrium systems included. (All S22 and HSG, 50/194 NBC10, 28/118 HBC6.)"""
fnreservoir['liliowa'] = r"""{0}MAE (dark by {1} kcal/mol) for subsets in residue classes cation, anion, polar, aliphatic, \& aromatic (L to R)."""
fnreservoir['flat'] = r"""{0}Errors with respect to benchmark within $\pm${1} kcal/mol. Guide lines are at {2} overbound ($-$) and underbound ($+$)."""