#
# @BEGIN LICENSE
#
# Psi4: an open-source quantum chemistry software package
#
# Copyright (c) 2007-2018 The Psi4 Developers.
#
# The copyrights for code used from other parties are included in
# the corresponding files.
#
# This file is part of Psi4.
#
# Psi4 is free software; you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, version 3.
#
# Psi4 is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License along
# with Psi4; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
# @END LICENSE
#
"""Module with functions to parse the input file and convert
Psithon into standard Python. Particularly, forms psi4
module calls that access the C++ side of Psi4.
"""
## Force Python 3 print syntax, if this is python 2.X
#if sys.hexversion < 0x03000000:
from __future__ import print_function
from __future__ import absolute_import
import re
import os
import sys
import uuid
from psi4 import core
from psi4.driver.qcdb.molparse import pubchem
from psi4.driver.p4util.util import set_memory
from psi4.driver.p4util.exceptions import *
# globally available regex strings
pubchemre = re.compile(r'^(\s*pubchem\s*:\s*(.*)\n)$', re.MULTILINE | re.IGNORECASE)
# inputfile contents to be preserved from the processor
literals = {}
# experimental - whether to run py statements as they're parsed from psithon
runalso = False
def bad_option_syntax(line):
"""Function to report bad syntax to screen and output file."""
message = ('Unsupported syntax:\n\n%s\n\n' % (line))
raise TestComparisonError(message)
def process_word_quotes(matchobj):
"""Function to determine if argument needs wrapping in quotes as string."""
dollar = matchobj.group(2)
val = matchobj.group(3)
if dollar:
# This is a python variable, make sure that it starts with a letter
if re.match(r'^[A-Za-z][\w]*', val):
return val
else:
message = ("Invalid Python variable: %s" % (val))
raise TestComparisonError(message)
elif re.match(r'^-?\d+\.?\d*(?:[Ee]-?\d+)?$', val):
# This must be a number, don't wrap it in quotes
return val
elif re.match(r'^\'.*\'$', val) or re.match(r'^\".*\"$', val):
# This is already wrapped in quotes, do nothing
return val
else:
# This must be a string
return "\"%s\"" % (val)
def quotify(string, isbasis=False):
"""Function to wrap anything that looks like a string in quotes
and to remove leading dollar signs from python variables. When *basis*
is True, allows commas, since basis sets may have commas and are assured to
not involve arrays.
"""
# This wraps anything that looks like a string in quotes, and removes leading
# dollar signs from python variables
if isbasis:
wordre = re.compile(r'(([$]?)([-+()*.,\w\"\'/\\]+))')
else:
wordre = re.compile(r'(([$]?)([-+()*.\w\"\'/\\]+))')
string = wordre.sub(process_word_quotes, string)
return string
def dequotify(string):
if string[0] == '"' and string[-1] == '"':
return string[1:-1]
else:
return string
def process_option(spaces, module, key, value, line):
"""Function to process a line with set or in a set block
into global/local domain and keyword/value.
"""
module = module.upper()
key = key.upper()
isbasis = True if 'BASIS' in key else False
value = quotify(value.strip(), isbasis=isbasis)
if module == "GLOBALS" or module == "GLOBAL" or module == "" or module.isspace():
# If it's really a global, we need slightly different syntax
if runalso:
core.set_global_option(key, dequotify(value))
return "%score.set_global_option(\"%s\", %s)\n" % (spaces, key, value)
else:
# It's a local option, so we need the module name in there too
if runalso:
core.set_local_option(module, key, dequotify(value))
return "%score.set_local_option(\"%s\", \"%s\", %s)\n" % (spaces, module, key, value)
def process_set_command(matchobj):
"""Function to process match of all individual ``set (module_list)
key {[value_list] or $value or value}``.
"""
result = ""
module_string = ""
if matchobj.group(2):
module_string = matchobj.group(2)
for module in module_string.split(","):
result += process_option(matchobj.group(1), module, matchobj.group(3), matchobj.group(4), matchobj.group(0))
return result
def process_set_commands(matchobj):
"""Function to process match of ``set name? { ... }``."""
spaces = matchobj.group(1)
commands = matchobj.group(3)
command_lines = re.split('\n', commands)
# Remove trailing newline from each line
map(lambda x: x.strip(), command_lines)
result = ""
module_string = ""
command = ""
if matchobj.group(2):
module_string = matchobj.group(2)
for module in module_string.split(","):
for line in command_lines:
# Chomp the trailing newline and accumulate
command += line
if not check_parentheses_and_brackets(command, 0):
# If the brackets don't match up, we need to move on to the next line
# and keep going, until they do match. Only then do we process the command
continue
# Ignore blank/empty lines
if not line or line.isspace():
continue
matchobj = re.match(r'^\s*(\w+)[\s=]+(.*?)$', command)
# Is the syntax correct? If so, process the line
if matchobj:
result += process_option(spaces, module, matchobj.group(1), matchobj.group(2), command)
# Reset the string
command = ""
else:
bad_option_syntax(command)
return result
def process_from_file_command(matchobj):
"""Function that process a match of ``from_file`` in molecule block."""
string = matchobj.group(2)
mol = core.mol_from_file(string, 1)
tempmol = [line for line in mol.split('\n') if line.strip() != '']
mol2 = set(tempmol)
mol = ""
for i in mol2:
mol += i
mol += "\n"
return mol
[docs]def process_pubchem_command(matchobj):
"""Function to process match of ``pubchem`` in molecule block."""
string = matchobj.group(2)
if re.match(r'^\s*[0-9]+\s*$', string):
# This is just a number - must be a CID
pcobj = pubchem.PubChemObj(int(string), '', '')
try:
return pcobj.getMoleculeString()
except Exception as e:
return e.message
else:
# Search pubchem for the provided string
try:
results = pubchem.getPubChemResults(string)
except Exception as e:
return e.message
# N.B. Anything starting with PubchemError will be handled correctly by the molecule parser
# in libmints, which will just print the rest of the string and exit gracefully.
if not results:
# Nothing!
return "PubchemError\n\tNo results were found when searching PubChem for %s.\n" % (string)
elif len(results) == 1:
# There's only 1 result - use it
return results[0].getMoleculeString()
else:
# There are multiple results. Print and exit
msg = "\tPubchemError\n"
msg += "\tMultiple pubchem results were found. Replace\n\n\t\tpubchem:%s\n\n" % (string)
msg += "\twith the Chemical ID number or exact name from one of the following and re-run.\n\n"
msg += "\t Chemical ID IUPAC Name\n\n"
for result in results:
msg += "%s" % (result)
if result.name().lower() == string.lower():
#We've found an exact match!
return result.getMoleculeString()
return msg
def process_molecule_command(matchobj):
"""Function to process match of ``molecule name? { ... }``."""
spaces = matchobj.group(1)
name = matchobj.group(2)
geometry = matchobj.group(3)
geometry = pubchemre.sub(process_pubchem_command, geometry)
from_filere = re.compile(r'^(\s*from_file\s*:\s*(.*)\n)$', re.MULTILINE | re.IGNORECASE)
geometry = from_filere.sub(process_from_file_command, geometry)
molecule = spaces
if name != "":
if sys.version_info >= (3, 0):
if not name.isidentifier():
raise ValidationError('Molecule name not valid Python identifier: ' + name)
else:
if not re.match(r'^[^\d\W]\w*\Z', name):
raise ValidationError('Molecule name not valid Python identifier: ' + name)
molecule += 'core.efp_init()\n' # clear EFP object before Molecule read in
molecule += spaces
if name != "":
molecule += '%s = ' % (name)
molecule += 'geometry("""%s"""' % (geometry)
if name != "":
molecule += ',"%s"' % (name)
molecule += ")\n"
molecule += '%score.IO.set_default_namespace("%s")' % (spaces, name)
return molecule
def process_literal_blocks(matchobj):
"""Function to process match of ``literals_psi4_yo-...``."""
return literals[matchobj.group(1)]
def process_cfour_command(matchobj):
"""Function to process match of ``cfour name? { ... }``."""
spaces = matchobj.group(1)
name = matchobj.group(2)
cfourblock = matchobj.group(3)
literalkey = str(uuid.uuid4())[:8]
literals[literalkey] = cfourblock
return "%score.set_global_option(\"%s\", \"\"\"%s\n\"\"\")\n" % \
(spaces, 'LITERAL_CFOUR', 'literals_psi4_yo-' + literalkey)
def process_extract_command(matchobj):
"""Function to process match of ``extract_subsets``."""
spaces = matchobj.group(1)
name = matchobj.group(2)
result = matchobj.group(0)
result += '%s%s.set_name("%s")' % (spaces, name, name)
result += "\n%score.set_active_molecule(%s)" % (spaces, name)
result += '\n%score.IO.set_default_namespace("%s")' % (spaces, name)
return result
def process_print_command(matchobj):
"""Function to process match of ``print`` and transform
it to ``core.print_out()``.
"""
spaces = matchobj.group(1)
string = matchobj.group(2)
return "%score.print_out(str(%s))\n" % (spaces, str(string))
def process_memory_command(matchobj):
"""Function to process match of ``memory ...``."""
spaces = str(matchobj.group(1))
sig = str(matchobj.group(2))
units = str(matchobj.group(3))
mem_in_bytes = set_memory(sig + units, execute=False)
return "%score.set_memory_bytes(%d)\n" % (spaces, mem_in_bytes)
def basname(name):
"""Imitates BasisSet.make_filename() without the gbs extension"""
return name.lower().replace('+', 'p').replace('*', 's').replace('(', '_').replace(')', '_').replace(',', '_')
def process_basis_block(matchobj):
"""Function to process match of ``basis name? { ... }``."""
spaces = matchobj.group(1)
basistype = matchobj.group(2).upper()
name = matchobj.group(3)
name = ('anonymous' + str(uuid.uuid4())[:8]) if name == '' else name
cleanbas = basname(name).replace('-', '') # further remove hyphens so can be function name
command_lines = re.split('\n', matchobj.group(4))
symbol_re = re.compile(r'^\s*assign\s+(?P<symbol>[A-Z]{1,3})\s+(?P<basis>[-+*\(\)\w]+)\s*$', re.IGNORECASE)
label_re = re.compile(
r'^\s*assign\s+(?P<label>(?P<symbol>[A-Z]{1,3})(?:(_\w+)|(\d+))?)\s+(?P<basis>[-+*\(\)\w]+)\s*$',
re.IGNORECASE)
all_re = re.compile(r'^\s*assign\s+(?P<basis>[-+*\(\)\w]+)\s*$', re.IGNORECASE)
basislabel = re.compile(r'\s*\[\s*([-*\(\)\w]+)\s*\]\s*')
result = """%sdef basisspec_psi4_yo__%s(mol, role):\n""" % (spaces, cleanbas)
result += """%s basstrings = {}\n""" % (spaces)
# Start by looking for assign lines, and remove them
leftover_lines = []
for line in command_lines:
if symbol_re.match(line):
m = symbol_re.match(line)
result += """%s mol.set_basis_by_symbol("%s", "%s", role=role)\n""" % \
(spaces, m.group('symbol'), m.group('basis'))
elif label_re.match(line):
m = label_re.match(line)
result += """%s mol.set_basis_by_label("%s", "%s", role=role)\n""" % \
(spaces, m.group('label'), m.group('basis'))
elif all_re.match(line):
m = all_re.match(line)
result += """%s mol.set_basis_all_atoms("%s", role=role)\n""" % \
(spaces, m.group('basis'))
else:
# Ignore blank lines and accumulate remainder
if line and not line.isspace():
leftover_lines.append(line.strip())
# Now look for regular basis set definitions
basblock = list(filter(None, basislabel.split('\n'.join(leftover_lines))))
if len(basblock) == 1:
if len(result.split('\n')) == 3:
# case with no [basname] markers where whole block is contents of gbs file
result += """%s mol.set_basis_all_atoms("%s", role=role)\n""" % \
(spaces, name)
result += """%s basstrings['%s'] = \"\"\"\n%s\n\"\"\"\n""" % \
(spaces, basname(name), basblock[0])
else:
message = ("Conflicting basis set specification: assign lines present but shells have no [basname] label.""")
raise TestComparisonError(message)
else:
# case with specs separated by [basname] markers
for idx in range(0, len(basblock), 2):
result += """%s basstrings['%s'] = \"\"\"\n%s\n\"\"\"\n""" % \
(spaces, basname(basblock[idx]), basblock[idx + 1])
result += """%s return basstrings\n""" % (spaces)
result += """{}qcdb.libmintsbasisset.basishorde['{}'] = {}\n""" \
.format(spaces, name.upper(), 'basisspec_psi4_yo__' + cleanbas)
result += """%score.set_global_option(\"%s\", \"%s\")""" % (spaces, basistype, name)
return result
def process_pcm_command(matchobj):
"""Function to process match of ``pcm name? { ... }``."""
spacing = str(matchobj.group(1)) # Ignore..
name = str(matchobj.group(2)) # Ignore..
block = str(matchobj.group(3)) # Get input to PCMSolver
suffix = str(os.getpid()) + '.' + str(uuid.uuid4())[:8]
pcmsolver_fname = 'pcmsolver.' + suffix + '.inp'
with open(pcmsolver_fname, 'w') as handle:
handle.write(block)
import pcmsolver
parsed_pcm = pcmsolver.parse_pcm_input(pcmsolver_fname).splitlines()
os.remove(pcmsolver_fname)
pcmsolver_parsed_fname = '@pcmsolver.' + suffix
write_input_for_pcm = "parsedFile = os.path.join(os.getcwd(), '{}')\n".format(pcmsolver_parsed_fname)
write_input_for_pcm += "with open(parsedFile, 'w') as tmp:\n"
write_input_for_pcm += " tmp.write('\\n'.join({}))\n\n".format(parsed_pcm)
write_input_for_pcm += "core.set_global_option(\'PCMSOLVER_PARSED_FNAME\', \'{}\')\n\n".format(
pcmsolver_parsed_fname)
return write_input_for_pcm
def process_external_command(matchobj):
"""Function to process match of ``external name? { ... }``."""
spaces = str(matchobj.group(1))
name = str(matchobj.group(2))
if not name or name.isspace():
name = "extern"
block = str(matchobj.group(3))
lines = re.split('\n', block)
extern = "%sqmmm = QMMM()\n" % (spaces)
NUMBER = "((?:[-+]?\\d*\\.\\d+(?:[DdEe][-+]?\\d+)?)|(?:[-+]?\\d+\\.\\d*(?:[DdEe][-+]?\\d+)?))"
# Comments are all removed by this point
# 0. Remove blank lines
re_blank = re.compile(r'^\s*$')
lines2 = []
for line in lines:
mobj = re_blank.match(line)
if mobj:
pass
else:
lines2.append(line)
lines = lines2
# 1. Look for units [ang|bohr|au|a.u.] defaults to ang
re_units = re.compile(r'^\s*units?[\s=]+((ang)|(angstrom)|(bohr)|(au)|(a\.u\.))$\s*', re.IGNORECASE)
units = 'ang'
lines2 = []
for line in lines:
mobj = re_units.match(line)
if mobj:
unit = mobj.group(1)
if unit in ['bohr', 'au', 'a.u.']:
units = 'bohr'
else:
units = 'ang'
else:
lines2.append(line)
lines = lines2
# 2. Look for basis basisname, defaults to cc-pvdz
# 3. Look for df_basis_scf basisname, defaults to cc-pvdz-jkfit
re_basis = re.compile(r'\s*basis[\s=]+(\S+)\s*$', re.IGNORECASE)
re_df_basis = re.compile(r'\s*df_basis_scf[\s=]+(\S+)\s*$', re.IGNORECASE)
basis = 'cc-pvdz'
df_basis_scf = 'cc-pvdz-jkfit'
lines2 = []
for line in lines:
mobj = re_basis.match(line)
if mobj:
basis = mobj.group(1)
else:
mobj = re_df_basis.match(line)
if mobj:
df_basis_scf = mobj.group(1)
else:
lines2.append(line)
lines = lines2
# 4. Look for charge lines Z x y z, convert according to unit convention
charge_re = re.compile(r'^\s*' + NUMBER + r'\s+' + NUMBER + r'\s+' + NUMBER + r'\s+' + NUMBER + r'\s*$')
lines2 = []
for line in lines:
mobj = charge_re.match(line)
if mobj:
if units == 'ang':
extern += '%sqmmm.addChargeAngstrom(%s,%s,%s,%s)\n' % (spaces, mobj.group(1), mobj.group(2),
mobj.group(3), mobj.group(4))
if units == 'bohr':
extern += '%sqmmm.addChargeBohr(%s,%s,%s,%s)\n' % (spaces, mobj.group(1), mobj.group(2), mobj.group(3),
mobj.group(4))
else:
lines2.append(line)
lines = lines2
# 5. Look for diffuse regions, which are XYZ molecules seperated by the usual -- lines
spacer_re = re.compile(r'^\s*--\s*$')
frags = []
frags.append([])
for line in lines:
mobj = spacer_re.match(line)
if mobj:
if len(frags[len(frags) - 1]):
frags.append([])
else:
frags[len(frags) - 1].append(line)
extern += '%sextern_mol_temp = core.get_active_molecule()\n' % (spaces)
mol_re = re.compile(r'\s*\S+\s+' + NUMBER + r'\s+' + NUMBER + r'\s+' + NUMBER + r'\s*$')
lines = []
for frag in frags:
if not len(frag):
continue
extern += '%sexternal_diffuse = geometry("""\n' % (spaces)
extern += '%s0 1\n' % (spaces)
for line in frag:
if not mol_re.match(line):
lines.append(line)
else:
extern += '%s%s\n' % (spaces, line)
extern += '%sunits %s\n' % (spaces, units)
extern += '%ssymmetry c1\n' % (spaces)
extern += '%sno_reorient\n' % (spaces)
extern += '%sno_com\n' % (spaces)
extern += '%s""")\n' % (spaces)
extern += "%sdiffuse = Diffuse(external_diffuse,'%s','%s')\n" % (spaces, basis, df_basis_scf)
extern += '%sdiffuse.fitScf()\n' % (spaces)
extern += '%sqmmm.addDiffuse(diffuse)\n' % (spaces)
extern += '\n'
extern += '%score.set_active_molecule(extern_mol_temp)\n' % (spaces)
# 6. If there is anything left, the user messed up
if len(lines):
print('Input parsing for external {}: Extra line(s) present:')
for line in lines:
raise TestComparisonError(line)
# Return is actually an ExternalPotential, not a QMMM
extern += '%sqmmm.populateExtern()\n' % (spaces)
extern += '%s%s = qmmm.extern\n' % (spaces, name)
extern += '%score.set_global_option_python("EXTERN", extern)\n' % (spaces)
return extern
def check_parentheses_and_brackets(input_string, exit_on_error):
"""Function to check that all parenthesis and brackets
in *input_string* are paired. On that condition, *exit_on_error* =1,
otherwise 0.
"""
# This returns 1 if the string's all matched up, 0 otherwise
import collections
# create left to right parenthesis mappings
lrmap = {"(": ")", "[": "]", "{": "}"}
# derive sets of left and right parentheses
lparens = set(lrmap.keys())
rparens = set(lrmap.values())
parenstack = collections.deque()
all_matched = 1
for ch in input_string:
if ch in lparens:
parenstack.append(ch)
elif ch in rparens:
opench = ""
try:
opench = parenstack.pop()
except IndexError:
# Run out of opening parens
all_matched = 0
if exit_on_error:
message = ("Input error: extra %s" % (ch))
raise TestComparisonError(message)
if lrmap[opench] != ch:
# wrong type of parenthesis popped from stack
all_matched = 0
if exit_on_error:
message = ("Input error: %s closed with a %s" % (opench, ch))
raise TestComparisonError(message)
if len(parenstack) != 0:
all_matched = 0
if exit_on_error:
message = ("Input error: Unmatched %s" % (parenstack.pop()))
raise TestComparisonError(message)
return all_matched
def parse_multiline_array(input_list):
"""Function to squash multiline arrays into a single line
until all parentheses and brackets are fully paired.
"""
line = input_list.pop(0)
# Keep adding lines to the current one, until all parens match up
while not check_parentheses_and_brackets(line, 0):
thisline = input_list.pop(0).strip()
line += thisline
return "%s\n" % (line)
def process_multiline_arrays(inputfile):
"""Function to find array inputs that are spread across multiple
lines and squash them into a single line.
"""
# This function takes multiline array inputs, and puts them on a single line
# Start by converting the input to a list, splitting at newlines
input_list = inputfile.split("\n")
set_re = re.compile(r'^(\s*?)set\s+(?:([-,\w]+)\s+)?(\w+)[\s=]+\[.*', re.IGNORECASE)
newinput = ""
while len(input_list):
line = input_list[0]
if set_re.match(line):
# We've found the start of a set matrix [ .... line - hand it off for more checks
newinput += parse_multiline_array(input_list)
else:
# Nothing to do - just add the line to the string
newinput += "%s\n" % (input_list.pop(0))
return newinput
if __name__ == "__main__":
result = process_input("""
molecule h2 {
H
H 1 R
R = .9
}
set basis 6-31G**
""")
print("Result\n==========================")
print(result)