Source code for qcdb.libmintsbasissetparser

from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import os
import re
import sys
from .exceptions import *
from .libmintsgshell import *
if sys.version_info >= (3,0):
    basestring = str

[docs]class Gaussian94BasisSetParser(object): """Class for parsing basis sets from a text file in Gaussian 94 format. Translated directly from the Psi4 libmints class written by Justin M. Turney and Andrew C. Simmonett. """ def __init__(self, forced_puream=None): """Constructor""" # If the parser needs to force spherical or cartesian (e.g., loading old guess) self.force_puream_or_cartesian = False if forced_puream is None else True # Is the forced value to use puream? (Otherwise force Cartesian). self.forced_is_puream = False if forced_puream is None else forced_puream # string filename self.filename = None
[docs] def load_file(self, filename, basisname=None): """Load and return the file to be used by parse. Return only portion of *filename* pertaining to *basisname* if specified (for multi-basisset files) otherwise entire file as list of strings. """ # string filename self.filename = filename given_basisname = False if basisname is None else True found_basisname = False basis_separator = re.compile(r'^\s*\[\s*(.*?)\s*\]\s*$') # Loads an entire file. try: infile = open(filename, 'r') except IOError: raise BasisSetFileNotFound("""BasisSetParser::parse: Unable to open basis set file: %s""" % (filename)) if os.stat(filename).st_size == 0: raise ValidationError("""BasisSetParser::parse: given filename '%s' is blank.""" % (filename)) contents = infile.readlines() lines = [] for text in contents: text = text.strip() # If no basisname was given always save the line. if given_basisname is False: lines.append(text) if found_basisname: # If we find another [*] we're done. if basis_separator.match(text): what = basis_separator.match(text).group(1) break lines.append(text) continue # If the user gave a basisname AND text matches the basisname we want to trigger to retain if given_basisname and basis_separator.match(text): if basisname == basis_separator.match(text).group(1): found_basisname = True return lines
[docs] def parse(self, symbol, dataset): """Given a string, parse for the basis set needed for atom. * @param symbol atom symbol to look for in dataset * @param dataset data set to look through dataset can be list of lines or a single string which will be converted to list of lines """ if isinstance(dataset, basestring): lines = dataset.split('\n') else: lines = dataset # Regular expressions that we'll be checking for. cartesian = re.compile(r'^\s*cartesian\s*', re.IGNORECASE) spherical = re.compile(r'^\s*spherical\s*', re.IGNORECASE) comment = re.compile(r'^\s*\!.*') # line starts with ! separator = re.compile(r'^\s*\*\*\*\*') # line starts with **** ATOM = '(([A-Z]{1,3}\d*)|([A-Z]{1,3}_\w+))' # match 'C 0', 'Al c 0', 'P p88 p_pass 0' not 'Ofail 0', 'h99_text 0' atom_array = re.compile(r'^\s*((' + ATOM + '\s+)+)0\s*$', re.IGNORECASE) # array of atomic symbols terminated by 0 shell = re.compile(r'^\s*(\w+)\s*(\d+)\s*(-?\d+\.\d+)') # Match beginning of contraction blank = re.compile(r'^\s*$') NUMBER = "((?:[-+]?\\d*\\.\\d+(?:[DdEe][-+]?\\d+)?)|(?:[-+]?\\d+\\.\\d*(?:[DdEe][-+]?\\d+)?))" primitives1 = re.compile(r'^\s*' + NUMBER + '\s+' + NUMBER + '.*') # Match s, p, d, f, g, ... functions primitives2 = re.compile(r'^\s*' + NUMBER + '\s+' + NUMBER + '\s+' + NUMBER + '.*') # match sp functions # s, p and s, p, d can be grouped together in Pople-style basis sets sp = 'SP' spd = 'SPD' # a b c d e f g h i j k l m n o p q r s t u v w x y z #shell_to_am = [-1,-1,-1, 2,-1, 3, 4, 5, 6,-1, 7, 8, 9,10,11, 1,12,13, 0,14,15,16,17,18,19,20] alpha = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'] angmo = [-1, -1, -1, 2, -1, 3, 4, 5, 6, -1, 7, 8, 9, 10, 11, 1, 12, 13, 0, 14, 15, 16, 17, 18, 19, 20] shell_to_am = dict(zip(alpha, angmo)) # Basis type. gaussian_type = 'Pure' if self.force_puream_or_cartesian: if self.forced_is_puream == False: gaussian_type = 'Cartesian' # Need a dummy center for the shell. center = [0.0, 0.0, 0.0] shell_list = [] lineno = 0 found = False while lineno < len(lines): line = lines[lineno] lineno += 1 # Ignore blank lines if blank.match(line): continue # Look for Cartesian or Spherical if not self.force_puream_or_cartesian: if cartesian.match(line): gaussian_type = 'Cartesian' #TODO if psi4.get_global_option('PUREAM').has_changed(): #TODO gaussian_type = 'Pure' if int(psi4.get_global('PUREAM')) else 'Cartesian' continue elif spherical.match(line): gaussian_type = 'Pure' #TODO if psi4.get_global_option('PUREAM').has_changed(): #TODO gaussian_type = 'Pure' if int(psi4.get_global('PUREAM')) else 'Cartesian' continue #end case where puream setting wasn't forced by caller # Do some matches if comment.match(line): continue if separator.match(line): continue # Match: H 0 # or: H O... 0 if atom_array.match(line): what = atom_array.match(line).group(1).split() # Check the captures and see if this basis set is for the atom we need. found = False if symbol in [x.upper() for x in what]: found = True msg = """line %5d""" % (lineno) # Read in the next line line = lines[lineno] lineno += 1 # Need to do the following until we match a "****" which is the end of the basis set while not separator.match(line): # Match shell information if shell.match(line): what = shell.match(line) shell_type = str(what.group(1)).upper() nprimitive = int(what.group(2)) scale = float(what.group(3)) if len(shell_type) == 1: am = shell_to_am[shell_type[0]] exponents = [0.0] * nprimitive contractions = [0.0] * nprimitive for p in range(nprimitive): line = lines[lineno] lineno += 1 line = line.replace('D', 'e', 2) line = line.replace('d', 'e', 2) what = primitives1.match(line) # Must match primitives1; will work on the others later if not what: raise ValidationError("""Gaussian94BasisSetParser::parse: Unable to match an exponent with one contraction: line %d: %s""" % (lineno, line)) exponent = float(what.group(1)) contraction = float(what.group(2)) # Scale the contraction and save the information contraction *= scale exponents[p] = exponent contractions[p] = contraction # We have a full shell, push it to the basis set shell_list.append(ShellInfo(am, contractions, exponents, gaussian_type, 0, center, 0, 'Unnormalized')) elif len(shell_type) == 2: # This is to handle instances of SP, PD, DF, FG, ... am1 = shell_to_am[shell_type[0]] am2 = shell_to_am[shell_type[1]] exponents = [0.0] * nprimitive contractions1 = [0.0] * nprimitive contractions2 = [0.0] * nprimitive for p in range(nprimitive): line = lines[lineno] lineno += 1 line = line.replace('D', 'e', 2) line = line.replace('d', 'e', 2) what = primitives2.match(line) # Must match primitivies2 if not what: raise ValidationError("Gaussian94BasisSetParser::parse: Unable to match an exponent with two contractions: line %d: %s" % (lineno, line)) exponent = float(what.group(1)) contraction = float(what.group(2)) # Scale the contraction and save the information contraction *= scale exponents[p] = exponent contractions1[p] = contraction # Do the other contraction contraction = float(what.group(3)) # Scale the contraction and save the information contraction *= scale contractions2[p] = contraction shell_list.append(ShellInfo(am1, contractions1, exponents, gaussian_type, 0, center, 0, 'Unnormalized')) shell_list.append(ShellInfo(am2, contractions2, exponents, gaussian_type, 0, center, 0, 'Unnormalized')) else: raise ValidationError("""Gaussian94BasisSetParser::parse: Unable to parse basis sets with spd, or higher grouping""") else: raise ValidationError("""Gaussian94BasisSetParser::parse: Expected shell information, but got: line %d: %s""" % (lineno, line)) line = lines[lineno] lineno += 1 break if not found: #raise BasisSetNotFound("Gaussian94BasisSetParser::parser: Unable to find the basis set for %s in %s" % \ # (symbol, self.filename), silent=True) return None, None return shell_list, msg