Source code for qcdb.libmintsbasissetparser

from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import os
import re
import sys
from .exceptions import *
from .libmintsgshell import *
if sys.version_info >= (3,0):
    basestring = str

[docs]class Gaussian94BasisSetParser(object):
    """Class for parsing basis sets from a text file in Gaussian 94
    format. Translated directly from the Psi4 libmints class written
    by Justin M. Turney and Andrew C. Simmonett.

    """

    def __init__(self, forced_puream=None):
        """Constructor"""
        # If the parser needs to force spherical or cartesian (e.g., loading old guess)
        self.force_puream_or_cartesian = False if forced_puream is None else True
        # Is the forced value to use puream?  (Otherwise force Cartesian).
        self.forced_is_puream = False if forced_puream is None else forced_puream
        # string filename
        self.filename = None

[docs]    def load_file(self, filename, basisname=None):
        """Load and return the file to be used by parse.  Return only
        portion of *filename* pertaining to *basisname* if specified (for
        multi-basisset files) otherwise entire file as list of strings.

        """
        # string filename
        self.filename = filename

        given_basisname = False if basisname is None else True
        found_basisname = False
        basis_separator = re.compile(r'^\s*\[\s*(.*?)\s*\]\s*$')

        # Loads an entire file.
        try:
            infile = open(filename, 'r')
        except IOError:
            raise BasisSetFileNotFound("""BasisSetParser::parse: Unable to open basis set file: %s""" % (filename))
        if os.stat(filename).st_size == 0:
            raise ValidationError("""BasisSetParser::parse: given filename '%s' is blank.""" % (filename))
        contents = infile.readlines()

        lines = []
        for text in contents:
            text = text.strip()
            # If no basisname was given always save the line.
            if given_basisname is False:
                lines.append(text)

            if found_basisname:
                # If we find another [*] we're done.
                if basis_separator.match(text):
                    what = basis_separator.match(text).group(1)
                    break
                lines.append(text)
                continue

            # If the user gave a basisname AND text matches the basisname we want to trigger to retain
            if given_basisname and basis_separator.match(text):
                if basisname == basis_separator.match(text).group(1):
                    found_basisname = True

        return lines

[docs]    def parse(self, symbol, dataset):
        """Given a string, parse for the basis set needed for atom.
        * @param symbol atom symbol to look for in dataset
        * @param dataset data set to look through
        dataset can be list of lines or a single string which will be converted to list of lines

        """
        if isinstance(dataset, basestring):
            lines = dataset.split('\n')
        else:
            lines = dataset

        # Regular expressions that we'll be checking for.
        cartesian = re.compile(r'^\s*cartesian\s*', re.IGNORECASE)
        spherical = re.compile(r'^\s*spherical\s*', re.IGNORECASE)
        comment = re.compile(r'^\s*\!.*')  # line starts with !
        separator = re.compile(r'^\s*\*\*\*\*')  # line starts with ****
        ATOM = '(([A-Z]{1,3}\d*)|([A-Z]{1,3}_\w+))'  # match 'C 0', 'Al c 0', 'P p88 p_pass 0' not 'Ofail 0', 'h99_text 0'
        atom_array = re.compile(r'^\s*((' + ATOM + '\s+)+)0\s*$', re.IGNORECASE)  # array of atomic symbols terminated by 0
        shell = re.compile(r'^\s*(\w+)\s*(\d+)\s*(-?\d+\.\d+)')  # Match beginning of contraction
        blank = re.compile(r'^\s*$')
        NUMBER = "((?:[-+]?\\d*\\.\\d+(?:[DdEe][-+]?\\d+)?)|(?:[-+]?\\d+\\.\\d*(?:[DdEe][-+]?\\d+)?))"
        primitives1 = re.compile(r'^\s*' + NUMBER + '\s+' + NUMBER + '.*')  # Match s, p, d, f, g, ... functions
        primitives2 = re.compile(r'^\s*' + NUMBER + '\s+' + NUMBER + '\s+' + NUMBER + '.*')  # match sp functions

        # s, p and s, p, d can be grouped together in Pople-style basis sets
        sp = 'SP'
        spd = 'SPD'

        #                a  b  c  d  e  f  g  h  i  j  k  l  m  n  o  p  q  r  s  t  u  v  w  x  y  z
        #shell_to_am = [-1,-1,-1, 2,-1, 3, 4, 5, 6,-1, 7, 8, 9,10,11, 1,12,13, 0,14,15,16,17,18,19,20]
        alpha = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L',
            'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z']
        angmo = [-1, -1, -1, 2, -1, 3, 4, 5, 6, -1, 7, 8,
            9, 10, 11, 1, 12, 13, 0, 14, 15, 16, 17, 18, 19, 20]
        shell_to_am = dict(zip(alpha, angmo))

        # Basis type.
        gaussian_type = 'Pure'

        if self.force_puream_or_cartesian:
            if self.forced_is_puream == False:
                gaussian_type = 'Cartesian'

        # Need a dummy center for the shell.
        center = [0.0, 0.0, 0.0]

        shell_list = []
        lineno = 0
        found = False

        while lineno < len(lines):
            line = lines[lineno]
            lineno += 1

            # Ignore blank lines
            if blank.match(line):
                continue

            # Look for Cartesian or Spherical
            if not self.force_puream_or_cartesian:
                if cartesian.match(line):
                    gaussian_type = 'Cartesian'
#TODO                    if psi4.get_global_option('PUREAM').has_changed():
#TODO                        gaussian_type = 'Pure' if int(psi4.get_global('PUREAM')) else 'Cartesian'
                    continue
                elif spherical.match(line):
                    gaussian_type = 'Pure'
#TODO                    if psi4.get_global_option('PUREAM').has_changed():
#TODO                        gaussian_type = 'Pure' if int(psi4.get_global('PUREAM')) else 'Cartesian'
                    continue
                #end case where puream setting wasn't forced by caller

            # Do some matches
            if comment.match(line):
                continue
            if separator.match(line):
                continue

            # Match: H    0
            # or:    H    O...     0
            if atom_array.match(line):
                what = atom_array.match(line).group(1).split()
                # Check the captures and see if this basis set is for the atom we need.
                found = False
                if symbol in [x.upper() for x in what]:
                    found = True
                    msg = """line %5d""" % (lineno)

                    # Read in the next line
                    line = lines[lineno]
                    lineno += 1

                    # Need to do the following until we match a "****" which is the end of the basis set
                    while not separator.match(line):
                        # Match shell information
                        if shell.match(line):
                            what = shell.match(line)
                            shell_type = str(what.group(1)).upper()
                            nprimitive = int(what.group(2))
                            scale = float(what.group(3))

                            if len(shell_type) == 1:
                                am = shell_to_am[shell_type[0]]

                                exponents = [0.0] * nprimitive
                                contractions = [0.0] * nprimitive

                                for p in range(nprimitive):
                                    line = lines[lineno]
                                    lineno += 1
                                    line = line.replace('D', 'e', 2)
                                    line = line.replace('d', 'e', 2)

                                    what = primitives1.match(line)
                                    # Must match primitives1; will work on the others later
                                    if not what:
                                        raise ValidationError("""Gaussian94BasisSetParser::parse: Unable to match an exponent with one contraction: line %d: %s""" % (lineno, line))
                                    exponent = float(what.group(1))
                                    contraction = float(what.group(2))

                                    # Scale the contraction and save the information
                                    contraction *= scale
                                    exponents[p] = exponent
                                    contractions[p] = contraction

                                # We have a full shell, push it to the basis set
                                shell_list.append(ShellInfo(am, contractions, exponents,
                                    gaussian_type, 0, center, 0, 'Unnormalized'))

                            elif len(shell_type) == 2:
                                # This is to handle instances of SP, PD, DF, FG, ...
                                am1 = shell_to_am[shell_type[0]]
                                am2 = shell_to_am[shell_type[1]]

                                exponents = [0.0] * nprimitive
                                contractions1 = [0.0] * nprimitive
                                contractions2 = [0.0] * nprimitive

                                for p in range(nprimitive):
                                    line = lines[lineno]
                                    lineno += 1
                                    line = line.replace('D', 'e', 2)
                                    line = line.replace('d', 'e', 2)

                                    what = primitives2.match(line)
                                    # Must match primitivies2
                                    if not what:
                                        raise ValidationError("Gaussian94BasisSetParser::parse: Unable to match an exponent with two contractions: line %d: %s" % (lineno, line))
                                    exponent = float(what.group(1))
                                    contraction = float(what.group(2))

                                    # Scale the contraction and save the information
                                    contraction *= scale
                                    exponents[p] = exponent
                                    contractions1[p] = contraction

                                    # Do the other contraction
                                    contraction = float(what.group(3))

                                    # Scale the contraction and save the information
                                    contraction *= scale
                                    contractions2[p] = contraction

                                shell_list.append(ShellInfo(am1, contractions1, exponents,
                                    gaussian_type, 0, center, 0, 'Unnormalized'))
                                shell_list.append(ShellInfo(am2, contractions2, exponents,
                                    gaussian_type, 0, center, 0, 'Unnormalized'))
                            else:
                                raise ValidationError("""Gaussian94BasisSetParser::parse: Unable to parse basis sets with spd, or higher grouping""")
                        else:
                            raise ValidationError("""Gaussian94BasisSetParser::parse: Expected shell information, but got: line %d: %s""" % (lineno, line))
                        line = lines[lineno]
                        lineno += 1

                    break

        if not found:
            #raise BasisSetNotFound("Gaussian94BasisSetParser::parser: Unable to find the basis set for %s in %s" % \
            #   (symbol, self.filename), silent=True)
            return None, None

        return shell_list, msg