"""
This module contains the basic functions to read the user provided files and
populate a ChemicalSystem class from them. The core function of this module is
'populate_chemicalsystem_fromfiles'.
"""
import re
from itertools import chain
from .classes import Compound,Energy,Reaction,TransitionState,DiffusionTS
# Custom errors
[docs]class MissingCompounds(ValueError):
pass
[docs]class MissingTransitionStates(ValueError):
pass
# GLOBALS
DIFFUSION = '<d>'
BACKWARDS = '<='
FORWARD = '=>'
EQUILIBRIUM = '<=>'
MARKERS = {EQUILIBRIUM: {'TS':TransitionState,}, # Reversible
FORWARD : {'TS':TransitionState,}, # Direct reaction
BACKWARDS : {'TS':TransitionState,}, # Backwards reaction
DIFFUSION: {'TS':DiffusionTS,}} # Reversible diffusion
REVERSIBLE = [EQUILIBRIUM,DIFFUSION] # reactions modeled as 2 elemental steps
for mark in MARKERS:
MARKERS[mark]['matcher'] = re.compile(f'(.*)\s{mark}\s(.*)\s!(.*)')
re_is_energy = re.compile('(-?[0-9]*\.+[0-9]*)\s*?([^\s]*?)')
[docs]def is_energy(text):
"""
Returns if a given string corresponds to an energy or not. If the string is
a
Parameters
----------
text : str
A string that may be an energy
Returns
-------
bool or truth-like value
If True or True-like then the evaluated string is an energy.
"""
test = re_is_energy.findall(text)
if test:
return test[0]
else:
return False
[docs]def populate_chemicalsystem_fromfiles(chemicalsystem,file_c,file_r,
energy_unit='J/mol',relativeE=False):
"""
Fills a chemicalsystem with the compounds and reactions specified in the
compunds file and the reactions file.
Parameters
----------
chemicalsystem : pykinetic.classes.ChemicalSystem
chemical system that will be populated
file_c : str or Path
path to the correctly formatted compounds file
file_r : str or Path
path to the correctly formatted reactions file
energy_unit : str, optional
default unit to assume when none is found, by default 'J/mol'
relativeE : bool, optional
If true the energies of the reactions are assumed to be the barriers
relative to reactants instead of assumed to be the energy of the
transition state connecting reactants and products, by default False
"""
# Process the Compound File
raw_compounds = read_compounds(file_c)
compounds = create_compounds(raw_compounds,energy_unit)
chemicalsystem.cextend(compounds)
# Process the reaction File
raw_reactions, TS_lines = read_reactions(file_r)
if TS_lines:
TS_dict = create_TS_dict(TS_lines,energy_unit)
else:
TS_dict = dict()
check_missing_compounds(chemicalsystem,raw_reactions)
check_missing_TS(raw_reactions,TS_dict)
# Parse the reactions and create the reaction and ts instances
for reactants,mark,products,TS_text in raw_reactions:
reactants = [chemicalsystem.Name2Compound[r] for r in reactants]
products = [chemicalsystem.Name2Compound[p] for p in products]
reactants_energy = sum([r.energy for r in reactants])
if mark == BACKWARDS:
reactants, products = products, reactants
label,energy,scannable = prepare_inline_TS(TS_text,TS_dict,energy_unit)
# Create the reaction/s
reactions = [Reaction(reactants,products),]
if mark in REVERSIBLE:
reactions.append(Reaction(products,reactants))
# Handle relative energy specification
if mark != DIFFUSION and relativeE and label not in TS_dict:
energy = energy + reactants_energy
# Create the TS
ts_cls = MARKERS[mark]['TS']
if label not in chemicalsystem.Name2TS:
TS = ts_cls(energy,reactions=reactions,label=label,
scannable=scannable)
else:
TS = chemicalsystem.Name2TS[label]
TS.reactions.extend(reactions)
# Add the reactions to the chemical system
for reaction in reactions:
reaction.TS = TS
chemicalsystem.radd(reaction,False)
chemicalsystem.rupdate()
[docs]def read_compounds(file):
"""
To-document
"""
raw_compounds = []
with open(file,"r") as F:
for line in F:
line = line.strip()
if not line or line.startswith("#"):
continue
raw_compounds.append(line.split())
return raw_compounds
[docs]def create_compounds(raw_compounds,energy_unit='J/mol'):
"""
To-document
"""
err_msg = "Unexpected number of items in \n '{}' "
compounds = []
for items in raw_compounds:
scannable = 'scan' in items[-1]
if scannable:
_ = items.pop(-1)
if len(items) == 3:
label,energy,unit = items
elif len(items) == 2:
label,energy = items
unit = energy_unit
else:
raise RuntimeError(err_msg.format(' '.join(items)))
compound = Compound(label,Energy(energy,unit),scannable=scannable)
compounds.append(compound)
try: # Check if there is any duplicates
assert len(set(compounds)) == len(compounds)
except AssertionError as e:
msg = 'Inconsistent number of compounds. Check for duplicates'
raise ValueError(msg)
return compounds
[docs]def read_reactions(file):
"""
To-document
"""
reaction_lines = []
TS_lines = []
with open(file,'r') as F:
lines = F.readlines()
for line in lines:
line = line.strip()
if not line or line.startswith('#'):
pass
elif '!' in line: # [A-] + [B+] => C !TS1
reaction_lines.append(line)
else: # TSNAME Number (Unit)
TS_lines.append(line)
raw_reactions = [split_reaction_line(line) for line in reaction_lines]
return raw_reactions,TS_lines
[docs]def create_TS_dict(TS_lines,energy_unit='J/mol'):
"""
To-document
"""
TS_dict = dict()
for line in TS_lines:
scannable = False
items = line.strip().split()
# Guess if it is a scannable TS
if items[-1] == 'scan':
scannable = True
_ = items.pop(-1)
# now only two cases are possible
if len(items) == 3: # Energy unit is specified
label,energy,unit = items
elif len(items) == 2: # energy unit is not specified
label, energy = items
unit = energy_unit
else:
raise RuntimeError(f"Unexpected number of items in \n '{line}' ")
TS_dict[label] = (Energy(energy,unit),scannable)
try: # Check if there is any duplicates
assert len(TS_dict) == len(TS_lines)
except AssertionError as e:
raise ValueError("Inconsistent number of TSs at the end of the file. " \
"Check for duplicates")
return TS_dict
[docs]def prepare_inline_TS(TS_text,TS_dict,energy_unit='J/mol'):
"""
To-document
"""
scannable = False
# Prepare the TS data
TS_items = TS_text.split()
if len(TS_items) == 1 and not is_energy(TS_text):
label = TS_items[0]
energy,scannable = TS_dict[label]
elif len(TS_items) == 1:
label = None
energy = Energy(TS_items[0],energy_unit)
elif len(TS_items) == 2:
label = None
number, unit = TS_items
energy = Energy(number,unit)
elif len(TS_items) == 3 and TS_items[-1] == 'scan':
label = None
number, unit = TS_items[:2]
energy = Energy(number,unit)
scannable = True
else:
ts = ' '.join(TS_items)
raise ValueError(f'Wrong TS definition at "{ts}"')
return label,energy,scannable
[docs]def split_reaction_line(reaction_text):
"""
To-document
"""
for mark in MARKERS:
match = MARKERS[mark]['matcher'].findall(reaction_text)
if match:
break
else:
raise ValueError('the reaction does not contain a known marker')
reactants_text, products_text, TS_text = match[0]
reactants = [r.strip() for r in reactants_text.split(' + ')]
products = [p.strip() for p in products_text.split(' + ')]
return reactants, mark, products, TS_text.strip()
[docs]def check_missing_compounds(chemicalsystem,raw_reactions):
"""
Checks if all the compounds in the chemicalsystem are 'defined'
within the ChemicalSystem instance.
Raises
------
MissingCompounds
"""
defined_compounds = set(chemicalsystem.Name2Compound)
reactants,mark,products,TS_text = zip(*raw_reactions)
reaction_compounds = set(list(chain(*reactants)) + list(chain(*products)))
missing_compounds = reaction_compounds - defined_compounds
if missing_compounds:
msg = 'The following compounds are not defined: \n{}\n'
items = [f'{item}\tNUMBER\tUNIT?' for item in sorted(missing_compounds)]
raise MissingCompounds(msg.format('\n'.join(items)))
[docs]def check_missing_TS(raw_reactions,TS_dict):
"""
Checks if all the labeled transition states of the reactions are 'defined'
are defined.
Raises
------
MissingTransitionStates
"""
defined_TS = set(TS_dict)
reactants,mark,products,TS_text = zip(*raw_reactions)
reaction_TS = []
for item in TS_text :
if not is_energy(item):
reaction_TS.append(item.split()[0])
missing_TS = set(reaction_TS) - defined_TS
if missing_TS:
msg = 'The following labeled TS are not defined: \n{}\n'
items = [f'{item}\tNUMBER\tUNIT?\tSCAN?'
for item in sorted(missing_TS)]
raise MissingTransitionStates(msg.format('\n'.join(items)))