"""
QCPortal Database ODM
"""
import itertools as it
from enum import Enum
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Tuple, Union
import numpy as np
import pandas as pd
from qcelemental import constants
from ..models import Molecule, ProtoModel
from ..util import replace_dict_keys
from .collection_utils import nCr, register_collection
from .dataset import Dataset
if TYPE_CHECKING: # pragma: no cover
from .. import FractalClient
from ..models import ComputeResponse
class _ReactionTypeEnum(str, Enum):
"""Helper class for locking the reaction type into one or the other"""
rxn = "rxn"
ie = "ie"
class ReactionEntry(ProtoModel):
"""Data model for the `reactions` list in Dataset"""
attributes: Dict[str, Union[int, float, str]] # Might be overloaded key types
reaction_results: Dict[str, dict]
name: str
stoichiometry: Dict[str, Dict[str, float]]
extras: Dict[str, Any] = {}
[docs]class ReactionDataset(Dataset):
"""
The ReactionDataset class for homogeneous computations on many reactions.
Attributes
----------
client : client.FractalClient
A FractalClient connected to a server
data : ReactionDataset.DataModel
A Model representation of the database backbone
df : pd.DataFrame
The underlying dataframe for the Dataset object
rxn_index : pd.Index
The unrolled reaction index for all reactions in the Dataset
"""
def __init__(self, name: str, client: Optional["FractalClient"] = None, ds_type: str = "rxn", **kwargs) -> None:
"""
Initializer for the Dataset object. If no Portal is supplied or the database name
is not present on the server that the Portal is connected to a blank database will be
created.
Parameters
----------
name : str
The name of the Dataset
client : client.FractalClient, optional
A FractalClient connected to a server
ds_type : str, optional
The type of Dataset involved
"""
ds_type = ds_type.lower()
super().__init__(name, client=client, ds_type=ds_type, **kwargs)
[docs] class DataModel(Dataset.DataModel):
ds_type: _ReactionTypeEnum = _ReactionTypeEnum.rxn
records: Optional[List[ReactionEntry]] = None
history: Set[Tuple[str, str, str, Optional[str], Optional[str], str]] = set()
history_keys: Tuple[str, str, str, str, str, str] = (
"driver",
"program",
"method",
"basis",
"keywords",
"stoichiometry",
)
def _entry_index(self, subset: Optional[List[str]] = None) -> None:
if self.data.records is None:
self._get_data_records_from_db()
# Unroll the index
tmp_index = []
for rxn in self.data.records:
name = rxn.name
for stoich_name in list(rxn.stoichiometry):
for mol_hash, coef in rxn.stoichiometry[stoich_name].items():
tmp_index.append([name, stoich_name, mol_hash, coef])
ret = pd.DataFrame(tmp_index, columns=["name", "stoichiometry", "molecule", "coefficient"])
if subset is None:
return ret
else:
return ret.reset_index().set_index("name").loc[subset].reset_index().set_index("index")
def _molecule_indexer(
self,
stoich: Union[str, List[str]],
subset: Optional[Union[str, Set[str]]] = None,
coefficients: bool = False,
force: bool = False,
) -> Tuple[Dict[Tuple[str, ...], "ObjectId"], Tuple[str]]:
"""Provides a {index: molecule_id} mapping for a given subset.
Parameters
----------
stoich : Union[str, List[str]]
The stoichiometries, or list of stoichiometries to return
subset : Optional[Union[str, Set[str]]], optional
The indices of the desired subset. Return all indices if subset is None.
coefficients : bool, optional
Returns the coefficients if as part of the index if True
No Longer Returned
------------------
Dict[str, 'ObjectId']
Molecule index to molecule ObjectId map
Returns
-------
Tuple[Dict[Tuple[str, ...], 'ObjectId'], Tuple[str]]
Molecule index to molecule ObjectId map, and index names
"""
if isinstance(stoich, str):
stoich = [stoich]
index = self.get_entries(subset=subset, force=force)
matched_rows = index[np.in1d(index["stoichiometry"], stoich)]
if subset:
matched_rows = matched_rows[np.in1d(matched_rows["name"], subset)]
names = ("name", "stoichiometry", "idx")
if coefficients:
names = names + ("coefficient",)
ret = {}
for gb_idx, group in matched_rows.groupby(["name", "stoichiometry"]):
for cnt, (idx, row) in enumerate(group.iterrows()):
if coefficients:
ret[gb_idx + (cnt, row["coefficient"])] = row["molecule"]
else:
ret[gb_idx + (cnt,)] = row["molecule"]
return ret, names
def valid_stoich(self, subset=None, force: bool = False) -> Set[str]:
entries = self.get_entries(subset=subset, force=force)
return set(entries["stoichiometry"].unique())
def _validate_stoich(self, stoich: Union[List[str], str], subset=None, force: bool = False) -> None:
if isinstance(stoich, str):
stoich = [stoich]
if isinstance(subset, str):
subset = [subset]
valid_stoich = self.valid_stoich(subset=subset, force=force)
for s in stoich:
if s.lower() not in valid_stoich:
raise KeyError("Stoichiometry not understood, valid keys are {}.".format(valid_stoich))
def _pre_save_prep(self, client: "FractalClient") -> None:
self._canonical_pre_save(client)
mol_ret = self._add_molecules_by_dict(client, self._new_molecules)
# Update internal molecule UUID's to servers UUID's
for record in self._new_records:
stoichiometry = replace_dict_keys(record.stoichiometry, mol_ret)
new_record = record.copy(update={"stoichiometry": stoichiometry})
self.data.records.append(new_record)
self._new_records: List[ReactionEntry] = []
self._new_molecules = {}
self._entry_index()
[docs] def get_values(
self,
method: Optional[Union[str, List[str]]] = None,
basis: Optional[Union[str, List[str]]] = None,
keywords: Optional[str] = None,
program: Optional[str] = None,
driver: Optional[str] = None,
stoich: str = "default",
name: Optional[Union[str, List[str]]] = None,
native: Optional[bool] = None,
subset: Optional[Union[str, List[str]]] = None,
force: bool = False,
) -> pd.DataFrame:
"""
Obtains values from the known history from the search paramaters provided for the expected `return_result` values.
Defaults to the standard programs and keywords if not provided.
Note that unlike `get_records`, `get_values` will automatically expand searches and return multiple method
and basis combinations simultaneously.
`None` is a wildcard selector. To search for `None`, use `"None"`.
Parameters
----------
method : Optional[Union[str, List[str]]], optional
The computational method (B3LYP)
basis : Optional[Union[str, List[str]]], optional
The computational basis (6-31G)
keywords : Optional[str], optional
The keyword alias
program : Optional[str], optional
The underlying QC program
driver : Optional[str], optional
The type of calculation (e.g. energy, gradient, hessian, dipole...)
stoich : str, optional
Stoichiometry of the reaction.
name : Optional[Union[str, List[str]]], optional
Canonical name of the record. Overrides the above selectors.
native: Optional[bool], optional
True: only include data computed with QCFractal
False: only include data contributed from outside sources
None: include both
subset: Optional[List[str]], optional
The indices of the desired subset. Return all indices if subset is None.
force : bool, optional
Data is typically cached, forces a new query if True
Returns
------
DataFrame
A DataFrame of values with columns corresponding to methods and rows corresponding to reaction entries.
Contributed (native=False) columns are marked with "(contributed)" and may include units in square brackets
if their units differ in dimensionality from the ReactionDataset's default units.
"""
return self._get_values(
method=method,
basis=basis,
keywords=keywords,
program=program,
driver=driver,
stoich=stoich,
name=name,
native=native,
subset=subset,
force=force,
)
def _get_native_values(
self,
subset: Set[str],
method: Optional[str] = None,
basis: Optional[str] = None,
keywords: Optional[str] = None,
program: Optional[str] = None,
stoich: Optional[str] = None,
name: Optional[str] = None,
force: bool = False,
) -> pd.DataFrame:
self._validate_stoich(stoich, subset=subset, force=force)
# So that datasets with no records do not require a default program and default keywords
if len(self.list_records()) == 0:
return pd.DataFrame(index=self.get_index(subset))
queries = self._form_queries(
method=method, basis=basis, keywords=keywords, program=program, stoich=stoich, name=name
)
if len(queries) == 0:
return pd.DataFrame(index=self.get_index(subset))
stoich_complex = queries.pop("stoichiometry").values[0]
stoich_monomer = "".join([x for x in stoich_complex if not x.isdigit()]) + "1"
def _query_apply_coeffients(stoich, query):
# Build the starting table
indexer, names = self._molecule_indexer(stoich=stoich, coefficients=True, force=force)
df = self._get_records(indexer, query, include=["return_result"], merge=True)
df.index = pd.MultiIndex.from_tuples(df.index, names=names)
df.reset_index(inplace=True)
# Block out null values `groupby.sum()` will return 0 rather than NaN in all cases
null_mask = df[["name", "return_result"]].copy()
null_mask["return_result"] = null_mask["return_result"].isnull()
null_mask = null_mask.groupby(["name"])["return_result"].sum() != False
# Multiply by coefficients and sum
df["return_result"] *= df["coefficient"]
df = df.groupby(["name"])["return_result"].sum()
df[null_mask] = np.nan
return df
names = []
new_queries = []
new_data = pd.DataFrame(index=subset)
for _, query in queries.iterrows():
query = query.replace({np.nan: None}).to_dict()
qname = query["name"]
names.append(qname)
if force or not self._subset_in_cache(qname, subset):
self._column_metadata[qname] = query
new_queries.append(query)
if not self._use_view(force):
units: Dict[str, str] = {}
for query in new_queries:
qname = query.pop("name")
if self.data.ds_type == _ReactionTypeEnum.ie:
# This implements 1-body counterpoise correction
# TODO: this will need to contain the logic for VMFC or other method-of-increments strategies
data_complex = _query_apply_coeffients(stoich_complex, query)
data_monomer = _query_apply_coeffients(stoich_monomer, query)
data = data_complex - data_monomer
elif self.data.ds_type == _ReactionTypeEnum.rxn:
data = _query_apply_coeffients(stoich_complex, query)
else:
raise ValueError(
f"ReactionDataset ds_type is not a member of _ReactionTypeEnum. (Got {self.data.ds_type}.)"
)
new_data[qname] = data * constants.conversion_factor("hartree", self.units)
query["name"] = qname
units[qname] = self.units
else:
for query in new_queries:
query["native"] = True
new_data, units = self._view.get_values(new_queries)
for query in new_queries:
qname = query["name"]
new_data[qname] = new_data[qname] * constants.conversion_factor(units[qname], self.units)
for query in new_queries:
qname = query["name"]
self._column_metadata[qname].update({"native": True, "units": units[qname]})
self._update_cache(new_data)
return self.df.loc[subset, names]
[docs] def visualize(
self,
method: Optional[str] = None,
basis: Optional[str] = None,
keywords: Optional[str] = None,
program: Optional[str] = None,
stoich: str = "default",
groupby: Optional[str] = None,
metric: str = "UE",
bench: Optional[str] = None,
kind: str = "bar",
return_figure: Optional[bool] = None,
show_incomplete: bool = False,
) -> "plotly.Figure":
"""
Parameters
----------
method : Optional[str], optional
Methods to query
basis : Optional[str], optional
Bases to query
keywords : Optional[str], optional
Keyword aliases to query
program : Optional[str], optional
Programs aliases to query
stoich : str, optional
Stoichiometry to query
groupby : Optional[str], optional
Groups the plot by this index.
metric : str, optional
The metric to use either UE (unsigned error) or URE (unsigned relative error)
bench : Optional[str], optional
The benchmark level of theory to use
kind : str, optional
The kind of chart to produce, either 'bar' or 'violin'
return_figure : Optional[bool], optional
If True, return the raw plotly figure. If False, returns a hosted iPlot. If None, return a iPlot display in Jupyter notebook and a raw plotly figure in all other circumstances.
show_incomplete: bool, optional
Display statistics method/basis set combinations where results are incomplete
Returns
-------
plotly.Figure
The requested figure.
"""
query = {"method": method, "basis": basis, "keywords": keywords, "program": program, "stoichiometry": stoich}
query = {k: v for k, v in query.items() if v is not None}
return self._visualize(
metric,
bench,
query=query,
groupby=groupby,
return_figure=return_figure,
kind=kind,
show_incomplete=show_incomplete,
)
[docs] def get_molecules(
self,
subset: Optional[Union[str, Set[str]]] = None,
stoich: Union[str, List[str]] = "default",
force: bool = False,
) -> pd.DataFrame:
"""Queries full Molecules from the database.
Parameters
----------
subset : Optional[Union[str, Set[str]]], optional
The index subset to query on
stoich : Union[str, List[str]], optional
The stoichiometries to pull from, either a single or multiple stoichiometries
force : bool, optional
Force pull of molecules from server
Return
------
pd.DataFrame
Indexed Molecules which match the stoich and subset string.
"""
self._check_client()
self._check_state()
if isinstance(subset, str):
subset = [subset]
self._validate_stoich(stoich, subset=subset, force=force)
indexer, names = self._molecule_indexer(stoich=stoich, subset=subset, force=force)
df = self._get_molecules(indexer, force=force)
df.index = pd.MultiIndex.from_tuples(df.index, names=names)
return df
[docs] def get_records(
self,
method: str,
basis: Optional[str] = None,
*,
keywords: Optional[str] = None,
program: Optional[str] = None,
stoich: Union[str, List[str]] = "default",
include: Optional[List[str]] = None,
subset: Optional[Union[str, Set[str]]] = None,
) -> Union[pd.DataFrame, "ResultRecord"]:
"""
Queries the local Portal for the requested keys and stoichiometry.
Parameters
----------
method : str
The computational method to query on (B3LYP)
basis : Optional[str], optional
The computational basis to query on (6-31G)
keywords : Optional[str], optional
The option token desired
program : Optional[str], optional
The program to query on
stoich : Union[str, List[str]], optional
The given stoichiometry to compute.
include : Optional[Dict[str, bool]], optional
The attribute project to perform on the query, otherwise returns ResultRecord objects.
subset : Optional[Union[str, Set[str]]], optional
The index subset to query on
Returns
-------
Union[pd.DataFrame, 'ResultRecord']
The name of the queried column
"""
self._check_client()
self._check_state()
method = method.upper()
if isinstance(stoich, str):
stoich = [stoich]
ret = []
for s in stoich:
name, _, history = self._default_parameters(program, method, basis, keywords, stoich=s)
history.pop("stoichiometry")
indexer, names = self._molecule_indexer(stoich=s, subset=subset, force=True)
df = self._get_records(
indexer,
history,
include=include,
merge=False,
raise_on_plan="`get_records` can only be used for non-composite quantities. You likely queried a DFT+D method or similar that requires a combination of DFT and -D. Please query each piece separately.",
)
df = df[0]
df.index = pd.MultiIndex.from_tuples(df.index, names=names)
ret.append(df)
ret = pd.concat(ret)
ret.sort_index(inplace=True)
return ret
[docs] def compute(
self,
method: str,
basis: Optional[str] = None,
*,
keywords: Optional[str] = None,
program: Optional[str] = None,
stoich: str = "default",
ignore_ds_type: bool = False,
tag: Optional[str] = None,
priority: Optional[str] = None,
) -> "ComputeResponse":
"""Executes a computational method for all reactions in the Dataset.
Previously completed computations are not repeated.
Parameters
----------
method : str
The computational method to compute (B3LYP)
basis : Optional[str], optional
The computational basis to compute (6-31G)
keywords : Optional[str], optional
The keyword alias for the requested compute
program : Optional[str], optional
The underlying QC program
stoich : str, optional
The stoichiometry of the requested compute (cp/nocp/etc)
ignore_ds_type : bool, optional
Optionally only compute the "default" geometry
tag : Optional[str], optional
The queue tag to use when submitting compute requests.
priority : Optional[str], optional
The priority of the jobs low, medium, or high.
Returns
-------
ComputeResponse
An object that contains the submitted ObjectIds of the new compute. This object has the following fields:
- ids: The ObjectId's of the task in the order of input molecules
- submitted: A list of ObjectId's that were submitted to the compute queue
- existing: A list of ObjectId's of tasks already in the database
"""
self._check_client()
self._check_state()
entry_index = self.get_entries(force=True)
self._validate_stoich(stoich, subset=None, force=True)
compute_keys = {"program": program, "method": method, "basis": basis, "keywords": keywords, "stoich": stoich}
# Figure out molecules that we need
if (not ignore_ds_type) and (self.data.ds_type.lower() == "ie"):
monomer_stoich = "".join([x for x in stoich if not x.isdigit()]) + "1"
tmp_monomer = entry_index[entry_index["stoichiometry"] == monomer_stoich].copy()
ret1 = self._compute(compute_keys, tmp_monomer["molecule"], tag, priority)
tmp_complex = entry_index[entry_index["stoichiometry"] == stoich].copy()
ret2 = self._compute(compute_keys, tmp_complex["molecule"], tag, priority)
ret = ret1.merge(ret2)
else:
tmp_complex = entry_index[entry_index["stoichiometry"] == stoich].copy()
ret = self._compute(compute_keys, tmp_complex["molecule"], tag, priority)
# Update the record that this was computed
self.save()
return ret
[docs] def get_rxn(self, name: str) -> ReactionEntry:
"""
Returns the JSON object of a specific reaction.
Parameters
----------
name : str
The name of the reaction to query
Returns
-------
ret : dict
The JSON representation of the reaction
"""
found = []
for num, x in enumerate(self.data.records):
if x.name == name:
found.append(num)
if len(found) == 0:
raise KeyError("Dataset:get_rxn: Reaction name '{}' not found.".format(name))
if len(found) > 1:
raise KeyError("Dataset:get_rxn: Multiple reactions of name '{}' found. Dataset failure.".format(name))
return self.data.records[found[0]]
# Visualization
[docs] def ternary(self, cvals=None):
"""Plots a ternary diagram of the DataBase if available
Parameters
----------
cvals : None, optional
Description
"""
raise Exception("MPL not avail")
# return visualization.Ternary2D(self.df, cvals=cvals)
# Adders
[docs] def parse_stoichiometry(self, stoichiometry: List[Tuple[Union[Molecule, str], float]]) -> Dict[str, float]:
"""
Parses a stiochiometry list.
Parameters
----------
stoichiometry : list
A list of tuples describing the stoichiometry.
Returns
-------
Dict[str, float]
A dictionary describing the stoichiometry for use in the database.
Keys are molecule hashes. Values are stoichiometric coefficients.
Notes
-----
This function attempts to convert the molecule into its corresponding hash. The following will happen depending on the form of the Molecule.
- Molecule hash - Used directly in the stoichiometry.
- Molecule class - Hash is obtained and the molecule will be added to the database upon saving.
- Molecule string - Molecule will be converted to a Molecule class and the same process as the above will occur.
"""
mol_hashes = []
mol_values = []
for line in stoichiometry:
if len(line) != 2:
raise KeyError("Dataset: Parse stoichiometry: passed in as a list, must be of key : value type")
# Get the values
try:
mol_values.append(float(line[1]))
except:
raise TypeError("Dataset: Parse stoichiometry: must be able to cast second value to a float.")
# What kind of molecule is it?
mol = line[0]
if isinstance(mol, str) and (len(mol) == 40):
molecule_hash = mol
elif isinstance(mol, str):
qcf_mol = Molecule.from_data(mol)
molecule_hash = qcf_mol.get_hash()
if molecule_hash not in list(self._new_molecules):
self._new_molecules[molecule_hash] = qcf_mol
elif isinstance(mol, Molecule):
molecule_hash = mol.get_hash()
if molecule_hash not in list(self._new_molecules):
self._new_molecules[molecule_hash] = mol
else:
raise TypeError(
"Dataset: Parse stoichiometry: first value must either be a molecule hash, "
"a molecule str, or a Molecule class."
)
mol_hashes.append(molecule_hash)
# Sum together the coefficients of duplicates
ret: Dict[str, float] = {}
for mol, coef in zip(mol_hashes, mol_values):
if mol in list(ret):
ret[mol] += coef
else:
ret[mol] = coef
return ret
[docs] def add_rxn(
self,
name: str,
stoichiometry: Dict[str, List[Tuple[Molecule, float]]],
reaction_results: Optional[Dict[str, str]] = None,
attributes: Optional[Dict[str, Union[int, float, str]]] = None,
other_fields: Optional[Dict[str, Any]] = None,
) -> ReactionEntry:
"""
Adds a reaction to a database object.
Parameters
----------
name : str
Name of the reaction.
stoichiometry : list or dict
Either a list or dictionary of lists
reaction_results : dict or None, Optional, Default: None
A dictionary of the computed total interaction energy results
attributes : dict or None, Optional, Default: None
A dictionary of attributes to assign to the reaction
other_fields : dict or None, Optional, Default: None
A dictionary of additional user defined fields to add to the reaction entry
Returns
-------
ReactionEntry
A complete specification of the reaction
"""
if reaction_results is None:
reaction_results = {}
if attributes is None:
attributes = {}
if other_fields is None:
other_fields = {}
rxn_dict: Dict[str, Any] = {"name": name}
# Set name
if name in self.get_index():
raise KeyError(
"Dataset: Name '{}' already exists. "
"Please either delete this entry or call the update function.".format(name)
)
# Set stoich
if isinstance(stoichiometry, dict):
rxn_dict["stoichiometry"] = {}
if "default" not in list(stoichiometry):
raise KeyError("Dataset:add_rxn: Stoichiometry dict must have a 'default' key.")
for k, v in stoichiometry.items():
rxn_dict["stoichiometry"][k] = self.parse_stoichiometry(v)
elif isinstance(stoichiometry, (tuple, list)):
rxn_dict["stoichiometry"] = {}
rxn_dict["stoichiometry"]["default"] = self.parse_stoichiometry(stoichiometry)
else:
raise TypeError("Dataset:add_rxn: Type of stoichiometry input was not recognized:", type(stoichiometry))
# Set attributes
if not isinstance(attributes, dict):
raise TypeError("Dataset:add_rxn: attributes must be a dictionary, not '{}'".format(type(attributes)))
rxn_dict["attributes"] = attributes
if not isinstance(other_fields, dict):
raise TypeError("Dataset:add_rxn: other_fields must be a dictionary, not '{}'".format(type(attributes)))
rxn_dict["extras"] = other_fields
if "default" in list(reaction_results):
rxn_dict["reaction_results"] = reaction_results
elif isinstance(reaction_results, dict):
rxn_dict["reaction_results"] = {}
rxn_dict["reaction_results"]["default"] = reaction_results
else:
raise TypeError("Passed in reaction_results not understood.")
rxn = ReactionEntry(**rxn_dict)
self._new_records.append(rxn)
return rxn
[docs] def add_ie_rxn(self, name: str, mol: Molecule, **kwargs) -> ReactionEntry:
"""Add a interaction energy reaction entry to the database. Automatically
builds CP and no-CP reactions for the fragmented molecule.
Parameters
----------
name : str
The name of the reaction
mol : Molecule
A molecule with multiple fragments
**kwargs
Additional kwargs to pass into `build_id_fragments`.
Returns
-------
ReactionEntry
A representation of the new reaction.
"""
reaction_results = kwargs.pop("reaction_results", {})
attributes = kwargs.pop("attributes", {})
other_fields = kwargs.pop("other_fields", {})
stoichiometry = self.build_ie_fragments(mol, name=name, **kwargs)
return self.add_rxn(
name, stoichiometry, reaction_results=reaction_results, attributes=attributes, other_fields=other_fields
)
[docs] @staticmethod
def build_ie_fragments(mol: Molecule, **kwargs) -> Dict[str, List[Tuple[Molecule, float]]]:
"""
Build the stoichiometry for an Interaction Energy.
Parameters
----------
mol : Molecule class or str
Molecule to fragment.
do_default : bool
Create the default (noCP) stoichiometry.
do_cp : bool
Create the counterpoise (CP) corrected stoichiometry.
do_vmfc : bool
Create the Valiron-Mayer Function Counterpoise (VMFC) corrected stoichiometry.
max_nbody : int
The maximum fragment level built, if zero defaults to the maximum number of fragments.
Notes
-----
Returns
-------
ret : dict
A JSON representation of the fragmented molecule.
"""
do_default = kwargs.pop("do_default", True)
do_cp = kwargs.pop("do_cp", True)
do_vmfc = kwargs.pop("do_vmfc", False)
max_nbody = kwargs.pop("max_nbody", 0)
if not isinstance(mol, Molecule):
mol = Molecule.from_data(mol, **kwargs)
ret = {}
max_frag = len(mol.fragments)
if max_nbody == 0:
max_nbody = max_frag
if max_frag < 2:
raise AttributeError("Dataset:build_ie_fragments: Molecule must have at least two fragments.")
# Build some info
fragment_range = list(range(max_frag))
# Loop over the bodis
for nbody in range(1, max_nbody):
nocp_tmp = []
cp_tmp = []
for k in range(1, nbody + 1):
take_nk = nCr(max_frag - k - 1, nbody - k)
sign = (-1) ** (nbody - k)
coef = take_nk * sign
for frag in it.combinations(fragment_range, k):
if do_default:
nocp_tmp.append((mol.get_fragment(frag, orient=True, group_fragments=True), coef))
if do_cp:
ghost = list(set(fragment_range) - set(frag))
cp_tmp.append((mol.get_fragment(frag, ghost, orient=True, group_fragments=True), coef))
if do_default:
ret["default" + str(nbody)] = nocp_tmp
if do_cp:
ret["cp" + str(nbody)] = cp_tmp
# VMFC is a special beast
if do_vmfc:
raise KeyError("VMFC isnt quite ready for primetime!")
# ret.update({"vmfc" + str(nbody): [] for nbody in range(1, max_nbody)})
# nbody_range = list(range(1, max_nbody))
# for nbody in nbody_range:
# for cp_combos in it.combinations(fragment_range, nbody):
# basis_tuple = tuple(cp_combos)
# for interior_nbody in nbody_range:
# for x in it.combinations(cp_combos, interior_nbody):
# ghost = list(set(basis_tuple) - set(x))
# ret["vmfc" + str(interior_nbody)].append((mol.get_fragment(x, ghost), 1.0))
# Add in the maximal position
if do_default:
ret["default"] = [(mol, 1.0)]
if do_cp:
ret["cp"] = [(mol, 1.0)]
# if do_vmfc:
# ret["vmfc"] = [(mol, 1.0)]
return ret
register_collection(ReactionDataset)