Source code for mlcg_tk.input_generator.prior_gen

from typing import Callable, Optional
from functools import partial
import mdtraj as md

from mlcg.nn.prior import (
    HarmonicBonds,
    HarmonicAngles,
    Dihedral,
    Repulsion,
    _Prior,
    GeneralBonds,
    GeneralAngles,
)
from mlcg.nn.gradients import GradientsOut

from mlcg.data import AtomicData
from .prior_fit.histogram import HistogramsNL


[docs] class PriorBuilder: """ General prior builder object holding statistics computed for a given prior feature and functions that are used to build neighbour lists and fit potentials to the computed statistics. Attributes ---------- histograms: HistogramsNL object for storing binned feature statistics nl_builder: Neighbour list class to be used in building neighbour list prior_fit_fn: Function to be used in fitting potential from statistics prior_cls: Prior class for fitting features """ def __init__( self, histograms: HistogramsNL, nl_builder: Callable, prior_fit_fn: Callable, prior_cls: _Prior, ) -> None: self.histograms = histograms self.prior_fit_fn = prior_fit_fn self.nl_builder = nl_builder self.prior_cls = prior_cls
[docs] def build_nl( self, topology: md.Topology, **kwargs, ): """ Generates tagged and ordered edges using neighbour list builder function Parameters ---------- topology: MDTraj topology object from which atom groups defining each prior term will be created Returns ------- Edges, orders, and tag for given prior term """ return self.nl_builder(topology=topology)
[docs] def accumulate_statistics( self, nl_name: str, data: AtomicData, key_dict: dict ) -> None: """ Computes atom-type specific features and calculates statistics from a collated AtomicData stucture Parameters ---------- nl_name: Neighbour list tag data: Collated list of individual AtomicData structures. """ mapping = data.neighbor_list[nl_name]["index_mapping"] values = self.prior_cls.compute_features(data.pos, mapping) if hasattr(data, "weights"): weights = data.weights else: weights = None self.histograms.accumulate_statistics(nl_name, values, key_dict, weights)
[docs] class Bonds(PriorBuilder): """ Builder for order-2 groups of bond priors. Attributes ---------- name: Name of specific prior (to match neighbour list name) nl_builder: Neighbour list class to be used in building neighbour list separate_termini: Whether statistics should be computed separately for terminal atoms nbins: The number of bins over which 1-D feature histograms are constructed in order to estimate distributions bmin: Lower bound of bin edges bmax: Upper bound of bin edges prior_fit_fn: Function to be used in fitting potential from statistics """ def __init__( self, name: str, nl_builder: Callable, separate_termini: bool, n_bins: int, bmin: float, bmax: float, prior_fit_fn: Callable, ) -> None: super().__init__( histograms=HistogramsNL( n_bins=n_bins, bmin=bmin, bmax=bmax, ), nl_builder=nl_builder, prior_fit_fn=prior_fit_fn, prior_cls=GeneralBonds, ) self.name = name self.type = "bonds" self.separate_termini = separate_termini # if separate_termini == True then these will be set in get_terminal_atoms self.n_term_atoms = None self.c_term_atoms = None self.n_atoms = None self.c_atoms = None
[docs] def build_nl(self, topology, **kwargs): """ Generates edges for order-2 atom groups for bond prior Parameters ---------- topology: MDTraj topology object from which atom groups defining each prior term will be created Returns ------- Edges, orders, and tag for angle prior term """ return self.nl_builder( topology=topology, separate_termini=self.separate_termini, n_term_atoms=self.n_term_atoms, c_term_atoms=self.c_term_atoms, n_atoms=self.n_atoms, c_atoms=self.c_atoms, )
[docs] def get_prior_model(self, statistics, name, targets="forces", **kwargs): """ Parameters ---------- statistics: Gathered bond statistics name: str Name of the prior object (corresponding to nls name) targets: The gradient targets to produce from a model output. These can be any of the gradient properties referenced in `mlcg.data._keys`. At the moment only forces are implemented. """ return GradientsOut(self.prior_cls(statistics, name=name), targets=targets)
[docs] class Angles(PriorBuilder): """ Builder for order-3 groups of angle priors. Attributes ---------- name: Name of specific prior (to match neighbour list name) nl_builder: Neighbour list class to be used in building neighbour list separate_termini: Whether statistics should be computed separately for terminal atoms nbins: The number of bins over which 1-D feature histograms are constructed in order to estimate distributions bmin: Lower bound of bin edges bmax: Upper bound of bin edges prior_fit_fn: Function to be used in fitting potential from statistics prior_cls: Prior class to be used. It must be able to be initialized from the output of the `prior_fit_fn` """ def __init__( self, name: str, nl_builder: Callable, separate_termini: bool, n_bins: int, bmin: float, bmax: float, prior_fit_fn: Callable, prior_cls=GeneralAngles, ) -> None: super().__init__( histograms=HistogramsNL( n_bins=n_bins, bmin=bmin, bmax=bmax, ), nl_builder=nl_builder, prior_fit_fn=prior_fit_fn, prior_cls=prior_cls, ) self.name = name self.type = "angles" self.separate_termini = separate_termini # if separate_termini == True then these will be set in get_terminal_atoms self.n_term_atoms = None self.c_term_atoms = None self.n_atoms = None self.c_atoms = None
[docs] def build_nl(self, topology, **kwargs): """ Generates edges for order-3 atom groups for angle prior Parameters ---------- topology: MDTraj topology object from which atom groups defining each prior term will be created Returns ------- Edges, orders, and tag for angle prior term """ return self.nl_builder( topology=topology, separate_termini=self.separate_termini, n_term_atoms=self.n_term_atoms, c_term_atoms=self.c_term_atoms, n_atoms=self.n_atoms, c_atoms=self.c_atoms, )
[docs] def get_prior_model(self, statistics, name, targets="forces", **kwargs): """ Parameters ---------- statistics: Gathered angle statistics name: str Name of the prior object (corresponding to nls name) targets: The gradient targets to produce from a model output. These can be any of the gradient properties referenced in `mlcg.data._keys`. At the moment only forces are implemented. """ return GradientsOut(self.prior_cls(statistics, name=name), targets=targets)
[docs] class NonBonded(PriorBuilder): """ Builder for order-2 groups of nonbonded priors. Attributes ---------- name: Name of specific prior (to match neighbour list name) nl_builder: Neighbour list class to be used in building neighbour list min_pair: Minimum number of bond edges between two atoms in order to be considered a member of the non-bonded set res_exclusion: If supplied, pairs within res_exclusion residues of each other are removed from the non-bonded set separate_termini: Whether statistics should be computed separately for terminal atoms nbins: The number of bins over which 1-D feature histograms are constructed in order to estimate distributions bmin: Lower bound of bin edges bmax: Upper bound of bin edges prior_fit_fn: Function to be used in fitting potential from statistics percentile: If specified, the sigma value is calculated using the specified distance percentile (eg, percentile = 1) sets the sigma value at the location of the 1th percentile of pairwise distances. This option is useful for estimating repulsions for distance distribtions with long lower tails or lower distance outliers. Must be a number from 0 to 1 cutoff: If specified, only those input values below this cutoff will be used in evaluating the percentile """ def __init__( self, name: str, nl_builder: Callable, min_pair: int, res_exclusion: int, separate_termini: bool, n_bins: int, bmin: float, bmax: float, prior_fit_fn: Callable, percentile: float = 1, cutoff: Optional[float] = None, ) -> None: prior_fit_fn = partial(prior_fit_fn, percentile=percentile, cutoff=cutoff) super().__init__( histograms=HistogramsNL( n_bins=n_bins, bmin=bmin, bmax=bmax, ), nl_builder=nl_builder, prior_fit_fn=prior_fit_fn, prior_cls=Repulsion, ) self.name = name self.type = "non_bonded" self.min_pair = min_pair self.res_exclusion = res_exclusion self.separate_termini = separate_termini # if separate_termini == True then these will be set in get_terminal_atoms self.n_term_atoms = None self.c_term_atoms = None self.n_atoms = None self.c_atoms = None
[docs] def build_nl(self, topology, **kwargs): """ Generates edges for order-2 atom groups for nonbond prior Parameters ---------- topology: MDTraj topology object from which atom groups defining each prior term will be created kwargs: bond_edges: Edges of bonded prior, to be omitted from nonbonded interactions angle_edges: Edges of angle prior, to be omitted from nonbonded interactions Returns ------- Edges, orders, and tag for nonbonded prior term """ bond_edges = kwargs["bond_edges"] angle_edges = kwargs["angle_edges"] return self.nl_builder( topology=topology, bond_edges=bond_edges, angle_edges=angle_edges, separate_termini=self.separate_termini, min_pair=self.min_pair, res_exclusion=self.res_exclusion, n_term_atoms=self.n_term_atoms, c_term_atoms=self.c_term_atoms, n_atoms=self.n_atoms, c_atoms=self.c_atoms, )
[docs] def get_prior_model(self, statistics, name, targets="forces", **kwargs): """ Parameters ---------- statistics: Gathered nonbonded statistics name: str Name of the prior object (corresponding to nls name) targets: The gradient targets to produce from a model output. These can be any of the gradient properties referenced in `mlcg.data._keys`. At the moment only forces are implemented. """ prior = self.prior_cls(statistics) prior.name = name return GradientsOut(prior, targets=targets)
[docs] class Dihedrals(PriorBuilder): """ Builder for order-4 groups of dihedral priors. Attributes ---------- name: Name of specific prior (to match neighbour list name) nl_builder: Neighbour list class to be used in building neighbour list nbins: The number of bins over which 1-D feature histograms are constructed in order to estimate distributions bmin: Lower bound of bin edges bmax: Upper bound of bin edges prior_fit_fn: Function to be used in fitting potential from statistics """ def __init__( self, name: str, nl_builder: Callable, n_bins: int, bmin: float, bmax: float, prior_fit_fn: Callable, ) -> None: super().__init__( histograms=HistogramsNL( n_bins=n_bins, bmin=bmin, bmax=bmax, ), nl_builder=nl_builder, prior_fit_fn=prior_fit_fn, prior_cls=Dihedral, ) self.name = name self.type = "dihedrals"
[docs] def get_prior_model(self, statistics, name, targets="forces", **kwargs): """ Parameters ---------- statistics: Gathered dihedral statistics name: str Name of the prior object (corresponding to nls name) targets: The gradient targets to produce from a model output. These can be any of the gradient properties referenced in `mlcg.data._keys`. At the moment only forces are implemented. kwargs: n_degs: The maximum number of degrees to attempt to fit if using the AIC criterion for prior model selection """ prior = self.prior_cls(statistics, n_degs=kwargs["n_degs"]) prior.name = name return GradientsOut(prior, targets=targets)