Source code for mlcg_tk.input_generator.prior_fit.polynomial

import numpy as np
import warnings
from scipy.integrate import trapezoid
from scipy.optimize import curve_fit
import torch
from typing import Optional

from .utils import compute_aic



[docs]
def polynomial(x: torch.Tensor, ks: torch.Tensor, V0: torch.Tensor):
    r"""Harmonic interaction in the form of a series. The shape of the tensors
        should match between each other.

    .. math:

        V(r) = V0 + \sum_{n=1}^{deg} k_n x^n

    """
    V = ks[0] * x
    for p, k in enumerate(ks[1:], start=2):
        V += k * torch.pow(x, p)
    V += V0
    return V




[docs]
def polynomial_wrapper_fit_func(x, *args):
    args = args[0]
    V0 = torch.tensor(args[0])
    ks = torch.tensor(args[1:])
    return polynomial(x, ks, V0)



def _init_parameters(n_degs, bin_centers_nz=None, dG_nz=None):
    ks = [1.0 for _ in range(n_degs)]
    V0 = -1.0
    p0 = [V0]
    p0.extend(ks)
    return p0


def _init_parameter_dict(n_degs):
    r"""Helper method for initializing the parameter dictionary"""
    stat = {"ks": {}, "v_0": 0.0}
    k_names = ["k_" + str(ii) for ii in range(1, n_degs + 1)]
    for ii in range(n_degs):
        k_name = k_names[ii]
        stat["ks"][k_name] = {}
    return stat


def _make_parameter_dict(stat, popt, n_degs):
    r"""Helper method for constructing a fitted parameter dictionary"""
    stat["v_0"] = popt[0]
    k_names = sorted(list(stat["ks"].keys()))
    for ii in range(n_degs):
        k_name = k_names[ii]
        stat["ks"][k_name] = popt[ii + 1]

    return stat


def _linear_regression(xs: torch.Tensor, ys: torch.Tensor, n_degs: int):
    r"""Vanilla linear regression"""
    features = [torch.ones_like(xs)]
    for n in range(n_degs):
        features.append(torch.pow(xs, n + 1))
    features = torch.stack(features).t()
    ys = ys.to(features.dtype)
    sol = torch.linalg.lstsq(features, ys.t())
    return sol


def _numpy_fit(xs: torch.Tensor, ys: torch.Tensor, n_degs: int):
    r"""Regression through numpy"""
    sol = np.polynomial.Polynomial.fit(xs.numpy(), ys.numpy(), deg=n_degs)
    return sol


def _scipy_fit(
    xs: torch.Tensor, ys: torch.Tensor, n_degs: int, bounds: Optional = None
):
    r"""Regression through scipy

    The `bounds` argument is passed to `scipy.optimize.curve_fit` to ensure bounds in the polynomial
    """
    if bounds is None:
        bounds = (-np.inf, np.inf)
    p0 = _init_parameters(n_degs, xs, ys)
    p0[-2] = 0
    popt, _ = curve_fit(
        lambda theta, *p0: polynomial_wrapper_fit_func(theta, p0),
        xs,
        ys,
        p0=p0,
        bounds=bounds,
        ftol=1e-5,
    )
    return popt


def _polynomial_fit(
    xs: torch.Tensor, ys: torch.Tensor, n_degs: int, regression_method: str = "scipy"
):
    _valid_regression_methods = ["linear", "numpy", "scipy"]
    popt = []
    if regression_method == "linear":
        sol = _linear_regression(xs, ys, n_degs)
        popt = sol.solution.numpy().tolist()
    elif regression_method == "numpy":
        sol = _numpy_fit(xs, ys, n_degs)
        popt = list(sol.convert().coef)
    elif regression_method == "scipy":
        low_bounds = [-np.inf for _ in range(n_degs + 1)]
        # enforce that the coefficient of the leading degree is positive
        low_bounds[-1] = 0
        high_bounds = [np.inf for _ in range(n_degs + 1)]
        popt = _scipy_fit(xs, ys, n_degs, bounds=(low_bounds, high_bounds))
        dev = [idx * popt[idx] for idx in range(1, n_degs + 1)]
        dev_val_at_min_1 = polynomial_wrapper_fit_func(torch.tensor(-1), dev)
        dev_val_at_plus_1 = polynomial_wrapper_fit_func(torch.tensor(1), dev)
        if dev_val_at_min_1 * dev_val_at_plus_1 > 0:
            # relaunch the fit making sure that the second largest degree has no coefficient
            low_bounds[-2] = -1e-2
            high_bounds[-2] = 1e-2
            popt = _scipy_fit(xs, ys, n_degs, bounds=(low_bounds, high_bounds))
    else:
        raise ValueError(
            f"regression method {regression_method} is not in {_valid_regression_methods} "
        )
    if popt[-1] <= 0:
        print("Warning: leading coefficient is negative, this might create problems")
    return popt



[docs]
def fit_polynomial_from_potential_estimates(
    bin_centers_nz: torch.Tensor,
    dG_nz: torch.Tensor,
    n_degs: int = 4,
    constrain_deg: Optional[int] = 4,
    regression_method: str = "scipy",
    **kwargs,
):
    r"""
    Loop over n_degs basins and use either the AIC criterion
    or a prechosen degree to select best fit. Parameter fitting
    occurs over unmaksed regions of the free energy only.

    Parameters
    ----------
    bin_centers_nz:
        Bin centers over which the fit is carried out
    dG_nz:
        The emperical free energy correspinding to the bin centers
    n_degs:
        The maximum number of degrees to attempt to fit if using the AIC
        criterion for prior model selection
    constrain_deg:
        If not None, a single fit is produced for the specified integer
        degree instead of using the AIC criterion for fit selection between
        multiple degrees

    Returns
    -------
        Statistics dictionary with fitted interaction parameters
    """

    integral = torch.tensor(
        float(trapezoid(dG_nz.cpu().numpy(), bin_centers_nz.cpu().numpy()))
    )
    mask = torch.abs(dG_nz) > 1e-4 * torch.abs(integral)
    if constrain_deg is not None:
        stat = _init_parameter_dict(n_degs)
        popt = _polynomial_fit(
            bin_centers_nz[mask], dG_nz[mask], n_degs, regression_method
        )
        stat = _make_parameter_dict(stat, popt, n_degs)
    else:
        try:
            # Determine best fit for unknown # of parameters
            stat = _init_parameter_dict(n_degs)
            popts = []
            aics = []
            degs = range(2, n_degs + 1, 2)
            for deg in degs:
                popt = _polynomial_fit(
                    bin_centers_nz[mask], dG_nz[mask], n_degs, regression_method
                )
                fitted_dG = polynomial_wrapper_fit_func(bin_centers_nz[mask], popt)
                free_parameters = deg + 1
                aic = compute_aic(fitted_dG, dG_nz[mask], free_parameters)
                popts.append(popt)
                aics.append(aic)
            # Select only priors that have upward curve at both regions of unexplored
            # phase space. i.e. (powers of 2)
            min_aic = min(aics)
            min_i_aic = aics.index(min_aic)
            popt = popts[min_i_aic]
            stat = _make_parameter_dict(stat, popt, n_degs)
        except RuntimeError:
            print("failed to fit potential estimate for Polynomial")
            stat = _init_parameter_dict(n_degs)
            k_names = sorted(list(stat["ks"].keys()))
            x_0_names = sorted(list(stat["x_0s"].keys()))
            for ii in range(n_degs):
                k1_name = k_names[ii]
                k2_name = x_0_names[ii]
                stat["ks"][k1_name] = torch.tensor(float("nan"))
                stat["x_0s"][k2_name] = torch.tensor(float("nan"))
    return stat