Source code for gwin.sampler.kombine

# Copyright (C) 2016  Christopher M. Biwer
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.


#
# =============================================================================
#
#                                   Preamble
#
# =============================================================================
#
"""
This modules provides classes and functions for using the kombine sampler
packages for parameter estimation.
"""

from __future__ import absolute_import

import numpy
from .base import BaseMCMCSampler

#
# =============================================================================
#
#                                   Samplers
#
# =============================================================================
#


[docs]class KombineSampler(BaseMCMCSampler): """This class is used to construct the MCMC sampler from the kombine package. Parameters ---------- model : model A model from ``gwin.models``. nwalkers : int Number of walkers to use in sampler. transd : bool If True, the sampler will operate across parameter spaces using a kombine.clustered_kde.TransdimensionalKDE proposal distribution. In this mode a masked array with samples in each of the possible sets of dimensions must be given for the initial ensemble distribution. processes : {None, int} Number of processes to use with multiprocessing. If None, all available cores are used. update_interval : {None, int} Make the sampler update the proposal densities every `update_interval` iterations. """ name = "kombine" def __init__(self, model, nwalkers, transd=False, pool=None, model_call=None, update_interval=None): try: import kombine except ImportError: raise ImportError("kombine is not installed.") if model_call is None: model_call = model # construct sampler for use in KombineSampler ndim = len(model.variable_params) count = 1 if pool is None else pool.count sampler = kombine.Sampler(nwalkers, ndim, model_call, transd=transd, pool=pool, processes=count) # initialize super(KombineSampler, self).__init__(sampler, model) self._nwalkers = nwalkers self.update_interval = update_interval @property def acceptance_fraction(self): """Get the fraction of steps accepted by each walker as an array. """ # acceptance returned by kombine has shape iterations x nwalkers return numpy.mean(self._sampler.acceptance, axis=0)
[docs] @classmethod def from_cli(cls, opts, model, pool=None, model_call=None): """Create an instance of this sampler from the given command-line options. Parameters ---------- opts : ArgumentParser options The options to parse. model : Model The model to use with the sampler. Returns ------- KombineSampler A kombine sampler initialized based on the given arguments. """ return cls(model, opts.nwalkers, model_call=model_call, pool=pool, update_interval=opts.update_interval)
[docs] def run(self, niterations, **kwargs): """Advance the sampler for a number of samples. Parameters ---------- niterations : int Number of samples to get from sampler. Returns ------- p : numpy.array An array of current walker positions with shape (nwalkers, ndim). lnpost : numpy.array The list of log posterior probabilities for the walkers at positions p, with shape (nwalkers, ndim). lnprop : numpy.array The list of log proposal densities for the walkers at positions p, with shape (nwalkers, ndim). """ # get starting point from pos; if it is None, means that sampler # hasn't run yet, so we get starting point from p0 p0 = self._pos if p0 is None: p0 = self.p0 # do the same for starting blob blob0 = self._currentblob if blob0 is None and self.model.return_all_stats: blob0 = [self.model(p0[wi, :])[1] for wi in range(self.nwalkers)] kwargs['blob0'] = blob0 if 'update_interval' not in kwargs: # use the internal update interval kwargs['update_interval'] = self.update_interval res = self._sampler.run_mcmc(niterations, p0=p0, **kwargs) p, lnpost, lnprop = res[0], res[1], res[2] # update the positions self._pos = p if self.model.return_all_stats: self._currentblob = self._sampler.blobs[-1] return p, lnpost, lnprop
@property def lnpost(self): """ Get the natural logarithm of the likelihood as an nwalkers x niterations array. """ # kombine returns niterations x nwaklers return self._sampler.lnpost.transpose() @property def chain(self): """Get all past samples as an nwalker x niterations x ndim array.""" # kombine returns niterations x nwalkers x ndim return self._sampler.chain.transpose((1, 0, 2))
[docs] def clear_chain(self): """Clears the chain and blobs from memory. """ # store the iteration that the clear is occuring on self.lastclear = self.niterations # kombine stores its chain as niterations x nwalkers x ndim current_shape = self._sampler._chain.shape new_shape = (0, current_shape[1], current_shape[2]) if isinstance(self._sampler._chain, numpy.ma.MaskedArray): self._sampler._chain = numpy.ma.resize(self._sampler._chain, new_shape) else: self._sampler._chain.resize(new_shape) self._sampler.stored_iterations = 0 # clear the blobs self._sampler._blobs = []
[docs] def burn_in(self): """Use kombine's `burnin` routine to advance the sampler. If a minimum number of burn-in iterations was specified, this will run the burn-in until it has advanced at least as many steps as desired. The initial positions (p0) must be set prior to running. For more details, see `kombine.sampler.burnin`. Returns ------- p : numpy.array An array of current walker positions with shape (nwalkers, ndim). lnpost : numpy.array The list of log posterior probabilities for the walkers at positions p, with shape (nwalkers, ndim). lnprop : numpy.array The list of log proposal densities for the walkers at positions p, with shape (nwalkers, ndim). """ # check that we haven't already burned in if self.pos is not None: raise ValueError("burn-in already run") # run once p0 = self.p0 if self.model.return_all_stats: blob0 = [self.model(p0[wi, :])[1] for wi in range(self.nwalkers)] else: blob0 = None res = self._sampler.burnin(self.p0, blob0=blob0) # store the number of iterations used self.burn_in_iterations = numpy.repeat(self.niterations, self.nwalkers) p, post, q = res[0], res[1], res[2] return p, post, q
def _write_kde(self, fp, dataset_name, kde): """Writes the given kde to the file.""" shape = kde.data.shape try: if shape != fp[dataset_name].shape: # resize the dataset fp[dataset_name].resize(shape) fp[dataset_name][:] = kde.data except KeyError: # dataset doesn't exist yet fp.create_dataset(dataset_name, shape, maxshape=(self._sampler._kde_size, len(self.variable_params)), dtype=float, fletcher32=True) fp[dataset_name][:] = kde.data
[docs] def write_state(self, fp): """Saves the state of the sampler in a file. In addition to the numpy random state, the current KDE used for the jump proposals is saved. Parameters ---------- fp : InferenceFile File to store sampler state. """ # save the numpy random state super(KombineSampler, self).write_state(fp) # save clustered KDE data subgroup = "clustered_kde" dataset_name = "/".join([fp.sampler_group, subgroup]) clustered_kde = self._sampler._kde self._write_kde(fp, dataset_name, clustered_kde) # metadata fp[dataset_name].attrs["nclusters"] = clustered_kde.nclusters fp[dataset_name].attrs["assignments"] = clustered_kde._assignments fp[dataset_name].attrs["centroids"] = clustered_kde.centroids fp[dataset_name].attrs["logweights"] = clustered_kde._logweights fp[dataset_name].attrs["mean"] = clustered_kde._mean fp[dataset_name].attrs["std"] = clustered_kde._std # save individual KDE data for i, kde in enumerate(clustered_kde._kdes): dataset_name = "/".join([fp.sampler_group, "kde" + str(i)]) self._write_kde(fp, dataset_name, kde)
[docs] def set_state_from_file(self, fp): """Sets the state of the sampler back to the instance saved in a file. In addition to the numpy random state, the current KDE used for the jump proposals is loaded. Parameters ---------- fp : InferenceFile File with sampler state stored. """ try: import kombine except ImportError: raise ImportError("kombine is not installed.") # set the numpy random state super(KombineSampler, self).set_state_from_file(fp) # create a ClusteredKDE dataset_name = "/".join([fp.sampler_group, "clustered_kde"]) clustered_kde = kombine.clustered_kde.ClusteredKDE( fp[dataset_name][:], fp[dataset_name].attrs["nclusters"]) clustered_kde._assignments = fp[dataset_name].attrs["assignments"] clustered_kde._centroids = fp[dataset_name].attrs["centroids"] clustered_kde.logweights = fp[dataset_name].attrs["logweights"] clustered_kde._mean = fp[dataset_name].attrs["mean"] clustered_kde._std = fp[dataset_name].attrs["std"] # add KDEs clustered_kde._kdes = [] for i in range(clustered_kde.nclusters): dataset_name = "/".join([fp.sampler_group, "kde" + str(i)]) clustered_kde._kdes.append( kombine.clustered_kde.KDE(fp[dataset_name][:])) # overwrite ClusteredKDE in Sampler instance self._sampler._kde = clustered_kde