Source code for pydl.pydlutils.bspline

# Licensed under a 3-clause BSD style license - see LICENSE.rst
# -*- coding: utf-8 -*-
"""This module corresponds to the bspline directory in idlutils.
"""
from warnings import warn
import numpy as np
from numpy.linalg.linalg import LinAlgError
from scipy.linalg import cholesky_banded, cho_solve_banded
from . import PydlutilsUserWarning
from .math import djs_reject
from .trace import fchebyshev
from .. import uniq
from ..goddard.math import flegendre


[docs] class bspline(object): """B-spline class. Functions in the idlutils bspline library are implemented as methods on this class. Parameters ---------- x : :class:`numpy.ndarray` The data. nord : :class:`int`, optional The order of the B-spline. Default is 4, which is cubic. npoly : :class:`int`, optional Polynomial order to fit over 2nd variable, if supplied. If not supplied the order is 1. bkpt : :class:`numpy.ndarray`, optional An initial breakpoint vector. If omitted, breakpoints will be computed from other options. bkspread : :class:`float`, optional Applies a scale factor to the difference between successive value of `bkpt`. placed : :class:`numpy.ndarray`, optional Precalculated breakpoint positions. bkspace : :class:`float`, optional Spacing of breakpoints in units of `x`. nbkpts : :class:`int`, optional Number of breakpoints to span `x` range; minimum is 2 (the endpoints). everyn : :class:`int`, optional Spacing of breakpoints in good pixels. Attributes ---------- breakpoints The processed breakpoints for the fit, based on the initialization options. nord The order of the B-spline. Default is 4, which is cubic. npoly Polynomial order to fit over 2nd variable, if supplied. If not supplied the order is 1. mask A mask for `breakpoints`. coeff Internal attribute used by :meth:`~pydl.pydlutils.bspline.bspline.fit`. icoeff Internal attribute used by :meth:`~pydl.pydlutils.bspline.bspline.fit`. xmin Normalization minimum for the second variable in 2-dimensional fits. xmax Normalization maximum for the second variable in 2-dimensional fits. funcname For 2-dimensional fits, this is the function for the second variable. The default is 'legendre'. Notes ----- Although this code has been tested (in the sense that it reproduces the original idlutils code) and appears to work for a subset 1-dimensional cases, it should not be used for 2 dimensions or more exotic 1-dimensional cases. """ def __init__(self, x, nord=4, npoly=1, bkpt=None, bkspread=1.0, placed=None, bkspace=None, nbkpts=None, everyn=None): """Init creates an object whose attributes are similar to the structure returned by the ``create_bsplineset()`` function. """ # # Set the breakpoints. # if bkpt is None: startx = x.min() rangex = x.max() - startx if placed is not None: w = ((placed >= startx) & (placed <= startx+rangex)) if w.sum() < 2: bkpt = np.arange(2, dtype='f') * rangex + startx else: bkpt = placed[w] elif bkspace is not None: nbkpts = int(rangex/bkspace) + 1 if nbkpts < 2: nbkpts = 2 tempbkspace = rangex/float(nbkpts-1) bkpt = np.arange(nbkpts, dtype='f')*tempbkspace + startx elif nbkpts is not None: if nbkpts < 2: nbkpts = 2 tempbkspace = rangex/float(nbkpts-1) bkpt = np.arange(nbkpts, dtype='f') * tempbkspace + startx elif everyn is not None: nx = x.size nbkpts = max(nx//everyn, 1) if nbkpts == 1: xspot = [0] else: xspot = int(nx/(nbkpts-1)) * np.arange(nbkpts, dtype='i4') bkpt = x[xspot].astype('f') else: raise ValueError('No information for bkpts.') imin = bkpt.argmin() imax = bkpt.argmax() if x.min() < bkpt[imin]: warn('Lowest breakpoint does not cover lowest x value: changing.', PydlutilsUserWarning) bkpt[imin] = x.min() if x.max() > bkpt[imax]: warn('Highest breakpoint does not cover highest x value: changing.', PydlutilsUserWarning) bkpt[imax] = x.max() nshortbkpt = bkpt.size fullbkpt = bkpt.copy() if nshortbkpt == 1: bkspace = np.float32(bkspread) else: bkspace = (bkpt[1] - bkpt[0]) * np.float32(bkspread) for i in np.arange(1, nord, dtype=np.float32): fullbkpt = np.insert(fullbkpt, 0, bkpt[0]-bkspace*i) fullbkpt = np.insert(fullbkpt, fullbkpt.shape[0], bkpt[nshortbkpt-1] + bkspace*i) # # Set the attributes # nc = fullbkpt.size - nord self.breakpoints = fullbkpt self.nord = nord self.npoly = npoly self.mask = np.ones((fullbkpt.size,), dtype='bool') if npoly > 1: self.coeff = np.zeros((npoly, nc), dtype='d') self.icoeff = np.zeros((npoly, nc), dtype='d') else: self.coeff = np.zeros((nc,), dtype='d') self.icoeff = np.zeros((nc,), dtype='d') self.xmin = 0.0 self.xmax = 1.0 self.funcname = 'legendre' return
[docs] def fit(self, xdata, ydata, invvar, x2=None): """Calculate a B-spline in the least-squares sense. Fit is based on two variables: `xdata` which is sorted and spans a large range where breakpoints are required `ydata` which can be described with a low order polynomial. Parameters ---------- xdata : :class:`numpy.ndarray` Independent variable. ydata : :class:`numpy.ndarray` Dependent variable. invvar : :class:`numpy.ndarray` Inverse variance of `ydata`. x2 : :class:`numpy.ndarray`, optional Orthogonal dependent variable for 2d fits. Returns ------- :class:`tuple` A tuple containing an integer error code, and the evaluation of the b-spline at the input values. An error code of -2 is a failure, -1 indicates dropped breakpoints, 0 is success, and positive integers indicate ill-conditioned breakpoints. """ goodbk = self.mask[self.nord:] nn = goodbk.sum() if nn < self.nord: yfit = np.zeros(ydata.shape, dtype='f') return (-2, yfit) nfull = nn * self.npoly bw = self.npoly * self.nord a1, lower, upper = self.action(xdata, x2=x2) foo = np.tile(invvar, bw).reshape(bw, invvar.size).transpose() a2 = a1 * foo alpha = np.zeros((bw, nfull+bw), dtype='d') beta = np.zeros((nfull+bw,), dtype='d') bi = np.arange(bw, dtype='i4') bo = np.arange(bw, dtype='i4') for k in range(1, bw): bi = np.append(bi, np.arange(bw-k, dtype='i4')+(bw+1)*k) bo = np.append(bo, np.arange(bw-k, dtype='i4')+bw*k) for k in range(nn-self.nord+1): itop = k*self.npoly ibottom = min(itop, nfull) + bw - 1 ict = upper[k] - lower[k] + 1 if ict > 0: work = np.dot(a1[lower[k]:upper[k]+1, :].T, a2[lower[k]:upper[k]+1, :]) wb = np.dot(ydata[lower[k]:upper[k]+1], a2[lower[k]:upper[k]+1, :]) alpha.T.flat[bo+itop*bw] += work.flat[bi] beta[itop:ibottom+1] += wb min_influence = 1.0e-10 * invvar.sum() / nfull errb = cholesky_band(alpha, mininf=min_influence) if isinstance(errb[0], int) and errb[0] == -1: a = errb[1] else: yfit, foo = self.value(xdata, x2=x2, action=a1, upper=upper, lower=lower) return (self.maskpoints(errb[0]), yfit) sol = cholesky_solve(a, beta) if self.npoly > 1: self.icoeff[:, goodbk] = np.array(a[0, 0:nfull].reshape(self.npoly, nn), dtype=a.dtype) self.coeff[:, goodbk] = np.array(sol[0:nfull].reshape(self.npoly, nn), dtype=sol.dtype) else: self.icoeff[goodbk] = np.array(a[0, 0:nfull], dtype=a.dtype) self.coeff[goodbk] = np.array(sol[0:nfull], dtype=sol.dtype) yfit, foo = self.value(xdata, x2=x2, action=a1, upper=upper, lower=lower) return (0, yfit)
[docs] def action(self, x, x2=None): """Construct banded B-spline matrix, with dimensions [ndata, bandwidth]. Parameters ---------- x : :class:`numpy.ndarray` Independent variable. x2 : :class:`numpy.ndarray`, optional Orthogonal dependent variable for 2d fits. Returns ------- :class:`tuple` A tuple containing the B-spline action matrix; the 'lower' parameter, a list of pixel positions, each corresponding to the first occurence of position greater than breakpoint indx; and 'upper', Same as lower, but denotes the upper pixel positions. """ nx = x.size nbkpt = self.mask.sum() if nbkpt < 2*self.nord: return (-2, 0, 0) n = nbkpt - self.nord # gb = self.breakpoints[self.mask] bw = self.npoly*self.nord lower = np.zeros((n - self.nord + 1,), dtype='i4') upper = np.zeros((n - self.nord + 1,), dtype='i4') - 1 indx = self.intrv(x) bf1 = self.bsplvn(x, indx) action = bf1 aa = uniq(indx, np.arange(indx.size, dtype='i4')) upper[indx[aa]-self.nord+1] = aa rindx = indx[::-1] bb = uniq(rindx, np.arange(rindx.size, dtype='i4')) lower[rindx[bb]-self.nord+1] = nx - bb - 1 if x2 is not None: if x2.size != nx: raise ValueError('Dimensions of x and x2 do not match.') x2norm = 2.0 * (x2 - self.xmin) / (self.xmax - self.xmin) - 1.0 if self.funcname == 'poly': temppoly = np.ones((nx, self.npoly), dtype='f') for i in range(1, self.npoly): temppoly[:, i] = temppoly[:, i-1] * x2norm elif self.funcname == 'poly1': temppoly = np.tile(x2norm, self.npoly).reshape(nx, self.npoly) for i in range(1, self.npoly): temppoly[:, i] = temppoly[:, i-1] * x2norm elif self.funcname == 'chebyshev': temppoly = fchebyshev(x2norm, self.npoly) elif self.funcname == 'legendre': temppoly = flegendre(x2norm, self.npoly) else: raise ValueError('Unknown value of funcname.') action = np.zeros((nx, bw), dtype='d') counter = -1 for ii in range(self.nord): for jj in range(self.npoly): counter += 1 action[:, counter] = bf1[:, ii]*temppoly[:, jj] return (action, lower, upper)
[docs] def intrv(self, x): """Find the segment between breakpoints which contain each value in the array `x`. The minimum breakpoint is ``nbkptord - 1``, and the maximum is ``nbkpt - nbkptord - 1``. Parameters ---------- x : :class:`numpy.ndarray` Data values, assumed to be monotonically increasing. Returns ------- :class:`numpy.ndarray` Position of array elements with respect to breakpoints. """ gb = self.breakpoints[self.mask] n = gb.size - self.nord indx = np.zeros((x.size,), dtype='i4') ileft = self.nord - 1 for i in range(x.size): while x[i] > gb[ileft+1] and ileft < n - 1: ileft += 1 indx[i] = ileft return indx
[docs] def bsplvn(self, x, ileft): """Calculates the value of all possibly nonzero B-splines at `x` of a certain order. Parameters ---------- x : :class:`numpy.ndarray` Independent variable. ileft : :class:`int` Breakpoint segements that contain `x`. Returns ------- :class:`numpy.ndarray` B-spline values. """ bkpt = self.breakpoints[self.mask] vnikx = np.zeros((x.size, self.nord), dtype=x.dtype) deltap = vnikx.copy() deltam = vnikx.copy() j = 0 vnikx[:, 0] = 1.0 while j < self.nord - 1: ipj = ileft+j+1 deltap[:, j] = bkpt[ipj] - x imj = ileft-j deltam[:, j] = x - bkpt[imj] vmprev = 0.0 for l in range(j+1): vm = vnikx[:, l]/(deltap[:, l] + deltam[:, j-l]) vnikx[:, l] = vm*deltap[:, l] + vmprev vmprev = vm*deltam[:, j-l] j += 1 vnikx[:, j] = vmprev return vnikx
[docs] def value(self, x, x2=None, action=None, lower=None, upper=None): """Evaluate a B-spline at specified values. Parameters ---------- x : :class:`numpy.ndarray` Independent variable. x2 : :class:`numpy.ndarray`, optional Orthogonal dependent variable for 2d fits. action : :class:`numpy.ndarray`, optional Action matrix to use. If not supplied it is calculated. lower : :class:`numpy.ndarray`, optional If the action parameter is supplied, this parameter must also be supplied. upper : :class:`numpy.ndarray`, optional If the action parameter is supplied, this parameter must also be supplied. Returns ------- :class:`tuple` A tuple containing the results of the bspline evaluation and a mask indicating where the evaluation was good. """ xsort = x.argsort() xwork = x[xsort] if x2 is not None: x2work = x2[xsort] else: x2work = None if action is not None: if lower is None or upper is None: raise ValueError('Must specify lower and upper if action is set.') else: action, lower, upper = self.action(xwork, x2=x2work) yfit = np.zeros(x.shape, dtype=x.dtype) bw = self.npoly * self.nord spot = np.arange(bw, dtype='i4') goodbk = self.mask.nonzero()[0] coeffbk = self.mask[self.nord:].nonzero()[0] n = self.mask.sum() - self.nord if self.npoly > 1: goodcoeff = self.coeff[:, coeffbk] else: goodcoeff = self.coeff[coeffbk] # maskthis = np.zeros(xwork.shape,dtype=xwork.dtype) for i in range(n-self.nord+1): ict = upper[i] - lower[i] + 1 if ict > 0: yfit[lower[i]:upper[i]+1] = np.dot( action[lower[i]:upper[i]+1, :], goodcoeff[i*self.npoly+spot]) yy = yfit.copy() yy[xsort] = yfit mask = np.ones(x.shape, dtype='bool') gb = self.breakpoints[goodbk] outside = ((x < gb[self.nord-1]) | (x > gb[n])) if outside.any(): mask[outside] = False hmm = ((np.diff(goodbk) > 2).nonzero())[0] for jj in range(hmm.size): inside = ((x >= self.breakpoints[goodbk[hmm[jj]]]) & (x <= self.breakpoints[goodbk[hmm[jj]+1]-1])) if inside.any(): mask[inside] = False return (yy, mask)
[docs] def maskpoints(self, err): """Perform simple logic of which breakpoints to mask. Parameters ---------- err : :class:`numpy.ndarray` The list of indexes returned by the cholesky routines. Returns ------- :class:`int` An integer indicating the results of the masking. -1 indicates that the error points were successfully masked. -2 indicates failure; the calculation should be aborted. Notes ----- The mask attribute is modified, assuming it is possible to create the mask. """ nbkpt = self.mask.sum() if nbkpt <= 2*self.nord: return -2 hmm = err[np.unique(err/self.npoly)]/self.npoly n = nbkpt - self.nord if np.any(hmm >= n): return -2 test = np.zeros(nbkpt, dtype='bool') for jj in range(-np.ceil(self.nord/2.0), self.nord/2.0): foo = np.where((hmm+jj) > 0, hmm+jj, np.zeros(hmm.shape, dtype=hmm.dtype)) inside = np.where((foo+self.nord) < n-1, foo+self.nord, np.zeros(hmm.shape, dtype=hmm.dtype)+n-1) test[inside] = True if test.any(): reality = self.mask[test] if self.mask[reality].any(): self.mask[reality] = False return -1 else: return -2 else: return -2
[docs] def cholesky_band(l, mininf=0.0): """Compute *lower* Cholesky decomposition of a banded matrix. This function provides informative error messages to pass back to the :class:`~pydl.pydlutils.bspline.bspline` machinery; the actual computation is delegated to :func:`scipy.linalg.cholesky_banded`. Parameters ---------- l : :class:`numpy.ndarray` A matrix on which to perform the Cholesky decomposition. The matrix must be in a special, *lower* form described in :func:`scipy.linalg.cholesky_banded`. In addition, the input must be padded. If the original, square matrix has size :math:`N \\times N`, and the width of the band is :math:`b`, `l` must be :math:`b \\times (N + b)`. mininf : :class:`float`, optional Entries in the `l` matrix are considered negative if they are less than this value (default 0.0). Returns ------- :class:`tuple` If problems were detected, the first item will be the index or indexes where the problem was detected, and the second item will simply be the input matrix. If no problems were detected, the first item will be -1, and the second item will be the Cholesky decomposition. """ bw, nn = l.shape n = nn - bw negative = l[0, 0:n] <= mininf if negative.any() or not np.all(np.isfinite(l)): warn('Bad entries: ' + str(negative.nonzero()[0]), PydlutilsUserWarning) return (negative.nonzero()[0], l) try: lower = cholesky_banded(l[:, 0:n], lower=True) except LinAlgError: # # Figure out where the error is. # lower = l.copy() kn = bw - 1 spot = np.arange(kn, dtype='i4') + 1 for j in range(n): lower[0, j] = np.sqrt(lower[0, j]) lower[spot, j] /= lower[0, j] x = lower[spot, j] if not np.all(np.isfinite(x)): warn('NaN found in cholesky_band.', PydlutilsUserWarning) return (j, l) # # Restore padding. # L = np.zeros(l.shape, dtype=l.dtype) L[:, 0:n] = lower return (-1, L)
[docs] def cholesky_solve(a, bb): """Solve the equation :math:`A x = b` where `a` is a *lower* Cholesky-banded matrix. In the :class:`~pydl.pydlutils.bspline.bspline` machinery, `a` needs to be padded. This function should only used with the output of :func:`~pydl.pydlutils.bspline.cholesky_band`, to ensure the proper padding on `a`. Otherwise the computation is delegated to :func:`scipy.linalg.cho_solve_banded`. Parameters ---------- a : :class:`numpy.ndarray` *Lower* Cholesky decomposition of :math:`A` in :math:`A x = b`. bb : :class:`numpy.ndarray` :math:`b` in :math:`A x = b`. Returns ------- :class:`numpy.ndarray` The solution, padded to be the same shape as `bb`. """ bw = a.shape[0] n = bb.shape[0] - bw x = np.zeros(bb.shape, dtype=bb.dtype) x[0:n] = cho_solve_banded((a[:, 0:n], True), bb[0:n]) return x
[docs] def iterfit(xdata, ydata, invvar=None, upper=5, lower=5, x2=None, maxiter=10, **kwargs): """Iteratively fit a B-spline set to data, with rejection. Additional keyword parameters are passed to :class:`~pydl.pydlutils.bspline.bspline`. Parameters ---------- xdata : :class:`numpy.ndarray` Independent variable. ydata : :class:`numpy.ndarray` Dependent variable. invvar : :class:`numpy.ndarray`, optional Inverse variance of `ydata`. If not set, it will be calculated based on the standard deviation. upper : :class:`int` or :class:`float`, optional Upper rejection threshold in units of sigma, defaults to 5 sigma. lower : :class:`int` or :class:`float`, optional Lower rejection threshold in units of sigma, defaults to 5 sigma. x2 : :class:`numpy.ndarray`, optional Orthogonal dependent variable for 2d fits. maxiter : :class:`int`, optional Maximum number of rejection iterations, default 10. Set this to zero to disable rejection. Returns ------- :class:`tuple` A tuple containing the fitted bspline object and an output mask. """ nx = xdata.size if ydata.size != nx: raise ValueError('Dimensions of xdata and ydata do not agree.') if invvar is not None: if invvar.size != nx: raise ValueError('Dimensions of xdata and invvar do not agree.') else: # # This correction to the variance makes it the same # as IDL's variance() # var = ydata.var()*(float(nx)/float(nx-1)) if var == 0: var = 1.0 invvar = np.ones(ydata.shape, dtype=ydata.dtype)/var if x2 is not None: if x2.size != nx: raise ValueError('Dimensions of xdata and x2 do not agree.') yfit = np.zeros(ydata.shape, dtype=ydata.dtype) if invvar.size == 1: outmask = True else: outmask = np.ones(invvar.shape, dtype='bool') xsort = xdata.argsort() maskwork = (outmask & (invvar > 0))[xsort] if 'oldset' in kwargs: sset = kwargs['oldset'] sset.mask = True sset.coeff = 0 else: if not maskwork.any(): raise ValueError('No valid data points.') if 'fullbkpt' in kwargs: # fullbkpt = kwargs['fullbkpt'] raise ValueError('Input via fullbkpt is not supported!') else: sset = bspline(xdata[xsort[maskwork]], **kwargs) if maskwork.sum() < sset.nord: warn('Number of good data points fewer than nord.', PydlutilsUserWarning) return (sset, outmask) if x2 is not None: if 'xmin' in kwargs: xmin = kwargs['xmin'] else: xmin = x2.min() if 'xmax' in kwargs: xmax = kwargs['xmax'] else: xmax = x2.max() if xmin == xmax: xmax = xmin + 1 sset.xmin = xmin sset.xmax = xmax if 'funcname' in kwargs: sset.funcname = kwargs['funcname'] xwork = xdata[xsort] ywork = ydata[xsort] invwork = invvar[xsort] if x2 is not None: warn('2D bspline fits may be buggy and will be fully removed in the future.', DeprecationWarning) x2work = x2[xsort] else: x2work = None iiter = 0 error = 0 qdone = -1 while (error != 0 or qdone == -1) and iiter <= maxiter: goodbk = sset.mask.nonzero()[0] if maskwork.sum() <= 1 or not sset.mask.any(): sset.coeff = 0 iiter = maxiter + 1 else: if 'requiren' in kwargs: i = 0 while xwork[i] < sset.breakpoints[goodbk[sset.nord]] and i < nx-1: i += 1 ct = 0 for ileft in range(sset.nord, sset.mask.sum()-sset.nord+1): while (xwork[i] >= sset.breakpoints[goodbk[ileft]] and xwork[i] < sset.breakpoints[goodbk[ileft+1]] and i < nx-1): ct += invwork[i]*maskwork[i] > 0 i += 1 if ct >= kwargs['requiren']: ct = 0 else: sset.mask[goodbk[ileft]] = False error, yfit = sset.fit(xwork, ywork, invwork*maskwork, x2=x2work) iiter += 1 inmask = maskwork if error == -2: return (sset, outmask) elif error == 0: maskwork, qdone = djs_reject(ywork, yfit, invvar=invwork, inmask=inmask, outmask=maskwork, upper=upper, lower=lower) else: pass outmask[xsort] = maskwork temp = yfit yfit[xsort] = temp return (sset, outmask)