# Univariate Splines # A univariate spline is an expansion in (univariate) piecewise # polynomial basis functions subject to osculatory restrictions. # The basis functions are polynomials over given intervals and # zero outside of those intervals, and the polynomials have # specified contact at the endpoints of the intervals. # The endpoints are called "knots". # The knots are chosen to be where the action is. # There are three types of spline basis functions commonly used. # * truncated power functions (or just power functions). # For k knots and degree p, there are k+p+1 of these: # 1, x, ..., x^p, ((x-z_1)_+)^p, ..., ((x-z_k)_+)^p # Sometimes the constant is not used, so there are only k+p functions. # These are nice when we are adding or deleting knots. Deletion of the # i^th knot, z_i, is equivalent to removal of the basis function ((x-z_i)_+)^p. # * B-splines. # For k knots and degree p, there are k+p+1 of these; they # are a little harder to define -- see text. # Sometimes the first function is not used, so there are only k+p functions. # These functions tend to be better conditioned than the power functions. # * "natural" polynomial splines. # These basis functions are such that the second derivative of the spline # expansion is 0 for all x beyond the boundary knots. # This condition can be imposed in various ways. An easy way is just to start # with any set of basis functions and replace the degrees of freedom from two # of them with the condition that every basis function have zero second # derivative for all x beyond the boundary knots. # For natural cubic splines with k knots, there are k of basis functions. # There is nothing "natural" about the natural polynomial splines. # A way of handling the end conditions that is usually better is to remove # the second and the penultimate knot and to replace them with the requirement # that the basis functions have one degree higher contact. (For cubics, # this means the 3^rd derivatives match.) # The "order" of a spline is the number of free parameters in # each interval. (For polynomial splines, the order is the # degree plus 1.) # Univariate Splines in S-Plus # Splines can be used in both "interpolation" and "approximation". # An interpolating spline fit matches each of a given set of # points. Each point is usually taken as a knot, and the # continuity conditions are imposed at each point. # It makes sense to interpolate points that are known to be exact. # The S-Plus function spline() yields an interpolating cubic # spline. # The reason to use an interpolating spline is usually to approximate # a function at points other than those given (maybe for quadrature), # and so applied mathematicians may refer to the results of the interpolating # spline as an "approximation". An interpolating spline is used # when a set of points are assumed to be known exactly (more-or-less). # The other way of using splines is for approximation or smoothing. # The individual points may be suject to error, so the spline may not # go through any of the given points. # In this usage, the splines are evaluated at each abscissa point, and # the ordinates are fitted by some criterion (such as least squares) to # the spline. # The choice of knots is a difficult problem when the points are measured # subject to error. One approach is to include the knots as decision variables # in the fitting optimization problem. This approach may be ill-posed. # A common approach is to add (pre-chosen) knots in a stepwise manner. # Another approach is to use a regularization method (addition of a component # to the fitting optimization objective function that increases for roughness # or for some other undesirable characteristic of the fit). # bs() in S-Plus # The S-Plus function bs() together with a fitting function yields a # fit that uses B-splines as the basis functions. # bs() generates values at a given set of points of the B-splines # of a specified order and for a given knot sequence. # Example # # Use cubic splines to approximate a discrete function over [xbeg,xend] # defined at nobs equally-spaced values, and with k interior # knots. Let's choose the knots to correspond to the # [nobs/k]th, [2*nobs/k]th, ... points. (The knots of course need not # correspond to observations.) # Generate points: nobs <- 100 xbeg <- 1 xend <- 100 step <- (xend-xbeg)/(nobs-1) x <- xbeg + (c(1:nobs)-1)*step # Knots: k <- 4 knots<-x[c(1:k)*nobs/(k+1)] # (x and the knots must be in ascending order) # First, let's look at some spline basis functions evaluated over the interval # of interest. par(mfrow=c(2,2)) # Generate values of B-splines at all values of x: intercept <- 1 degree <- 3 xb <- bs(x, knots=knots, degree=degree, intercept=intercept) # ??? could not use xframe here ??? # There are bb = k+degree+intercept bb <- k+degree+intercept # Plot the B-splines: plot(x, xb[,1], type="l",ylim=c(0,max(xb)),ylab="basis functions",xlab="x and the knots") for (i in c(2:bb)) {lines(x, xb[,i], type="l", lty=i)} lines(knots, c(rep(0,length(knots))), type="p",cex=1.5) title <- paste("B-Splines Basis Functions; Order =", degree+1) title <- paste(title,";", sep="") title <- paste(title, k, "Knots" ) title(title) ################################################################# # For comparison, look at degree 1 (2nd order) degree <- 1 xb1 <- bs(x, knots=knots, degree=degree, intercept=intercept) bb1 <- k+degree+intercept plot(x, xb1[,1], type="l",ylim=c(0,max(xb1)), ylab="basis functions",xlab="x and the knots") for (i in c(2:bb1)) {lines(x, xb1[,i], type="l", lty=i)} lines(knots, c(rep(0,length(knots))), type="p",cex=1.5) title <- paste("B-Splines Basis Functions; Order =", degree+1) title <- paste(title,";", sep="") title <- paste(title, k, "Knots" ) title(title) ################################################################# # For a different comparison, consider the power basis functions. # Let's look at degree 3 and the same knots as before. # For the power basis, we generally scale the range into [0,1] xscaled <- (x-x[1])/(x[nobs]-x[1]) degree <- 3 order <- degree+1 pp1 <- k+order xp1 <- matrix(rep(0,nobs*pp1),nrow=nobs) xp1[,1] <- 1 xp1[,2] <- xscaled xp1[,3] <- xscaled^2 xp1[,4] <- xscaled^3 for (i in c(1:order)) { xp1[(i*nobs/(k+1)):nobs,4+i] <- ((xscaled[(i*nobs/(k+1)):nobs]-xscaled[(i*nobs/(k+1))]) / (xscaled[nobs]-xscaled[(i*nobs/(k+1))]) )^3 } plot(x, xp1[,1], type="l",ylim=c(0,max(xp1)), ylab="basis functions",xlab="x and the knots") for (i in c(2:pp1)) {lines(x, xp1[,i], type="l", lty=i)} lines(knots, c(rep(0,length(knots))), type="p",cex=1.5) title <- paste("Power Basis Functions; Order =", degree+1) title <- paste(title,";", sep="") title <- paste(title, k, "Knots" ) title(title) ################################################################# # For yet another comparison, consider the "natural" cubic basis functions. # Use the same knots as before. # We will scale the range into [0,1]. # Let's use the natural cubic splines of the text: xscaled <- (x-x[1])/(x[nobs]-x[1]) xn1 <- matrix(rep(0,nobs*k),nrow=nobs) knotk <- xscaled[(k*nobs/(k+1))] adj <- rep(0,nobs) adj[(k*nobs/(k+1)):nobs] <- (xscaled[(k*nobs/(k+1)):nobs]-knotk)^3 dkm1 <- rep(0,nobs) dkm1[((k-1)*nobs/(k+1)):nobs] <- ((xscaled[((k-1)*nobs/(k+1)):nobs]-xscaled[((k-1)*nobs/(k+1))])^3 - adj[((k-1)*nobs/(k+1)):nobs])/ (knotk - xscaled[((k-1)*nobs/(k+1))]) xn1[,1] <- 1 xn1[,2] <- xscaled for (i in c(1:(k-2))) { xn1[(i*nobs/(k+1)):nobs,2+i] <- ((xscaled[(i*nobs/(k+1)):nobs]-xscaled[(i*nobs/(k+1))])^3 - adj[(i*nobs/(k+1)):nobs])/ (knotk - xscaled[(i*nobs/(k+1))]) - dkm1[(i*nobs/(k+1)):nobs] } plot(x, xn1[,1], type="l",ylim=c(0,max(xn1)), ylab="basis functions",xlab="x and the knots") for (i in c(2:k)) {lines(x, xn1[,i], type="l", lty=i)} lines(knots, c(rep(0,length(knots))), type="p",cex=1.5) title <- paste("Natural Cubic Spline Basis Functions with", k, "Knots" ) title(title) ################################################################# par(mfrow=c(1,1)) # Now let's consider some examples of the use of splines. # First, let's approximate funapp(x) over [xbeg,xend]. # Define the function (make sure it's defined over the # interval): funapp <- function(x){return(1/x)} y <- funapp(x) # If we begin with known values, we probably want to interpolate them, # but we'll do it both ways. intercept <- 0 degree <- 3 xb <- bs(x, knots=knots, degree=degree, intercept=intercept) # First we'll use order 4 and the 4 knots that we used above to illustrate # the basis functions. The basis functions we computed and stored in the # structure xb included the "intercept". S-Plus cannot handle this, so # let's first recompute the basis functions: intercept <- 0 degree <- 3 xb <- bs(x, knots=knots, degree=degree, intercept=intercept) # Now let's fit the function. This can be done in any way. # Let's use least squares: fit <- lm(y~xb) # Now, compute predicted values. # Coerce x to be a data frame. xframe <- as.data.frame(x) yhat <- predict(fit,xframe) # Must use xframe here ??? plot(x, y, type="l",ylab="Function and Approximation") lines(x, yhat, type="l", lty=2) # For comparison, let's also use the order 2 splines (with the same # knots). intercept <- 0 degree <- 1 xb1 <- bs(x, knots=knots, degree=degree, intercept=intercept) fit1 <- lm(y~xb1) yhat1 <- predict(fit1,xframe) lines(x, yhat1, type="l", lty=3) ################################################################ # For comparison, let's interpolate at some points (a smaller number). # Let's just choose nobs*frac points randomly: frac <- .2 kknots <- c(1,sample(nobs-2,floor(nobs*frac))+1,nobs) kknots <- sort(kknots) ispline <- spline(x[kknots],y[kknots],n=nobs,xmin=x[1],xmax=x[nobs]) plot(x, y, type="l",ylim=c(min(ispline$y,y),max(ispline$y,y)), ylab="Function and Approximations") lines(x, yhat, type="l", lty=2) lines(x, yhat1, type="l", lty=3) lines(ispline$x, ispline$y, type="l", lty=5) lines(x[kknots],y[kknots], type="p",cex=1.5) title("Various Spline Approximations")