#--------------Introduction--------------# #Here we provide a sample of R code used to carry out the testing procedure #described in Two sample inference in functional linear models, by Horvath, Kokoszka, and Reimherr. #The first section of code concerns simulating from a functional linear model with #functional response and explanatory functions. This is the code for the most general #procedure which does not assume equal variances of the explanatory functions. #"Application of FDA" and onwards consists of applying our procedure to the data. library(fda) # We will be using the fda package basisnumber=80 databasis<-create.bspline.basis(c(0,1),basisnumber) # Throughout, we will be using the splines basis #--------------Functions for Simulating Data--------------# #The two function below are used to simulate random functions# #We sample the functions at M points# #Function generates a standard brownian motion on the [0,T] interval# BM.fn <- function(M,T){ dt<-T/(M-1) #To ensure we start at zero and end at 1 W<-numeric(M) W[1]=0 for(j in 2:M){ W[j]=W[j-1]+sqrt(dt)*rnorm(1, mean=0, sd=1) } return(W) } #Function generates a standard brownian bridge on the [0,T] interval# BB.fn<-function(M,T){ dt<-T/(M-1) #To ensure we start at zero and end at 1 W<-numeric(M) W[1]=0 for(j in 2:M){ W[j]=W[j-1]+sqrt(dt)*rnorm(1, mean=0, sd=1) } B= W*(W[M]-W) return(B) } #--------------Parameter Values--------------# M=200 # Number of points to sample from each function T=1 # End point of interval for functions N= 100 # Sample Size for both samples t<-numeric(M) # time points where we observe functions t=(0:(M-1))/(M-1) xnharm=5 # number of principle components for explanatory ynharm=3 # number of principle components for response reps=100 #--------------Generating Data from Functional Linear Model--------------# Kernal<-function(s,t,c){ c*min(s,t)} #This is the kernel of the operator used in the functional linear model# c1=1 #value of c for the first sample c2=5 #value of c for the second sample # Since we are working with vectors that represent the observed functions # we have to construct the matrix approximation of the operator KernalMatrix1<-matrix(rep(0,M*M), M, M) #For first sample for(i in 1:M){ for(j in 1:M){ KernalMatrix1[i,j]=Kernal(t[i],t[j],c1) } } KernalMatrix2<-matrix(rep(0,M*M), M, M) #For second samplen for(i in 1:M){for(j in 1:M){ KernalMatrix2[i,j]=Kernal(t[i],t[j],c2) } } #We store the functions in matrices, different rows represent different observations, #while different columns represent different time points X1<- matrix(0, N, M) #Generate N i.i.d. Brownian Motions for Explanatory Functions of first sample for( i in 1: N){ X1[i,]= BM.fn(M,T) } X2<- matrix(0, N, M) # Generate N i.i.d. Brownian Bridges for Explanatory Functions of second sample for( i in 1:N){ X2[i,]= BB.fn(M,T) } Y1<- matrix(0, N , M) #Generate Response for first sample for( i in 1: N){ Y1[i,]= (1/M)*X1[i,]%*%KernalMatrix1 + BM.fn(M,T) } Y2<- matrix(0, N , M) #Generate Response for second sample for( i in 1: N){ Y2[i,]= (1/M)*X2[i,]%*%KernalMatrix2 + BM.fn(M,T) } X<-rbind(X1,X2) #Pooled Explanatory functions Y<-rbind(Y1,Y2) #Pooled response functions Test<-numeric(0) Lambda<-numeric(0) TBasis<-numeric(0) #--------------Application of FDA--------------# #We start by expressing our data as functional data# #The observations are expressed as function with respect to the splines basis #For our procedure we will use the pooled versions X.f<-center.fd(data2fd(t(X),t,databasis)) Y.f<-center.fd(data2fd(t(Y),t,databasis)) #A pca is then performed on the pooled samples# pca_X=pca.fd(X.f,xnharm) pca_Y=pca.fd(Y.f,ynharm) #After the pca we can separate the samples X1scores=pca_X$scores[1:N,] X2scores=pca_X$scores[(N+1):(2*N),] Y1scores=pca_Y$scores[1:N,] Y2scores=pca_Y$scores[(N+1):(2*N),] TBasis=inprod(pca_X1$harmonics,pca_X2$harmonics) #--------------Testing Procedure--------------# # We first estimate the contribution to the covariance from the explanatory functions# gamma1<-(t(X1scores)%*%X1scores)*(1/N) gamma2<-(t(X2scores)%*%X2scores)*(1/N) gammainv1<-solve(gamma1) gammainv2<-solve(gamma2) # Then we fit a linear regression between the two samples LM1<-lm(Y1scores ~ X2scores - 1) LM2<-lm(Y2scores ~ X2scores - 1) # We pull the coefficient estimates mu<-matrix(LM1$coeff,ncol=1) mustar<-matrix(LM2$coeff,ncol=1) # We estimate the covariance matrix of the errors errorcov1 <- (1/(N-xnharm)) * (t(LM1$residuals)%*%LM1$residuals) errorcov2 <- (1/(N-xnharm)) * (t(LM2$residuals)%*%LM2$residuals) # For simplicity, we construct the test statistic in two stages: # First we compute the estimated covariance matrix used in the quadratic form sigma=errorcov1 %x% gammainv1 + (errorcov2 %x% gammainv2 ) # And finally we compute the test statistic Lambda=N*t(mu - mustar)%*%solve(sigma)%*%(mu -mustar) #If the null is true, c1=c2 in this case, then Lambda should #have an approximate chi-squared distribution with xnharm*ynharm degrees #of freedom P.Value<-1-pchisq(Lambda,df=xnharm*ynharm) P.Value