#################################################
# C01: 維度縮減                                 #
# 吳漢銘 國立政治大學統計學系                   #
# https://hmwu.idv.tw/                          #
#################################################

# 9/144

X <- iris[,1:4]
(S <- cov(X))
          
(e <- eigen(S))

D <- diag(e$values)
C <- e$vectors
C%*%D%*%t(C)


S%*%C[,1]
D[1]*C[,1]

#10/144

eigen(cov(iris[,1:4]))


# 13/144

iris.sub <- iris[sample(1:150, 8),1:4]
iris.sub
M.svd <- svd(iris.sub)
M.svd
M.svd$u %*% (diag(M.svd$d) %*% t(M.svd$v)) 

d.sub <- diag(M.svd$d[1:2])   
u.sub <- as.matrix(M.svd$u[, 1:2])
v.sub <- as.matrix(M.svd$v[, 1:2])
iris.sub.approx <- u.sub %*% d.sub %*% t(v.sub)
iris.sub.approx

sum((iris.sub - iris.sub.approx)^2)


# 14/144
if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager")
BiocManager::install("EBImage")

library(EBImage) #(Repositories: BioC Software)
lena <- readImage("lena.jpg")
dims <- dim(lena)
dims

plot(c(0, dims[1]), c(0, dims[2]), type='n', xlab="", ylab="")
rasterImage(lena, 0, 0, dims[1], dims[2])

library(jpeg)
lena <- readJPEG("lena.jpg")


#  15/144

lena.flip <- Image(flip(lena))
red.weight   <- .2989
green.weight <- .587
blue.weight  <- 0.114

lena.gray <- red.weight * imageData(lena.flip)[,,1] + 
  green.weight * imageData(lena.flip)[,,2] + 
  blue.weight  * imageData(lena.flip)[,,3]
dim(lena.gray)
lena.gray[1:5, 1:5]
image(lena.gray, col = grey(seq(0, 1, length = 256)))


# 16/144

lena.svd <- svd(lena.gray)
d <- diag(lena.svd$d)
dim(d)
u <- lena.svd$u
v <- lena.svd$v
plot(1:length(lena.svd$d), lena.svd$d, pch=19, xlab="i-th lena.svd$d", ylab="lena.svd$d")

used.no <- 20
u.sub <- as.matrix(u[, 1:used.no])
v.sub <- as.matrix(v[, 1:used.no])
d.sub <- as.matrix(d[1:used.no, 1:used.no])
lena.approx <- u.sub %*% d.sub %*% t(v.sub)
image(lena.approx, col = grey(seq(0, 1, length = 256)))


# 20/144

?cov
x <- iris[, 1:4]
cov(x)


# 21/144

x <- iris[, 1:4]
(covx <- cov(x))
e <- eigen(covx)
V <- e$vectors
V.inverse <- solve(e$vectors)
covx.hat <- V %*% diag(e$values) %*% V.inverse
covx.hat # same with covx


# 23/144

z <- as.matrix(x) %*% e$vectors[, 1:2]
plot(z[, 1], z[, 2], col=iris[, 5])


# 31/144

m1 <- matrix(sample(1:16,16),4,4)
m1

m1.scale.svd <- svd(scale(m1))
m1.scale.svd


m1.pca <- prcomp(m1, scale=T)
m1.pca


pca2 <- princomp(m1, cor=T)
pca2$scores

pca2$loadings


# 33/144

cell.matrix  <- read.table("YeastCellCycle_alpha.txt", header=TRUE, row.names=1)
n <- dim(cell.matrix)[1]
p <- dim(cell.matrix)[2]-1
cell.data <- cell.matrix[,2:p+1]
gene.phase <- cell.matrix[,1]
phase <- unique(gene.phase)
phase.name <- c("G1", "S", "S/G2", "G2/M", "M/G1")
cell.sdata <- t(scale(t(cell.data))) 
rc <- rainbow(5)[as.integer(gene.phase)]
cc <- rainbow(ncol(cell.sdata))
hv <- heatmap(cell.sdata, col = GBRcol, scale = "column", Colv=NA, Rowv=NA,
              RowSideColors = rc, ColSideColors = cc, margins = c(5,10),
              xlab = "Times", ylab =  "Genes", main = "Heatmap of Microarray Data")


# 34/144

cell.pca <- princomp(cell.sdata, cor=TRUE, scores=TRUE)
# 2D plot for first two components
pca.dim1 <- cell.pca$scores[,1]
pca.dim2 <- cell.pca$scores[,2]
plot(pca.dim1, pca.dim2, main="PCA for Cell Cycle Data on Genes", 
     xlab="1st PCA Component", ylab="2nd PCA Component",col=c(phase), pch=c(phase))
legend(3, 4, phase.name, pch=c(phase), col=c(phase))

# shows a screeplot.
plot(cell.pca) 
biplot(cell.pca)


# 35/144

# loadings plot
plot(loadings(cell.pca)[,1], loadings(cell.pca)[,2], xlab="1st PCA", 
     ylab="2nd PCA", main="Loadings Plot", type="n")
text(loadings(cell.pca)[,1], loadings(cell.pca)[,2], labels=paste(1:p))
abline(h=0)
abline(v=0)


# 36/144

library(MASS)
mu <- c(2, -1)
Sigma <- matrix(c(2.4, -0.5, -0.5, 1), 2)
n <- 250
X <- mvrnorm(n, mu, Sigma)
 
mycol <- terrain.colors(n)
sorted.x1 <- sort(X[,1])
order.x1 <- order(X[,1])
id <- 1:n
sorted.id <- id[order.x1]
x1.col <- mycol[order(sorted.id)]
 
par(mfrow=c(1, 2))
plot(X, col=x1.col, pch=16, main="simulated bivariate normal")
abline(h=0, v=0, col="gray")
X.pca <- princomp(X, cor = TRUE)
X.pca$sdev


X.pca$loadings
plot(X.pca$scores, col=x1.col, pch=16, main="PCA")
abline(h=0, v=0, col="gray")


# 37/144

pca.pkg <- c("FactoMineR", "factoextra", "corrplot")
install.packages(pca.pkg)
lapply(pca.pkg, library, character.only=TRUE)
data(decathlon2) 
head(decathlon2

dim(decathlon2)

x <- decathlon2[,1:10]


# 38/144

x.pca <- PCA(x, graph = FALSE)
class(x.pca)

str(x.pca)

print(x.pca)


# 39/144

eig.val <- get_eigenvalue(x.pca)
eig.val

fviz_eig(x.pca, addlabels = TRUE, ylim = c(0, 50))


# 40/144

var <- get_pca_var(x.pca)
var

head(var$coord, 4)

fviz_pca_var(x.pca, col.var = "black")


# 41/144

head(var$cos2, 4)
corrplot(var$cos2, is.corr=FALSE)
fviz_cos2(x.pca, choice = "var", axes = 1:2)
fviz_pca_var(x.pca, col.var = "cos2",
             gradient.cols = c("blue", "yellow", "red"), 
              repel = TRUE) # Avoid text overlapping


# 42/144

head(var$contrib, 4)
corrplot(var$contrib, is.corr=FALSE) 
fviz_contrib(x.pca, choice = "var", axes = 1, top = 10)
fviz_contrib(x.pca, choice = "var", axes = 2, top = 10)
fviz_contrib(x.pca, choice = "var", axes = 1:2, top = 10)


# 43/144

fviz_pca_var(x.pca, col.var = "contrib",
              gradient.cols = c("blue", "yellow", "red"))
set.seed(123)
var.kms <- kmeans(var$coord, centers = 3, nstart = 25)
kms.grp <- as.factor(var.kms$cluster)
fviz_pca_var(x.pca, col.var = kms.grp, palette = c("blue", "green", "red"),
              legend.title = "Cluster")


# 44/144

ind <- get_pca_ind(x.pca)
ind
ad(ind$coord, 3)
head(ind$cos2, 3)
head(ind$contrib, 3)


# 45/144

fviz_pca_ind(x.pca, col.ind = "cos2", gradient.cols = c("blue", "black", "red"),
             repel = TRUE)

fviz_pca_ind(x.pca, pointsize = "cos2", pointshape = 21, fill = "lightblue",
             repel = TRUE)


# 46/144

fviz_pca_ind(x.pca, geom.ind = "point", col.ind = decathlon2[,13],
             palette = c("blue", "red"), legend.title = "Competition")

fviz_cos2(x.pca, choice = "ind", top = 5)
fviz_contrib(x.pca, choice = "ind", axes = 1:2, top = 5)


# 48/144

x <- c(-0.9, 0.6, 0.1)
y <- c(-0.5, 0, 0.4)
plot(x, y, xlim=c(-1, 1), ylim=c(-1, 1), main="Data Input Space")
abline(h=0, v=0, col="blue", lwd=2)
text(x+0.05, y+0.05, c("s1", "s2", "s3"), col="red")
pca <- princomp(cbind(x, y)); ab <- pca$loadings
arrows(-ab[1,1], -ab[2,1], ab[1,1], ab[2,1], col="green", angle = 10, lwd=2)
text(-ab[1,1], -ab[2,1], "Comp.1")
arrows(-ab[1,2], -ab[2,2], ab[1,2], ab[2,2], col="green", angle = 10, lwd=2)
text(-ab[1,2], -ab[2,2], "Comp.2")


# 51/144

iris.pca <- princomp(iris[,1:4])
biplot(iris.pca, main="Biplot for iris data")


# 54/144

library(devtools)
install_github("vqv/ggbiplot")

library(ggbiplot)
iris.prcomp <- prcomp(iris[,1:4], scale. = TRUE)
ggbiplot(iris.prcomp, groups = iris[,5])
ggbiplot(iris.prcomp, obs.scale = 1, var.scale = 1,
         groups = iris[,5], ellipse = TRUE, circle = TRUE) +
  scale_color_discrete(name = '') +
  theme(legend.direction = 'horizontal', legend.position = 'top')


# 55/144

library(lattice)
state.spending <- read.table("StatePolicySpending.txt", header = T, row.names=1)
head(state.spending)

spend <- scale(state.spending)
spend.svd <- svd(spend, nu=2, nv=2)

D <- spend.svd$d[1:2]
V <- spend.svd$v[,1:2] 
U <- spend.svd$u[,1:2]

# Create matrices for variables and observations by weighting singular vectors with 
# the square roots of the first two singular values. These will be used to construct 
# a symmetric biplot.

spend.var <- V * (rep(1, length(V[,1])) %*% t(D^.5))
spend.obs <- U * (rep(1, length(U[,1])) %*% t(D^.5))
row.names(spend.var) <- colnames(spend)
row.names(spend.obs) <- row.names(spend)


# 56/144

# Within the panel function, "panel.xyplot" draws the observation points and 
# "panel.segments" draws the variable vectors. The first "panel.text" labels the vectors, 
# and the second "panel.text" provides labels for relatively extreme observation points.
xyplot(spend.obs[,2] ~ spend.obs[,1],
       aspect = 1,
       panel = function(x, y) {
         panel.xyplot(x, y, col = "black")
         panel.segments(rep(0, length(spend.var[,1])), 
                        rep(0, length(spend.var[,1])),
                        spend.var[,1], spend.var[,2], lty = 1, col = "blue")
         panel.text(spend.var[,1], spend.var[,2], 
                    row.names(spend.var), cex = .7, col = "green")
         panel.text(x[abs(x)>.7 | abs(y)>.7]+.04, y[abs(x)>.7 | abs(y)>.7],
                    row.names(spend.obs)[abs(x)>.7 | abs(y)>.7], cex = .7, 
                    adj = 0, col= "red")
       },
       xlim = c(-2.5, 2.5), ylim = c(-2.5, 2.5),
       xlab = " ", ylab = " ")

# Calculate proportion of variance explained by
# first two pairs of singular vectors
var.explained <- sum(D^2)/sum(spend.svd$d^2)
var.explained


# 57/144

biplot(princomp(scale(state.spending)), cex=0.6)


# 64/144

data(iris3)
Iris <- data.frame(rbind(iris3[,,1], iris3[,,2], iris3[,,3]), 
                   Species = rep(c("setosa","versicolor","virginica"), rep(50,3)))

## FA
data <- Iris[,1:4]
class <- Iris[,5]
factanal(data, factors=1)
fa.dim1 <- as.matrix(data)%*%iris.fa$loadings[,1]


# 68/144

x <- as.matrix(iris[, 1:4])
B <- x %*% t(x)
d <- 2
eB <- eigen(B)
C <- eB$vectors
D <- eB$values
Z <- C[, 1:d] %*% diag(D[1:d])
plot(z[,1], z[,2], col=iris[,5], main="MDS")


# 71/144

# correlation matrix
cell.cor <- cor(t(cell.sdata)) 
# distance matrix
cell.dist<- sqrt(2*(1-cell.cor)) 
cell.mds<- cmdscale(cell.dist)
mds.dim1 <- cell.mds[,1]
mds.dim2 <- cell.mds[,2]

plot(mds.dim1, mds.dim2, type="n", xlab="MDS-1", ylab="MDS-2", main="MDS for Cell Cycle Data")
for(i in 0:4){
  text(mds.dim1[number[gene.phase==i]], mds.dim2[number[gene.phase==i]],      
       gene.phase[number[gene.phase==i]] , cex=0.8, col= i+1) 
}
legend(0.8, 1.0, phase.name, pch="01234", col=c(1,2,3,4,5))


# 73/144

no.group <- 5
no.iter <- 20
USArrests.kmeans <- kmeans(USArrests, no.group, no.iter)
plot(USArrests, col = USArrests.kmeans$cluster, main = "K-means: USArrests data")
# PCA
USArrests.pca <- princomp(USArrests, cor=TRUE, scores=TRUE)
pca.dim1 <- USArrests.pca$scores[,1]; pca.dim2 <- USArrests.pca$scores[,2]
plot(pca.dim1, pca.dim2,  main="PCA for USArrests Data with K-means", 
     xlab="PCA-1", ylab="PCA-2", col=USArrests.kmeans$cluster)
# MDS
USArrests.mds<- cmdscale(dist(USArrests))
mds.dim1 <- USArrests.mds[,1]; mds.dim2 <- USArrests.mds[,2]
plot(mds.dim1, mds.dim2, xlab="MDS-1", ylab="MDS-2", 
     main="MDS for USArrests Data with K-means", col = USArrests.kmeans$cluster)


# 86/144
swissroll <- function(n, sigma=0.05){
  
  angle <- (3*pi/2)*(1+2*runif(n)); 
  height <- runif(n);
  xdata <-  cbind(angle*cos(angle), height, angle*sin(angle))
  
  # angle <-  seq((1.5*pi): (4.5*pi), length.out=n);
  # height <- sample(0:21, n, replace = TRUE);
  # xdata <-  cbind(angle*cos(angle), height, angle*sin(angle))
  
  xdata <- scale(xdata) + matrix(rnorm(n*3, 0, sigma), n, 3)
  
  order.angle <- order(angle)
  sort.angle <- sort(order.angle, index.return=TRUE)	
  col.id <- rainbow130(n)
  my.color <- col.id[sort.angle$ix]
  
  colnames(xdata) <- paste("x", 1:3, sep="")
  
  list(xdata=xdata, angle=angle, color=my.color)
}


source("rainbow130.r")
sr <-  swissroll(800)
library(rgl); open3d()
plot3d(sr$xdata[,1], sr$xdata[,2], sr$xdata[,3], col=sr$color, size=3,
       xlab="", ylab="", zlab="", axes = T)	  
library(vegan)
sr.isomap <-  isomap(dist(sr$xdata), ndim=2, k=7) # try different k
plot(sr.isomap, col=sr$color)


# 87/144

Scurve <- function(n){
  # upper half S curve    
  theta1 <- runif((n/2), min=-0.9*(pi), max=(pi)/2)
  z1 <- runif((n/2), min=0, max=5)
  x1 <- -cos(theta1)
  y1 <- 2-sin(theta1)
  side.upper <- matrix(0, ncol=4, nrow=(n/2))
  for(i in 1:(n/2)){
    side.upper[i,1] <- x1[i]
    side.upper[i,2] <- y1[i]
    side.upper[i,3] <- z1[i]
    side.upper[i,4] <- theta1[i]
  }
  order.theta1 <- order(theta1)
  sort.theta1 <- sort(order.theta1, method="qu", index=TRUE)
  upper.color <- sort.theta1$ix
  
  # lower half S curve     
  theta2 <- runif((n/2), min=(pi)/2, max=1.9*(pi))
  z2 <- runif((n/2), min=0, max=5)
  x2 <- cos((pi)-theta2)
  y2 <- sin((pi)-theta2)
  side.lower <- matrix(0, ncol=4, nrow=(n/2))
  
  for(i in 1:(n/2)){
    side.lower[i,1] <- x2[i]
    side.lower[i,2] <- y2[i]
    side.lower[i,3] <- z2[i]
    side.lower[i,4] <- theta2[i]
  }
  order.theta2 <- order(theta2)
  sort.theta2 <- sort(order.theta2, method="qu", index=TRUE)
  lower.color <- sort.theta2$ix
  
  # 88/144
  
     xdata <- rbind(side.upper[,c(1,3,2)], side.lower[,c(1,3,2)])
    xdata <- scale(xdata) + matrix(rnorm(n*3,0, 0.05), n, 3)

    angle <- c(side.upper[,4], side.lower[,4])
    S.color <- c(upper.color, (n/2 + lower.color))
    my.color <- (rainbow130((1.2)*n)[S.color])[1:n]

    scatterplot3d(xdata, color=my.color, 
	         xlab="x", ylab="y", zlab="z", 	              
	         pch=20, angle=30)

    open3d()
    plot3d(xdata[,1], xdata[,2], xdata[,3], col=my.color, size=5, 
	  xlab="x", ylab="y", zlab="z")

    return(list(xdata=xdata, angle=angle, color=my.color))

}


# 95/144

data(iris3)
Iris <- data.frame(rbind(iris3[,,1], iris3[,,2], iris3[,,3]), 
                   Species = rep(c("setosa","versicolor","virginica"), rep(50,3)))

## LDA
data <- Iris[,1:4]
class <- Iris[,5]
Iris.lda <- lda(x=data, grouping=class)
Iris.lda <- lda(Species ~ ., Iris)
lda.dim1 <- as.matrix(data)%*%iris.lda$scaling[,1]
lda.dim2 <- as.matrix(data)%*%iris.lda$scaling[,2]

## LDA for classification
trainingIndex <- sample(1:150, 75)
trainingSample <- Iris[trainingIndex, ]
testSample <- Iris[-trainingIndex, ]
table(Iris$Species[trainingIndex])

ldaRule <- lda(Species ~ ., Iris, 
               subset = trainingIndex)
predict(ldaRule, testSample)$class


# 101/144

iris.sir <- dr(as.integer(Species) ~ Sepal.Length + Sepal.Width + 
                 Petal.Length + Petal.Width , data=iris, nslices=3, 
               chi2approx="wood", method="sir")
summary(iris.sir)
comp.sir <- as.matrix(iris[,1:4]) %*% as.matrix(iris.sir$evectors[,1:2])
plot(comp.sir, col=as.integer(iris$Species)+1, main="SIR to Iris Data")


# 102/144
Li6.1 <- function(n){
  p <- 5
  x <- matrix(0, ncol=p, nrow=n)
  for(i in 1:p){
    x[,i] <- rnorm(n, mean=0, sd=1)
  }
  
  epsilon <- rnorm(n, mean=0, sd=1)
  y <- x[,1] + x[,2] + x[,3]+ x[,4] + epsilon    
  colnames(x) <- c("x1","x2","x3","x4","x5")
  
  as.data.frame(cbind(y, x))    
}

mydata <- Li6.1(200)
attach(mydata)
library(dr)
my.sir <- dr(y~x1+x2+x3+x4+x5, method="sir", nslices=10)
my.sir

my.sir

 dr(formula = y ~ x1 + x2 + x3 + x4 + x5, method = "sir", 
     nslices = 10)


# 110/144

x <- matrix(rnorm(100), ncol=20)
dim(x)
princomp(x)


princomp 
methods(princomp) 
getAnywhere('princomp.default') # or stats:::princomp.default
?cov.wt


# 111/144
e.cor <- eigen(cor(x, use="pairwise.complete.obs"))
e.cor$values

pca.pkg <- c("FactoMineR", "factoextra", "corrplot")
lapply(pca.pkg, library, character.only=TRUE)
x.pca <- PCA(x, graph = FALSE) # works, why?
PCA # check the code

str(x.pca)


# 113/144

library(cancerdata)
data(VEER1) # Breast cancer gene expression data (van't Veer)
VEER1
dim(exprs(VEER1))
str(VEER1)
exprs(VEER1)[1:3, 1:5] X1     X2     X3     X4     X5

VEER1@phenoData@data$info 
97 Levels: Sample.1..5.yr.survival...
table(VEER1@phenoData@data$class)


# 114/144

library(FactoMineR); library(factoextra); library(corrplot)
x <- t(exprs(VEER1))
x.pca <- PCA(x, graph = FALSE)
x.pca
eig.val <- get_eigenvalue(x.pca)
plot(eig.val[,3], type="l", xlab="components", ylab="variance", +      main="cumulative.variance.percent")
hist(x.pca$svd$V[,1], xlab="first eigenvector", main="weights of the first eigenvector") 


# 115/144

x.pc <- rep(1:3, 3:1)
y.pc <- unlist(lapply(2:4, function(x) seq(x, 4)))

mypca.plot <- function(used.pc){
    fviz_pca_ind(x.pca, axes = used.pc, 
                   geom.ind = "point", 
                 col.ind = VEER1@phenoData@data$class, 
                 palette = c("blue", "red"), 
                 legend.title = "Groups")
}
plot.list <- apply(cbind(x.pc, y.pc), 1, mypca.plot)

library(gridGraphics)
library(grid)
grid.newpage() 
library(gridExtra)
grid.arrange(grobs=plot.list, 
               nrow=2, 
               ncol=3)


# 116/144
x.pca.scores <- get_pca_ind(x.pca)
dim(x.pca.scores$coord)
group.col <- c("blue", "red")[as.integer(VEER1@phenoData@data$class)]
library("rgl")
open3d()
plot3d(x.pca.scores$coord[,1:3], 
       col=group.col, 
       type ="p", size=10)
play3d(spin3d(), duration=10)


# 118/144

library("corpcor")

n <- 6 # try 20, 500
p <- 10 # try 100, 10
set.seed(123456)
sigma <- matrix(rnorm(p * p), ncol = p)
sigma <- crossprod(sigma) + diag(rep(0.1, p)) #  t(x) %*% x

sigsvd <- svd(sigma)
y <- t(sigsvd$v %*% (t(sigsvd$u) * sqrt(sigsvd$d)))
x <- matrix(rnorm(n * ncol(sigma)), nrow = n) %*% y # problem here! use mvrnorm {MASS}
s1 <- cov(x)
s2 <- cov.shrink(x)
par(mfrow=c(1,3))
image(t(sigma)[,p:1], main="true cov", xaxt="n", yaxt="n")
image(t(s1)[,p:1], main="empirical cov", xaxt="n", yaxt="n")
image(t(s2)[,p:1], main="shrinkage cov", xaxt="n", yaxt="n")
sum((s1 - sigma) ^ 2)
sum((s2 - sigma) ^ 2)

mvrnorm {MASS}:
mvrnorm(n = 1, mu, Sigma, ...)


# 119/144

is.positive.definite(sigma)
is.positive.definite(s1)
is.positive.definite(s2)

rc <- rbind(
  data.frame(rank.condition(sigma)),
  data.frame(rank.condition(s1)),
  data.frame(rank.condition(s2)))
rownames(rc) <- c("true", "empirical", "shrinkage")
rc
          rank condition          tol
true        10 256.35819 6.376444e-14
empirical    5       Inf 1.947290e-13
shrinkage   10  15.31643 1.022819e-13


e0 <- eigen(sigma, symmetric = TRUE)$values
e1 <- eigen(s1, symmetric = TRUE)$values
e2 <- eigen(s2, symmetric = TRUE)$values

matplot(data.frame(e0, e1, e2), type = "l", ylab="eigenvalues", lwd=2)
legend("top", legend=c("true", "empirical", "shrinkage"), lwd=2, lty=1:3, col=1:3)


# 124/144

head(LifeCycleSavings)
pop <- LifeCycleSavings[, 2:3]
oec <- LifeCycleSavings[, -(2:3)]

cancor(pop, oec)


# 128/144

library(CCA)
data(nutrimouse)
str(nutrimouse)

table(nutrimouse$genotype, nutrimouse$diet)


# 129/144

X <- as.matrix(nutrimouse$gene)
Y <- as.matrix(nutrimouse$lipid) 
correl <- matcor(X, Y)
img.matcor(correl)
img.matcor(correl, type = 2)
str(correl)


# 130/144

x.hm <- heatmap(correl$Xcor)
y.hm <- heatmap(correl$Ycor)
ordering <- c(x.hm$rowInd, y.hm$rowInd + 120)
correl.2 <- list(Xcor=correl$Xcor[x.hm$rowInd, x.hm$rowInd], 
                 Ycor=correl$Ycor[y.hm$rowInd, y.hm$rowInd], 
                 XYcor=correl$XYcor[ordering, ordering])
img.matcor(correl.2)
img.matcor(correl.2, type = 2)


# 131/144

X.subset <- as.matrix(nutrimouse$gene[, sample(1:120, size = 10)])
my.cca <- cc(X.subset, Y)
barplot(my.cca$cor, xlab="Dimension",
        ylab="Canonical correlations", 
        names.arg=1:10, ylim=c(0,1))
plt.cc(my.cca)
names(my.cca)

my.cca$cor


# 132/144

regul.par <- estim.regul(X, Y, plt = TRUE,
                         grid1 = seq(0.0001, 0.01, l=5), # l=50
                         grid2 = seq(0, 0.1, l=5)) # l=50
  lambda1 =  0.01 
  lambda2 =  0.075 
 CV-score =  0.884716 
my.rcca <- rcc(X, Y, regul.par$lambda1, regul.par$lambda2)
names(my.rcca)
[1] "cor"    "names"  "xcoef"  "ycoef"  "scores"
barplot(my.rcca$cor, xlab = "Dimension", 
        ylab = "Canonical correlations", names.arg = 1:21, ylim = c(0,1))
plt.cc(my.rcca, var.label = TRUE,
       ind.names = paste(nutrimouse$genotype, nutrimouse$diet, sep = "."))

names(my.rcca$scores)
[1] "xscores"        "yscores"        "corr.X.xscores"
[4] "corr.Y.xscores" "corr.X.yscores" "corr.Y.yscores


# 134/144

library("corpcor")
options(digits=4)

test <- function(x, s){
  image(t(s)[,nrow(s):1], main="cov(x)", col=terrain.colors(100))  
  image(t(x)[,nrow(x):1], main="x", col=terrain.colors(100))  
  cat("is.positive.definite:", is.positive.definite(s), "\n")
  cat("eigen:\n")
  print(eigen(s))
  cat("inverse:\n")
  print(solve(s))
}
layout(matrix(1:2, ncol=1), height=c(1,2))
n <- 100
p <- 4
x1 <- matrix(rnorm(n*p), ncol=p)
summary(x1)


# 135/144

s1 <- cov(x1)
test(x1, s1)


# 136/144

x2 <- matrix(rnorm(n*p, sd=0.0001), ncol=p)
id <- sample(1:p, floor(p/3))
x2[, id] <- x2[, id] + abs(rnorm(n*length(id), m=10000, sd=5000))
summary(x2)
s2 <- cov(x2)
test(x2, s2)


# 137/144

x3 <- scale(x2)
s3 <- cov(x3)
test(x3, s3)


# 138/144

library(fields); library("corpcor"); library(CCA)
cor.col <- two.colors(start="blue", middle="white", 
                        end="red") # length=255
par(mfrow=c(3,1))
n <- 100
p <- 4
q <- 5
set.seed(12345)
X <- matrix(rnorm(n*p), ncol=p); rX <- cor(X)
range.col <- floor((1+range(rX))*127+1)
image(t(rX)[,p:1], main="cor(X)", col=cor.col[range.col[1]: range.col[2]])
is.positive.definite(cov(X))
Y <- matrix(rnorm(n*q), ncol=q); rY <- cor(Y)
range.col <- floor((1+range(rY))*127+1)
image(t(rY)[,q:1], main="cor(Y)", col=cor.col[range.col[1]: range.col[2]])
is.positive.definite(cov(Y))
xy.cca <- cc(X, Y)

X[,1] <- 3*X[,4] + 1; rX <- cor(X)
range.col <- floor((1+range(rX))*127+1)
image(t(rX)[,p:1], main="cor(X)", col=cor.col[range.col[1]: range.col[2]])
xy.cca <- cc(X, Y)
is.positive.definite(cov(X))


# 140/144

LCMC
function (Q, K = 1:nrow(Q)) 
{
...
    nQ <- nrow(Q)
    nK <- length(K)
    N <- nQ + 1
    if (nK < 0.2 * nQ) {
        lcmc <- numeric(nK)
        for (i in 1:nK) {
            k <- K[i]
            lcmc[i] <- k/(1 - N) + sum(Q[cm.UL_K(k, nQ)])/N/k
        }
    }
    else {
        lcmc_ <- diag(apply(apply(Q, 2, cumsum), 1, cumsum))/(1:nQ)/N - (1:nQ)/nQ
        lcmc <- lcmc_[K]
    }
    lcmc
}
<environment: namespace:coRanking>


my.coranking <- function(X.high, X.low){
  # X.high <- iris[1:10,1:4]
  # X.low <- princomp(X.high)$score[,1:2]

  D.high <- as.matrix(dist(X.high)) 
  D.low <- as.matrix(dist(X.low))
  f <- function(x){
     rank(x, ties.method = 'first', na.last = FALSE)
  }
  diag(D.high) <- NA # NA is rank 1
  diag(D.low) <- NA
  R.high <- apply(D.high, 1, f) 
  R.low <- apply(D.low, 1, f) 
  table(R.high, R.low)[-1, -1]


# 142/144

library(coRanking) # install.packages("coRanking")
library(MASS)
par(mfrow=c(2, 3))
x <- iris[,1:4]
y <- iris[,5]
pca <- princomp(x)
Q.pca <- coranking(x, pca$score[,1:2], input = "data")
imageplot(Q.pca)
lcmc.pca <- LCMC(Q.pca, K = 5:10)

mds <- cmdscale(dist(x))
Q.mds <- coranking(x, mds[,1:2], input = "data")
imageplot(Q.mds)
lcmc.mds <- LCMC(Q.pca, K = 5:10)

mylda <- lda(x, grouping=y)
lda.dim <- as.matrix(x)%*%mylda$scaling[,1:2]
Q.lda <- coranking(x, lda.dim, input = "data")
imageplot(Q.lda)
lcmc.lda <- LCMC(Q.lda, K = 5:10)

names(lcmc.pca) <- paste0("K=", 5:10)
rbind(lcmc.pca, lcmc.mds, lcmc.lda)

plot(pca$score[,1:2], col=as.integer(y)+1, main="PCA")
plot(mds[,1:2], col=as.integer(y)+1, main="MDS")
plot(lda.dim[,1:2], col=as.integer(y)+1, main="LDA")

