#################################################
# B04: 假設檢定 & 變異數分析                    #
# 吳漢銘 國立政治大學統計學系                   #
# https://hmwu.idv.tw                           #
#################################################

# 11/22
protein <- c(20.70, 27.46, 22.15, 19.85, 21.29, 24.75, 20.75, 22.91, 25.34, 20.33, 21.54, 21.08,
             22.14, 19.56, 21.10, 18.04, 24.12, 19.95, 19.72, 18.28, 16.26, 17.46, 20.53, 22.12,
             25.06, 22.44, 19.08, 19.88, 21.39, 22.33, 25.79)

length(protein)

par(mfrow = c(1, 2))
hist(protein)
qqnorm(protein)
qqline(protein)

t.test(protein, mu = 20)
ks.test(log(protein), "pnorm")
shapiro.test(protein)


# 12/22
x <- iris$Sepal.Length
y <- iris$Petal.Length
alpha <- 0.05
(vt <- (var.test(x, y)$p.value <= alpha))
t.test(x, y, var.equal = !vt ) 



# 18/22
if (!requireNamespace("BiocManager", quietly = TRUE))
  install.packages("BiocManager")
BiocManager::install("made4")

library(made4)
data(khan)
Anova.pvalues <- function(x){
  x <- unlist(x)
  SRBCT.aov.obj <- aov(x ~ khan$train.classes)
  SRBCT.aov.info <- unlist(summary(SRBCT.aov.obj))
  SRBCT.aov.info["Pr(>F)1"]
}
# perform anova for each gene
SRBCT.aov.p <- apply(khan$train, 1, Anova.pvalues)


# 19/22
order.p <- order(SRBCT.aov.p)
ranked.genes <- data.frame(pvalues = SRBCT.aov.p[order.p], 
                           ann = khan$annotation[order.p, ])
top5.gene.row.loc <- rownames(ranked.genes[1:5,  ])
summary(t(khan$train[top5.gene.row.loc, ]))

par(mfrow=c(1, 5), mai=c(0.3, 0.4, 0.3, 0.3))

usr <- par("usr")
myplot <- function(gene){ 
  boxplot(unlist(khan$train[gene, ]) ~ khan$train.classes, 
          ylim = c(0, 6), main = ranked.genes[gene, 4])
  text(2, usr[4]-1, labels = paste("p=", ranked.genes[gene, 1], sep = ""), 
       col = "blue")
  ranked.genes[gene,]
}


# 20/22
do.call(rbind, lapply(top5.gene.row.loc, myplot))


# 22/22
M <- as.table(rbind(c(762, 327, 468), 
                    c(484, 239, 477)))
dimnames(M) <- list(gender = c("F", "M"), 
                    party = c("Democrat", "Independent", "Republican"))
M
(res <- chisq.test(M))

