#################################################
# B02: 機率分佈                                 #
# 吳漢銘 國立政治大學統計學系                   #
# http://www.hmwu.idv.tw/                       #
#################################################

# 12/48
x <- letters[1:3]
library(combinat)
permn(x)
nCm(5, 3)

combn(5, 3)
combn(5, 3, min)
choose(5, 3)


# 19/48
curve(pnorm(x), -3, 3)
arrows(-1, 0, -1, pnorm(-1), col = "red")
arrows(-1, pnorm(-1), -3, pnorm(-1), col = "green")
pnorm(-1)


# 23/48
qnorm(0.025)
qnorm(0.5)
qnorm(0.975)


# 25/48
par(mfrow = c(2,2))
hist.sym <- hist(rnorm(10000),nclas = 100,freq = FALSE,  
                 main = "Symmetric Distribution", xlab = "rnorm")
hist.flat <- hist(runif(10000),nclas = 100,freq = FALSE, 
                  main = "Symmetric Flat Distribution", xlab = "runif")
hist.skr <- hist(rgamma(10000,shape = 2,scale = 1),freq = FALSE, nclas = 100, 
                 main = "Skewed to Right", xlab = "rgamma")
hist.skl <- hist(rbeta(10000,8,2),nclas = 100,freq = FALSE, 
                 main = "Skewed to Left", xlab = "rbeta")

# 26/48
sample(x, size, replace = FALSE, prob = NULL)
sample(1:40, 5)
sample(1:40, 5, replace = TRUE)
sample(c("H", "T"), 10, replace = T)
sample(c("succ", "fail"), 10, replace = T, prob = c(0.9, 0.1))


# 28/48
factorial(10)/(factorial(3)*factorial(7))*0.8^3*0.2^7
dbinom(3, 10, 0.8)
pbinom(3, 10, 0.8)- pbinom(2, 10, 0.8)
qbinom(0.1208, 10, 0.8)
pbinom(6, 10, 0.8)


# 30/48
dnorm(0)
pnorm(-1)
qnorm(0.975)

dnorm(10, 10, 2) # X~N(10, 4)
pnorm(1.96, 10, 2)
qnorm(0.975, 10, 2)
rnorm(5, 10, 2)
pnorm(15, 10, 2) - pnorm(8, 10, 2)  # P(8<= X<= 15)

par(mfrow = c(1,4))
curve(dnorm, -3, 3, xlab = "z", ylab = "Probability density", main = "Density")
curve(pnorm, -3, 3, xlab = "z", ylab = "Probability", main = "Probability")
curve(qnorm, 0, 1, xlab = "p", ylab = "Quantile(z)", main = "Quantiles")
hist(rnorm(1000), xlab = "z", ylab = "frequency", main = "Random numbers")


# 31/48
norm.sample <- rnorm(250)
summary(norm.sample)
hist(norm.sample, xlim = c(-5, 5), ylab = "probability", 
     main = "Histogram of N(0, 1)", prob = T)
x <- seq(from = -5, to = 5, length = 300)
lines(x, dnorm(x))

x <- seq(from = -5, to = 5, length = 300)
plot(x, dnorm(x), type = "l")
points(0, dnorm(0))
height <- round(dnorm(0), 4); height
text(1.5, height, paste("(0,", height, ")"))


# 33/48
par(mfrow = c(1, 2))
n <- 20 # 4
p <- 0.4 # 0.04
mu <- n * p
sigma <- sqrt(n * p * (1 - p))
x <- 0:n
plot(x, dbinom(x, n, p), type = 'h', lwd = 2, 
     xlab = "x", ylab = "P(X = x)",
     main = "B(20, 0.4)")
z <- seq(0, n, 0.1)
lines(z, dnorm(z, mu, sigma), col = "red", lwd = 2)
abline(h = 0, lwd = 2, col = "grey")


# 34/48
par(mfcol = c(2,4))
n <- 100
x <- rcauchy(n, 0, 0.1) # a long tail density 
hist(x, freq = FALSE, main = "long tail")
curve(dnorm(x, mean = mean(x), sd = sd(x)), add = TRUE, col = "red")
qqnorm(x, main = "rcauchy(n, 0, 1)"); 
qqline(x, col = "red")
# different shapes of distributions
x <- rbeta(n, 2, 2) # a short tail density 
x <- rbeta(n, 2, 20) # a right skewed density
x <- rbeta(n, 20, 2) # a left skewed density


# 36/48
qqnorm(iris[,1])
qqline(iris[,1])
qqnorm(scale(iris[,1]))
qqline(scale(iris[,1]))

my.qqplot <- function(x){
  x.mean <- mean(x)
  x.var <- var(x)
  n <- length(x)
  
  z <- (x-x.mean)/sqrt(x.var)
  z.mean <- mean(z)
  z.var <- var(z)
  z.sort <- sort(z)
  
  k <- 1:n
  p <- (k-0.5)/n
  q <- qnorm(p)
  
  plot(q, z.sort, xlim = c(-3, 3), ylim = c(-3, 3))
  title("QQ plot")
  lines(q, q, col = 2)
}
my.qqplot(iris[,1])


# 36/48
par(mfrow = c(1, 2))
hist(iris$Sepal.Width)
qqnorm(iris$Sepal.Width)
qqline(iris$Sepal.Width, col = "red")


# 39/48
z <- (126/210 -0.7)/sqrt(0.001) # 通過人數>126的機率
z
1 - pnorm(z)

pass.prob <- function(x, n, mu, sigma2, digit = m){
  xbar <- x/n
  z <- (xbar-mu)/sqrt(sigma2)
  zvalue <- round(z, digit)
  right.prob <- round(1-pnorm(z), digit)
  list(zvalue = zvalue, prob = right.prob)
}

pass.prob(126, 210, 0.7, 0.001, 4)


# 42/48
umin <- 5
umax <- 80
n.sample <- 20
n.repeated <- 500

RandomSample <- matrix(0, n.sample, n.repeated)
for(i in 1:n.repeated){
  rnumber <- runif(n.sample, umin, umax)
  RandomSample[,i] <- as.matrix(rnumber) 
}
dim(RandomSample)


# 43/48
par(mfrow=c(2,2))
for(i in 1:4){
  title <- paste(i,"-th sampling", sep = "")
  hist(RandomSample[,i], ylab = "f(x)", xlab = "random uniform", 
       pro = T, main = title)
}


# 44/48
SampleMean <- apply(RandomSample, 2, mean)
hist(SampleMean, ylab = "f(x)", xlab = "sample mean", pro = T, main = "n=20")


myfun <- function(x){
  mu <- (umin + umax)/2
  s2 <- (umax - umin)^2/12
  z <- (mean(x) - mu)/sqrt(s2/n)
  z
}

qqnorm(SampleMean)
qqline(SampleMean)


# 45/48
CLT.unif <- function(umin, umax, n.sample, n.repeated){ 
  RandomSample <- matrix(0, n.sample, n.repeated)
  for(i in 1:n.repeated){
    rnumber <- runif(n.sample, umin, umax)
    RandomSample[,i] <- as.matrix(rnumber) 
  }
  SampleMean <- apply(RandomSample, 2, mean)
  par(mfrow=c(1,2))
  title <- paste("n=", n.sample, sep = "")
  hist(SampleMean, breaks = 30, ylab = "f(x)", xlab = "sample mean", 
       freq = F, main = title)
  qqnorm(SampleMean)
  qqline(SampleMean)
}

CLT.unif(5, 80, 50, 500)
CLT.unif(5, 80, 1, 500)
CLT.unif(5, 80, 5, 500)
CLT.unif(5, 80, 10, 500)
CLT.unif(5, 80, 30, 500)


# 59/48
girl.born <- function(n, show.id = F){
  
  girl.count <- 0
  for (i in 1:n) {
    if (show.id) cat(i,": ")
    child.count <- 0
    repeat {
      rn <- sample(0:99, 1) # random number
      if (show.id) cat(paste0("(", rn, ")"))
      is.girl <- ifelse(rn <=  48, TRUE, FALSE)
      child.count <- child.count + 1
      if (is.girl){
        girl.count <- girl.count + 1
        if (show.id) cat("女+")
        break
      } else if (child.count  ==  3) {
        if (show.id) cat("男")        
        break
      } else{
        if (show.id) cat("男")        
      }
    }
    if (show.id) cat("\n")
  }
  p <- girl.count / n
  p
  
}

girl.p <- 0.49 + 0.51*0.49 + 0.51^2*0.49
girl.p
girl.born(n = 10, show.id = T)
girl.born(n = 10000)


