#################################################
# H01: R語言深度學習入門: Deep Learning with R  #
# 吳漢銘 國立政治大學統計學系                   #
# http://www.hmwu.idv.tw/                       #
#################################################

# 23/135
library(mlbench)
data(BostonHousing)
str(BostonHousing)

# 25/135
data("PimaIndiansDiabetes2", package="mlbench")
? mlbench::PimaIndiansDiabetes2
str(PimaIndiansDiabetes2)

# 26/135
# library(mlbench)
data(Shuttle)
head(Shuttle)
table(Shuttle$Class)


# 31/135
# Sigmoid 
AF_sigmoid <- function(x){
  1/(1+exp(-x))
}

# Hyperbolic Tangent
AF_tanh <- function(x){
  (exp(x)-exp(-x))/(exp(x)+exp(-x))
}

# Rectified Linear Unit
AF_ReLU <- function(x){
  ifelse(x > 0, x, 0)
}

# Leaky Rectified Linear Unit
AF_lReLU <- function(x, a=0.01){
  ifelse(x > 0, x, a*x)
}

# Softmax
AF_Softmax <- function(x){
  exp(x)/sum(exp(x))
}
# Softmax calculates the probabilities of each target class over all possible target classes


library(reshape2)
x <- seq(-6, 6, 0.01)
xyData <- data.frame(x=x, sigmoid=AF_sigmoid(x), tanh=AF_tanh(x), 
                     ReLU=AF_ReLU(x), lReLU=AF_lReLU(x), Softmax=AF_Softmax(x))
head(xyData)
xyData.lf <- melt(xyData, id.var="x")
head(xyData.lf)
ggplot(xyData.lf, aes(x=x, y=value, group=variable, colour=variable)) +
  geom_line(aes(linetype=variable), size=1) +
  geom_hline(yintercept=c(-1, 0, 1), color="gray") 

ggplot(xyData, aes(x=x, y=Softmax)) +
  geom_line(size=2, linetype="dashed") +
  ggtitle("Softmax")



# 32/135
Loan <- read.table("Loan_training.txt", header=T, sep="\t")
Loan
Loan$LoanDefaults <- as.factor(Loan$LoanDefaults)
income <- (Loan$Income - min(Loan$Income))/(max(Loan$Income)- min(Loan$Income))
no.class <- length(levels(Loan$LoanDefaults))
Target <- data.frame(matrix(0, nrow=length(income), ncol=no.class))
sapply(1:no.class, function(x){
  Target[which(Loan$LoanDefaults == levels(Loan$LoanDefaults)[x]), x] <<- 1
})

colnames(Target) <- c("NO", "YES")
data.frame(Loan$Sex, income, Target)


# 41/135
library(caret)
set.seed(12345)

iris.two.species <- iris[51:150, ]
iris.two.species[,5] <- ifelse(iris.two.species[,5] == "setosa", 1, 0)

id <- createDataPartition(y = 1:nrow(iris.two.species), p = 0.8)

# note that the dim of the data is p by n
train.data <- iris.two.species[id$Resample1, ]
myTrain.X <- t(train.data[, -5])
myTrain.y <- t(train.data[, 5])
head(myTrain.X)
dim(myTrain.X) 
length(myTrain.y) 

test.data <- iris.two.species[-id$Resample1, ]
myTest.X <- t(test.data[, -5])
myTest.y <- t(test.data[, 5])
dim(myTest.X) 
length(myTest.y) 

num.iter <- 1000
learning.rate <- 0.01

# 42/135
## install "EBImage" package from BioConductor
if (!requireNamespace("BiocManager", quietly = TRUE))
  install.packages("BiocManager")
BiocManager::install("EBImage")

library(EBImage)
library(pbapply)

#######################################
# read and show original images       #
#######################################
#file.path.train <- "data/Dogs_vs_Cats/train"  # 25000 files
#file.path.test <- "data/Dogs_vs_Cats/test1"  # 12500 files

file.path.train <- "data/Dogs_vs_Cats/train_subset" # 1250 files
file.path.test <- "data/Dogs_vs_Cats/test1_subset" # 50 files

par(mfrow = c(1, 2))
show.train.id <- 1
images.train <- list.files(file.path.train)
img.train <- readImage(file.path(file.path.train, images.train[show.train.id]))
EBImage::display(img.train, method = "raster")

show.test.id <- 20
images.test <- list.files(file.path.test)
img.test <- readImage(file.path(file.path.test, images.test[show.test.id]))
EBImage::display(img.test, method = "raster")


# 43/135
#######################################
# extract image features by resize    #
#######################################
extract_feature <- function(file.dir, width, height) {
  
  # file.dir <- "data/Dogs_vs_Cats/test1_subset"
  # width <- 64
  # height <- 64
  img.size <- width * height
  images <- list.files(file.dir)
  
  print(paste("Processing", length(images), "images@", file.dir))
  label <- ifelse(grepl("dog", images) == T, 1, 0)
  
  # pbapply {pbapply}: Adding Progress Bar to '*apply' Functions
  feature.list <- pblapply(images, function(img.name) {
    
    # img.name <- "cat.9993.jpg"
    img <- readImage(file.path(file.dir, img.name))
    img.resized <- EBImage::resize(img, w = width, h = height)
    
    ## show images
    # display(img, method = "raster")
    # display(img.resized, method = "raster")
    
    img.matrix <- matrix(reticulate::array_reshape(img.resized, 
                                                   (width*height*channels)), 
                         nrow = width*height*channels)
    img.vector <- as.vector(t(img.matrix))
    img.vector
  })
  
  feature.matrix <- do.call(rbind, feature.list)
  
  list(t(feature.matrix), label)
}

# 44/135
#######################################
# extract image features              #
#######################################
height <- 64
width <- 64
channels <- 3

train.data <- extract_feature(file.path.train, width, height)
myTrain.X <- train.data[[1]]
myTrain.y <- train.data[[2]]
dim(myTrain.X) 
length(myTrain.y) 

test.data <- extract_feature(file.path.test, width, height)
str(test.data)
myTest.X <- test.data[[1]]
myTest.y <- test.data[[2]]
length(myTest.y) 
dim(myTest.X) 


# 45/135
#######################################
# show resized images                 #
#######################################
data2Image <- function(xdata, width, height){
  ar <- array(0, dim=c(width, height, 3))
  lapply(1:3, function(i){
    ar[,,i] <<- matrix(xdata[seq(i, length(xdata), 3)], 
                       width, height, byrow=T)  
  })
  
  # return an Image class
  Image(ar, dim=c(width, height, 3), colormode=2)
}

images.train.resized <- Image(ar, dim=c(width, height, 3), colormode=2)
images.train.resized <- data2Image(myTrain.X[, show.train.id], width, height)
images.test.resized <- data2Image(myTest.X[, show.test.id], width, height)
par(mfrow = c(1, 2))
EBImage::display(images.train.resized, method = "raster")
EBImage::display(images.test.resized, method = "raster")


#######################################
# nomalization for each columns?      #
#######################################
myTrain.X <- scale(myTrain.X)
myTest.X <- scale(myTest.X)

num.iter <- 1000
learning.rate <- 0.01


# 46/135
#######################################
# activation function:sigmoid         # 
#######################################
sigmoid <- function(x){
  1/(1+exp(-x))
}

#######################################
# initialization: weights, bias       #
#######################################
# initializing our weight vector and bias scalars to zero.
initialize_with_zeros <- function(dim){
  
  W <- matrix(0, nrow = dim, ncol = 1)
  b <- 0
  
  list(W = W, b = b)
}

##########################################
# Forward propagate,  Backward propagate #
##########################################
# carry out the forward propagation of the network to learn our parameters. 

propagate <- function(W, b, X, y){
  
  n <- ncol(X)  # number of observations of the training data 
  
  # Forward Propagation
  A <- sigmoid((t(W) %*% X) + b)
  
  # cost is the negative log-likelihood cost of the logistic regression
  cost <- (-1/n) * sum(y * log(A) + (1-y) * log(1 - A))
  
  ## Backward Propagation
  # dw: the gradient of the cost with respect to w
  dW <- (1/n) * (X %*% t(A-y))
  # db: the gradient of the cost with respect to b
  db <- (1/n) * rowSums(A-y)
  
  list(gradient = list(dW=dW, db=db), cost = cost)
}

# 47/135
#######################################
#                                     #
#######################################
optimize <- function(W, b, X, y, num.iter, learning.rate, 
                     print.cost = FALSE) {
  
  ## test code 
  # X <- train.X;  dim(X)
  # y <- train.y;  length(y)
  
  cost <- numeric(num.iter)
  
  for (i in 1:num.iter) {
    
    # i <- 100
    
    # calculate gradient and cost
    grad.cost <- propagate(W, b, X, y)
    gradient <- grad.cost$gradient
    cost[i] <- grad.cost$cost
    
    # Retrieve the derivatives
    dW <- matrix(gradient$dW)
    db <- gradient$db
    
    # Update the parameters
    W <- W - learning.rate * dW
    b <- b - learning.rate * db
    
    # Print the cost every 100th iteration
    if ((print.cost == T) & (i%%100 == 0)) {
      cat(sprintf("Cost after iteration %d: %06f\n", i, cost[i]))
    }
    
    params <- list(W=W, b=b)
    gradient <- list(dW=dW, db=db)
  }
  
  list(params = params, gradient = gradient, cost = cost)
}



#######################################
#                                     #
#######################################  
# Activation vector A to predict the probability of a dog/cat
pred <- function(W, b, X) {
  
  n <- ncol(X)
  Y.prediction <- numeric(n)
  
  A <- sigmoid((t(W) %*% X) + b)
  
  for (i in 1:n) {
    
    if (A[1, i] > 0.5) {
      Y.prediction[i] <- 1
    } else{
      Y.prediction[i] <- 0
    } 
    
  }
  
  Y.prediction
}

# 48/135
#######################################
#                                     #
#######################################
simple_model <- function(train.X, train.y, test.X, test.y,
                         num.iter, learning.rate, print.cost = FALSE){
  ## test code
  # train.X <- myTrain.X; dim(train.X) 
  # train.y <- myTrain.y; length(train.y)
  # test.X <- myTest.X; dim(test.X)
  # test.y <- myTest.y; length(test.y)
  # num.iter <- 1000
  # learning.rate <- 0.01
  # print.cost <- T
  
  # initialize parameters with zeros
  Wb <- initialize_with_zeros(nrow(train.X))
  
  W <- Wb$W #dim(W)
  b <- Wb$b
  
  # Gradient descent
  optFn.output <- optimize(W, b, train.X, train.y,
                           num.iter, learning.rate, print.cost)
  
  W <- as.matrix(optFn.output$params$W)
  b <- optFn.output$params$b
  
  pred.train <- pred(W, b, train.X)
  pred.test <- pred(W, b, test.X)
  
  # Print train/test Errors
  cat(sprintf("train accuracy: %#.2f \n", mean(pred.train == train.y) * 100))
  cat(sprintf("test accuracy: %#.2f \n", mean(pred.test == test.y) * 100))
  
  res <- list(cost = optFn.output$cost,
              pred.train = pred.train, pred.test = pred.test,
              W = W, b = b)
  res
}


# 49/135
#######################################
# Train model                         #
#######################################
SNN.model <- simple_model(myTrain.X, myTrain.y, myTest.X, myTest.y,
                          num.iter, learning.rate, print.cost = T)

plot(1:length(SNN.model$cost), SNN.model$cost, 
     type="l", xlab = "Iterations", ylab = "Cost")
legend(200, 0.9, c("Learning rate = 0.01"))




# 60/135
# install.packages("nnet")
library(nnet)
xdata <- iris[, 1:4]
ydata <- iris[, 5]
no.class <- length(unique(ydata))
T <- matrix(0, nrow=nrow(xdata), ncol=no.class)
sapply(1:no.class, function(x){
  T[which(ydata == levels(ydata)[x]), x] <<- 1
})
id <- sample(1:nrow(iris), 100)
training.x <- xdata[id,]
training.y <- T[id, ]
testing.x <- xdata[-id,]
testing.y <- T[-id, ]
nn.iris <- nnet(training.x, training.y, size = 2, rang = 0.1,
                decay = 5e-4, maxit = 200)

pred <- max.col(predict(nn.iris, testing.x))
testing.true <- apply(T[-id, ], 1, which.max)
table(testing.true, pred)


# 61/135
str(nn.iris)
summary(nn.iris)
print(nn.iris)
coef(nn.iris)


# 62/135
# install.packages("NeuralNetTools")
library(NeuralNetTools)
plotnet(nn.iris)

# or
nn.iris.2 <- nnet(Species ~ ., data = iris, subset = id, 
                  size = 2, rang = 0.1,
                  decay = 5e-4, maxit = 200)
table(iris$Species[-id], predict(nn.iris.2, iris[-id,], type = "class"))
plotnet(nn.iris.2)


# 63/135
# install.packages("neuralnet")
library(neuralnet)

set.seed(12345)
x <- sample(seq(-2, 2, length.out=50), 50, replace=F)
f <- function(x){
  x^2
}
y <- f(x)
train.data <- data.frame(attribute=x, response=y)
head(train.data)
plot(train.data, main="f(x)=x^2")

fit <- neuralnet(response ~ attribute, data=train.data, 
                 hidden=c(3, 3),
                 threshold=0.01)
fit
plot(fit)


# 64/135
test.data <- as.matrix(sample(seq(-2, 2, length.out=10), 10, replace=F), ncol=1)
head(test.data)
pred <- compute(fit, test.data)

data.frame(Test.attribute=test.data, Prediction=pred$net.result, 
           Actual.response=f(test.data))

dev.set(dev.prev())
points(test.data, pred$net.result, col="red", cex=2, pch=3)


# 65/135
data(Boston, package="MASS")
head(Boston)
str(Boston)

#data(BostonHousing, package="mlbench")
#head(BostonHousing)
#str(BostonHousing)

names(Boston)
used.variables <- c("crim", "indus", "nox", "rm", "age", "dis", "tax", 
                    "ptratio", "lstat", "medv")
Boston.used <- as.data.frame(scale(Boston[used.variables]))
str(Boston.used)

apply(Boston.used, 2, function(x) sum(is.na(x)))
pairs(Boston.used, cex=0.4)

library(psych)
pairs.panels(Boston.used)


# 66/135
library(caret)
set.seed(12345)
id <- createDataPartition(y=1:nrow(Boston.used), p=0.8)
train.data <- Boston.used[id$Resample1, ]
head(train.data)
train.X <- train.data[, -10]
train.y <- train.data[, 10]
test.data <- Boston.used[-id$Resample1, ]
head(test.data)
test.X <- test.data[, -10]
test.y <- test.data[, 10]

validation.index <- function(pred.value, real.value){
  require(Metrics) # 計算各種模型的擬合標準
  cor.v <- cor(pred.value, real.value)^2
  mse.v <- mse(pred.value, real.value)
  rmse.v <- rmse(pred.value, real.value)
  index <- paste0("Cor^2=", round(cor.v, 4), 
                  ", MSE=", round(mse.v, 4),
                  ", RMSE=", round(rmse.v, 4))
  index
}

# 67/135
fit.lm <- lm(formula=medv ~ ., data=train.data)
summary(fit.lm)


# 68/135
pred.lm <- predict(fit.lm, test.X)
lm.pred.data <- data.frame(pred.y=pred.lm, test.y=test.y)
ggplot(data=lm.pred.data, aes(x=test.y, y=pred.y)) + 
  geom_point() +
  ggtitle(paste("lm fit:", validation.index(pred.lm, test.y))) +
  geom_abline(intercept=0, slope=1, color="red", size=1) +
  coord_fixed(ratio=1)



# 69/135
# library(neuralnet)
fit.nn <- neuralnet(formula=medv ~ ., 
                    data=train.data,
                    hidden=c(10, 12, 20),
                    threshold=0.1,
                    algorithm="rprop+",
                    err.fct="sse",
                    act.fct="logistic",
                    linear.output=T)
str(fit.nn)
summary(fit.nn)

pred.nn <- compute(fit.nn,  test.X)
nn.pred.data <- data.frame(pred.y=pred.nn$net.result, test.y=test.y)
ggplot(data=nn.pred.data, aes(x=test.y, y=pred.y)) + 
  geom_point() +
  ggtitle(paste("nn fit:", validation.index(pred.nn$net.result, test.y))) +
  geom_abline(intercept=0, slope=1, color="red", size=1) +
  coord_fixed(ratio=1) 




# 71/135
require(deepnet)

train.X <- as.matrix(train.X)
test.X <- as.matrix(test.X)

fit.dn <- nn.train(x=train.X, y=train.y,
                   initW=NULL, initB=NULL,
                   hidden=c(10, 12, 20),
                   learningrate=0.58,
                   momentum=0.74,
                   learningrate_scale=1,
                   activationfun="sigm",  #linear, tanh
                   output="linear", #sigm, softmax
                   numepochs=970,
                   batchsize=60,
                   hidden_dropout=0,
                   visible_dropout=0)


pred.dn <- nn.predict(fit.dn, test.X)

dn.pred.data <- data.frame(pred.y=pred.dn, test.y=test.y)


# 72/135
ggplot(data=dn.pred.data, aes(x=test.y, y=pred.y)) + 
  geom_point() +
  ggtitle(paste("deepnet fit:", validation.index(pred.dn, test.y))) +
  geom_abline(intercept=0, slope=1, color="red", size=1) +
  coord_fixed(ratio=1) 


# 73/135
data("PimaIndiansDiabetes2", package="mlbench")

# missing values
sapply(PimaIndiansDiabetes2, function(x) sum(is.na(x)))

pima <- PimaIndiansDiabetes2
dim(pima)
pima$insulin <- NULL
pima$triceps <- NULL
dim(pima)
pima <- na.omit(pima)
dim(pima)

names(pima)
pima$diabetes
pima <- data.frame(scale(pima[,-7]), pima[,7])

set.seed(12345)

train.id <- sample(1:nrow(pima), 600)
train.X <- pima[train.id, -7]
train.y <- as.integer(pima[train.id, 7])
test.X <- pima[-train.id, -7]
test.y <- as.integer(pima[-train.id, 7])


# 75/135
require(RSNNS)
fit.mlp <- mlp(x=train.X, y=train.y,
               size=c(12, 8),
               maxit=1000,
               initFun="Randomiza_Weights",
               initFuncParams=c(-0.3, 0.3),
               learnFunc="Std_Backpropagation",
               learnFuncParams=c(0.2, 0),
               updateFunc="Topological_Order",
               updateFuncParams=c(0),
               hiddenActFunc="Act_Logistic",
               shufflePatterns=T,
               linOut=T)


summary(fit.mlp)
fit.mlp
weightMatrix(fit.mlp)
extractNetInfo(fit.mlp)
plotIterativeError(fit.mlp)



pred.mlp <- ifelse(predict(fit.mlp, test.X) >= 1.5, 2, 1)

# 76/135
library(caret)
caret::confusionMatrix(data=as.factor(pred.mlp), 
                       reference=as.factor(test.y))




# 78/135
detach(package:RSNNS, unload=T)
library(AMORE)

amore.net <- newff(n.neurons=c(6, 12, 8, 1),
                   learning.rate.global=0.01,
                   momentum.global=0.5,
                   error.criterium="LMLS",
                   Stao=NA,
                   hidden.layer="sigmoid",
                   output.layer="purelin",
                   method="ADAPTgdwm")

fit.amore <- AMORE::train(net=amore.net, 
                          P=as.matrix(train.X), T=as.numeric(train.y),
                          error.criterium="LMLS",
                          report=T,
                          n.shows=5,
                          show.step=100)


pred.amore <- ifelse(sim(fit.amore$net, as.matrix(test.X)) >= 1.5, 2, 1)


# 79/135
caret::confusionMatrix(data=as.factor(pred.amore), 
                       reference=as.factor(test.y))


# 80/135
data.zip <- "OnlineNewsPopularity.zip"
uci <- "https://archive.ics.uci.edu/ml/machine-learning-databases/00332/"
url.loc <- paste0(uci, data.zip)
download.file(url.loc, destfile=data.zip, method="libcurl")
unzip(data.zip, exdir="data")
dir()

file.loc <- "data/OnlineNewsPopularity/OnlineNewsPopularity.csv"
ONP.data <- read.csv(file.loc)
head(ONP.data)
str(ONP.data)

summary(ONP.data$shares)
ggplot(data=ONP.data, aes(x=shares)) + geom_histogram()
ggplot(data=ONP.data, aes(x=shares)) + geom_histogram() + 
  scale_y_log10() +
  labs(x="#shares", y="log(count)", 
       title="histogram of the shares (Online News Popularity Data)")


# 81/135
response <- as.numeric(ONP.data$shares > median(ONP.data$shares))

scale_01 <- function(x){
  (x-min(x))/(max(x)-min(x))
}

names(ONP.data)
channel <- ONP.data[, 14:19] #binary
kword <- apply(ONP.data[, 20:28], 2, scale_01)
day <- ONP.data[, 32:39] #binary
lda <- ONP.data[, 40:44] #binary

ONP.used <- cbind(channel, kword, day, lda, response)

# 82/135
library(caret)
set.seed(12345)
id <- createDataPartition(y=response, p=0.8)
response.at <- which(names(train.data) %in% c("response"))

train.data <- ONP.used[id$Resample1, ]
head(train.data)
train.X <- train.data[, -response.at]
train.y <- train.data[, response.at]
dim(train.X)

test.data <- ONP.used[-id$Resample1, ]
head(test.data)
test.X <- test.data[, -response.at]
test.y <- test.data[, response.at]
dim(test.X)


# 83/135
#######################################
# deepnet                             #
#######################################
require(deepnet)
set.seed(12345)

fit.dn <- nn.train(x=as.matrix(train.X), y=train.y,
                   hidden=c(60, 30),
                   numepochs=10,
                   activationfun="sigm",
                   output="sigm")

attributes(fit.dn)
pred.dn <- nn.predict(fit.dn, as.matrix(test.X))
pred.dn.class <- ifelse(pred.dn > 0.5, 1, 0)

caret::confusionMatrix(data=as.factor(pred.dn.class), 
                       reference=as.factor(test.y))



# 88/135
library(imager)
vd.img <- load.image("data/Vd-Orig.png")
dim(vd.img)
plot(vd.img)


Edge.detection.k1 <- as.cimg(matrix(c(1, 0, -1, 0, 0, 0, -1, 0, 1), ncol=3))
Edge.detection.k2 <- as.cimg(matrix(c(0, -1, 0, -1, 4, -1, 0, -1, 0), ncol=3))
Edge.detection.k3 <- as.cimg(matrix(c(-1, -1, -1, -1, 8, -1, -1, -1, -1), ncol=3))
Sharpen.k <- as.cimg(matrix(c(0, -1, 0, -1, 5, -1, 0, -1, 0), ncol=3))
Boxblur.k <- as.cimg(matrix(rep(1, 9), ncol=3)/9)
Gaussianblur.3x3.k <- as.cimg(matrix(c(1, 2, 1, 2, 4, 2, 1, 2, 1), ncol=3)/16)

edk1.img <- imager::convolve(vd.img, Edge.detection.k1)
edk2.img <- imager::convolve(vd.img, Edge.detection.k2)
edk3.img <- imager::convolve(vd.img, Edge.detection.k3)
sk.img <- imager::convolve(vd.img, Sharpen.k)
bk.img <- imager::convolve(vd.img, Boxblur.k)
g3k.img <- imager::convolve(vd.img, Gaussianblur.3x3.k)

par(mfrow=c(2, 3))
plot(edk1.img, main="Edge Detection (1)")
plot(edk2.img, main="Edge Detection (2)")
plot(edk3.img, main="Edge Detection (3)")
plot(sk.img, main="Sharpen")
plot(bk.img, main="Box blur")
plot(g3k.img, main="Gaussian blur (3x3)")


# 106/135
writeLines('PATH="${RTOOLS40_HOME}\\usr\\bin;${PATH}"', con = "~/.Renviron")
# Restart R within Rstudio
.rs.restartR() 
# 測試: show the path to your Rtools installation. 
Sys.which("make") 


# 107/135
# 於Ggui或Rstudio中，執行下列指令
install.packages("tensorflow")
# 載入tensorflow
library(tensorflow)
# 安裝 tensorflow、keras、python等等環境
install_tensorflow()
安裝順利後，R Session會重啟，再次載入tensorflow
library(tensorflow)
# 測試安裝結果是否成功
# 若成功會印出"tf.Tensor(b'Hellow Tensorflow', shape=(), dtype=string)"
tf$constant("Hellow Tensorflow")


# 108/135
## 1: Installation
#(1) tensorflow: R Interface to 'tensorflow'
install.packages("tensorflow")   
library(tensorflow)
install_tensorflow()
...
Sys.setenv(TENSORFLOW_PYTHON='C:/Users/userpc/AppData/Local/r-miniconda/envs/r-reticulate/Lib/site-packages/tensorflow')
library(tensorflow)
tf_config()
tf$constant("Hellow Tensorflow")
# 印出 tf.Tensor(b'Hellow Tensorflow', shape=(), dtype=string)

# if 錯誤: Installation of TensorFlow not found.
install_tensorflow(version = "2.0.0b1", method = "conda", envname = "r-reticulate")
tf_config()
tf$constant("Hellow Tensorflow")
# tf.Tensor(b'Hellow Tensorflow', shape=(), dtype=string)

#(2) keras: R Interface to 'keras'
# install.packages("keras")
# library(keras)
# install_keras()

#(3) reticulate: R Interface to 'Python'
install.packages("reticulate") 
library(reticulate)
conda_version()
py_available()
py_module_available("tensorflow")
py_config()
conda_list()

## 一些有用的指令
sessionInfo()
Sys.getenv()  #檢查系統參數
packageVersion("tensorflow")
packageVersion("keras")



# 109/135
require(tensorflow)
# 印出一行字
# TF2.0: use tf$compat$v1$Session() instead of tf$Session()
# sess <- tf$Session()
tf$compat$v1$disable_eager_execution()
sess <- tf$compat$v1$Session()
sess
hello <- tf$constant('Hello, TensorFlow!')
hello
sess$run(hello)
# 運算
alpha <- tf$constant(5)
alpha
beta <- tf$constant(4)
beta
prod <- tf$multiply(alpha, beta)
sess$run(prod)
sess$close()


# 110/135
install.packages("keras")
library(keras)
install_keras()

model <- keras_model_sequential() %>% 
  layer_flatten(input_shape = c(28, 28)) %>% 
  layer_dense(units = 128, activation = "relu") %>% 
  layer_dropout(0.2) %>% 
  layer_dense(10, activation = "softmax")

summary(model)


# 111/135
install.packages("reticulate") 
library(reticulate)
conda_version()
py_available() # Check if Python is available on this system
py_module_available("tensorflow")
py_config() # Python configuration
conda_list() # the names and paths to the respective python binaries of available environments
conda_binary() # the location of the main conda binary 

# 111/135
sessionInfo()
Sys.getenv()  # 系統參數
packageVersion("tensorflow")
packageVersion("keras")
## 卸載套件
detach("package:tensorflow", unload=TRUE)
## 移除套件
remove.packages("tensorflow")   



# 113/135
library(tensorflow)
install_tensorflow()
library(tensorflow)
tf$constant("Hellow Tensorflow")

sessionInfo()

# 114/135
tf$Session()
library(tensorflow)
install_tensorflow(version = "2.0.0b1", method = "conda", envname = "r-reticulate")


# 123/135
# 安裝GPU版本的TensorFlow 
# (a single-user / desktop environment):
library(tensorflow)
install_tensorflow(version = "gpu")

# 或同時安裝 Keras 和 GPU版本的TensorFlow:
library(keras)
install_keras(tensorflow = "gpu")


# 測試:
library(keras)
tensorflow::tf$test$is_gpu_available() 
tensorflow::tf$config$list_physical_devices('GPU')


# 126/135
sessionInfo()
Sys.getenv()
tensorflow::tf_config()
keras:::keras_version()
reticulate::py_config()


# 129/135
library(keras)

# loading the MNIST dataset
mnist <- dataset_mnist()
str(mnist)

# Display the first 25 images from the training set
par(mfrow=c(5, 5), mai=c(0.1, 0.2, 0.4, 0.2))
empty <- lapply(1:25, function(i){
  x <- mnist$train$x[i,,]
  #x <- mnist$test$x[i,,]
  image(t(x)[, nrow(x):1], axes=FALSE, col=grey(255:0/255))
  box()
})
mtext("First 25 training samples of MNIST dataset of handwritten digits", 
      side=3, line=-2, outer=TRUE, cex=1.5)

# convert pixels values from (0~255) to (0~1)
mnist$train$x <- mnist$train$x/255
mnist$test$x <- mnist$test$x/255


# 130/135
# define the a Keras model using the sequential API
model <- keras_model_sequential() 

# Buildinng the model
# Note that when using the Sequential API 
# the first layer must specify the input_shape argument 
# which represents the dimensions of the input (images 28x28)
model %>% 
  layer_flatten(input_shape=c(28, 28)) %>% 
  layer_dense(units=128, activation="relu") %>% 
  layer_dropout(0.2) %>% 
  layer_dense(10, activation="softmax")

# print out layers, number of parameters, etc 
summary(model)



# 131/135
# Compiling the model
# define what loss will be optimized and what optimizer will be used. 
#　You can also specify metrics, callbacks and etc that are meant to be run during the model fitting.
model %>% 
  compile(loss="sparse_categorical_crossentropy",
          optimizer="adam",
          metrics="accuracy")


# Fit the model
model %>% 
  fit(x=mnist$train$x, 
      y=mnist$train$y,
      epochs=5,
      validation_split=0.3,
      verbose=2)


# 132/135
# make predictions
predictions <- predict(model, mnist$test$x)
head(predictions, 2)


# By default predict will return the output of the last Keras layer. 
# In our case this is the probability for each class. 
# use predict_classes and predict_proba to generate class and probability
predictions.classes <- predict_classes(model, mnist$test$x)
head(predictions.classes, 2)

predictions.proba <- predict_proba(model, mnist$test$x)
head(predictions.proba, 2)


# access the model performance
model %>% 
  evaluate(mnist$test$x, mnist$test$y, verbose = 0)