#################################################
# B06: 時間序列資料分析 Time Series Analysis    #
# 吳漢銘 國立政治大學統計學系                   #
# https://hmwu.idv.tw                          #
#################################################

# 3/53
browseURL("https://cran.r-project.org/web/views/TimeSeries.html")


# 9/53
(lct <- Sys.getlocale("LC_TIME"))
Sys.setlocale("LC_TIME", "C")
Sys.getlocale("LC_TIME")

as.numeric(as.Date("1970/1/1")) 

as.Date("1985-6-16")
as.Date("2019/02/17")
as.Date(1000, origin = "1900-01-01")
as.Date("2/15/2011", format = "%m/%d/%Y")

as.Date("April 26, 1993", format = "%B %d, %Y")
as.Date("22JUN01", format = "%d%b%y") 
seq(as.Date('1976-7-4'), by = 'days', length = 10)
seq(as.Date('2010-2-1'), to = as.Date('2010-4-1'), by = '2 weeks')


# 10/53
Sys.time()
substr(as.character(Sys.time()), 1, 10)
substr(as.character(Sys.time()), 12, 19)
date()

now <- Sys.time()
as.POSIXlt(now)
class(now)

my.date <- as.POSIXlt(Sys.time())
my.date
my.date$sec
my.date$min
my.date$hour

my.date$mday
my.date$mon
my.date$year + 1900
my.date$wday
my.date$yday


# 11/53
as.POSIXct("1969-12-31 23:59:59", format = "%Y-%m-%d %H:%M:%S", tz = "UTC") 
as.POSIXlt(Sys.time(), "GMT") 


# 12/53
x1 <- c("20040227", "20050412", "19930922")
strptime(x1, format = "%Y%m%d")
x2 <- c("27/02/2004", "27/02/2005", "14/01/2003")
strptime(x2, format = "%d/%m/%Y")
x3 <- c("1jan1960", "2jan1960", "31mar1960", "30jul1960")
strptime(x3, "%d%b%Y")
dates <- c("02/27/92", "02/27/92", "01/14/92", "02/28/92", "02/01/92")
times <- c("23:03:20", "22:29:56", "01:03:30", "18:21:03", "16:56:26")
x <- paste(dates, times)
strptime(x, "%m/%d/%y %H:%M:%S")


# 13/53
mydate <- read.table("mydate.txt", 
                     sep = ";")
mydate
lapply(mydate, class)

varNames <- c("ID", "Values", "DateTime")
mydate <- read.table("mydate.txt", sep = ";", 
                     col.names = varNames)
mydate
lapply(mydate, class)

mydate$DateTime <- strptime(mydate$DateTime, 
                            "%Y/%m/%d %H:%M:%S")
lapply(mydate, class)



# 14/53
library(readxl)
mydate <- read_excel("mydate.xlsx", col_names = FALSE)
mydate
lapply(mydate, class)

mydate2 <- read_excel("mydate2.xlsx", col_names = FALSE)
mydate2
lapply(mydate2, class)


library(writexl)
write_xlsx(iris, path = "iris.xlsx")
write_xlsx(list(sheet1 = iris[, 1:2], sheet2 = iris[, 3:4]), path = "iris.xlsx")


# 18/53
library(lubridate)
today()
current1 <- now()
current1
class(current1)

current2 <- date()
current2
class(current2)

my_date <- ymd("20230910")
my_date
class(my_date)

mdy("09-10-2023")
dmy("10/09/2023")

ymd_hms("20131219101707") 
ymd_hms("2013 Dec 19 10:17:07") 
mdy("Dec 19, 2013") 
mdy_hms("December 19, 2013 10:17:07") 
dmy_hms("19-Dec, 2013 10:17:07") 

two_dates <- ymd_hms("2023-01-01 01:15:00", "2023-02-01 01:30:00")
minute(two_dates)
mean(minute(two_dates))


# 19/53
OlsonNames() ## typically around six hundred names
Sys.timezone()
Sys.setenv(TZ = "UTC")
ymd_hms("2023-08-17 08:50:00", tz = "Asia/Taipei")
ymd_hms("2023-08-17 19:25:00", tz = "Europe/London")

departure <- ymd_hms("2023-08-17 08:50:00", tz = "Asia/Taipei")
departure
with_tz(departure, "Europe/London")

changed <- force_tz(departure, "America/Chicago")
changed
with_tz(changed, "Europe/London")


# 20/53
departure <- ymd_hms("2023-08-17 08:50:00", tz = "Asia/Taipei")
year(departure) 
month(departure) 
week(departure) 
yday(departure) 
mday(departure) 
wday(departure) 
hour(departure) 
minute(departure) 
second(departure)

month(departure, label = TRUE) 
month(departure, label = TRUE, abbr = FALSE)
wday(departure, label = TRUE)

Sys.getenv()
Sys.setenv(LANG = "en")
Sys.setenv(LANG = "zh")


# 21/53
summer_camp_start <- ymd_hms("2023-07-20 08:00:00")
summer_camp_end <- ymd_hms("2023-08-03 12:00:00")
camp_period <- interval(summer_camp_start, summer_camp_end)

# Toronto
# Current: 	EDT — Eastern Daylight Time
# Next Change: 	EST — Eastern Standard Time

JSM2023 <- interval(ymd(20230805, tz = "Canada/Eastern"), 
                    ymd(20230810, tz = "Canada/Eastern"))
JSM2023

int_overlaps(JSM2023, camp_period)
union(JSM2023, camp_period)


# 22/53
ymd(20230805) + 1
ymd(20230805) + days(1)
days(0:5)
weeks(1:5)
months(1)
ymd(20230805) + months(1)
years(1:2)
ymd(20230805) + years(1:2)

dweeks(1)
ddays(6) 
dhours(1)
dminutes(1)
dseconds(60)



# 23/53
jan31 <- ymd("2013-01-31")
jan31

#(1)
jan31 + months(1)
jan31 + months(0:11)

#(2)
floor_date(jan31, "month")
floor_date(jan31, "month") + months(0:11) + days(31)

#(3)
# %m+% and %m-%: roll dates back to the last day of the month
jan31 %m+% months(0:11)



# 24/53
Observation <- ts(c(123, 39, 78, 52, 110), start = 2012)
ts_matrix <- matrix(round(rnorm(60), 2), 20, 3)
ts_matrix
ts_obj <- ts(ts_matrix, start = c(1961, 1), frequency = 12)
ts_obj 
class(ts_obj)
is.mts(ts_obj)


# 25/53
stockdata <- read.table("stock-data.txt", skip = 1, header = T)
head(stockdata, 5)
sapply(stockdata, class)
for(at in 7:9){
  stockdata[,at] <- as.numeric(gsub(",", "", stockdata[,at]))
}
sapply(stockdata, class)
ts.plot(stockdata$加權平均價[1:12])



# 27/53
mat <- as.data.frame(matrix(stockdata$加權平均價, ncol = 5))
colnames(mat) <- unique(stockdata$半導體公司)
mat

matplot(1:12, mat, type = "o", lty = 1:5, col = 1:5, pch = 1:5, 
        main = "民國100年5家半導體公司股票月成交資訊(元,股)", 
        xlab = "月份", ylab = "價位(元/股)")
legend(10, 240, legend = colnames(mat), lty = 1:5, col = 1:5,
       pch = 1:5, cex = 0.9)


ts.plot(mat, lty = 1:5, col = 1:5, type = "b")



# 28/53
library(ggplot2)
ggplot(stockdata, aes(x = 月份, y = 加權平均價, colour = 半導體公司)) +
  geom_point(aes(size = 週轉率百分比))+
  geom_line(aes(lty = 半導體公司)) + 
  scale_x_continuous(breaks = 1:12, labels = month.abb) +
  labs(title="民國100年三家半導體公司股票月成交資訊(元,股)")

stockdata$年月日 <- as.Date(paste0("2011/", stockdata$月份, "/01"))
ggplot(stockdata, aes(x = 年月日 , y = 加權平均價, colour = 半導體公司)) +
  geom_point(aes(size = 週轉率百分比))+
  geom_line(aes(lty = 半導體公司)) + 
  scale_x_date(date_labels = "%b/%Y") +
  labs(title="民國100年三家半導體公司股票月成交資訊(元,股)")


# 29/53
library(forecast)
library(ggplot2)
library(fpp) # Forecasting: principles and practice

data(melsyd) # Total weekly air passenger numbers on Ansett airline flights between Melbourne and Sydney, 1987-1992. 
head(melsyd)
class(melsyd)
colnames(melsyd)
time(melsyd)

autoplot(melsyd[, "Economy.Class"]) +
  ggtitle("Passenger numbers on Ansett airline (Melbourne - Sydney)") +
  xlab("Year") +
  ylab("Thousand")



# 30/53
data(a10) #Monthly anti-diabetic drug sales in Australia from 1992 to 2008. 
a10
class(a10)

autoplot(a10) +
  xlab("Month/Year")+
  ylab("Million (USD)") +
  ggtitle("Anti-diabetic drug sales in Australia (1992-2008)")


ggseasonplot(a10, year.labels = TRUE) +
  xlab("Month")+
  ylab("Million (USD)") +
  ggtitle("Monthly anti-diabetic drug sales in Australia (1992-2008)")


# 31/53  
library(plotly)
# data.frame with one date/time column 
date_mat <- data.frame(date = seq(as.Date("2011/1/1"), by = "month", 
                                  length.out = 12), mat)
sapply(date_mat, class)
ts_plot(date_mat)

# data.frame to ts, and then ts_plot
mat_ts <- as.ts(mat, start = c(2011, 1), frequency = 12)
mat_ts
class(mat_ts)
str(mat_ts)
ts_plot(mat_ts)
ts_plot(mat_ts, type = "multiple") 




# 35/53  
Gasoline_Sales <- ts(c(68, 84, 76, 92, 72, 64, 80, 72, 88, 80, 60, 88))
Gasoline_Sales
acf(Gasoline_Sales) # base package
Acf(Gasoline_Sales) # forecast package
ggAcf(Gasoline_Sales) # forecast package


class(melsyd[, "Economy.Class"])
acf(melsyd[, "Economy.Class"], na.action = na.pass)
acf(coredata(melsyd[, "Economy.Class"]), na.action = na.pass)
Acf(melsyd[, "Economy.Class"])
ggAcf(melsyd[, "Economy.Class"])


# 36/53  
library(readxl)
Gasoline <- read_excel("Gasoline.xlsx")
Gasoline
plot(Sales ~ Week, data = Gasoline, type = "o", 
     main = "Gasoline Sales Time Series Plot",
     ylim = c(0, 100), pch = 16)

Gasoline_lm <- lm(Sales ~ Week, data = Gasoline)
library(lmtest)
dwtest(formula = Gasoline_lm,  alternative = "two.sided")



# 39/53  
library(TTR)
data(ttrc)
dim(ttrc)
head(ttrc)

t <- 1:100
sma.20 <- SMA(ttrc[t, "Close"], 20)
ema.20 <- EMA(ttrc[t, "Close"], 20)
wma.20 <- WMA(ttrc[t, "Close"], 20)

plot(ttrc[t,"Close"], type = "l", main = "ttrc")
lines(sma.20, col = "red", lwd = 2)
lines(ema.20, col = "blue", lwd = 2)
lines(wma.20, col = "green", lwd = 2)
legend("topright", legend=c("sma.20", "ema.20", "wma.20"), col = c("red", "blue", "green"), lty = 1, lwd = 2)



# 41/53  
library(readxl)
Gasoline <- read_excel("Gasoline.xlsx")
Gasoline

plot(Sales ~ Week, data = Gasoline, type = "o", 
     main = "Gasoline Sales Time Series Plot",
     ylim = c(0, 100), pch = 16)

library(forecast)
Gasoline_ts <- ts(Gasoline$Sales)
plot(Gasoline_ts)

Gasoline_fc_naive <- naive(Gasoline_ts)
Gasoline_fc_naive$fitted
summary(Gasoline_fc_naive)
accuracy(Gasoline_fc_naive)



# 42/53  
library(TTR)
Gasoline_fc_sma3 <- SMA(Gasoline_ts, n = 3)
Gasoline_fc_wma3 <- WMA(Gasoline_ts, n = 3)
Gasoline_fc_ema3 <- EMA(Gasoline_ts, n = 3)

accuracy(Gasoline_fc_sma3, Gasoline_ts[2:12])
accuracy(Gasoline_fc_wma3, Gasoline_ts[2:12]) 
accuracy(Gasoline_fc_ema3, Gasoline_ts[2:12]) 

plot(Gasoline_ts, type = "o", ylim = c(0, 100), lwd = 2, pch = 16,
     xlab = "Week", ylab = "Sales", 
     main = "Gasoline Sales Time Series Plot and \n Three-Week Moving Average Forecasts")
points(Gasoline_fc_sma3, type = "o", lwd = 2, col = 2, pch = 16)
points(Gasoline_fc_wma3, type = "o", lwd = 2, col = 3, pch = 16)
points(Gasoline_fc_ema3, type = "o", lwd = 2, col = 4, pch = 16)
legend(8, 40, legend = c("Gasoline_ts", "SMA(3)", "WMA(3)", "EMA(3)"), 
       col = 1:4, lty = 1, lwd = 2)


# 44/53  
SmartphoneSales <- read_excel("SmartphoneSales.xlsx")
SmartphoneSales
SmartphoneSales_ts <- ts(SmartphoneSales$`Sales (1000s)`, 
                         start =1, frequency = 4)

plot(SmartphoneSales_ts, type = "o", pch = 16, 
     ylim = c(0, 9), xlab = "Year/Quarter", ylab = "Sales", 
     xaxt = "n", 
     main = "Smartphone Sales Time Series Plot")
axis(1, at = seq(from = 1, by = 0.25, length.out = length(SmartphoneSales_ts)),
     labels = paste0(rep(1:4, each = 4), "/", rep(1:4, 4)))



# 44/53  
SmartphoneSales$Quarter <- factor(SmartphoneSales$Quarter, levels = 4:1) 
SmartphoneSales$Period <- 1:nrow(SmartphoneSales)

SmartphoneSales_lm <- lm(`Sales (1000s)` ~ Quarter + Period, data = SmartphoneSales)
summary(SmartphoneSales_lm)
anova(SmartphoneSales_lm)


# forecast quarterly sales for next year. Next year is
# year 5 for the smartphone sales time series; 
# that is, time periods 17, 18, 19, and 20.
predict(SmartphoneSales_lm, newdata = data.frame(Quarter = factor(1:4), 
                                                 Period = 17:20))

points(x = seq(from = 1, by = 0.25, length.out = length(SmartphoneSales_ts)),
       y = SmartphoneSales_lm$fitted.values, 
       type = "o", pch = 15, col = "red")
legend(3, 3, legend = c("Time Series", "Regression Fit"), lty = 1, 
       col = c("black", "red"), pch = c(16, 15))




# 46/53  
library(tseries)
my_ts_data <- ts(c(13, 54, 54, 65, 66, 71, 67, 67, 79, 88, 59, 52, 60))
plot(my_ts_data)
ggAcf(my_ts_data)
adf.test(my_ts_data)



# 50/53  
library(forecast) 
autoplot(WWWusage) +
  xlab("Time")+
  ylab("Numbers of users") +
  ggtitle("Internet Usage per Minute")

ggAcf(WWWusage)


# 51/53  
WWWusage_arima <- Arima(WWWusage, order = c(2, 1, 2))
WWWusage_arima
WWWusage_arima_best <- auto.arima(WWWusage)
WWWusage_arima_best
forecast(WWWusage_arima_best, h = 10)
plot(forecast(WWWusage_arima_best, h = 10))