#Author: Lorena Santos and Karine Ferreira

#----------------------------------- Distances -------------------------------------

#Library zoo for irregular time series
library(zoo)
library(TSdist)
library(dtw)
library(dplyr)
library(kohonen)
library(ggplot2)

# Two time series
X <- c(1,1,1,4,4,4,4,4,1,1)
Y <- c(1,1,4,4,4,4,4,1,1,1)

# Plot time series
zoo::plot.zoo(cbind(X, Y), plot.type = "multiple", col = c("red", "blue"), lwd = 2, main= "Time Series")

# Euclidean distance
TSdist::DTWDistance(X, Y)

# Euclidean distance
TSdist::EuclideanDistance(X, Y)

# DTW distance
alignment<-dtw::dtw(X, Y, keep.internals = TRUE, step=symmetric1, k=T)

plot(alignment,type="threeway")

plot(dtw::dtw(X, Y, keep.internals = TRUE, step.pattern =rabinerJuangStepPattern(6,"c")),
     type="twoway",offset=-2)

dtwPlotTwoWay(dtw(X, Y, step = asymmetricP1, keep = T))

#--------------------------- Read Time series from file ---------------------------------

timeseries.zoo <- read.zoo("timeseries.csv", header = TRUE, sep = ",")

#Define a color for each group (4 groups)
colors<-c("#FF0000FF","#FF0000FF","#FF0000FF","#FF0000FF",
          "#00FF9FFF","#00FF9FFF","#00FF9FFF","#00FF9FFF",
          "#009FFFFF","#009FFFFF","#009FFFFF","#009FFFFF",
          "#DFFF00FF","#DFFF00FF","#DFFF00FF","#DFFF00FF")

#Plot all time series
zoo::plot.zoo(timeseries.zoo, plot.type = "multiple", 
              col = colors, lwd = 2, 
              main= "Set of time series")

#or 
plot1 <-ggplot2::autoplot((timeseries.zoo[,1:10]), facet = NULL) + ylab("y") + geom_line(size = 1) + ggtitle("Group 1") 
plot2 <-ggplot2::autoplot((timeseries.zoo[,11:20]), facet = NULL) + ylab("y") + geom_line(size = 1) + ggtitle("Group 2")
plot3 <-ggplot2::autoplot((timeseries.zoo[,21:30]), facet = NULL) + ylab("y") + geom_line(size = 1) + ggtitle("Group 3")
plot4 <-ggplot2::autoplot((timeseries.zoo[,31:40]), facet = NULL) + ylab("y") + geom_line(size = 1) + ggtitle("Group 4")
gridExtra::grid.arrange(plot1, plot2,plot3, plot4, ncol=2,nrow=2, top="Time Series")


#-----------------------------  Starting SOM ---------------------------------------------

#Explain parameters of SOM
somgrid=kohonen::somgrid(xdim=4, ydim=4, topo="rectangular", neighbourhood.fct = "gaussian")
som <-
  kohonen::supersom(
    t(as.matrix(timeseries.zoo)),
    somgrid,
    rlen = 100,
    alpha = 1,
    dist.fcts = "euclidean"
  )

plot(som,"codes",codeRendering="lines")
plot(som,"mapping")


#the time series were allocated in these neurons.
#each number represents the number of neuron
#Example: the position 1 of vector represents the time series 1 in a dataset, this time series
#was allocated in neuron 9.
neuron_timeseries<-som$unit.classif

#This data.frame contains the name of time series and the neuron that it was allocated.
info_timeSeries<- data.frame(time_series=(rownames(t(timeseries.zoo))), 
                             neuron=as.integer(som$unit.classif))


#get the time series that represent the signature of neurons. 
codes<-zoo::zoo(t(som$codes[[1]]))

zoo::plot.zoo(codes, plot.type = "multiple", col = "black", lwd = 2, main= "Weight of neurons")
#plot all time series (neurons)
ggplot2::autoplot((codes), facet = NULL) + ylab("y") +xlab("time") + geom_line(size = 1) + ggtitle("Weight of neurons")



library(proxy)
library(stats)

## use hierarchical clustering to cluster the codebook vectors.
# Cut dendrogram in 4 groups
hc <- stats::hclust(dist(t(codes)),method = "ward.D2")

#Cuts a tree, e.g., as resulting from hclust, into several groups either 
#by specifying the desired number(s) of groups or the cut height(s).
som_cluster <- stats::cutree(hc, 4)

library(dendextend)
#plot dendrogram
dend <- hc%>% as.dendrogram %>%
  set("branches_k_color", k = 4) %>% set("branches_lwd", 1.2) %>%
  set("labels_cex", 0.8) %>% set("labels_colors", k = 4) %>%
  set("leaves_pch", 19) %>% set("leaves_cex", 0.5) 
ggd1 <- as.ggdend(dend)
ggplot(ggd1, horiz = FALSE) + ggtitle("Dendrogram")

#Cluster is created by a set of neurons. 
cluster<-data.frame(cluster=som_cluster,neuron= 1:dim(codes)[2])

#Timeseries, neuron and cluster
cluster_info<-merge(cluster,info_timeSeries, by="neuron")
cluster_info[order(cluster_info$time_series),]
cluster_info[order(cluster_info$cluster),]

#get the neurons that corresponds each group
neuron_group1<-which(som_cluster==1)
neuron_group2<-which(som_cluster==2)
neuron_group3<-which(som_cluster==3)
neuron_group4<-which(som_cluster==4)

#Paint neurons and plot som with groups separated by hierarchical clustering.
plot(som, type="codes",codeRendering="lines", bgcol = terrain.colors(4)[som_cluster], main = "Clusters")
plot(som, type="mapping",codeRendering="lines", bgcol = terrain.colors(4)[som_cluster], main = "Clusters") 
kohonen::add.cluster.boundaries(som, som_cluster)

#Plot the weight vector of neurons. 
#Are they similar with the time series set?
v1<-ggplot2::autoplot(codes[,neuron_group1], facet = NULL) + ylab("y") + geom_line(size = 1) 
v2<-ggplot2::autoplot(codes[,neuron_group2], facet = NULL) + ylab("y") + geom_line(size = 1) 
v3<-ggplot2::autoplot(codes[,neuron_group3], facet = NULL) + ylab("y") + geom_line(size = 1) 
v4<-ggplot2::autoplot(codes[,neuron_group4], facet = NULL) + ylab("y") + geom_line(size = 1) 
gridExtra::grid.arrange(v1, v2,v3, v4, ncol=2,nrow=2, top="Weight of Neurons")



