Similarity Distances
library(zoo)
library(kohonen)
library(ggplot2)
library(dtw)
# Two time series
X <- c(1,1,1,4,4,4,4,4,1,1)
Y <- c(1,1,4,4,4,4,4,1,1,1)
# Plot
zoo::plot.zoo(cbind(X, Y), plot.type = "multiple", col = c("red", "blue"), lwd = 2, main= "Time Series")
# Euclidean distance
TSdist::DTWDistance(X, Y)
[1] 0
# Euclidean distance
TSdist::EuclideanDistance(X, Y)
[1] 4.242641
# DTW distance
alignment<-dtw::dtw(X, Y, keep.internals = TRUE, step=symmetric1, k=T)
plot(alignment,type="threeway")
plot(dtw::dtw(X, Y, keep.internals = TRUE, step.pattern =rabinerJuangStepPattern(6,"c")),
type="twoway",offset=-2)
dtwPlotTwoWay(dtw(X, Y, step = asymmetricP1, keep = T))
Read the time series from csv file and plot all
#read time series from file
timeseries.zoo <- read.zoo("timeseries.csv", header = TRUE, sep = ",")
#zoo::plot.zoo(z, main = "Set of time series")
#Define a color for each group (4 groups)
colors<-c("#FF0000FF","#FF0000FF","#FF0000FF","#FF0000FF",
"#00FF9FFF","#00FF9FFF","#00FF9FFF","#00FF9FFF",
"#009FFFFF","#009FFFFF","#009FFFFF","#009FFFFF",
"#DFFF00FF","#DFFF00FF","#DFFF00FF","#DFFF00FF")
#Plot all time series
zoo::plot.zoo(timeseries.zoo, plot.type = "multiple", col = colors, lwd = 2, main= "Set of time series")
Self-organizing maps using Kohonen package
# SOM
somgrid=kohonen::somgrid(xdim=4, ydim=4, topo="rectangular", neighbourhood.fct = "gaussian")
som<-kohonen::supersom(t(as.matrix(timeseries.zoo)),somgrid,rlen = 100, alpha=1)
plot(som,"codes",codeRendering="lines")
The time series were allocated in these neurons. Each number represents the number of neuron.
Example: the position 1 of vector represents the time series 1 in a dataset, this time series was allocated in neuron 9.
neuron_timeseries<-som$unit.classif
info_timeSeries<- data.frame(time_series=(rownames(t(timeseries.zoo))),
neuron=as.integer(som$unit.classif))
codes<-zoo::zoo(t(som$codes[[1]]))
zoo::plot.zoo(codes, plot.type = "multiple", col = "black", lwd = 2, main= "Weight of neurons")
#plot all time series (neurons)
ggplot2::autoplot((codes), facet = NULL) + ylab("y") +xlab("time") + geom_line(size = 1) + ggtitle("Weight of neurons")
library(proxy)
library(stats)
## use hierarchical clustering to cluster the codebook vectors.
# Cut dendrogram in 4 groups
hc <- stats::hclust(dist(t(codes)),method = "ward.D2")
#Cuts a tree, e.g., as resulting from hclust, into several groups either
#by specifying the desired number(s) of groups or the cut height(s).
som_cluster <- stats::cutree(hc, 4)
library(dendextend)
#plot dendrogram
dend <- hc%>% as.dendrogram %>%
set("branches_k_color", k = 4) %>% set("branches_lwd", 1.2) %>%
set("labels_cex", 0.8) %>% set("labels_colors", k = 4) %>%
set("leaves_pch", 19) %>% set("leaves_cex", 0.5)
ggd1 <- as.ggdend(dend)
ggplot(ggd1, horiz = FALSE) + ggtitle("Dendrogram")
#Cluster is created by a set of neurons.
cluster<-data.frame(cluster=som_cluster,neuron= 1:dim(codes)[2])
#Timeseries, neuron and cluster
cluster_info<-merge(cluster,info_timeSeries, by="neuron")
cluster_info[order(cluster_info$time_series),]
neuron cluster time_series
1 1 1 t1_1
2 1 1 t1_10
16 5 1 t1_2
17 5 1 t1_3
18 5 1 t1_4
4 1 1 t1_5
19 5 1 t1_6
20 5 1 t1_7
3 1 1 t1_8
5 1 1 t1_9
11 4 2 t2_1
8 3 2 t2_10
9 3 2 t2_2
13 4 2 t2_3
7 3 2 t2_4
14 4 2 t2_5
6 3 2 t2_6
12 4 2 t2_7
10 3 2 t2_8
15 4 2 t2_9
25 12 4 t3_1
29 12 4 t3_10
30 12 4 t3_2
38 16 4 t3_3
28 12 4 t3_4
26 12 4 t3_5
27 12 4 t3_6
40 16 4 t3_7
31 12 4 t3_8
39 16 4 t3_9
24 10 3 t4_1
37 14 3 t4_10
22 9 3 t4_2
32 13 3 t4_3
35 14 3 t4_4
23 10 3 t4_5
21 9 3 t4_6
33 13 3 t4_7
36 14 3 t4_8
34 13 3 t4_9
#get the neurons that corresponds each group
neuron_group1<-which(som_cluster==1)
neuron_group2<-which(som_cluster==2)
neuron_group3<-which(som_cluster==3)
neuron_group4<-which(som_cluster==4)
#Paint neurons and plot som with groups separated by hierarchical clustering.
plot(som, type="codes",codeRendering="lines", bgcol = terrain.colors(4)[som_cluster], main = "Clusters")
plot(som, type="mapping",codeRendering="lines", bgcol = terrain.colors(4)[som_cluster], main = "Clusters")
kohonen::add.cluster.boundaries(som, som_cluster)
#Plot the weight vector of neurons.
#Are they similar with the time series set?
v1<-ggplot2::autoplot(codes[,neuron_group1], facet = NULL) + ylab("y") + geom_line(size = 1)
v2<-ggplot2::autoplot(codes[,neuron_group2], facet = NULL) + ylab("y") + geom_line(size = 1)
v3<-ggplot2::autoplot(codes[,neuron_group3], facet = NULL) + ylab("y") + geom_line(size = 1)
v4<-ggplot2::autoplot(codes[,neuron_group4], facet = NULL) + ylab("y") + geom_line(size = 1)
gridExtra::grid.arrange(v1, v2,v3, v4, ncol=2,nrow=2, top="Weight of Neurons")