Time Series Clustering Using Self-Organizing Maps

Similarity Distances

library(zoo)
library(kohonen)
library(ggplot2)
library(dtw)
# Two time series
X <- c(1,1,1,4,4,4,4,4,1,1)
Y <- c(1,1,4,4,4,4,4,1,1,1)
# Plot
zoo::plot.zoo(cbind(X, Y), plot.type = "multiple", col = c("red", "blue"), lwd = 2, main= "Time Series")

# Euclidean distance
TSdist::DTWDistance(X, Y)
[1] 0
# Euclidean distance
TSdist::EuclideanDistance(X, Y)
[1] 4.242641
# DTW distance
alignment<-dtw::dtw(X, Y, keep.internals = TRUE, step=symmetric1, k=T)
plot(alignment,type="threeway")

plot(dtw::dtw(X, Y, keep.internals = TRUE, step.pattern =rabinerJuangStepPattern(6,"c")),
     type="twoway",offset=-2)

dtwPlotTwoWay(dtw(X, Y, step = asymmetricP1, keep = T))

Read the time series from csv file and plot all

#read time series from file
timeseries.zoo <- read.zoo("timeseries.csv", header = TRUE, sep = ",")
#zoo::plot.zoo(z, main = "Set of time series")
#Define a color for each group (4 groups)
colors<-c("#FF0000FF","#FF0000FF","#FF0000FF","#FF0000FF",
          "#00FF9FFF","#00FF9FFF","#00FF9FFF","#00FF9FFF",
          "#009FFFFF","#009FFFFF","#009FFFFF","#009FFFFF",
          "#DFFF00FF","#DFFF00FF","#DFFF00FF","#DFFF00FF")
#Plot all time series
zoo::plot.zoo(timeseries.zoo, plot.type = "multiple", col = colors, lwd = 2, main= "Set of time series")

Self-organizing maps using Kohonen package

# SOM
somgrid=kohonen::somgrid(xdim=4, ydim=4, topo="rectangular", neighbourhood.fct = "gaussian")
som<-kohonen::supersom(t(as.matrix(timeseries.zoo)),somgrid,rlen = 100,  alpha=1)
plot(som,"codes",codeRendering="lines")

The time series were allocated in these neurons. Each number represents the number of neuron.

Example: the position 1 of vector represents the time series 1 in a dataset, this time series was allocated in neuron 9.

neuron_timeseries<-som$unit.classif
info_timeSeries<- data.frame(time_series=(rownames(t(timeseries.zoo))), 
                             neuron=as.integer(som$unit.classif))
                        
codes<-zoo::zoo(t(som$codes[[1]]))
                             
zoo::plot.zoo(codes, plot.type = "multiple", col = "black", lwd = 2, main= "Weight of neurons")

#plot all time series (neurons)
ggplot2::autoplot((codes), facet = NULL) + ylab("y") +xlab("time") + geom_line(size = 1) + ggtitle("Weight of neurons")

library(proxy)
library(stats)
## use hierarchical clustering to cluster the codebook vectors.
# Cut dendrogram in 4 groups
hc <- stats::hclust(dist(t(codes)),method = "ward.D2")
#Cuts a tree, e.g., as resulting from hclust, into several groups either 
#by specifying the desired number(s) of groups or the cut height(s).
som_cluster <- stats::cutree(hc, 4)
library(dendextend)
#plot dendrogram
dend <- hc%>% as.dendrogram %>%
  set("branches_k_color", k = 4) %>% set("branches_lwd", 1.2) %>%
  set("labels_cex", 0.8) %>% set("labels_colors", k = 4) %>%
  set("leaves_pch", 19) %>% set("leaves_cex", 0.5) 
ggd1 <- as.ggdend(dend)
ggplot(ggd1, horiz = FALSE) + ggtitle("Dendrogram")

#Cluster is created by a set of neurons. 
cluster<-data.frame(cluster=som_cluster,neuron= 1:dim(codes)[2])
#Timeseries, neuron and cluster
cluster_info<-merge(cluster,info_timeSeries, by="neuron")
cluster_info[order(cluster_info$time_series),]
   neuron cluster time_series
1       1       1        t1_1
2       1       1       t1_10
16      5       1        t1_2
17      5       1        t1_3
18      5       1        t1_4
4       1       1        t1_5
19      5       1        t1_6
20      5       1        t1_7
3       1       1        t1_8
5       1       1        t1_9
11      4       2        t2_1
8       3       2       t2_10
9       3       2        t2_2
13      4       2        t2_3
7       3       2        t2_4
14      4       2        t2_5
6       3       2        t2_6
12      4       2        t2_7
10      3       2        t2_8
15      4       2        t2_9
25     12       4        t3_1
29     12       4       t3_10
30     12       4        t3_2
38     16       4        t3_3
28     12       4        t3_4
26     12       4        t3_5
27     12       4        t3_6
40     16       4        t3_7
31     12       4        t3_8
39     16       4        t3_9
24     10       3        t4_1
37     14       3       t4_10
22      9       3        t4_2
32     13       3        t4_3
35     14       3        t4_4
23     10       3        t4_5
21      9       3        t4_6
33     13       3        t4_7
36     14       3        t4_8
34     13       3        t4_9
#get the neurons that corresponds each group
neuron_group1<-which(som_cluster==1)
neuron_group2<-which(som_cluster==2)
neuron_group3<-which(som_cluster==3)
neuron_group4<-which(som_cluster==4)
#Paint neurons and plot som with groups separated by hierarchical clustering.
plot(som, type="codes",codeRendering="lines", bgcol = terrain.colors(4)[som_cluster], main = "Clusters")

plot(som, type="mapping",codeRendering="lines", bgcol = terrain.colors(4)[som_cluster], main = "Clusters") 
kohonen::add.cluster.boundaries(som, som_cluster)

#Plot the weight vector of neurons. 
#Are they similar with the time series set?
v1<-ggplot2::autoplot(codes[,neuron_group1], facet = NULL) + ylab("y") + geom_line(size = 1) 
v2<-ggplot2::autoplot(codes[,neuron_group2], facet = NULL) + ylab("y") + geom_line(size = 1) 
v3<-ggplot2::autoplot(codes[,neuron_group3], facet = NULL) + ylab("y") + geom_line(size = 1) 
v4<-ggplot2::autoplot(codes[,neuron_group4], facet = NULL) + ylab("y") + geom_line(size = 1) 
gridExtra::grid.arrange(v1, v2,v3, v4, ncol=2,nrow=2, top="Weight of Neurons")

LS0tDQp0aXRsZTogIlRpbWUgU2VyaWVzIENsdXN0ZXJpbmcgVXNpbmcgU2VsZi1Pcmdhbml6aW5nIE1hcHMiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KU2ltaWxhcml0eSBEaXN0YW5jZXMNCg0KDQpgYGB7cn0NCmxpYnJhcnkoem9vKQ0KbGlicmFyeShrb2hvbmVuKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeShkdHcpDQpgYGANCg0KYGBge3J9DQoNCiMgVHdvIHRpbWUgc2VyaWVzDQpYIDwtIGMoMSwxLDEsNCw0LDQsNCw0LDEsMSkNClkgPC0gYygxLDEsNCw0LDQsNCw0LDEsMSwxKQ0KDQojIFBsb3QNCnpvbzo6cGxvdC56b28oY2JpbmQoWCwgWSksIHBsb3QudHlwZSA9ICJtdWx0aXBsZSIsIGNvbCA9IGMoInJlZCIsICJibHVlIiksIGx3ZCA9IDIsIG1haW49ICJUaW1lIFNlcmllcyIpDQoNCiMgRXVjbGlkZWFuIGRpc3RhbmNlDQpUU2Rpc3Q6OkRUV0Rpc3RhbmNlKFgsIFkpDQoNCiMgRXVjbGlkZWFuIGRpc3RhbmNlDQpUU2Rpc3Q6OkV1Y2xpZGVhbkRpc3RhbmNlKFgsIFkpDQoNCiMgRFRXIGRpc3RhbmNlDQphbGlnbm1lbnQ8LWR0dzo6ZHR3KFgsIFksIGtlZXAuaW50ZXJuYWxzID0gVFJVRSwgc3RlcD1zeW1tZXRyaWMxLCBrPVQpDQoNCnBsb3QoYWxpZ25tZW50LHR5cGU9InRocmVld2F5IikNCg0KcGxvdChkdHc6OmR0dyhYLCBZLCBrZWVwLmludGVybmFscyA9IFRSVUUsIHN0ZXAucGF0dGVybiA9cmFiaW5lckp1YW5nU3RlcFBhdHRlcm4oNiwiYyIpKSwNCiAgICAgdHlwZT0idHdvd2F5IixvZmZzZXQ9LTIpDQoNCmR0d1Bsb3RUd29XYXkoZHR3KFgsIFksIHN0ZXAgPSBhc3ltbWV0cmljUDEsIGtlZXAgPSBUKSkNCg0KYGBgDQoNCg0KUmVhZCB0aGUgdGltZSBzZXJpZXMgZnJvbSBjc3YgZmlsZSBhbmQgcGxvdCBhbGwNCg0KYGBge3J9DQojcmVhZCB0aW1lIHNlcmllcyBmcm9tIGZpbGUNCnRpbWVzZXJpZXMuem9vIDwtIHJlYWQuem9vKCJ0aW1lc2VyaWVzLmNzdiIsIGhlYWRlciA9IFRSVUUsIHNlcCA9ICIsIikNCg0KI3pvbzo6cGxvdC56b28oeiwgbWFpbiA9ICJTZXQgb2YgdGltZSBzZXJpZXMiKQ0KDQojRGVmaW5lIGEgY29sb3IgZm9yIGVhY2ggZ3JvdXAgKDQgZ3JvdXBzKQ0KY29sb3JzPC1jKCIjRkYwMDAwRkYiLCIjRkYwMDAwRkYiLCIjRkYwMDAwRkYiLCIjRkYwMDAwRkYiLA0KICAgICAgICAgICIjMDBGRjlGRkYiLCIjMDBGRjlGRkYiLCIjMDBGRjlGRkYiLCIjMDBGRjlGRkYiLA0KICAgICAgICAgICIjMDA5RkZGRkYiLCIjMDA5RkZGRkYiLCIjMDA5RkZGRkYiLCIjMDA5RkZGRkYiLA0KICAgICAgICAgICIjREZGRjAwRkYiLCIjREZGRjAwRkYiLCIjREZGRjAwRkYiLCIjREZGRjAwRkYiKQ0KYGBgDQoNCg0KYGBge3J9DQoNCiNQbG90IGFsbCB0aW1lIHNlcmllcw0Kem9vOjpwbG90Lnpvbyh0aW1lc2VyaWVzLnpvbywgcGxvdC50eXBlID0gIm11bHRpcGxlIiwgY29sID0gY29sb3JzLCBsd2QgPSAyLCBtYWluPSAiU2V0IG9mIHRpbWUgc2VyaWVzIikNCg0KYGBgDQoNClNlbGYtb3JnYW5pemluZyBtYXBzIHVzaW5nIEtvaG9uZW4gcGFja2FnZQ0KYGBge3J9DQojIFNPTQ0Kc29tZ3JpZD1rb2hvbmVuOjpzb21ncmlkKHhkaW09NCwgeWRpbT00LCB0b3BvPSJyZWN0YW5ndWxhciIsIG5laWdoYm91cmhvb2QuZmN0ID0gImdhdXNzaWFuIikNCnNvbTwta29ob25lbjo6c3VwZXJzb20odChhcy5tYXRyaXgodGltZXNlcmllcy56b28pKSxzb21ncmlkLHJsZW4gPSAxMDAsICBhbHBoYT0xKQ0KDQoNCmBgYA0KDQoNCmBgYHtyfQ0KcGxvdChzb20sImNvZGVzIixjb2RlUmVuZGVyaW5nPSJsaW5lcyIpDQpgYGANClRoZSB0aW1lIHNlcmllcyB3ZXJlIGFsbG9jYXRlZCBpbiB0aGVzZSBuZXVyb25zLg0KRWFjaCBudW1iZXIgcmVwcmVzZW50cyB0aGUgbnVtYmVyIG9mIG5ldXJvbi4NCg0KRXhhbXBsZTogdGhlIHBvc2l0aW9uIDEgb2YgdmVjdG9yIHJlcHJlc2VudHMgdGhlIHRpbWUgc2VyaWVzIDEgaW4gYSBkYXRhc2V0LCB0aGlzIHRpbWUgc2VyaWVzDQp3YXMgYWxsb2NhdGVkIGluIG5ldXJvbiA5Lg0KDQpgYGB7cn0NCm5ldXJvbl90aW1lc2VyaWVzPC1zb20kdW5pdC5jbGFzc2lmDQoNCmluZm9fdGltZVNlcmllczwtIGRhdGEuZnJhbWUodGltZV9zZXJpZXM9KHJvd25hbWVzKHQodGltZXNlcmllcy56b28pKSksIA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICBuZXVyb249YXMuaW50ZWdlcihzb20kdW5pdC5jbGFzc2lmKSkNCiAgICAgICAgICAgICAgICAgICAgICAgIA0KY29kZXM8LXpvbzo6em9vKHQoc29tJGNvZGVzW1sxXV0pKQ0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICANCmBgYA0KDQoNCmBgYHtyfQ0KDQp6b286OnBsb3Quem9vKGNvZGVzLCBwbG90LnR5cGUgPSAibXVsdGlwbGUiLCBjb2wgPSAiYmxhY2siLCBsd2QgPSAyLCBtYWluPSAiV2VpZ2h0IG9mIG5ldXJvbnMiKQ0KI3Bsb3QgYWxsIHRpbWUgc2VyaWVzIChuZXVyb25zKQ0KZ2dwbG90Mjo6YXV0b3Bsb3QoKGNvZGVzKSwgZmFjZXQgPSBOVUxMKSArIHlsYWIoInkiKSAreGxhYigidGltZSIpICsgZ2VvbV9saW5lKHNpemUgPSAxKSArIGdndGl0bGUoIldlaWdodCBvZiBuZXVyb25zIikNCg0KDQoNCmxpYnJhcnkocHJveHkpDQpsaWJyYXJ5KHN0YXRzKQ0KDQojIyB1c2UgaGllcmFyY2hpY2FsIGNsdXN0ZXJpbmcgdG8gY2x1c3RlciB0aGUgY29kZWJvb2sgdmVjdG9ycy4NCiMgQ3V0IGRlbmRyb2dyYW0gaW4gNCBncm91cHMNCmhjIDwtIHN0YXRzOjpoY2x1c3QoZGlzdCh0KGNvZGVzKSksbWV0aG9kID0gIndhcmQuRDIiKQ0KDQojQ3V0cyBhIHRyZWUsIGUuZy4sIGFzIHJlc3VsdGluZyBmcm9tIGhjbHVzdCwgaW50byBzZXZlcmFsIGdyb3VwcyBlaXRoZXIgDQojYnkgc3BlY2lmeWluZyB0aGUgZGVzaXJlZCBudW1iZXIocykgb2YgZ3JvdXBzIG9yIHRoZSBjdXQgaGVpZ2h0KHMpLg0Kc29tX2NsdXN0ZXIgPC0gc3RhdHM6OmN1dHJlZShoYywgNCkNCg0KbGlicmFyeShkZW5kZXh0ZW5kKQ0KI3Bsb3QgZGVuZHJvZ3JhbQ0KZGVuZCA8LSBoYyU+JSBhcy5kZW5kcm9ncmFtICU+JQ0KICBzZXQoImJyYW5jaGVzX2tfY29sb3IiLCBrID0gNCkgJT4lIHNldCgiYnJhbmNoZXNfbHdkIiwgMS4yKSAlPiUNCiAgc2V0KCJsYWJlbHNfY2V4IiwgMC44KSAlPiUgc2V0KCJsYWJlbHNfY29sb3JzIiwgayA9IDQpICU+JQ0KICBzZXQoImxlYXZlc19wY2giLCAxOSkgJT4lIHNldCgibGVhdmVzX2NleCIsIDAuNSkgDQpnZ2QxIDwtIGFzLmdnZGVuZChkZW5kKQ0KZ2dwbG90KGdnZDEsIGhvcml6ID0gRkFMU0UpICsgZ2d0aXRsZSgiRGVuZHJvZ3JhbSIpDQoNCiNDbHVzdGVyIGlzIGNyZWF0ZWQgYnkgYSBzZXQgb2YgbmV1cm9ucy4gDQpjbHVzdGVyPC1kYXRhLmZyYW1lKGNsdXN0ZXI9c29tX2NsdXN0ZXIsbmV1cm9uPSAxOmRpbShjb2RlcylbMl0pDQoNCiNUaW1lc2VyaWVzLCBuZXVyb24gYW5kIGNsdXN0ZXINCmNsdXN0ZXJfaW5mbzwtbWVyZ2UoY2x1c3RlcixpbmZvX3RpbWVTZXJpZXMsIGJ5PSJuZXVyb24iKQ0KY2x1c3Rlcl9pbmZvW29yZGVyKGNsdXN0ZXJfaW5mbyR0aW1lX3NlcmllcyksXQ0KDQoNCiNnZXQgdGhlIG5ldXJvbnMgdGhhdCBjb3JyZXNwb25kcyBlYWNoIGdyb3VwDQpuZXVyb25fZ3JvdXAxPC13aGljaChzb21fY2x1c3Rlcj09MSkNCm5ldXJvbl9ncm91cDI8LXdoaWNoKHNvbV9jbHVzdGVyPT0yKQ0KbmV1cm9uX2dyb3VwMzwtd2hpY2goc29tX2NsdXN0ZXI9PTMpDQpuZXVyb25fZ3JvdXA0PC13aGljaChzb21fY2x1c3Rlcj09NCkNCg0KI1BhaW50IG5ldXJvbnMgYW5kIHBsb3Qgc29tIHdpdGggZ3JvdXBzIHNlcGFyYXRlZCBieSBoaWVyYXJjaGljYWwgY2x1c3RlcmluZy4NCnBsb3Qoc29tLCB0eXBlPSJjb2RlcyIsY29kZVJlbmRlcmluZz0ibGluZXMiLCBiZ2NvbCA9IHRlcnJhaW4uY29sb3JzKDQpW3NvbV9jbHVzdGVyXSwgbWFpbiA9ICJDbHVzdGVycyIpDQpwbG90KHNvbSwgdHlwZT0ibWFwcGluZyIsY29kZVJlbmRlcmluZz0ibGluZXMiLCBiZ2NvbCA9IHRlcnJhaW4uY29sb3JzKDQpW3NvbV9jbHVzdGVyXSwgbWFpbiA9ICJDbHVzdGVycyIpIA0Ka29ob25lbjo6YWRkLmNsdXN0ZXIuYm91bmRhcmllcyhzb20sIHNvbV9jbHVzdGVyKQ0KDQojUGxvdCB0aGUgd2VpZ2h0IHZlY3RvciBvZiBuZXVyb25zLiANCiNBcmUgdGhleSBzaW1pbGFyIHdpdGggdGhlIHRpbWUgc2VyaWVzIHNldD8NCnYxPC1nZ3Bsb3QyOjphdXRvcGxvdChjb2Rlc1ssbmV1cm9uX2dyb3VwMV0sIGZhY2V0ID0gTlVMTCkgKyB5bGFiKCJ5IikgKyBnZW9tX2xpbmUoc2l6ZSA9IDEpIA0KdjI8LWdncGxvdDI6OmF1dG9wbG90KGNvZGVzWyxuZXVyb25fZ3JvdXAyXSwgZmFjZXQgPSBOVUxMKSArIHlsYWIoInkiKSArIGdlb21fbGluZShzaXplID0gMSkgDQp2MzwtZ2dwbG90Mjo6YXV0b3Bsb3QoY29kZXNbLG5ldXJvbl9ncm91cDNdLCBmYWNldCA9IE5VTEwpICsgeWxhYigieSIpICsgZ2VvbV9saW5lKHNpemUgPSAxKSANCnY0PC1nZ3Bsb3QyOjphdXRvcGxvdChjb2Rlc1ssbmV1cm9uX2dyb3VwNF0sIGZhY2V0ID0gTlVMTCkgKyB5bGFiKCJ5IikgKyBnZW9tX2xpbmUoc2l6ZSA9IDEpIA0KZ3JpZEV4dHJhOjpncmlkLmFycmFuZ2UodjEsIHYyLHYzLCB2NCwgbmNvbD0yLG5yb3c9MiwgdG9wPSJXZWlnaHQgb2YgTmV1cm9ucyIpDQoNCmBgYA0KDQoNCg==
</html>