Time Series Clustering Using Self-Organizing Maps
Similarity Distances
library(zoo)
library(kohonen)
library(ggplot2)
library(dtw)
# Two time series
X <- c(1,1,1,4,4,4,4,4,1,1)
Y <- c(1,1,4,4,4,4,4,1,1,1)
# Plot
zoo::plot.zoo(cbind(X, Y), plot.type = "multiple", col = c("red", "blue"), lwd = 2, main= "Time Series")
# Euclidean distance
TSdist::DTWDistance(X, Y)
[1] 0
# Euclidean distance
TSdist::EuclideanDistance(X, Y)
[1] 4.242641
# DTW distance
alignment<-dtw::dtw(X, Y, keep.internals = TRUE, step=symmetric1, k=T)
plot(alignment,type="threeway")
plot(dtw::dtw(X, Y, keep.internals = TRUE, step.pattern =rabinerJuangStepPattern(6,"c")),
type="twoway",offset=-2)
dtwPlotTwoWay(dtw(X, Y, step = asymmetricP1, keep = T))
Read the time series from csv file and plot all
#read time series from file
timeseries.zoo <- read.zoo("timeseries.csv", header = TRUE, sep = ",")
#zoo::plot.zoo(z, main = "Set of time series")
#Define a color for each group (4 groups)
colors<-c("#FF0000FF","#FF0000FF","#FF0000FF","#FF0000FF",
"#00FF9FFF","#00FF9FFF","#00FF9FFF","#00FF9FFF",
"#009FFFFF","#009FFFFF","#009FFFFF","#009FFFFF",
"#DFFF00FF","#DFFF00FF","#DFFF00FF","#DFFF00FF")
#Plot all time series
zoo::plot.zoo(timeseries.zoo, plot.type = "multiple", col = colors, lwd = 2, main= "Set of time series")
Self-organizing maps using Kohonen package
# SOM
somgrid=kohonen::somgrid(xdim=4, ydim=4, topo="rectangular", neighbourhood.fct = "gaussian")
som<-kohonen::supersom(t(as.matrix(timeseries.zoo)),somgrid,rlen = 100, alpha=1)
plot(som,"codes",codeRendering="lines")
The time series were allocated in these neurons. Each number represents the number of neuron.
Example: the position 1 of vector represents the time series 1 in a dataset, this time series was allocated in neuron 9.
neuron_timeseries<-som$unit.classif
info_timeSeries<- data.frame(time_series=(rownames(t(timeseries.zoo))),
neuron=as.integer(som$unit.classif))
codes<-zoo::zoo(t(som$codes[[1]]))
zoo::plot.zoo(codes, plot.type = "multiple", col = "black", lwd = 2, main= "Weight of neurons")
#plot all time series (neurons)
ggplot2::autoplot((codes), facet = NULL) + ylab("y") +xlab("time") + geom_line(size = 1) + ggtitle("Weight of neurons")
library(proxy)
library(stats)
## use hierarchical clustering to cluster the codebook vectors.
# Cut dendrogram in 4 groups
hc <- stats::hclust(dist(t(codes)),method = "ward.D2")
#Cuts a tree, e.g., as resulting from hclust, into several groups either
#by specifying the desired number(s) of groups or the cut height(s).
som_cluster <- stats::cutree(hc, 4)
library(dendextend)
#plot dendrogram
dend <- hc%>% as.dendrogram %>%
set("branches_k_color", k = 4) %>% set("branches_lwd", 1.2) %>%
set("labels_cex", 0.8) %>% set("labels_colors", k = 4) %>%
set("leaves_pch", 19) %>% set("leaves_cex", 0.5)
ggd1 <- as.ggdend(dend)
ggplot(ggd1, horiz = FALSE) + ggtitle("Dendrogram")
#Cluster is created by a set of neurons.
cluster<-data.frame(cluster=som_cluster,neuron= 1:dim(codes)[2])
#Timeseries, neuron and cluster
cluster_info<-merge(cluster,info_timeSeries, by="neuron")
cluster_info[order(cluster_info$time_series),]
neuron cluster time_series
1 1 1 t1_1
2 1 1 t1_10
16 5 1 t1_2
17 5 1 t1_3
18 5 1 t1_4
4 1 1 t1_5
19 5 1 t1_6
20 5 1 t1_7
3 1 1 t1_8
5 1 1 t1_9
11 4 2 t2_1
8 3 2 t2_10
9 3 2 t2_2
13 4 2 t2_3
7 3 2 t2_4
14 4 2 t2_5
6 3 2 t2_6
12 4 2 t2_7
10 3 2 t2_8
15 4 2 t2_9
25 12 4 t3_1
29 12 4 t3_10
30 12 4 t3_2
38 16 4 t3_3
28 12 4 t3_4
26 12 4 t3_5
27 12 4 t3_6
40 16 4 t3_7
31 12 4 t3_8
39 16 4 t3_9
24 10 3 t4_1
37 14 3 t4_10
22 9 3 t4_2
32 13 3 t4_3
35 14 3 t4_4
23 10 3 t4_5
21 9 3 t4_6
33 13 3 t4_7
36 14 3 t4_8
34 13 3 t4_9
#get the neurons that corresponds each group
neuron_group1<-which(som_cluster==1)
neuron_group2<-which(som_cluster==2)
neuron_group3<-which(som_cluster==3)
neuron_group4<-which(som_cluster==4)
#Paint neurons and plot som with groups separated by hierarchical clustering.
plot(som, type="codes",codeRendering="lines", bgcol = terrain.colors(4)[som_cluster], main = "Clusters")
plot(som, type="mapping",codeRendering="lines", bgcol = terrain.colors(4)[som_cluster], main = "Clusters")
kohonen::add.cluster.boundaries(som, som_cluster)
#Plot the weight vector of neurons.
#Are they similar with the time series set?
v1<-ggplot2::autoplot(codes[,neuron_group1], facet = NULL) + ylab("y") + geom_line(size = 1)
v2<-ggplot2::autoplot(codes[,neuron_group2], facet = NULL) + ylab("y") + geom_line(size = 1)
v3<-ggplot2::autoplot(codes[,neuron_group3], facet = NULL) + ylab("y") + geom_line(size = 1)
v4<-ggplot2::autoplot(codes[,neuron_group4], facet = NULL) + ylab("y") + geom_line(size = 1)
gridExtra::grid.arrange(v1, v2,v3, v4, ncol=2,nrow=2, top="Weight of Neurons")
LS0tDQp0aXRsZTogIlRpbWUgU2VyaWVzIENsdXN0ZXJpbmcgVXNpbmcgU2VsZi1Pcmdhbml6aW5nIE1hcHMiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KU2ltaWxhcml0eSBEaXN0YW5jZXMNCg0KDQpgYGB7cn0NCmxpYnJhcnkoem9vKQ0KbGlicmFyeShrb2hvbmVuKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeShkdHcpDQpgYGANCg0KYGBge3J9DQoNCiMgVHdvIHRpbWUgc2VyaWVzDQpYIDwtIGMoMSwxLDEsNCw0LDQsNCw0LDEsMSkNClkgPC0gYygxLDEsNCw0LDQsNCw0LDEsMSwxKQ0KDQojIFBsb3QNCnpvbzo6cGxvdC56b28oY2JpbmQoWCwgWSksIHBsb3QudHlwZSA9ICJtdWx0aXBsZSIsIGNvbCA9IGMoInJlZCIsICJibHVlIiksIGx3ZCA9IDIsIG1haW49ICJUaW1lIFNlcmllcyIpDQoNCiMgRXVjbGlkZWFuIGRpc3RhbmNlDQpUU2Rpc3Q6OkRUV0Rpc3RhbmNlKFgsIFkpDQoNCiMgRXVjbGlkZWFuIGRpc3RhbmNlDQpUU2Rpc3Q6OkV1Y2xpZGVhbkRpc3RhbmNlKFgsIFkpDQoNCiMgRFRXIGRpc3RhbmNlDQphbGlnbm1lbnQ8LWR0dzo6ZHR3KFgsIFksIGtlZXAuaW50ZXJuYWxzID0gVFJVRSwgc3RlcD1zeW1tZXRyaWMxLCBrPVQpDQoNCnBsb3QoYWxpZ25tZW50LHR5cGU9InRocmVld2F5IikNCg0KcGxvdChkdHc6OmR0dyhYLCBZLCBrZWVwLmludGVybmFscyA9IFRSVUUsIHN0ZXAucGF0dGVybiA9cmFiaW5lckp1YW5nU3RlcFBhdHRlcm4oNiwiYyIpKSwNCiAgICAgdHlwZT0idHdvd2F5IixvZmZzZXQ9LTIpDQoNCmR0d1Bsb3RUd29XYXkoZHR3KFgsIFksIHN0ZXAgPSBhc3ltbWV0cmljUDEsIGtlZXAgPSBUKSkNCg0KYGBgDQoNCg0KUmVhZCB0aGUgdGltZSBzZXJpZXMgZnJvbSBjc3YgZmlsZSBhbmQgcGxvdCBhbGwNCg0KYGBge3J9DQojcmVhZCB0aW1lIHNlcmllcyBmcm9tIGZpbGUNCnRpbWVzZXJpZXMuem9vIDwtIHJlYWQuem9vKCJ0aW1lc2VyaWVzLmNzdiIsIGhlYWRlciA9IFRSVUUsIHNlcCA9ICIsIikNCg0KI3pvbzo6cGxvdC56b28oeiwgbWFpbiA9ICJTZXQgb2YgdGltZSBzZXJpZXMiKQ0KDQojRGVmaW5lIGEgY29sb3IgZm9yIGVhY2ggZ3JvdXAgKDQgZ3JvdXBzKQ0KY29sb3JzPC1jKCIjRkYwMDAwRkYiLCIjRkYwMDAwRkYiLCIjRkYwMDAwRkYiLCIjRkYwMDAwRkYiLA0KICAgICAgICAgICIjMDBGRjlGRkYiLCIjMDBGRjlGRkYiLCIjMDBGRjlGRkYiLCIjMDBGRjlGRkYiLA0KICAgICAgICAgICIjMDA5RkZGRkYiLCIjMDA5RkZGRkYiLCIjMDA5RkZGRkYiLCIjMDA5RkZGRkYiLA0KICAgICAgICAgICIjREZGRjAwRkYiLCIjREZGRjAwRkYiLCIjREZGRjAwRkYiLCIjREZGRjAwRkYiKQ0KYGBgDQoNCg0KYGBge3J9DQoNCiNQbG90IGFsbCB0aW1lIHNlcmllcw0Kem9vOjpwbG90Lnpvbyh0aW1lc2VyaWVzLnpvbywgcGxvdC50eXBlID0gIm11bHRpcGxlIiwgY29sID0gY29sb3JzLCBsd2QgPSAyLCBtYWluPSAiU2V0IG9mIHRpbWUgc2VyaWVzIikNCg0KYGBgDQoNClNlbGYtb3JnYW5pemluZyBtYXBzIHVzaW5nIEtvaG9uZW4gcGFja2FnZQ0KYGBge3J9DQojIFNPTQ0Kc29tZ3JpZD1rb2hvbmVuOjpzb21ncmlkKHhkaW09NCwgeWRpbT00LCB0b3BvPSJyZWN0YW5ndWxhciIsIG5laWdoYm91cmhvb2QuZmN0ID0gImdhdXNzaWFuIikNCnNvbTwta29ob25lbjo6c3VwZXJzb20odChhcy5tYXRyaXgodGltZXNlcmllcy56b28pKSxzb21ncmlkLHJsZW4gPSAxMDAsICBhbHBoYT0xKQ0KDQoNCmBgYA0KDQoNCmBgYHtyfQ0KcGxvdChzb20sImNvZGVzIixjb2RlUmVuZGVyaW5nPSJsaW5lcyIpDQpgYGANClRoZSB0aW1lIHNlcmllcyB3ZXJlIGFsbG9jYXRlZCBpbiB0aGVzZSBuZXVyb25zLg0KRWFjaCBudW1iZXIgcmVwcmVzZW50cyB0aGUgbnVtYmVyIG9mIG5ldXJvbi4NCg0KRXhhbXBsZTogdGhlIHBvc2l0aW9uIDEgb2YgdmVjdG9yIHJlcHJlc2VudHMgdGhlIHRpbWUgc2VyaWVzIDEgaW4gYSBkYXRhc2V0LCB0aGlzIHRpbWUgc2VyaWVzDQp3YXMgYWxsb2NhdGVkIGluIG5ldXJvbiA5Lg0KDQpgYGB7cn0NCm5ldXJvbl90aW1lc2VyaWVzPC1zb20kdW5pdC5jbGFzc2lmDQoNCmluZm9fdGltZVNlcmllczwtIGRhdGEuZnJhbWUodGltZV9zZXJpZXM9KHJvd25hbWVzKHQodGltZXNlcmllcy56b28pKSksIA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICBuZXVyb249YXMuaW50ZWdlcihzb20kdW5pdC5jbGFzc2lmKSkNCiAgICAgICAgICAgICAgICAgICAgICAgIA0KY29kZXM8LXpvbzo6em9vKHQoc29tJGNvZGVzW1sxXV0pKQ0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICANCmBgYA0KDQoNCmBgYHtyfQ0KDQp6b286OnBsb3Quem9vKGNvZGVzLCBwbG90LnR5cGUgPSAibXVsdGlwbGUiLCBjb2wgPSAiYmxhY2siLCBsd2QgPSAyLCBtYWluPSAiV2VpZ2h0IG9mIG5ldXJvbnMiKQ0KI3Bsb3QgYWxsIHRpbWUgc2VyaWVzIChuZXVyb25zKQ0KZ2dwbG90Mjo6YXV0b3Bsb3QoKGNvZGVzKSwgZmFjZXQgPSBOVUxMKSArIHlsYWIoInkiKSAreGxhYigidGltZSIpICsgZ2VvbV9saW5lKHNpemUgPSAxKSArIGdndGl0bGUoIldlaWdodCBvZiBuZXVyb25zIikNCg0KDQoNCmxpYnJhcnkocHJveHkpDQpsaWJyYXJ5KHN0YXRzKQ0KDQojIyB1c2UgaGllcmFyY2hpY2FsIGNsdXN0ZXJpbmcgdG8gY2x1c3RlciB0aGUgY29kZWJvb2sgdmVjdG9ycy4NCiMgQ3V0IGRlbmRyb2dyYW0gaW4gNCBncm91cHMNCmhjIDwtIHN0YXRzOjpoY2x1c3QoZGlzdCh0KGNvZGVzKSksbWV0aG9kID0gIndhcmQuRDIiKQ0KDQojQ3V0cyBhIHRyZWUsIGUuZy4sIGFzIHJlc3VsdGluZyBmcm9tIGhjbHVzdCwgaW50byBzZXZlcmFsIGdyb3VwcyBlaXRoZXIgDQojYnkgc3BlY2lmeWluZyB0aGUgZGVzaXJlZCBudW1iZXIocykgb2YgZ3JvdXBzIG9yIHRoZSBjdXQgaGVpZ2h0KHMpLg0Kc29tX2NsdXN0ZXIgPC0gc3RhdHM6OmN1dHJlZShoYywgNCkNCg0KbGlicmFyeShkZW5kZXh0ZW5kKQ0KI3Bsb3QgZGVuZHJvZ3JhbQ0KZGVuZCA8LSBoYyU+JSBhcy5kZW5kcm9ncmFtICU+JQ0KICBzZXQoImJyYW5jaGVzX2tfY29sb3IiLCBrID0gNCkgJT4lIHNldCgiYnJhbmNoZXNfbHdkIiwgMS4yKSAlPiUNCiAgc2V0KCJsYWJlbHNfY2V4IiwgMC44KSAlPiUgc2V0KCJsYWJlbHNfY29sb3JzIiwgayA9IDQpICU+JQ0KICBzZXQoImxlYXZlc19wY2giLCAxOSkgJT4lIHNldCgibGVhdmVzX2NleCIsIDAuNSkgDQpnZ2QxIDwtIGFzLmdnZGVuZChkZW5kKQ0KZ2dwbG90KGdnZDEsIGhvcml6ID0gRkFMU0UpICsgZ2d0aXRsZSgiRGVuZHJvZ3JhbSIpDQoNCiNDbHVzdGVyIGlzIGNyZWF0ZWQgYnkgYSBzZXQgb2YgbmV1cm9ucy4gDQpjbHVzdGVyPC1kYXRhLmZyYW1lKGNsdXN0ZXI9c29tX2NsdXN0ZXIsbmV1cm9uPSAxOmRpbShjb2RlcylbMl0pDQoNCiNUaW1lc2VyaWVzLCBuZXVyb24gYW5kIGNsdXN0ZXINCmNsdXN0ZXJfaW5mbzwtbWVyZ2UoY2x1c3RlcixpbmZvX3RpbWVTZXJpZXMsIGJ5PSJuZXVyb24iKQ0KY2x1c3Rlcl9pbmZvW29yZGVyKGNsdXN0ZXJfaW5mbyR0aW1lX3NlcmllcyksXQ0KDQoNCiNnZXQgdGhlIG5ldXJvbnMgdGhhdCBjb3JyZXNwb25kcyBlYWNoIGdyb3VwDQpuZXVyb25fZ3JvdXAxPC13aGljaChzb21fY2x1c3Rlcj09MSkNCm5ldXJvbl9ncm91cDI8LXdoaWNoKHNvbV9jbHVzdGVyPT0yKQ0KbmV1cm9uX2dyb3VwMzwtd2hpY2goc29tX2NsdXN0ZXI9PTMpDQpuZXVyb25fZ3JvdXA0PC13aGljaChzb21fY2x1c3Rlcj09NCkNCg0KI1BhaW50IG5ldXJvbnMgYW5kIHBsb3Qgc29tIHdpdGggZ3JvdXBzIHNlcGFyYXRlZCBieSBoaWVyYXJjaGljYWwgY2x1c3RlcmluZy4NCnBsb3Qoc29tLCB0eXBlPSJjb2RlcyIsY29kZVJlbmRlcmluZz0ibGluZXMiLCBiZ2NvbCA9IHRlcnJhaW4uY29sb3JzKDQpW3NvbV9jbHVzdGVyXSwgbWFpbiA9ICJDbHVzdGVycyIpDQpwbG90KHNvbSwgdHlwZT0ibWFwcGluZyIsY29kZVJlbmRlcmluZz0ibGluZXMiLCBiZ2NvbCA9IHRlcnJhaW4uY29sb3JzKDQpW3NvbV9jbHVzdGVyXSwgbWFpbiA9ICJDbHVzdGVycyIpIA0Ka29ob25lbjo6YWRkLmNsdXN0ZXIuYm91bmRhcmllcyhzb20sIHNvbV9jbHVzdGVyKQ0KDQojUGxvdCB0aGUgd2VpZ2h0IHZlY3RvciBvZiBuZXVyb25zLiANCiNBcmUgdGhleSBzaW1pbGFyIHdpdGggdGhlIHRpbWUgc2VyaWVzIHNldD8NCnYxPC1nZ3Bsb3QyOjphdXRvcGxvdChjb2Rlc1ssbmV1cm9uX2dyb3VwMV0sIGZhY2V0ID0gTlVMTCkgKyB5bGFiKCJ5IikgKyBnZW9tX2xpbmUoc2l6ZSA9IDEpIA0KdjI8LWdncGxvdDI6OmF1dG9wbG90KGNvZGVzWyxuZXVyb25fZ3JvdXAyXSwgZmFjZXQgPSBOVUxMKSArIHlsYWIoInkiKSArIGdlb21fbGluZShzaXplID0gMSkgDQp2MzwtZ2dwbG90Mjo6YXV0b3Bsb3QoY29kZXNbLG5ldXJvbl9ncm91cDNdLCBmYWNldCA9IE5VTEwpICsgeWxhYigieSIpICsgZ2VvbV9saW5lKHNpemUgPSAxKSANCnY0PC1nZ3Bsb3QyOjphdXRvcGxvdChjb2Rlc1ssbmV1cm9uX2dyb3VwNF0sIGZhY2V0ID0gTlVMTCkgKyB5bGFiKCJ5IikgKyBnZW9tX2xpbmUoc2l6ZSA9IDEpIA0KZ3JpZEV4dHJhOjpncmlkLmFycmFuZ2UodjEsIHYyLHYzLCB2NCwgbmNvbD0yLG5yb3c9MiwgdG9wPSJXZWlnaHQgb2YgTmV1cm9ucyIpDQoNCmBgYA0KDQoNCg==