# Import library
library("ggplot2")
library("tidyverse")
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## <U+221A> tibble 3.0.4 <U+221A> dplyr 1.0.2
## <U+221A> tidyr 1.1.2 <U+221A> stringr 1.4.0
## <U+221A> readr 1.4.0 <U+221A> forcats 0.5.0
## <U+221A> purrr 0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library("dplyr")
library("scatterplot3d")
library("plot3D")
# Read data
setwd("C:/Users/asus/Desktop")
X_train = read.table("X_TRAIN.csv")
Y_train = read.table("Y_TRAIN.csv")
Z_train = read.table("Z_TRAIN.csv")
In this part, our aim is to visualize one instance(all axes) as 3D scatterplot. We obtain velocity. We can transform this data to velocity vector by computing cumulative sum of acceleration over time.Then, We obtain position vector using velocity vector.
X_velocity = as.data.frame(matrix(,8,ncol(X_train)))
X_position = as.data.frame(matrix(,8,ncol(X_train)))
Y_velocity = as.data.frame(matrix(,8,ncol(Y_train)))
Y_position = as.data.frame(matrix(,8,ncol(Y_train)))
Z_velocity = as.data.frame(matrix(,8,ncol(Z_train)))
Z_position = as.data.frame(matrix(,8,ncol(Z_train)))
for (j in 1:8){
for (i in 1:nrow(X_train)){
if( X_train[i,1] == j){
X_velocity[j,] = cbind(j,matrix(cumsum(as.numeric(X_train[i,-1])),1,ncol(X_train)-1))
X_position[j,] = cbind(j,matrix(cumsum(as.numeric(X_velocity[j,-1])),1,ncol(X_train)-1))
Y_velocity[j,] = cbind(j,matrix(cumsum(as.numeric(Y_train[i,-1])),1,ncol(Y_train)-1))
Y_position[j,] = cbind(j,matrix(cumsum(as.numeric(Y_velocity[j,-1])),1,ncol(Y_train)-1))
Z_velocity[j,] = cbind(j,matrix(cumsum(as.numeric(Z_train[i,-1])),1,ncol(Z_train)-1))
Z_position[j,] = cbind(j,matrix(cumsum(as.numeric(Z_velocity[j,-1])),1,ncol(Z_train)-1))
break
}
}
}
# Visualize position for Class 1
scatterplot3d( as.numeric(X_position[1,-1]), as.numeric(Y_position[1,-1]), as.numeric(Z_position[1,-1]), color ="Blue", xlab = "X direction", ylab = "y direction",zlab = "z direction", main = "Position for Class 1")
# Visualize position for Class 2
scatterplot3d( as.numeric(X_position[2,-1]), as.numeric(Y_position[2,-1]), as.numeric(Z_position[2,-1]), color ="Blue",xlab = "X direction", ylab = "y direction",zlab = "z direction", main = "Position for Class 2")
# Visualize position for Class 3
scatterplot3d( as.numeric(X_position[3,-1]), as.numeric(Y_position[3,-1]), as.numeric(Z_position[3,-1]), color ="Blue",xlab = "X direction", ylab = "y direction",zlab = "z direction", main = "Position for Class 3")
# Visualize position for Class 4
scatterplot3d( as.numeric(X_position[4,-1]), as.numeric(Y_position[4,-1]), as.numeric(Z_position[4,-1]), color ="Blue",xlab = "X direction", ylab = "y direction",zlab = "z direction", main = "Position for Class 4")
# Visualize position for Class 5
scatterplot3d( as.numeric(X_position[5,-1]), as.numeric(Y_position[5,-1]), as.numeric(Z_position[5,-1]), color ="Blue",xlab = "X direction", ylab = "y direction",zlab = "z direction", main = "Position for Class 5")
# Visualize position for Class 6
scatterplot3d( as.numeric(X_position[6,-1]), as.numeric(Y_position[6,-1]), as.numeric(Z_position[6,-1]), color ="Blue",xlab = "X direction", ylab = "y direction",zlab = "z direction", main = "Position for Class 6")
# Visualize position for Class 7
scatterplot3d( as.numeric(X_position[7,-1]), as.numeric(Y_position[7,-1]), as.numeric(Z_position[7,-1]), color ="Blue",xlab = "X direction", ylab = "y direction",zlab = "z direction", main = "Position for Class 7")
# Visualize position for Class 8
scatterplot3d( as.numeric(X_position[8,-1]), as.numeric(Y_position[8,-1]), as.numeric(Z_position[8,-1]), color ="Blue",xlab = "X direction", ylab = "y direction",zlab = "z direction", main = "Position for Class 8")
There are some similarities between 3D graphs and gestures which are given in homework. Especially, Class 1,4,5,8 are very similar to given gestures.
In part B, our aim is to create long format table from given data.Then,We apply PCA to whole data in order to reduce 3D data to 1D.Then, we can select 2 time series from each classes by chance.
colnames(X_train)[1] <- colnames(Y_train)[1] <- colnames(Z_train)[1] <- "Class"
# Create time ID,index and class
Time_Ind = matrix(rep(matrix(rep(1:(ncol(X_train)-1)),(ncol(X_train)-1),1),nrow(X_train)),nrow(X_train)*(ncol(X_train)-1),1)
Time_ID = matrix(rep(1:nrow(X_train),each = ncol(X_train)-1),nrow(X_train)*(ncol(X_train)-1),1)
Class = as.data.frame(X_train[order(X_train$Class),1])
Class = as.data.frame(rep(Class, each = 315))
Class_long = reshape(Class,direction = "long", v.names = "Class", varying = 1:ncol(Class))
Class_long = as.data.frame(Class_long[order(Class_long$Class),2])
colnames(Class_long) = "Class"
X_train_1 <- reshape(as.data.frame(t(X_train[which(X_train$Class ==1 ),-1])), direction = "long", v.names = "X",varying =1:nrow(X_train[which(X_train$Class == 1),]))[2]
X_train_2 <- reshape(as.data.frame(t(X_train[which(X_train$Class == 2 ),-1])), direction = "long", v.names = "X",varying =1:nrow(X_train[which(X_train$Class == 2),]))[2]
X_train_3 <- reshape(as.data.frame(t(X_train[which(X_train$Class ==3 ),-1])), direction = "long", v.names = "X",varying = 1:nrow(X_train[which(X_train$Class == 3),]))[2]
X_train_4 <- reshape(as.data.frame(t(X_train[which(X_train$Class ==4 ),-1])), direction = "long", v.names = "X",varying =1:nrow(X_train[which(X_train$Class == 4),]))[2]
X_train_5 <- reshape(as.data.frame(t(X_train[which(X_train$Class ==5 ),-1])), direction = "long", v.names = "X",varying =1:nrow(X_train[which(X_train$Class == 5),]))[2]
X_train_6 <- reshape(as.data.frame(t(X_train[which(X_train$Class ==6 ),-1])), direction = "long", v.names = "X",varying =1:nrow(X_train[which(X_train$Class == 6),]))[2]
X_train_7 <- reshape(as.data.frame(t(X_train[which(X_train$Class ==7 ),-1])), direction = "long", v.names = "X",varying =1:nrow(X_train[which(X_train$Class == 7),]))[2]
X_train_8 <- reshape(as.data.frame(t(X_train[which(X_train$Class ==8 ),-1])), direction = "long", v.names = "X",varying =1:nrow(X_train[which(X_train$Class == 8),]))[2]
# combine x into one matrix
X_long <- rbind(X_train_1,X_train_2,X_train_3,X_train_4,X_train_5,X_train_6,X_train_7,X_train_8)
# For y
Y_train_1 <- reshape(as.data.frame(t(Y_train[which(Y_train$Class ==1 ),-1])), direction = "long", v.names = "Y",varying =1:nrow(Y_train[which(Y_train$Class == 1),]))[2]
Y_train_2 <- reshape(as.data.frame(t(Y_train[which(Y_train$Class == 2 ),-1])), direction = "long", v.names = "Y",varying =1:nrow(Y_train[which(Y_train$Class == 2),]))[2]
Y_train_3 <- reshape(as.data.frame(t(Y_train[which(Y_train$Class ==3 ),-1])), direction = "long", v.names = "Y",varying = 1:nrow(Y_train[which(Y_train$Class == 3),]))[2]
Y_train_4 <- reshape(as.data.frame(t(Y_train[which(Y_train$Class ==4 ),-1])), direction = "long", v.names = "Y",varying =1:nrow(Y_train[which(Y_train$Class == 4),]))[2]
Y_train_5 <- reshape(as.data.frame(t(Y_train[which(Y_train$Class ==5 ),-1])), direction = "long", v.names = "Y",varying =1:nrow(Y_train[which(Y_train$Class == 5),]))[2]
Y_train_6 <- reshape(as.data.frame(t(Y_train[which(Y_train$Class ==6 ),-1])), direction = "long", v.names = "Y",varying =1:nrow(Y_train[which(Y_train$Class == 6),]))[2]
Y_train_7 <- reshape(as.data.frame(t(Y_train[which(Y_train$Class ==7 ),-1])), direction = "long", v.names = "Y",varying =1:nrow(Y_train[which(Y_train$Class == 7),]))[2]
Y_train_8 <- reshape(as.data.frame(t(Y_train[which(Y_train$Class ==8 ),-1])), direction = "long", v.names = "Y",varying =1:nrow(Y_train[which(Y_train$Class == 8),]))[2]
# combine y into one matrix
Y_long = rbind(Y_train_1,Y_train_2,Y_train_3,Y_train_4,Y_train_5,Y_train_6,Y_train_7,Y_train_8)
# For Z
Z_train_1 <- reshape(as.data.frame(t(Z_train[which(Z_train$Class ==1 ),-1])), direction = "long", v.names = "Z",varying =1:nrow(Z_train[which(Z_train$Class == 1),]))[2]
Z_train_2 <- reshape(as.data.frame(t(Z_train[which(Z_train$Class == 2 ),-1])), direction = "long", v.names = "Z",varying =1:nrow(Z_train[which(Z_train$Class == 2),]))[2]
Z_train_3 <- reshape(as.data.frame(t(Z_train[which(Z_train$Class ==3 ),-1])), direction = "long", v.names = "Z",varying = 1:nrow(Z_train[which(Z_train$Class == 3),]))[2]
Z_train_4 <- reshape(as.data.frame(t(Z_train[which(Z_train$Class ==4 ),-1])), direction = "long", v.names = "Z",varying =1:nrow(Z_train[which(Z_train$Class == 4),]))[2]
Z_train_5 <- reshape(as.data.frame(t(Z_train[which(Z_train$Class ==5 ),-1])), direction = "long", v.names = "Z",varying =1:nrow(Z_train[which(Z_train$Class == 5),]))[2]
Z_train_6 <- reshape(as.data.frame(t(Z_train[which(Z_train$Class ==6 ),-1])), direction = "long", v.names = "Z",varying =1:nrow(Z_train[which(Z_train$Class == 6),]))[2]
Z_train_7 <- reshape(as.data.frame(t(Z_train[which(Z_train$Class ==7 ),-1])), direction = "long", v.names = "Z",varying =1:nrow(Z_train[which(Z_train$Class == 7),]))[2]
Z_train_8 <- reshape(as.data.frame(t(Z_train[which(Z_train$Class ==8 ),-1])), direction = "long", v.names = "Z",varying =1:nrow(Z_train[which(Z_train$Class == 8),]))[2]
## combine z into one matrix
Z_long = rbind(Z_train_1,Z_train_2,Z_train_3,Z_train_4,Z_train_5,Z_train_6,Z_train_7,Z_train_8)
# Create long table
alldata_long <- cbind(Time_ID,Time_Ind ,X_long,Y_long,Z_long,Class_long)
head(alldata_long)
## Time_ID Time_Ind X Y Z Class
## 1.1 1 1 -0.7914472 -1.959984 -0.2490781 1
## 2.1 1 2 -0.7914472 -1.959984 -0.2490781 1
## 3.1 1 3 -0.7958727 -1.956600 -0.2514390 1
## 4.1 1 4 -0.8100650 -1.945750 -0.2590101 1
## 5.1 1 5 -0.8492300 -1.915809 -0.2799033 1
## 6.1 1 6 -0.9034648 -1.874347 -0.3088358 1
# Apply PCA to acceleration data
only_axis_data = alldata_long[,3:5]
pca <- princomp(only_axis_data , cor = TRUE)
summary(pca,loadings = TRUE)
## Importance of components:
## Comp.1 Comp.2 Comp.3
## Standard deviation 1.213171 1.0198583 0.6986445
## Proportion of Variance 0.490595 0.3467037 0.1627014
## Cumulative Proportion 0.490595 0.8372986 1.0000000
##
## Loadings:
## Comp.1 Comp.2 Comp.3
## X 0.427 0.776 0.464
## Y 0.721 -0.692
## Z 0.546 -0.630 0.553
# For Class 1
Pca_data = as.data.frame(pca$scores)
Pca_data$TimeID = Time_ID
Pca_data$Class = Class_long
drop = c("Comp.2","Comp.3")
Component_1 = Pca_data[,!(names(Pca_data) %in% drop)]
time = c(1:315)
ts_1.1 <- Component_1[which(Component_1$Class == 1 & Component_1$TimeID == 100),]
ts_1.2 <- Component_1[which(Component_1$Class == 1 & Component_1$TimeID == 85),]
ggplot()+geom_line(data=ts_1.1,aes(x=time,y=Comp.1,colour="yellow"), size=1 )+
geom_line(data=ts_1.2,aes(x=time,y=Comp.1,colour="red"),size=1) +
scale_color_discrete(name = "Time series no", labels = c("100", "85"))+xlab("Time ")+
ylab("Score")+ggtitle("PCA for Class 1")+theme(plot.title = element_text(hjust = 0.5))
# For class 2
ts_2.1 <- Component_1[which(Component_1$Class == 2 & Component_1$TimeID == 143),]
ts_2.2 <- Component_1[which(Component_1$Class == 2 & Component_1$TimeID == 170),]
ggplot()+geom_line(data=ts_2.1,aes(x=time,y=Comp.1,colour="green"), size=1 )+
geom_line(data=ts_2.2,aes(x=time,y=Comp.1,colour="blue"),size=1) +
scale_color_discrete(name = "Time series no", labels = c("143", "170"))+xlab("Time ")+
ylab("Score")+ggtitle("PCA for Class 2")+ theme(plot.title = element_text(hjust=0.5))
# For class 3
ts_3.1 <- Component_1[which(Component_1$Class == 3 & Component_1$TimeID == 300),]
ts_3.2 <- Component_1[which(Component_1$Class == 3 & Component_1$TimeID == 275),]
ggplot()+geom_line(data=ts_3.1,aes(x=time,y=Comp.1,colour="green"), size=1 )+
geom_line(data=ts_3.2,aes(x=time,y=Comp.1,colour="blue"),size=1) +
scale_color_discrete(name = "Time series no", labels = c("300", "275"))+xlab("Time ")+
ylab("Score")+ggtitle("PCA for Class 3")+theme(plot.title = element_text(hjust = 0.5))
# For class 4
ts_4.1 <- Component_1[which(Component_1$Class == 4 & Component_1$TimeID == 350),]
ts_4.2 <- Component_1[which(Component_1$Class == 4 & Component_1$TimeID == 365),]
ggplot()+geom_line(data=ts_4.1,aes(x=time,y=Comp.1,colour="darkred"), size=1 )+
geom_line(data=ts_4.2,aes(x=time,y=Comp.1,colour="steelblue"),size=1) +
scale_color_discrete(name = "Time series no", labels = c("350", "365"))+xlab("Time ")+
ylab("Score")+ggtitle("PCA for Class 4")+theme(plot.title = element_text(hjust = 0.5))
# For class 5
ts_5.1 <- Component_1[which(Component_1$Class == 5 & Component_1$TimeID == 482),]
ts_5.2 <- Component_1[which(Component_1$Class == 5 & Component_1$TimeID == 500),]
ggplot()+geom_line(data=ts_5.1,aes(x=time,y=Comp.1,colour="green"), size=1 )+
geom_line(data=ts_5.2,aes(x=time,y=Comp.1,colour="blue"),size=1) +
scale_color_discrete(name = "Time series no", labels = c("482", "500"))+xlab("Time ")+
ylab("Score")+ggtitle("PCA for Class 5")+theme(plot.title = element_text(hjust = 0.5))
# For class 6
ts_6.1 <- Component_1[which(Component_1$Class == 6 & Component_1$TimeID == 591),]
ts_6.2 <- Component_1[which(Component_1$Class == 6 & Component_1$TimeID == 601),]
ggplot()+geom_line(data=ts_6.1,aes(x=time,y=Comp.1,colour="green"), size=1 )+
geom_line(data=ts_6.2,aes(x=time,y=Comp.1,colour="blue"),size=1) +
scale_color_discrete(name = "Time series no", labels = c("591", "601"))+xlab("Time")+
ylab("Score")+ggtitle("PCA for Class 6")+theme(plot.title = element_text(hjust = 0.5))
# For class 7
ts_7.1 <- Component_1[which(Component_1$Class == 7 & Component_1$TimeID == 701),]
ts_7.2 <- Component_1[which(Component_1$Class == 7 & Component_1$TimeID == 746),]
ggplot()+geom_line(data=ts_7.1,aes(x=time,y=Comp.1,colour="green"), size=1 )+
geom_line(data=ts_7.2,aes(x=time,y=Comp.1,colour="blue"),size=1) +
scale_color_discrete(name = "Time series no", labels = c("701", "746"))+xlab("Time index")+
ylab("Score")+ggtitle("PCA for Class 7")+theme(plot.title = element_text(hjust = 0.5))
# For class 8
ts_8.1 <- Component_1[which(Component_1$Class == 8 & Component_1$TimeID == 807),]
ts_8.2 <- Component_1[which(Component_1$Class == 8 & Component_1$TimeID == 892),]
ggplot()+geom_line(data=ts_8.1,aes(x=time,y=Comp.1,colour="green"), size=1 )+
geom_line(data=ts_8.2,aes(x=time,y=Comp.1,colour="blue"),size=1) +
scale_color_discrete(name = "Time series no", labels = c("807", "892"))+xlab("Time ")+
ylab("Score")+ggtitle("PCA for Class 8")+theme(plot.title = element_text(hjust = 0.5))
According to PCA, component 1 has best values to show this data as 1D.Proportion of variance means that how much data this component use. Proportion of variance of component1 is equal to 0.49 which is the highest value among components.Therefore,component1 is used. When we visiualize data, it is possible to see that classes have different pattern.Thus, we can separate classes looking at graphs in reduced dimensions.
# Class1
Data_1 <- alldata_long %>% filter(Class == 1)
PCA_1 <- princomp(Data_1[,3:5], cor = T)
summary(PCA_1,loadings = TRUE)
## Importance of components:
## Comp.1 Comp.2 Comp.3
## Standard deviation 1.1779640 0.9845807 0.8018738
## Proportion of Variance 0.4625331 0.3231331 0.2143339
## Cumulative Proportion 0.4625331 0.7856661 1.0000000
##
## Loadings:
## Comp.1 Comp.2 Comp.3
## X 0.357 0.896 0.263
## Y 0.691 -0.720
## Z 0.629 -0.439 0.642
# Class2
Data_2 <- alldata_long %>% filter(Class == 2)
PCA_2 <- princomp(Data_2[,3:5], cor = T)
summary(PCA_2,loadings = TRUE)
## Importance of components:
## Comp.1 Comp.2 Comp.3
## Standard deviation 1.2400176 0.9682706 0.7244366
## Proportion of Variance 0.5125479 0.3125160 0.1749361
## Cumulative Proportion 0.5125479 0.8250639 1.0000000
##
## Loadings:
## Comp.1 Comp.2 Comp.3
## X 0.455 0.801 0.388
## Y 0.685 -0.728
## Z 0.569 -0.597 0.565
# Class3
Data_3 <- alldata_long %>% filter(Class == 3)
PCA_3 <- princomp(Data_3[,3:5], cor = T)
summary(PCA_3,loadings = TRUE)
## Importance of components:
## Comp.1 Comp.2 Comp.3
## Standard deviation 1.273669 0.9475424 0.6927707
## Proportion of Variance 0.540744 0.2992789 0.1599771
## Cumulative Proportion 0.540744 0.8400229 1.0000000
##
## Loadings:
## Comp.1 Comp.2 Comp.3
## X 0.675 0.738
## Y 0.531 0.689 -0.494
## Z -0.513 0.725 0.460
# Class4
Data_4 <- alldata_long %>% filter(Class == 4)
PCA_4 <- princomp(Data_4[,3:5], cor = T)
summary(PCA_4,loadings = TRUE)
## Importance of components:
## Comp.1 Comp.2 Comp.3
## Standard deviation 1.2846735 0.9605566 0.6534102
## Proportion of Variance 0.5501287 0.3075563 0.1423150
## Cumulative Proportion 0.5501287 0.8576850 1.0000000
##
## Loadings:
## Comp.1 Comp.2 Comp.3
## X 0.681 0.113 0.724
## Y 0.634 0.404 -0.659
## Z -0.367 0.908 0.204
# Class5
Data_5 <- alldata_long %>% filter(Class == 5)
PCA_5 <- princomp(Data_5[,3:5], cor = T)
summary(PCA_5,loadings = TRUE)
## Importance of components:
## Comp.1 Comp.2 Comp.3
## Standard deviation 1.3934383 0.9076507 0.48425204
## Proportion of Variance 0.6472234 0.2746099 0.07816668
## Cumulative Proportion 0.6472234 0.9218333 1.00000000
##
## Loadings:
## Comp.1 Comp.2 Comp.3
## X 0.399 0.915
## Y 0.643 -0.321 0.696
## Z 0.654 -0.243 -0.716
# Class6
Data_6 <- alldata_long %>% filter(Class == 6)
PCA_6 <- princomp(Data_6[,3:5], cor = T)
summary(PCA_6,loadings = TRUE)
## Importance of components:
## Comp.1 Comp.2 Comp.3
## Standard deviation 1.3097601 0.9941695 0.54420161
## Proportion of Variance 0.5718239 0.3294577 0.09871846
## Cumulative Proportion 0.5718239 0.9012815 1.00000000
##
## Loadings:
## Comp.1 Comp.2 Comp.3
## X 0.206 0.964 0.167
## Y -0.680 0.264 -0.684
## Z -0.704 0.710
# Class7
Data_7 <- alldata_long %>% filter(Class == 7)
PCA_7 <- princomp(Data_7[,3:5], cor = T)
summary(PCA_7,loadings = TRUE)
## Importance of components:
## Comp.1 Comp.2 Comp.3
## Standard deviation 1.2480424 1.0144419 0.6428823
## Proportion of Variance 0.5192033 0.3430308 0.1377659
## Cumulative Proportion 0.5192033 0.8622341 1.0000000
##
## Loadings:
## Comp.1 Comp.2 Comp.3
## X 0.229 0.925 0.304
## Y 0.715 -0.697
## Z 0.661 -0.377 0.649
# Class8
Data_8 <- alldata_long %>% filter(Class == 8)
PCA_8 <- princomp(Data_2[,3:5], cor = T)
summary(PCA_8,loadings = TRUE)
## Importance of components:
## Comp.1 Comp.2 Comp.3
## Standard deviation 1.2400176 0.9682706 0.7244366
## Proportion of Variance 0.5125479 0.3125160 0.1749361
## Cumulative Proportion 0.5125479 0.8250639 1.0000000
##
## Loadings:
## Comp.1 Comp.2 Comp.3
## X 0.455 0.801 0.388
## Y 0.685 -0.728
## Z 0.569 -0.597 0.565
There is similarity among some classes. For example, component 1 value of class 2,7,8 is very similar, but other classes have different values respectively.This result may be associated with different relationships among classes in terms of linearity.
In this part, our aim is to compute the distance between the time series for each axis and then apply MDS to distance matrix in order to examine whether classes are separated or not when we visualize data as distance matrix.
dist <- dist(X_train[,2:ncol(X_train)], method = "euclidean") + dist(Y_train[,2:ncol(Y_train)], method = "euclidean") +
dist(Z_train[,2:ncol(Z_train)], method = "euclidean")
MDS <- cmdscale(dist,eig = TRUE, k=2)
MDS <- as.data.frame(cbind(X_train[,1],MDS$points))
col = as.factor(MDS$V1)
ggplot(data = MDS)+geom_point(mapping = aes(x = V2,y = V3,colour=col))+
xlab("X Coordinate")+ylab("Y Coordinate")+ ggtitle("Multi Dimensional Scalling")+theme(legend.position = "right")+ theme(plot.title = element_text(hjust=0.5))
There is no clear distinction among classes when we visualize distance matrix applied MDS but it is safe to say that some points belonging to same class are clustered in certain regions.