Hierarchical Cluster Analysis in R studio

####Load the packages#########
install.packages("factoextra")
library(factoextra)

install.packages("cluster")
library(cluster)

########load data#############
df <- USArrests

############remove missing values##############
df <- na.omit(df)

##################scale each variable to have a mean of 0 and sd of 1##########
df <- scale(df)

#############view first six rows of dataset###############
head(df)

############view first six rows of dataset###############

Output:

> head(df)
               Murder   Assault   UrbanPop         Rape
Alabama    1.24256408 0.7828393 -0.5209066 -0.003416473
Alaska     0.50786248 1.1068225 -1.2117642  2.484202941
Arizona    0.07163341 1.4788032  0.9989801  1.042878388
Arkansas   0.23234938 0.2308680 -1.0735927 -0.184916602
California 0.27826823 1.2628144  1.7589234  2.067820292
Colorado   0.02571456 0.3988593  0.8608085  1.864967207
############define linkage methods#######################
m <- c( "average", "single", "complete", "ward")
names(m) <- c( "average", "single", "complete", "ward")

##############function to compute agglomerative coefficient
ac <- function(x) {
  agnes(df, method = x)$ac
}

#########calculate agglomerative coefficient for each clustering linkage method##
sapply(m, ac)

Output:

> sapply(m, ac)
  average    single  complete      ward 
0.7379371 0.6276128 0.8531583 0.9346210 
#######perform hierarchical clustering by Ward's minimum variance#######
clust <- agnes(df, method = "ward")

###################produce dendrogram###################
pltree(clust, cex = 0.6, hang = -1, main = "Dendrogram") 

Leave a Reply

Your email address will not be published.