####Load the packages#########
install.packages("factoextra")
library(factoextra)
install.packages("cluster")
library(cluster)
########load data#############
df <- USArrests
############remove missing values##############
df <- na.omit(df)
##################scale each variable to have a mean of 0 and sd of 1##########
df <- scale(df)
#############view first six rows of dataset###############
head(df)
############view first six rows of dataset###############
Output:
> head(df)
Murder Assault UrbanPop Rape
Alabama 1.24256408 0.7828393 -0.5209066 -0.003416473
Alaska 0.50786248 1.1068225 -1.2117642 2.484202941
Arizona 0.07163341 1.4788032 0.9989801 1.042878388
Arkansas 0.23234938 0.2308680 -1.0735927 -0.184916602
California 0.27826823 1.2628144 1.7589234 2.067820292
Colorado 0.02571456 0.3988593 0.8608085 1.864967207
############define linkage methods#######################
m <- c( "average", "single", "complete", "ward")
names(m) <- c( "average", "single", "complete", "ward")
##############function to compute agglomerative coefficient
ac <- function(x) {
agnes(df, method = x)$ac
}
#########calculate agglomerative coefficient for each clustering linkage method##
sapply(m, ac)
Output:
> sapply(m, ac)
average single complete ward
0.7379371 0.6276128 0.8531583 0.9346210
#######perform hierarchical clustering by Ward's minimum variance#######
clust <- agnes(df, method = "ward")
###################produce dendrogram###################
pltree(clust, cex = 0.6, hang = -1, main = "Dendrogram")
