hierarchical.knit

Hierarchical Clustering

Data Loading

book <- read.csv('data/book_clustering.csv')
book <- head(book, 100)
head(book,5)
##   X                                                               Name
## 1 0                                      10-Day Green Smoothie Cleanse
## 2 1                                                  11/22/63: A Novel
## 3 2                            12 Rules for Life: An Antidote to Chaos
## 4 3                                             1984 (Signet Classics)
## 5 4 5,000 Awesome Facts (About Everything!) (National Geographic Kids)
##   User.Rating    Reviews      Price Year Genre_n
## 1  0.35999008  0.4604527 -0.4708098 2016       0
## 2 -0.08097772 -0.8447859  0.8216092 2011       1
## 3  0.35999008  0.5994404  0.1753997 2018       0
## 4  0.35999008  0.8080499 -0.6554410 2017       1
## 5  0.80095789 -0.3658799 -0.1015472 2019       0
book_cluster <- book[3:7]

Cosine Similarity from @Source: https://stats.stackexchange.com/questions/31565/compute-a-cosine-dissimilarity-matrix-in-r

Matrix <- as.matrix(book_cluster)
sim <- Matrix / sqrt(rowSums(Matrix * Matrix))
sim <- sim %*% t(sim)
cos_dist<- as.dist(1 - sim)
book_cluster.label <- hclust(cos_dist, method = "complete")
#png(filename="book_dendrogram_4.png")
plot(book_cluster.label)
rect.hclust(book_cluster.label, k=4, border = 2:5)

png(filename="img/book_dendrogram_5.png")
plot(book_cluster.label)
rect.hclust(book_cluster.label, k=5, border = 2:5)
png(filename="img/book_dendrogram_6.png")
plot(book_cluster.label)
rect.hclust(book_cluster.label, k=6, border = 2:5)
png(filename="img/book_dendrogram_7.png")
plot(book_cluster.label)
rect.hclust(book_cluster.label, k=7, border = 2:5)
Author

Bofan Zheng

Posted on

2023-02-27

Updated on

2023-02-27

Licensed under

NN