Hierarchical Clustering
Data Loading
book <- read.csv('data/book_clustering.csv')
book <- head(book, 100)
head(book,5)
## X Name
## 1 0 10-Day Green Smoothie Cleanse
## 2 1 11/22/63: A Novel
## 3 2 12 Rules for Life: An Antidote to Chaos
## 4 3 1984 (Signet Classics)
## 5 4 5,000 Awesome Facts (About Everything!) (National Geographic Kids)
## User.Rating Reviews Price Year Genre_n
## 1 0.35999008 0.4604527 -0.4708098 2016 0
## 2 -0.08097772 -0.8447859 0.8216092 2011 1
## 3 0.35999008 0.5994404 0.1753997 2018 0
## 4 0.35999008 0.8080499 -0.6554410 2017 1
## 5 0.80095789 -0.3658799 -0.1015472 2019 0
book_cluster <- book[3:7]
Cosine Similarity from @Source: https://stats.stackexchange.com/questions/31565/compute-a-cosine-dissimilarity-matrix-in-r
Matrix <- as.matrix(book_cluster)
sim <- Matrix / sqrt(rowSums(Matrix * Matrix))
sim <- sim %*% t(sim)
cos_dist<- as.dist(1 - sim)
book_cluster.label <- hclust(cos_dist, method = "complete")
#png(filename="book_dendrogram_4.png")
plot(book_cluster.label)
rect.hclust(book_cluster.label, k=4, border = 2:5)
png(filename="img/book_dendrogram_5.png")
plot(book_cluster.label)
rect.hclust(book_cluster.label, k=5, border = 2:5)
png(filename="img/book_dendrogram_6.png")
plot(book_cluster.label)
rect.hclust(book_cluster.label, k=6, border = 2:5)
png(filename="img/book_dendrogram_7.png")
plot(book_cluster.label)
rect.hclust(book_cluster.label, k=7, border = 2:5)