par(mar=c(4.5,4.5,1,1)) plot(faithful$eruptions,faithful$waiting,cex=1,pch=19,xlab="Durata",ylab="Attesa",cex.lab=2) par(mfrow=c(1,2),mar=c(4.5,4.5,1,1)) hist(faithful$eruptions,xlab="Durata",ylab="",main="",cex.lab=2,col="gray",breaks = 40) hist(faithful$waiting,xlab="Attesa",ylab="",main="",cex.lab=2,col="gray",breaks = 40) clus <- kmeans(faithful,2) table(clus$cluster) par(mfrow=c(1,1),mar=c(4.5,4.5,1,1)) plot(faithful$eruptions,faithful$waiting,cex=1,pch=19,xlab="Durata",ylab="Attesa",cex.lab=2,col=clus$cluster+1) clus <- kmeans(scale(faithful),2) table(clus$cluster) par(mfrow=c(1,1),mar=c(4.5,4.5,1,1)) plot(faithful$eruptions,faithful$waiting,cex=1,pch=19,xlab="Durata",ylab="Attesa",cex.lab=2,col=clus$cluster+1) library(cluster) par(mar=c(5,5,1,1)) clusplot(faithful, clus$cluster, color=TRUE, shade=TRUE, labels=2, main="",lines=0) clusplot(pam(faithful,2), color=TRUE, shade=TRUE, labels=2, main="",lines=0) load("/Users/utente/Documents/prova r pkg/lucidi del corso/datibi16.RData") datibi16tr <- (datibi16^(1/5)-1)*5 pairs(datibi16tr[,c(2,6,11)],cex=0.5,pch=19) clbi16 <- clara(datibi16tr[,c(2,6,11)],k=4,samples = 50, sampsize = 500) table(clbi16$clustering) pairs(datibi16tr[,c(2,6,11)],cex=0.5,pch=19,col=c("red","yellow","orange","gray","blue")[clbi16$clustering]) tt <- table(clbi16$clustering,datibi16$NASCAREA) tt / rowSums(tt) load("~/Documents/prova r pkg/lucidi del corso/esoplan.RData") massa <- (dati$MASS^(1/30)-1)*30 perio <- (dati$PERIOD^(1/30)-1)*30 x <- scale(cbind(massa,perio),center=T,scale=T) x <- x[complete.cases(x),] par(mar=c(4.5,4.5,1,1)) plot(x,cex=0.5,pch=19,xlab="Massa",ylab="Periodo",cex.lab=2) pampl <- pam(x, 3) plot(x,cex=0.5,pch=19,xlab="Massa",ylab="Periodo",cex.lab=2,col=c("red","yellow","orange")[pampl$clustering]) pairs(USArrests,cex=0.5,pch=19) prusa <- prcomp(USArrests,4,scale=T) Distanza <- dist(prusa$x, method = "euclidean") fit <- hclust(Distanza, method="ward.D") par(mar=c(5,5,1,1)) plot(fit,main="",cex.lab=1.5) groups <- cutree(fit, k=4) rect.hclust(fit, k=4, border="red") library(usmap) library(ggplot2) newdat <- as.data.frame(statepop) newdat$clust <- rep(0,51) newdat$clust[1:8] <- groups[1:8] newdat$clust[10:51] <- groups[9:50] plot_usmap(data = newdat, values = "clust", lines = "red") + scale_fill_continuous(low = "yellow", high = "red", name = "Gruppi Crimine", label = scales::comma) + theme(legend.position = "right") library(factoextra) UScale <- scale(USArrests) par(mar=c(4.5,4.5,1,1)) fviz_nbclust(UScale, kmeans, method = "wss")+geom_vline(xintercept = 4, linetype = 2)+labs(subtitle = "Elbow method") fviz_nbclust(UScale, kmeans, method = "silhouette")+labs(subtitle = "Silhouette method") set.seed(12345) fviz_nbclust(UScale, kmeans, nstart = 25, method = "gap_stat", nboot = 50)+labs(subtitle = "Gap statistic method") library(NbClust) nbus <- NbClust(data = UScale, distance = "euclidean", min.nc = 2, max.nc = 15, method = "kmeans") load("/Users/utente/Documents/prova r pkg/lucidi del corso/fertvsasvita.RData") plot(wdd$`value.Fertility rate, total (births per woman)`,wdd$`value.Life expectancy at birth, total (years)`,cex=1,pch=19,col=wdd$region) library(mclust) risgap <- Mclust(na.omit(wdd[,3:4]),G=1:9) plot(risgap) load("/Users/utente/Documents/prova r pkg/lucidi del corso/RemSens.RData") library(raster) plotRGB(ls8) nr <- getValues(ls8) kmls8 <- kmeans(na.omit(nr), centers = 10, iter.max = 500, nstart = 5, algorithm="Lloyd") kmls8 table(kmls8$cluster) sclu <- ls8[[1]] knr <- setValues(sclu, kmls8$cluster) plot(knr, main = 'Unsupervised classification', col = terrain.colors(10)) plotRGB(sent3) nr <- getValues(sent3) kmsent3 <- kmeans(na.omit(nr), centers = 4, iter.max = 500, nstart = 5, algorithm="Lloyd") kmsent3 table(kmsent3$cluster) sclu <- sent3[[1]] knr <- setValues(sclu, kmsent3$cluster) plot(knr, main = 'Unsupervised classification', col = c("darkgreen","gray","cyan","gold")) load("/Users/utente/Documents/prova r pkg/lucidi del corso/dna.RData") set.seed(12345) fviz_nbclust(dna[,2:13], kmeans, nstart = 25, method = "gap_stat", nboot = 50)+labs(subtitle = "Gap statistic method") library(NbClust) nbus <- NbClust(data = dna[,2:13], distance = "euclidean", min.nc = 2, max.nc = 15, method = "kmeans") cldna <- kmeans(dna[,2:13],centers = 3) kk <- prcomp(dna[,2:13],2)$x[,1:2] dna$Region <- as.character(dna$Region) par(mfrow=c(1,1),mar=c(4.5,4.5,1,1)) plot(kk,cex=1,pch=19,col=c("red","blue","green")[cldna$cluster]) text(kk,dna$Region,pos=1,col=c("red","blue","green")[cldna$cluster],cex=1.2) load("/Users/utente/Documents/prova r pkg/lucidi del corso/fifa19.RData") cargioc <- fifa19[rowSums(is.na(fifa19[,55:88])) == 0,] fifa19cl <- kmeans(cargioc[,55:88],centers = 10) clusplot(cargioc[,55:88], fifa19cl$cluster) fifa19pr <- prcomp(cargioc[,55:88])$x[,1:2] par(mar=c(4.5,4.5,1,1)) plot(fifa19pr,cex=0.5,pch=19,col=fifa19cl$cluster) table(cargioc$Position,fifa19cl$cluster) aggregate(cargioc[,90:92],by=list(clus=fifa19cl$cluster),mean,na.rm=T) load("/Users/utente/Documents/prova r pkg/lucidi del corso/musica.RData") pairs(evol[,28:41],cex=0.5,pch=19,col=evol$cluster) kk <- table(evol$year,evol$cluster) vv <- kk/rowSums(kk) tt <- t(vv) barplot(tt,xlab="Year",ylab="Perc. cluster",main="Evoluzione dei generi") kk <- table(evol$artist_name,evol$cluster) prinart <- matrix("",ncol(kk),10) freqart <- matrix(0,ncol(kk),10) for (i in 1:ncol(kk)) { prinart[i,] <- names(sort(kk[,i],decreasing = T)[1:10]) freqart[i,] <- sort(kk[,i],decreasing = T)[1:10] }