DataCamp Multivariate Probability Distributions in R
Principal Component Analysis
MULTIVARIATE PROBABILITY DISTRIBUTIONS IN R
Principal Component Analysis Surajit Ray Reader, University of - - PowerPoint PPT Presentation
DataCamp Multivariate Probability Distributions in R MULTIVARIATE PROBABILITY DISTRIBUTIONS IN R Principal Component Analysis Surajit Ray Reader, University of Glasgow DataCamp Multivariate Probability Distributions in R Principal Component
DataCamp Multivariate Probability Distributions in R
MULTIVARIATE PROBABILITY DISTRIBUTIONS IN R
DataCamp Multivariate Probability Distributions in R
DataCamp Multivariate Probability Distributions in R
princomp() function calculates PCs
DataCamp Multivariate Probability Distributions in R
x: a numeric matrix or data frame cor: use correlation matrix instead of covariance scores: scores/projection of the data on principal components are produced
princomp(x, cor = FALSE, scores = TRUE)
DataCamp Multivariate Probability Distributions in R
mtcars dataset relates to 11 variables on fuel consumption for 32 automobiles
head(mtcars,5) mpg cyl disp hp drat wt qsec vs am gear carb Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4 Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4 Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1 Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1 Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
DataCamp Multivariate Probability Distributions in R
mtcars.sub <- mtcars[ , -c(8,9)] cars.pca <- princomp(mtcars.sub, cor = TRUE, scores = TRUE)
DataCamp Multivariate Probability Distributions in R
cars.pca # Output of cars.pca Standard deviations: Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9 2.378 1.443 0.710 0.515 0.428 0.352 0.324 0.242 0.149 summary(cars.pca) # Summary of cars.pca Importance of components: Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9 Standard deviation 2.378 1.443 0.710 0.5148 0.4280 0.3518 0.3241 0.2419 0.14896 Proportion of Variance 0.628 0.231 0.056 0.0294 0.0204 0.0138 0.0117 0.0065 0.00247 Cumulative Proportion 0.628 0.860 0.916 0.9453 0.9656 0.9794 0.9910 0.9975 1.00000
DataCamp Multivariate Probability Distributions in R
MULTIVARIATE PROBABILITY DISTRIBUTIONS IN R
DataCamp Multivariate Probability Distributions in R
MULTIVARIATE PROBABILITY DISTRIBUTIONS IN R
DataCamp Multivariate Probability Distributions in R
summary(cars.pca) # Summary of cars.pca Importance of components: Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9 Standard deviation 2.378 1.443 0.710 0.5148 0.4280 0.3518 0.3241 0.2419 0.14896 Proportion of Variance 0.628 0.231 0.056 0.0294 0.0204 0.0138 0.0117 0.0065 0.00247 Cumulative Proportion 0.628 0.860 0.916 0.9453 0.9656 0.9794 0.9910 0.9975 1.00000
DataCamp Multivariate Probability Distributions in R
screeplot(cars.pca, type = "lines")
DataCamp Multivariate Probability Distributions in R
summary(cars.pca) # Summary of cars.pca Importance of components: Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9 Standard deviation 2.378 1.443 0.710 0.5148 0.4280 0.3518 0.3241 0.2419 0.14896 Proportion of Variance 0.628 0.231 0.056 0.0294 0.0204 0.0138 0.0117 0.0065 0.00247 Cumulative Proportion 0.628 0.860 0.916 0.9453 0.9656 0.9794 0.9910 0.9975 1.00000
DataCamp Multivariate Probability Distributions in R
# Variance explained pc.var <- cars.pca$sdev^2 # Proportion of variation pc.pvar <- pc.var / sum(pc.var) # Cumulative proportion plot(cumsum(pc.pvar), type = 'b') abline(h = 0.9)
DataCamp Multivariate Probability Distributions in R
# Variance explained pc.var <- cars.pca$sdev^2 # Proportion of variation pc.pvar <- pc.var / sum(pc.var) # Cumulative proportion plot(cumsum(pc.pvar), type = 'b') abline(h = 0.9)
DataCamp Multivariate Probability Distributions in R
MULTIVARIATE PROBABILITY DISTRIBUTIONS IN R
DataCamp Multivariate Probability Distributions in R
MULTIVARIATE PROBABILITY DISTRIBUTIONS IN R
DataCamp Multivariate Probability Distributions in R
cars.pca <- princomp(mtcars.sub, cor = TRUE, scores = TRUE) attributes(cars.pca) $names [1] "sdev" "loadings" "center" "scale" "n.obs" "scores" "call"
DataCamp Multivariate Probability Distributions in R
cars.pca$loadings # or loadings(cars.pca) Loadings: Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9 mpg 0.393 -0.221 -0.321 0.720 0.381 0.125 -0.115 cyl -0.403 -0.252 0.117 0.224 0.159 -0.810 -0.163 disp -0.397 0.339 -0.487 0.182 0.662 hp -0.367 -0.269 -0.295 0.354 -0.696 0.166 -0.252 drat 0.312 -0.342 0.150 0.846 0.162 -0.135 wt -0.373 0.172 0.454 0.191 -0.187 0.428 0.198 -0.569 qsec 0.224 0.484 0.628 -0.148 0.258 -0.276 -0.356 0.169 gear 0.209 -0.551 0.207 -0.282 -0.562 -0.323 -0.316 carb -0.245 -0.484 0.464 -0.214 0.400 0.357 0.206 0.108 0.320
DataCamp Multivariate Probability Distributions in R
cars.pca$loadings[, 1:2] Loadings: Comp.1 Comp.2 mpg 0.393 cyl -0.403 disp -0.397 hp -0.367 -0.269 drat 0.312 -0.342 wt -0.373 0.172 qsec 0.224 0.484 gear 0.209 -0.551 carb -0.245 -0.484
DataCamp Multivariate Probability Distributions in R
biplot(cars.pca, col = c("gray","steelblue"), cex = c(0.5, 1.3))
DataCamp Multivariate Probability Distributions in R
head(cars.pca$scores) # PC scores of first 6 observations Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8 Comp.9 Mazda RX4 0.67 -1.19 -0.21 -0.128 0.764 -0.127 0.430 0.0033 0.1697 Mazda RX4 Wag 0.65 -0.99 0.11 -0.087 0.667 -0.067 0.456 -0.0575 0.0727 Datsun 710 2.34 0.33 -0.21 -0.110 -0.077 -0.576 -0.392 0.2053 -0.1163 Hornet 4 Drive 0.22 2.01 -0.33 -0.313 -0.248 0.085 -0.034 0.0241 0.1476 Hornet Sportabout -1.61 0.84 -1.05 0.150 -0.226 0.186 0.059 -0.1548 0.1571 Valiant -0.05 2.49 0.11 -0.885 -0.128 -0.234 -0.228 -0.1002 0.0043
DataCamp Multivariate Probability Distributions in R
head(cars.pca$scores[, 1:2]) # First two PC scores of first 6 observations Comp.1 Comp.2 Mazda RX4 0.67 -1.19 Mazda RX4 Wag 0.65 -0.99 Datsun 710 2.34 0.33 Hornet 4 Drive 0.22 2.01 Hornet Sportabout -1.61 0.84 Valiant -0.05 2.49
DataCamp Multivariate Probability Distributions in R
biplot(cars.pca, col = c("steelblue", "white"), cex = c(0.8, 0.01))
DataCamp Multivariate Probability Distributions in R
scores <- data.frame(cars.pca$scores) ggplot(data = scores, aes(x = Comp.1, y = Comp.2, label = rownames(scores))) + geom_text(size = 4, col = "steelblue")
DataCamp Multivariate Probability Distributions in R
cylinder <- factor(mtcars$cyl) ggplot(data = scores, aes(x = Comp.1, y = Comp.2, label = rownames(scores), color = cylinder)) + geom_text(size = 4)
DataCamp Multivariate Probability Distributions in R
fviz_pca_biplot() fviz_pca_ind() fviz_pca_var()
DataCamp Multivariate Probability Distributions in R
DataCamp Multivariate Probability Distributions in R
DataCamp Multivariate Probability Distributions in R
DataCamp Multivariate Probability Distributions in R
MULTIVARIATE PROBABILITY DISTRIBUTIONS IN R
DataCamp Multivariate Probability Distributions in R
MULTIVARIATE PROBABILITY DISTRIBUTIONS IN R
DataCamp Multivariate Probability Distributions in R
cmdscale() function
isoMDS() sammon()
cmdscale(d, k = 2, ...)
DataCamp Multivariate Probability Distributions in R
# UScitiesD dataset Atlanta Chicago Denver Houston LosAngeles Miami NewYork SanFrancisco Seattle Chicago 587 Denver 1212 920 Houston 701 940 879 LosAngeles 1936 1745 831 1374 Miami 604 1188 1726 968 2339 NewYork 748 713 1631 1420 2451 1092 SanFrancisco 2139 1858 949 1645 347 2594 2571 Seattle 2182 1737 1021 1891 959 2734 2408 678 Washington.DC 543 597 1494 1220 2300 923 205 2442 2329
DataCamp Multivariate Probability Distributions in R
usloc <- cmdscale(UScitiesD) usloc [,1] [,2] Atlanta -719 143.0 Chicago -382 -340.8 Denver 482 -25.3 Houston -161 572.8 LosAngeles 1204 390.1 Miami -1134 581.9 NewYork -1072 -519.0 SanFrancisco 1421 112.6 Seattle 1342 -579.7 Washington.DC -980 -335.5 ggplot(data = data.frame(usloc), aes(x = X1, y = X2, label = rownames(usloc))) + geom_text()
DataCamp Multivariate Probability Distributions in R
DataCamp Multivariate Probability Distributions in R
cars.dist <- dist(mtcars) cars.mds <- cmdscale(cars.dist, k = 2) cars.mds <- data.frame(cars.mds) ggplot(data = cars.mds, aes(x = X1, y = X2, label = rownames(cars.mds))) + geom_text()
DataCamp Multivariate Probability Distributions in R
cars.dist <- dist(mtcars) cmds3 <- data.frame(cmdscale(cars.dist, k = 3)) scatterplot3d(cmds3, type = "h", pch = 19, lty.hplot = 2)
DataCamp Multivariate Probability Distributions in R
cars.dist <- dist(mtcars) cmds3 <- data.frame(cmdscale(cars.dist, k = 3)) scatterplot3d(cmds3, type = "h", pch = 19, lty.hplot = 2, color = mtcars$cyl)
DataCamp Multivariate Probability Distributions in R
MULTIVARIATE PROBABILITY DISTRIBUTIONS IN R
DataCamp Multivariate Probability Distributions in R
MULTIVARIATE PROBABILITY DISTRIBUTIONS IN R
DataCamp Multivariate Probability Distributions in R
DataCamp Multivariate Probability Distributions in R
DataCamp Multivariate Probability Distributions in R
MULTIVARIATE PROBABILITY DISTRIBUTIONS IN R