amv18 - Multivariate Correlation Matrix

preview_player
Показать описание
Help this channel to remain great! Donating to Patreon or Paypal can do this!
Рекомендации по теме
Комментарии
Автор

begin video 1

n<-22 # sample size
y <- rnorm(n, mean=7, sd=3)
j<-rep(1, n)
mean(y)
t(y)%*%j/n
crossprod(y, j)/n

var(y)

z <- y - j%*%(y%*%j/n)
t(z)%*%z/(n-1)

z <- y - mean(y)*j
t(z)%*%z/(n-1)
crossprod(z)/(n-1)

z <- y - as.numeric((y%*%j/n))*j
t(z)%*%z/(n-1)

n<-22 # sample size
x <- seq(from=-3.5, to=3.5, by=.01)
plot(x, dnorm(x, mean=0, sd=1), type='l', main="Standard Normal Density",
ylab="f(y)", xlab=paste("Random Sample of Size =", n))
segments(0, 0, 0, dnorm(0), lty=2)
legend("topright", legend=c(expression(mu~"= 0"), expression(sigma^2~"= 1")))
rug(rnorm(n))

n<-22 # sample size

y <- matrix(rnorm(100*n), ncol=n) # each row is a sample of n=22
y <- rowMeans(y)
plot(x, dnorm(x, mean=0, sd=1), type='l', main="Standard Normal Density",
ylab="f(y)", xlab=paste("Random Sample of Size =", n))
segments(0, 0, 0, dnorm(0), lty=2)
legend("topright", legend=c(expression(mu~"= 0"),
expression(sigma^2~"= 1")))
rug(y)

begin video 1

begin video 2

data(cars)
str(cars) # structure of cars data set
head(cars) # prints first 6 rows
summary(cars)
plot(cars$speed, cars$dist, xlab = "Speed (mph)", ylab = "Stopping distance (ft)",
las = 1, main="cars data")
abline(lm(cars$dist~cars$speed), col = "red")
legend("topleft", "Simple Linear Regression", lty=1, col='red')
j <- rep(1, nrow(cars))
t(cars)%*%j / nrow(cars) # sample mean vector
colMeans(cars) # sample mean vector

end video 2

begin video 3

plot(cars$speed, cars$dist, xlab = "Speed (mph)", ylab = "Stopping distance (ft)",
las = 1, main="cars data")
abline(lm(cars$dist~cars$speed), col = "red")
abline(v=mean(cars$speed), h=mean(cars$dist))
legend("topleft", "Marginal Mean", lty=1)

var(cars$speed)
var(cars[, 1])
var(cars$dist)
var(cars[, 2])
cov(cars$speed, cars$dist)
cov(cars[, 1], cars[, 2])
lm(cars$dist~cars$speed)
cov(cars$speed, cars$dist)/var(cars$speed) # matches slope of regression line

Y <- as.matrix(cars) # create data matrix from data frame
var(Y)
cov(Y)
n<-nrow(Y)
j<-rep(1, n)
J<-j%*%t(j) # very large matrix nxn
t(Y)%*%(diag(n) - (J/n))%*%Y / (n-1)
all.equal((J/n)%*%(J/n), (J/n))
all.equal((diag(n)-(J/n))%*%(diag(n)-(J/n)), (diag(n)-(J/n)))

end video 3

begin video 4

plot(cars$speed, cars$dist, xlab = "Speed (mph)", ylab = "Stopping distance (ft)",
las = 1, main="cars data")
abline(lm(cars$dist~cars$speed), col = "red")
legend("topleft", "Regression Line", lty=1, col='red')

cor(cars$speed, cars$dist)
cov(cars$speed, cars$dist)/(sd(cars$speed)*sd(cars$dist))
cov(cars$speed, cars$dist)/sqrt(var(cars$speed)*var(cars$dist))

lm(cars$dist~cars$speed)
cor(cars$speed, cars$dist)*sd(cars$dist)/sd(cars$speed) # matches slope of regression line

Y <- as.matrix(cars) # create data matrix from data frame
S <- cov(Y)
D.5 <- diag(sqrt(diag(S)))
cor(Y)
solve(D.5)%*%S%*%solve(D.5)

end video 4


begin video 6

data(mtcars)
str(mtcars) # structure of cars data set
head(mtcars) # prints first 6 rows
pairs(mtcars[, c(1, 3:7, 11)], main="Motor Trend Car Road Tests Data")
round(colMeans(mtcars[, c(1, 3:7, 11)]), 2) # sample mean vector
round(cov(mtcars[, c(1, 3:7, 11)]), 2) # sample covariance matrix
round(var(mtcars[, c(1, 3:7, 11)]), 2) # sample covariance matrix
round(cor(mtcars[, c(1, 3:7, 11)]), 2) # sample correlation matrix

end video 6

statisticsmatt