matrices
variable from FamilyIQ
dataset.Proposed answer:
library(MPsychoR)
data("FamilyIQ")
mean(FamilyIQ$matrices)
## [1] 29.73434
var(FamilyIQ$matrices)
## [1] 24.44181
sd(FamilyIQ$matrices)
## [1] 4.943866
quantile(FamilyIQ$matrices)
## 0% 25% 50% 75% 100%
## 15 26 30 33 46
IQR(FamilyIQ$matrices)
## [1] 7
Iris
dataset. (Check only Species
and
Sepal.Length
).Proposed answer:
head(iris)
(test <- chisq.test(iris$Species, iris$Sepal.Length))
##
## Pearson's Chi-squared test
##
## data: iris$Species and iris$Sepal.Length
## X-squared = 156.27, df = 68, p-value = 6.666e-09
# We can reject the null hypothesis
Proposed answer:
observed<-c( 700, 557, 228)
expected<-c(0.5, 0.4, 0.1)
(result<-chisq.test(observed, p = expected))
##
## Chi-squared test for given probabilities
##
## data: observed
## X-squared = 47.298, df = 2, p-value = 5.363e-11
# We can reject the null hypothesis
Proposed answer:
Performance <-
matrix(c(600, 50, 380, 570),
nrow = 2,
dimnames = list("1st Survey" = c("Approve", "Disapprove"),
"2nd Survey" = c("Approve", "Disapprove")))
(result <- mcnemar.test(Performance))
##
## McNemar's Chi-squared test with continuity correction
##
## data: Performance
## McNemar's chi-squared = 251.72, df = 1, p-value < 2.2e-16
# We can reject the null hypothesis
height
variable fromwoman
dataset comes from
population with mean = 69.Proposed answer:
# One sample t-test
head(women)
shapiro.test(women$height)
##
## Shapiro-Wilk normality test
##
## data: women$height
## W = 0.96359, p-value = 0.7545
t.test(women$height, mu=69) # we can reject the null hypothesis
##
## One Sample t-test
##
## data: women$height
## t = -3.4641, df = 14, p-value = 0.003797
## alternative hypothesis: true mean is not equal to 69
## 95 percent confidence interval:
## 62.52341 67.47659
## sample estimates:
## mean of x
## 65
murder
variable from
USArrests
dataset if there are, on average, less murders in
less urbanized regions. Split the data for more and less urbanized
regions using UrbanProp>=72 and UrbanProp<72.Proposed answer:
# The independent samples t-test
library(dplyr)
USArrests %>%
mutate(type=case_when (UrbanPop >= 72 ~ "More_urbanized", UrbanPop < 72 ~ "Less_urbanized")) ->USArrests_by_urbanpop
USArrests_by_urbanpop %>%
group_by(type) %>%
summarise(p.value = shapiro.test(Murder)$p.value)
(bartlett.test(USArrests_by_urbanpop$Murder, USArrests_by_urbanpop$type))
##
## Bartlett test of homogeneity of variances
##
## data: USArrests_by_urbanpop$Murder and USArrests_by_urbanpop$type
## Bartlett's K-squared = 1.6031, df = 1, p-value = 0.2055
(result<-t.test(USArrests_by_urbanpop$Murder ~USArrests_by_urbanpop$type, alternative = "less", var.equal = TRUE))
##
## Two Sample t-test
##
## data: USArrests_by_urbanpop$Murder by USArrests_by_urbanpop$type
## t = -0.094554, df = 48, p-value = 0.4625
## alternative hypothesis: true difference in means between group Less_urbanized and group More_urbanized is less than 0
## 95 percent confidence interval:
## -Inf 2.029055
## sample estimates:
## mean in group Less_urbanized mean in group More_urbanized
## 7.741935 7.863158
# we can not reject the null hypothesis
murder
variable from
USArrests
dataset check if the tightening of penalties has
affected the average level of murder in all states.Proposed answer:
# The paired samples t-test
Before <- USArrests$Murder
After <- c(14.2, 3.4, 3.2, 6.3, 9.5, 3.3, 3.0, 9.9, 5.5, 8.8, 4.3, 12.1, 17.0, 4.4, 8.8, 10.2, 8.3, 0.3, 2.1, 13.6, 9.7, 10.1, 8.8, 13.2, 3.5, 16.2, 8.5, 9.2, 13.2, 5.1, 2.6, 11.1, 0.3, 10.8, 8.2, 5.3, 4.1, 1.8, 3.0, 1.9, 13.2, 9.6, 8.8, 3.2, 12.2, 2.1, 11.7, 3.8, 19.0, 9.7)
shapiro.test(Before-After)
##
## Shapiro-Wilk normality test
##
## data: Before - After
## W = 0.9847, p-value = 0.7585
(result<-t.test(Before, After, paired =TRUE ))
##
## Paired t-test
##
## data: Before and After
## t = 0.027395, df = 49, p-value = 0.9783
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.881227 1.933227
## sample estimates:
## mean of the differences
## 0.026
#We can not reject the null hypothesis
Sepal.Width
in each of iris spacies.
Useiris
dataset.Proposed answer:
# One-way ANOVA
summary(iris) # we have 3 species
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
library(dplyr)
iris %>%
group_by(Species) %>%
summarise(variance=var(Sepal.Width),
p.value = shapiro.test(Sepal.Width)$p.value)
(bartlett.test(iris$Sepal.Width, iris$Species))
##
## Bartlett test of homogeneity of variances
##
## data: iris$Sepal.Width and iris$Species
## Bartlett's K-squared = 2.0911, df = 2, p-value = 0.3515
result<-aov(Sepal.Width~Species, iris)
summary(result) #we can reject the null hypothesis
## Df Sum Sq Mean Sq F value Pr(>F)
## Species 2 11.35 5.672 49.16 <2e-16 ***
## Residuals 147 16.96 0.115
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(agricolae)
TukeyHSD(result)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Sepal.Width ~ Species, data = iris)
##
## $Species
## diff lwr upr p adj
## versicolor-setosa -0.658 -0.81885528 -0.4971447 0.0000000
## virginica-setosa -0.454 -0.61485528 -0.2931447 0.0000000
## virginica-versicolor 0.204 0.04314472 0.3648553 0.0087802
HSD.test(result, 'Species', group=TRUE, console=TRUE) #All groups combinations differ
##
## Study: result ~ "Species"
##
## HSD Test for Sepal.Width
##
## Mean Square Error: 0.1153878
##
## Species, means
##
## Sepal.Width std r Min Max
## setosa 3.428 0.3790644 50 2.3 4.4
## versicolor 2.770 0.3137983 50 2.0 3.4
## virginica 2.974 0.3224966 50 2.2 3.8
##
## Alpha: 0.05 ; DF Error: 147
## Critical Value of Studentized Range: 3.348424
##
## Minimun Significant Difference: 0.1608553
##
## Treatments with the same letter are not significantly different.
##
## Sepal.Width groups
## setosa 3.428 a
## virginica 2.974 b
## versicolor 2.770 c
Proposed answer:
result<-c(12, 34, 22, 14, 22, 17, 24, 22, 18, 14, 18, 12)
shapiro.test(result)
##
## Shapiro-Wilk normality test
##
## data: result
## W = 0.89548, p-value = 0.1387
library(outliers)
grubbs.test(result)
##
## Grubbs test for one outlier
##
## data: result
## G = 2.3833, U = 0.4367, p-value = 0.0295
## alternative hypothesis: highest value 34 is an outlier
# this is an outlier
cars
dataset.Proposed answer:
head(cars)
lm(dist~speed,cars)
##
## Call:
## lm(formula = dist ~ speed, data = cars)
##
## Coefficients:
## (Intercept) speed
## -17.579 3.932