Below is the outline of tomorrow’s class
A brief introduction to looping (for loops and while loops) and the use of apply functions in R (lapply, sapply, etc.). How to store results from loops as a vector, list or data frame for use later.
Introduction to simulating probability distributions (binomial, Poisson, uniform, normal, lognormal and exponential probability distribution, amongst others).We shall introduce Monte Carlo Simulation for a throw or toss of a fair die.
Next meeting outline:
We shall prove the most famous theory in Statistics called the Central Limit Theory as well based on these simulations.
Introduction to two important methods of estimation: Bootstrapping and Jacknife estimation techniques.
NB: Assignment tasks based on sections 1 to 3 will be given at the end of the class. You may still need knowledge of our first 3 meetings (already hosted on YouTube) while doing the assigned tasks. The assignment will be put on Piazza as well including all R scripts that I will write for tomorrow’s class/meeting.
NB: Avoid loops if not needed. Only use them when necessary.
You don't need a loop to add, substract, multiple or divide entries of two or more vectors.
Addition without loops
#Addition without loops
x <- 1:10
y <- 21:30
x+y
#y-x
#y/x
Addition with for loops as saving results as a vector
results1<-numeric(length(x)) #or results1<-rep(NA,length(x)) or results1<-rep(FALSE,length(x))
for(i in seq_along(x)){
results1[i]<- x[i]+y[i]
#print(results[i])
}
results1
#seq_along(x) returns all the indecies of x
results2<-rep(FALSE,length(x)) #or results<-rep(NA,length(x)) or results<-rep(FALSE,length(x))
for(i in seq_along(x)){
results2[i]<- x[i]+y[i]
}
results2
all.equal(results1, results2)# to check whether entries of two or more vectors are the same
Creating or extracting sequence of numbers (even numbers, odd numbers and prime numbers) using loops
1000/3
1000%/%3
#try running the codes below to see what each does
20/3 #returns 20 divided by 3
20%%3 #returns the modulus or the remainder after dividing 20 by 3
20%/%3 #returns the whole numberafter dividing 20 by 3
seq(2,100,by=2)
88%%3
How many even numbers are there from 1 to 54859 using a for loop?
n=11:20
seq_along(n)
#first approach
n <- 1: 564000
count<- 0
for (k in seq_along(n)) {
if(k%% 2 == 0) {
#print(k)
count = count+1
}
}
print(count)
1:10
for (k in 1:10) {
print(k^3)
}
#second approach (long approach)
n <- 1: 54859
multiple_2<-list() #or multiple_2<-c() or multiple_2<- NULL
for (val in n) {
if(val%% 2 == 0) {
multiple_2[[val]]<- val
}
}
multiples_of_2<-unlist(multiple_2) #stores all multiples of 2
#multiples_of_2
length(multiples_of_2) #total number of even numbers
How many multiples of 8 are there from 1 to 1000 and what are they using for loops?
Try: extracting odds numbers, multiples of 5 and multiples of 12 from 10 to 10000 using for loops
# How many multiples of 8 are there from 1 to 1000 and what are they?
#First approach
n <- 1: 1000
multiple_8<-list()
#try multiple_8<-c() or multiple_8<-NULL #to see it does the job but with NA's if a number is not a multiple of 8
#multiple_8<-list()
for (k in seq_along(n)) {
if(k%% 8 == 0) {
multiple_8[[k]]<- k
}
}
multiples_of_8<-unlist(multiple_8) #stores all multiples of 8
print(paste("The total number of multiples of 8 is", "", length(multiples_of_8))) #total number of even numbers
#print(multiples_of_8)
#second approach
n <- 1: 1000
#try multiple_8<-c() or multiple_8<-NULL #to see it does the job but with NA's if a number is not a multiple of 8
multiple_8<- NULL
for (k in seq_along(n)) {
if(k%% 8 == 0) {
multiple_8[[k]]<- k
}
}
#na.omit(multiples_of_8)
#multiples_of_8<-unlist(multiple_8) #stores all multiples of 8
#print(multiples_of_8)
#print(paste("The total number of multiples of 8 is", "", length(multiples_of_8))) #total number of even numbers
multiples_of_8<- na.omit(multiples_of_8)
paste("The total number of multiples of 8 is", "", length(multiples_of_8)) #total number of even numbers
multiples_of_8
How many multiples of 8 are there from 1 to 1000 and what are they using a while loop?
Try: extracting odds numbers, multiples of 5 and multiples of 12 from 10 to 10000 using while loop
multiples_8<-NULL
n=1
while(n<=1000){
if(n%%8==0) multiples_8[[n]]<- n
n=n+1
}
na.omit(unlist(multiples_8))
#multiples_of_8<-unlist(multiples_8)
#multiples_of_8
We will also learn apply() sapply(), lapply() and tapply(). The apply collection can be viewed as a substitute to the loop.
Let's create a data frame and use the apply functions while finding summary measures
rep(c("Male","Female"),5)
#let's create a matrix and use the apply functions while finding summary measures
Age<- c(5,7,8,9,10,12,14,36,60,50)
Size<-seq(1,30,length.out=10)
Height<-rep(c(2,4,5,7,8), 2)
Gender<-rep(c("Male","Female"),5)
Data_artifical<- data.frame(Age,Size,Height,Gender) # Data_artifical<- cbind(Age,Size,Height,Gender)
Data_artifical
#to change variable names, you to say age, size, height & gender.
# names(Data_artifical)<- c("age", "size","height","gender")
#Data_artifical[1] #
#Data_artifical[, 1]
Data_artifical[[1]]
Find mean across columns or each variable
se<- function(x){
error= sd(x)/length(x)
return(error)
}
apply(Data_artifical[,1:2],2,sum)
#apply(Data_artifical[,1:3],1,mean)
#Find mean across columns or each variable: 2=column in apply() function
apply(Data_artifical[,1:3],2,mean)
Find mean across rows
# find mean across rows: 1=row & 2=column in apply() function
#1=rows
apply(Data_artifical[,1:3],1,mean)
tapply(Data_artifical$Age,Data_artifical$Gender, mean)
mean(Data_artifical$Age)
# tapply(X, INDEX, FUN = NULL)
tapply(Data_artifical[[1]],Data_artifical[[4]], mean)
round(tapply(Data_artifical[,2],Data_artifical[[4]], mean),3) #round to 3 d.p
apply(Data_artifical[,1:3],2,sum)
x=lapply(Data_artifical[,1:3], median)
x
sapply(Data_artifical[,1:3], median)
results=lapply(Data_artifical[,1:3], sum)
results[[1]]
results[[2]]
results[[3]]
sapply(Data_artifical[,1:3], sum)
#Creating function to find standard error
Std_err<-function(x){
se<- sd(x)/length(x)
return(se)
}
lapply(Data_artifical[,1:3],Std_err)
tapply(Data_artifical$Age,Data_artifical$Gender,Std_err)
Simulating 1000 samples from binomial, Poisson, uniform, normal, lognormal and exponential distributions.
We shall prove the most famous theory in Statistics called the Central Limit Theory as well based on these simulations.
options(repr.plot.width=8, repr.plot.height=8,repr.plot.res = 300) #Setting plot size
set.seed(125)
rbinom(n=10,size=10,prob=0.5)
set.seed(125)
rbinom(n=10,size=10,prob=0.5)
set.seed(123) #for reproducibility
names_prob_distn<-c("binomial","poisson","uniform","exponential","normal","lognormal")
prob_distn=NULL #list( )# c()
prob_distn[[1]]<-rbinom(n=1000,size=10,prob=0.5) #binomial sample
prob_distn[[2]]<-rpois(n=1000, lambda=1)#poisson_sample
prob_distn[[3]]<-runif(n=1000,min=0, max=1)#uniform sample
prob_distn[[4]]<-rexp(n=1000, rate=1)#exponential sample
prob_distn[[5]]<-rnorm(n=1000, mean = 0, sd = 1)#normal sample
prob_distn[[6]]<-rlnorm(n=1000, meanlog = 0, sdlog = 1)#lognormal sample
options(repr.plot.width=8, repr.plot.height=8,repr.plot.res = 300) #Setting plot size & resolution
paste("position",seq_along(names_prob_distn))
par(mfrow=c(3,2),mar=c(4,4,1,0))
for(i in seq_along(names_prob_distn)){
hist(prob_distn[[i]],col=i,main=paste(names_prob_distn[i]),xlab="Samples")
}
#trying n=1000 and increase n to 100000 (returns an elliptical shape for bivariate normality)
set.seed(1)
normal_sample1<- rnorm(n=100000, mean = 2, sd = 1)
normal_sample2<- rnorm(n=100000, mean = 3, sd = 1)
plot(normal_sample1,normal_sample2,col="blue",type="p",xlab="Normal samples 1",ylab="Normal sample 2")
SIMULATING A TOSS OF A DIE
Monte Carlo Simulations works perfect as the number of simulations increases
runif(n=1,0,1)
numeric(6)
rep(0,6)
count=rep(0,6) #numeric(6)
rand=runif(n=1,0,1) #a random variable to decide which event to occur (a single toss)
for (i in rand){
if (i<1/6){
count[1]=count[1]+1
print("face 1")
} else if (i<2/6){ # 1/6<i <2/6
count[2]=count[2]+1
print("face 2")
} else if (i<3/6){
count[3]=count[3]+1
print("face 3")
} else if (i<4/6){
count[4]=count[4]+1
print("face 4")
} else if (i<5/6){
count[5]=count[5]+1
print("face 5")
} else {
count[6]=count[6]+1
print("face 6")
}
}
count=rep(0,6)
tosses=1000 #increase the number of tosses (to 1000000) to show its a fair die
rand=runif(n=tosses,0,1) #a random variable to decide which event to occur (for n tosses)
for (i in rand){
if (i<1/6){
count[1]=count[1]+1
#print("face 1")
} else if (i<2/6){
count[2]=count[2]+1
#print("face 2")
} else if (i<3/6){
count[3]=count[3]+1
#print("face 3")
} else if (i<4/6){
count[4]=count[4]+1
#print("face 4")
} else if (i<5/6){
count[5]=count[5]+1
#print("face 5")
} else {
count[6]=count[6]+1
#print("face 6")
}
}
count
for (i in seq_along(count)) {
print(paste("Die number",i,"", "appears",count[i], "times"))
}
barplot(count/tosses,col="red",xlab="Die outcome",horiz = FALSE,
names.arg =c("face 1","face 2","face 3","face 4","face 5","face 6"))