Session: One
Date: 17/03/2021
Intructor: Clement Twumasi
Website: https://twumasiclement.wixsite.com/website
YouTube channel: https://www.youtube.com/channel/UCxrpjLYi_Akmbc2QICDvKaQ
Brief introduction to the R environment and its IDEs (R base, R studio and Jupyter Notebook).
Objects, variable types, Arrays and matrices & installing R packages.
Vector and matrix arithmetics (using R as a calculator).
Set working directory and importing different data formats (.CSV, .TXT, XLS/XLXS, SPSS, SAS, STATA, etc.)
Attaching and detaching variables & its effects
Exporting data or results from R as a CSV file into desired working directory.
Attached is also a pdf just in case you want to learn on your own
1.https://cran.r-project.org/doc/manuals/r-release/R-intro.pdf
2.https://www.tutorialspoint.com/r/r_tutorial.pdf
For extremely good free books on R, click the link to get names of range of good books👇
setwd("C:/Users/user/Desktop/DataAnalysis_results_R/NUGSChina_R_class")
#Setting plot size and resolution in Jupyter notebook
options(repr.plot.width=8, repr.plot.height=8,repr.plot.res = 300) #Setting plot size
NB: You only install a package once; afterwards, you will only need to load it with library() anytime you want to use it
# install R package called "foreign" (it is used to import other data types like SPSS, STATA, etc.)
#NB: From the warning message, this package is not in my version of R
install.packages("foreign")
#NB; Package "haven" can also be used import other data types like SPSS, STATA, etc.
x= 1:1000
mean(x)
sd(x)
#plot(x)
install.packages("https://cran.r-project.org/src/contrib/Archive/foreign/foreign_0.8-76.tar.gz")
#install either package "devtools" or "remotes" to install R packages from Github
#install.packages("devtools")
#install.packages("remotes")
#library(devtools) #loading "devtools" package
Installing package "haven" from github from the author Hadley
#You can use:
#install.packages("haven")
#Alternatively
#remotes::install_github("hadley/haven")
#Loading packages
library("foreign")
library("readxl") #package for loading excel data
library(haven) #pac
2+3
Creating vectors and matrices
multiples_of_3<- seq(3, 100, by=3)
multiples_of_3
x<- seq(1,20,length.out=100)
#x
print(x)
is.vector(multiples_of_3) # asking whether its a vector
multiples_of_3
length(multiples_of_3)
#multiples_of_3
print(multiples_of_3)
x<-c(4,80,1,4,6)
x
x<- c(2,4, 6, 8, 10)
y<-c(3,6,9,12,15)
x/(y^2)
3*y ##multiple 3 to each value of y
x+y #it adds element-wise
x-y #it substracts element-wise
y/x #it divides element-wise
x*y #it multiples element-wise
?sort
z<- c(5,8,2,4,7,10)
z
sort(z) # sorts the vector in an ascending order
order(z) #eturns the indices of the vector in a sorted order
sort(z,decreasing = TRUE) # sorts the vector in a descending order
rev(z) # reverse z
Countries<- c("Ghana","UK","France")
Countries
Countries[1]
Countries[1:2]
Countries[3]
Countries[c(3,2)]
Method 1
mean_value<- 3
mean_value
c1<-c(3,8,7)
c2<-c(2,5,2)
c3<- c(3,6,9)
A1<- cbind(c1,c2,c3)
A1
#OR
A2= matrix(c(3,8,7,2,5,2,3,6,9), nrow=3, ncol=3,byrow=F)
A2
Method 2
r1<-c(3,2,3); r2<- c(8,5,6); r3<- c(7,2,9)
A3<- rbind(r1,r2,r3)
A3
#OR
A4= matrix(c(3,2,3,8,5,6,7,2,9), nrow=3, ncol=3,byrow=TRUE)
A4
# NB A1=A4 creating an object A1 with entries equal to A4
A1==A4 # A1==A4 means checking whether the entries of A1 and A4 are equal
Matrix Arithmetics
det(A1) #determinant of matrix A
2*3
A1_square<- A1%*%A4
A1_square
solve(A1)# inverse of matrix A1
eigen(A1)$values
eigen(A1)# return eigen values and eigen vectors
eigen(A1)$values # extract only eigen values as a vector
eigen(A1)$vectors # extract only eigen vectors as a matrix
Note that its is more convenient to convert excel files into CSV file before importing into R
#Importing CSV data
MurderRates<-read.csv("MurderRates_data.csv")
head(MurderRates,n=10) # view first 6 rows
tail(MurderRates,n=6) # view last 6 rows
#Importing excel data directly without changing to CSV
library("readxl")
Excel_data<- read_excel("Transformed_data.xlsx")
head(Excel_data)
Excel_data<- Excel_data[,-1]
head(Excel_data)
#Importing SPSS data into R
SPSS_data<- read.spss("Combined_data_SPSS.sav", use.value.label=TRUE, to.data.frame=TRUE)
head(SPSS_data)
#Importing Stata data into R using package "foreign"
Stata_data <- read.dta("imm23.dta")
head(Stata_data )
Run in SAS to convert data into CSV before importing into R (Long approach) :)
proc export data=dataset
outfile="datast.csv"
dbms=csv;
run;
And then, run this in R
df <- read.csv("dataset.csv",header=T,as.is=T)
#Alternatively (simple approach) using package haven
#library(haven)
SAS_data<- read_sas("imm10.sas7bdat")
head(SAS_data)
summary(SAS_data) #not recommended
print(SAS_data$race) #run the variable sex in the SAS data
print(SAS_data$sex)
levels(as.factor(SAS_data$sex))
is.factor(SAS_data$sex)
levels(as.factor(SAS_data$sex))
SAS_data$sex<- factor(SAS_data$sex,levels=c(1,2),labels=c("male","female"))
head(SAS_data)
table(SAS_data$sex)
print(SAS_data$race)
table(as.factor(SAS_data$race))
SAS_data$race<- factor(SAS_data$race,levels=c(1,2,3,4),labels=c("asian","Hispanic","Black","White"))
print(SAS_data$race)
table(SAS_data$race,SAS_data$sex)
attach(SAS_data)
print(sex)
detach(SAS_data) # detach function can be used to also detach an R package
print(sex)
head(SAS_data)
write.csv(SAS_data,"SAS_data_updated.csv")