2+2
2-3
6/2
10/3
10%%3


mean(c(1,2)) # we will see we need to put concatenate different values with a c() first
exp(-2)


round(exp(-2), 2)


log(100,base=10) #we want to get the log of 100 in base 10


help(round)

ceiling(x)
floor(x)
trunc(x, ...)

round(x, digits = 0)
signif(x, digits = 6)

round(.5 + -2:4) # IEEE / IEC rounding: -2  0  0  2  2  4  4
## (this is *good* behaviour -- do *NOT* report it as bug !)

( x1 <- seq(-2, 4, by = .5) )
round(x1) #-- IEEE / IEC rounding !
x1[trunc(x1) != floor(x1)]
x1[round(x1) != floor(x1 + .5)]
(non.int <- ceiling(x1) != floor(x1))

x2 <- pi * 100^(-1:3)
round(x2, 3)
signif(x2, 3)


?exp

log(x, base = exp(1))
logb(x, base = exp(1))
log10(x)
log2(x)

log1p(x)

exp(x)
expm1(x)

log(exp(3))
log10(1e7) # = 7

x <- 10^-(1+2*1:9)
cbind(x, log(1+x), log1p(x), exp(x)-1, expm1(x))


x <- 2

x

x+x


y <- x+3

y


x <- 4
y


y <- x+3
y


s <- "this is a string of characters"
s


class(x)
class(s)


"1"
class("1")
class(1)


try("1" + 3)# I added the try function to avoid stopping the notebook if you want to run all the cells

Error in "1" + 3 : non-numeric argument to binary operator


1 + 3


sessionInfo()

R version 4.0.2 (2020-06-22)
Platform: x86_64-conda_cos6-linux-gnu (64-bit)
Running under: CentOS Linux 7 (Core)

Matrix products: default
BLAS/LAPACK: /shared/ifbstor1/software/miniconda/envs/r-4.0.2/lib/libopenblasp-r0.3.10.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

loaded via a namespace (and not attached):
 [1] compiler_4.0.2  ellipsis_0.3.1  IRdisplay_0.7.0 pbdZMQ_0.3-3.1 
 [5] tools_4.0.2     htmltools_0.5.1 pillar_1.4.7    base64enc_0.1-3
 [9] crayon_1.3.4    uuid_0.1-4      IRkernel_1.1.1  jsonlite_1.7.2 
[13] digest_0.6.27   lifecycle_0.2.0 repr_1.1.0      rlang_0.4.10   
[17] evaluate_0.14


getwd()


setwd('/shared/home/cvandiedonck/RSession1') #change with your login!!!
getwd() #change is visible


ls()


rm(y)
ls()


dir()


list.files(pattern=".ipynb")


save(x,file="x.RData")


rm(x)
ls()


load("x.RData")
ls()
x #x is again accessible


file.remove("x.RData") #remove file: returns TRUE on successful removal


save(x,s, file="xands.RData")


file.remove("xands.RData")# to clean the working directory


ls()
save.image(file="AllMyData.RData")


rm(list=ls()) # this command removes all the objects on the R session
ls() #all variables have been removed


load("AllMyData.RData")
ls() #all variables are accessible again
file.remove("AllMyData.RData")
ls()


# ls()
# savehistory(file="MyHistory.Rhistory") #save all previously run commands in a special formatted file
# loadhistory("MyHistory.Rhistory") #load all commands stored in the specified file
# my_history <- read.delim("MyHistory.Rhistory") #see how the file is formatted: number of line and associated command
# head(my_history)


x <- c(3,7,1,2) # we define a variable x with 4 numeric values concatenated
x


print(x)

[1] 3 7 1 2


is.numeric(x)


x<2 # we test wether the 4 values are < 2


x==2


class(x)
class(s)
is.character(s)
is.numeric(s)
print(as.numeric(x<2))
is.numeric("1")
is.numeric(as.numeric("1"))
is.numeric(c(1,"1"))

[1] 0 0 1 0


a <- c()
a

NULL


weight <- c(60, 72, 57, 90, 95, 72)
weight


print(weight)

[1] 60 72 57 90 95 72


4:10
print(4:10)

[1]  4  5  6  7  8  9 10


print(seq(4,10))

[1]  4  5  6  7  8  9 10


print(seq(2,10,2))

[1]  2  4  6  8 10


print(rep(4,2))

[1] 4 4


print(rep(seq(4,10,2)))
print(c(rep(1,4),rep(2,4)))
print(c(5,s))

[1]  4  6  8 10
[1] 1 1 1 1 2 2 2 2
[1] "5"                              "this is a string of characters"


class(c(5,s))
length(1:10)
length(weight)
str(weight)

 num [1:6] 60 72 57 90 95 72


size <- c(1.75, 1.8, 1.65, 1.9, 1.74, 1.91)
print(size^2)
print(bmi <- weight/size^2 )
print(bmi)

[1] 3.0625 3.2400 2.7225 3.6100 3.0276 3.6481
[1] 19.59184 22.22222 20.93664 24.93075 31.37799 19.73630
[1] 19.59184 22.22222 20.93664 24.93075 31.37799 19.73630


print(sort(size))
mean(size)
sd(size)
median(size)
min(size)
max(size)
print(range(size))
summary(size)

[1] 1.65 1.74 1.75 1.80 1.90 1.91

[1] 1.65 1.91

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  1.650   1.742   1.775   1.792   1.875   1.910


print(size)
size[1]
size[2]
size[6]
size[c(2,6)]
size[c(6,2)]
min(size[c(6,2)])

[1] 1.75 1.80 1.65 1.90 1.74 1.91


names(size)
names(size) <- c("Fabien","Pierre","Sandrine","Claire","Bruno","Delphine")
size
str(size)

NULL

 Named num [1:6] 1.75 1.8 1.65 1.9 1.74 1.91
 - attr(*, "names")= chr [1:6] "Fabien" "Pierre" "Sandrine" "Claire" ...


myData <- matrix(c(1,2,3, 11,12,13), nrow = 2, ncol = 3)
myData
class(myData)


myData <- matrix(c(1,2,3, 11,12,13), nrow = 2, ncol = 3, byrow = TRUE)
myData


print(dim(myData))
str(myData)
nrow(myData)
ncol(myData)

[1] 2 3
 num [1:2, 1:3] 1 11 2 12 3 13


print(myData)

     [,1] [,2] [,3]
[1,]    1    2    3
[2,]   11   12   13


myData[1,2] # returns the value of the 1st row and 2nd column


myData[2,1] # returns the value of the 2nd row and 1st column


print(myData[,1]) # returns the values of the vector corresponding to the 1st column

[1]  1 11


print(myData[2,])  # returns the values of the vector corresponding to the 2nd row

[1] 11 12 13


myData[,2:3] # subsets the initial matrix returning a sub-matrix
             # with all rows of the 2nd and 3rd columns from the initial matrix
             # the generated matrix has 2 rows and 2 columns


print(dim(myData[,2:3])) # the generated matrix has 2 rows and 2 columns

[1] 2 2


class(myData[,1]) # we extract a vector -> thus the class is numeric and no more matrix
length(myData[1,])
length(myData[,1])


myData2 <- cbind(weight, size, bmi)
myData2
myData3 <- rbind(weight, size, bmi)
myData3


myData2*2
summary(myData2)
mean(myData2)
mean(myData2[,1])

     weight           size            bmi       
 Min.   :57.00   Min.   :1.650   Min.   :19.59  
 1st Qu.:63.00   1st Qu.:1.742   1st Qu.:20.04  
 Median :72.00   Median :1.775   Median :21.58  
 Mean   :74.33   Mean   :1.792   Mean   :23.13  
 3rd Qu.:85.50   3rd Qu.:1.875   3rd Qu.:24.25  
 Max.   :95.00   Max.   :1.910   Max.   :31.38


myDataf <- data.frame(weight, size, bmi)
myDataf


class(myDataf)


str(myDataf)

'data.frame':	6 obs. of  3 variables:
 $ weight: num  60 72 57 90 95 72
 $ size  : num  1.75 1.8 1.65 1.9 1.74 1.91
 $ bmi   : num  19.6 22.2 20.9 24.9 31.4 ...


print(dim(myDataf))

[1] 6 3


d <- data.frame()
d
dim(d)


class(myData2)
class(as.data.frame(myData2))
str(as.data.frame(myData2))

'data.frame':	6 obs. of  3 variables:
 $ weight: num  60 72 57 90 95 72
 $ size  : num  1.75 1.8 1.65 1.9 1.74 1.91
 $ bmi   : num  19.6 22.2 20.9 24.9 31.4 ...


d2 <- as.data.frame(cbind(1:2, 10:11))
str(d2)

'data.frame':	2 obs. of  2 variables:
 $ V1: int  1 2
 $ V2: int  10 11


d <- as.data.frame(matrix(NA,2,3))
d
dim(d)
str(d)

'data.frame':	2 obs. of  3 variables:
 $ V1: logi  NA NA
 $ V2: logi  NA NA
 $ V3: logi  NA NA


rownames(d)
colnames(d)


row.names(d)
names(d)


print(myDataf)

         weight size      bmi
Fabien       60 1.75 19.59184
Pierre       72 1.80 22.22222
Sandrine     57 1.65 20.93664
Claire       90 1.90 24.93075
Bruno        95 1.74 31.37799
Delphine     72 1.91 19.73630


print(myDataf[,2])
print(myDataf[,"size"])
print(myDataf$size)

[1] 1.75 1.80 1.65 1.90 1.74 1.91
[1] 1.75 1.80 1.65 1.90 1.74 1.91
[1] 1.75 1.80 1.65 1.90 1.74 1.91


myDataf[2,]


myDataf["Pierre",]


class(myDataf["Pierre",])


temp <- unlist(myDataf["Pierre",])
print(temp)
class(temp)

  weight     size      bmi 
72.00000  1.80000 22.22222


d2$new <- 1:2
d2


gender <- c("Man","Man","Woman","Woman","Man","Woman")
print(gender)
myDataf$sex <- gender
print(myDataf$sex)
myDataf
str(myDataf)

[1] "Man"   "Man"   "Woman" "Woman" "Man"   "Woman"
[1] "Man"   "Man"   "Woman" "Woman" "Man"   "Woman"

'data.frame':	6 obs. of  4 variables:
 $ weight: num  60 72 57 90 95 72
 $ size  : num  1.75 1.8 1.65 1.9 1.74 1.91
 $ bmi   : num  19.6 22.2 20.9 24.9 31.4 ...
 $ sex   : chr  "Man" "Man" "Woman" "Woman" ...


d3 <-  data.frame(d, d2)
d3


path_to_file <- "/shared/projects/dubii2021/trainers/module3/data/Temperatures.txt" 
temperatures <- read.table(path_to_file, sep="\t", header=T, stringsAsFactors=F)
temperatures
str(temperatures)

'data.frame':	12 obs. of  2 variables:
 $ Month    : chr  "January" "February" "March" "April" ...
 $ Mean_Temp: num  2 2.6 7.9 11.2 15.3 22.2 22.9 22.5 17.3 11.7 ...


temperatures.2 <- read.table(path_to_file, sep="\t", header=T, stringsAsFactors=TRUE)
str(temperatures.2)

'data.frame':	12 obs. of  2 variables:
 $ Month    : Factor w/ 12 levels "April","August",..: 5 4 8 1 9 7 6 2 12 11 ...
 $ Mean_Temp: num  2 2.6 7.9 11.2 15.3 22.2 22.9 22.5 17.3 11.7 ...


levels(temperatures.2$Month)


# save a dataframe as a text file in the working directory
write.table(myDataf, file="bmi_data.txt", sep="\t", quote=F, col.names=T)


rm(myDataf)
myDataf <- read.table("bmi_data.txt", sep="\t", header=T, stringsAsFactors=F)
head(myDataf) #myDataf is again accessible
file.remove("bmi_data.txt") #to clean the working directory


print(which ( myDataf$sex == "Woman") )

[1] 3 4 6


myDataf [ which ( myDataf$sex == "Woman") , ]


str(myDataf [ which ( myDataf$sex == "Woman") , ])

'data.frame':	3 obs. of  4 variables:
 $ weight: int  57 90 72
 $ size  : num  1.65 1.9 1.91
 $ bmi   : num  20.9 24.9 19.7
 $ sex   : chr  "Woman" "Woman" "Woman"


print(which ( myDataf$sex != "Man"))

[1] 3 4 6


print(which (! myDataf$sex == "Man"))

[1] 3 4 6


myDataf2 <- myDataf
myDataf2["Claire", "sex"] <- NA
myDataf2


myDataf2[myDataf2$sex == "Woman",]


myDataf2[which(myDataf2$sex == "Woman"),]


print(grep("Wom", myDataf$sex))

[1] 3 4 6


print(grep("Woman", myDataf$sex))

[1] 3 4 6


myDataf [grep("Woman", myDataf$sex), ]


print(grep("a", row.names(myDataf)))

[1] 1 3 4


myDataf [grep("a", row.names(myDataf)),]


WomenDataf <- subset(myDataf, gender== "Woman")
WomenDataf


filteredData <- myDataf [ which ( myDataf$sex == "Woman" & myDataf$weight < 80 & myDataf$bmi > 20), ]
filteredData


subset( myDataf, sex == "Woman" & weight < 80 & bmi > 20)


myDataf$index <- 1:6
myDataf


OtherData <- data.frame(c(1:5, 7),rep(c("right-handed","left-handed"),3))
names(OtherData) <- c("ID","handedness")
OtherData


myMergedDataf <- merge(myDataf, OtherData, by.x="index", by.y="ID", all.x=T, all.y=T, sort=F)
myMergedDataf


plot(myDataf$weight~myDataf$size)


boxplot(myDataf$weight)


boxplot(myDataf$weight~myDataf$sex)


a <- rnorm(1000) # to sample 1000 values from a normal distribution of mean 0 and standard deviation 1
hist(a, breaks=20) # the argument breaks is used to specify the number of intervals


ls()


save(myDataf,temperatures, file="RSession1_tutorial.RData")


sessionInfo()

R version 4.0.2 (2020-06-22)
Platform: x86_64-conda_cos6-linux-gnu (64-bit)
Running under: CentOS Linux 7 (Core)

Matrix products: default
BLAS/LAPACK: /shared/ifbstor1/software/miniconda/envs/r-4.0.2/lib/libopenblasp-r0.3.10.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

loaded via a namespace (and not attached):
 [1] digest_0.6.27   crayon_1.3.4    IRdisplay_0.7.0 repr_1.1.0     
 [5] lifecycle_0.2.0 jsonlite_1.7.2  evaluate_0.14   pillar_1.4.7   
 [9] rlang_0.4.10    uuid_0.1-4      vctrs_0.3.6     ellipsis_0.3.1 
[13] IRkernel_1.1.1  Cairo_1.5-12.2  tools_4.0.2     compiler_4.0.2 
[17] base64enc_0.1-3 htmltools_0.5.1 pbdZMQ_0.3-3.1

`x`	a numeric vector. Or, for `round` and `signif`, a complex vector.
`digits`	integer indicating the number of decimal places (`round`) or significant digits (`signif`) to be used. Negative values are allowed (see ‘Details’).
`...`	arguments to be passed to methods.

`x`	a numeric or complex vector.
`base`	a positive or complex number: the base with respect to which logarithms are computed. Defaults to e=`exp(1)`.

	weight	size	bmi
Fabien	120	3.50	39.18367
Pierre	144	3.60	44.44444
Sandrine	114	3.30	41.87328
Claire	180	3.80	49.86150
Bruno	190	3.48	62.75598
Delphine	144	3.82	39.47260

	weight	size	bmi
Fabien	60	1.75	19.59184
Pierre	72	1.80	22.22222
Sandrine	57	1.65	20.93664
Claire	90	1.90	24.93075
Bruno	95	1.74	31.37799
Delphine	72	1.91	19.73630

	Fabien	Pierre	Sandrine	Claire	Bruno	Delphine
weight	60.00000	72.00000	57.00000	90.00000	95.00000	72.0000
size	1.75000	1.80000	1.65000	1.90000	1.74000	1.9100
bmi	19.59184	22.22222	20.93664	24.93075	31.37799	19.7363

	weight	size	bmi
	<dbl>	<dbl>	<dbl>
Fabien	60	1.75	19.59184
Pierre	72	1.80	22.22222
Sandrine	57	1.65	20.93664
Claire	90	1.90	24.93075
Bruno	95	1.74	31.37799
Delphine	72	1.91	19.73630

Month	Mean_Temp
<chr>	<dbl>
January	2.0
February	2.6
March	7.9
April	11.2
May	15.3
June	22.2
July	22.9
August	22.5
September	17.3
October	11.7
November	5.2
December	2.8

	weight	size	bmi	sex
	<int>	<dbl>	<dbl>	<chr>
Fabien	60	1.75	19.59184	Man
Pierre	72	1.80	22.22222	Man
Sandrine	57	1.65	20.93664	Woman
Claire	90	1.90	24.93075	Woman
Bruno	95	1.74	31.37799	Man
Delphine	72	1.91	19.73630	Woman

ID	handedness
<dbl>	<chr>
1	right-handed
2	left-handed
3	right-handed
4	left-handed
5	right-handed
7	left-handed

object	Can it be heterogeneous?
vector	no
matrix	no
dataframe	yes
list	yes

A data.frame: 2 × 3
V1	V2	V3
<lgl>	<lgl>	<lgl>
NA	NA	NA
NA	NA	NA

A data.frame: 2 × 6
V1	V2	V3	V1.1	V2.1	new
<lgl>	<lgl>	<lgl>	<int>	<int>	<int>
NA	NA	NA	1	10	1
NA	NA	NA	2	11	2

DU Bii - module 3: R and stats¶

Session 1: tutorial on dataframes¶

Before going further¶

I. Some reminders on R basics¶

I.0 What is R ?¶

I.1 - R as a calulator¶

Rounding of Numbers

Description

Usage

Arguments

Details

S4 methods

Warning

References

See Also

Examples

Logarithms and Exponentials

Description

Usage

Arguments

Details

Value

S4 methods

Source

References

See Also

Examples

I-2 - Assigning data into R objects, using and reading them¶

I.3 - Managing your session¶

I-4 - Managing objects in your R Session and working directory¶

I.5 - Saving your data, session, and history¶

a - Saving specific data (or functions)¶

b - Saving all variables (and functions) at once¶

c- Save "history" = all past commands¶

I.6 - Classes and types of R objects¶

a - Classes of R objects¶

b. Main data structures in R¶

1. Vectors¶

2 - Matrices¶

II - Dataframes¶

II.1. - Creating a dataframe:¶

II.2. - Reading a text file into R and vice versa¶

a. reading a text file into R¶

b. writing a dataframe on your computer¶

II.3. - Subsetting a dataframe¶

a. The function which() returns the index of what is TRUE in a tested condition:¶

b. One can also search for a pattern with grep():¶

c. The function subset() is even simpler than which():¶

d. You can even combine conditions:¶

II.4. -Merging dataframes: using a column as a "key"¶

II.5 - Some basic plotting¶

a. scatter plot with the function plot()¶

b. Representation of quantitative data distribution:¶

**b - Saving all variables (and functions) at once**¶

a. The function `which()` returns the index of what is TRUE in a tested condition:¶

b. One can also search for a pattern with `grep()`:¶

c. The function `subset()` is even simpler than `which()`:¶

a. scatter plot with the function `plot()`¶