\(MOSAIC_{data}\)

Block 0: Introduction to R, Part 2

Oliver Nakoinz, Lizzie Scholtus, Néhémie Strupler

2022-07-11
License: CC BY-SA 4.0

Problem

a <- log(as.matrix(data.frame(id = 1:4,
                              b = c(5, 4, 2, 3),
                              c = c(7, 7, 6, 7))))

Answer

log(as.matrix(data.frame(id = 1:4,
                         b = c(5, 4, 2, 3),
                         c = c(7, 7, 6, 7))))
##             id         b        c
## [1,] 0.0000000 1.6094379 1.945910
## [2,] 0.6931472 1.3862944 1.945910
## [3,] 1.0986123 0.6931472 1.791759
## [4,] 1.3862944 1.0986123 1.945910

Problem

data.frame(id = 1:3
           b = c("a", "b", "c"),
           c = (9, 4, 5))

Answer

data.frame(id = 1:3
           b = c("a", "b", "c"),
           c = (9, 4, 5))
data.frame(id = 1:3,
           b = c("a", "b", "c"),
           c = c(9, 4, 5))
id b c
1 a 9
2 b 4
3 c 5

Problem

a <- log(as.matrix(data.frame(id = 1:4,
                              b = c(5, 4, 2, 3),
                              c = c(7, 7, 6, 7))))
d <- meen(a[2:4, 2:4]))

Answer

a <- log(as.matrix(data.frame(id = 1:4,
                              b = c(5, 4, 2, 3),
                              c = c(7, 7, 6, 7))))
d <- meen(a[2:4, 2:4]))
a <- log(as.matrix(data.frame(id = 1:4,
                              b = c(5, 4, 2, 3),
                              c = c(7, 7, 6, 7))))
d <- mean(a[2:4, 1:3])
d
## [1] 1.337743

Create data objects

a <- c(1, 2, 3)
b <- c(6, 3, 6)
df <- data.frame(a,
                 col_b = b)
df
a col_b
1 6
2 3
3 6
a <- c(1, 2, 3)
b <- c(6, 3, 6)
df2 <- cbind(a,
             col_b = b)
df2
##      a col_b
## [1,] 1     6
## [2,] 2     3
## [3,] 3     6

Create data objects

str(df)
## 'data.frame':    3 obs. of  2 variables:
##  $ a    : num  1 2 3
##  $ col_b: num  6 3 6
str(df2)
##  num [1:3, 1:2] 1 2 3 6 3 6
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr [1:2] "a" "col_b"
str(as.data.frame(df2))
## 'data.frame':    3 obs. of  2 variables:
##  $ a    : num  1 2 3
##  $ col_b: num  6 3 6

Create data objects

explain the Code!

a <- c(1, 6)
b <- c(2, 3)
c <- c(3, 6)
df3 <- as.data.frame(rbind(a, b, c))
df3
V1 V2
a 1 6
b 2 3
c 3 6

… and update of column names:

colnames(df3) <- c("a", "b")
df3
a b
a 1 6
b 2 3
c 3 6

Create data externally

id a b c
1 5 8,3 “abc”
2 4 7,5 “def”
3 5 5,2 “ghi”
4 2 4,8 “jkl”
5 6 8,9 “mno”

Save data as xxx.csv gespeichert werden. The parameters can be ajusted:

.csv can be edited in any texteditor (including RStudio).

Note

Why don’t we recommend direct database access in most cases?

Answer

Working directory

getwd()
## [1] "/home/fon/Dropbox/daten/lehre/lv/LV100_uebEinfR/6praes"
wd <- "C:\\Folder\\Subfolder\\SubSubFolder"    # Windows
wd <- "/home/xxx/subFolder/SubSubFolder"       # Linux
setwd(wd)

Read files

tdaten_name <- "../2data/22archdata/testdaten1.csv"
test_df <- read.csv(tdaten_name)
head(test_df)
id a b c
1 5 8,3 abc
2 4 7,5 def
3 5 5,2 ghi
4 2 4,8 jkl
5 6 8,9 mno

Is the result all right?

Read files

test_df$c <- as.character(test_df$c)
head(test_df)
id a b c
1 5 8,3 abc
2 4 7,5 def
3 5 5,2 ghi
4 2 4,8 jkl
5 6 8,9 mno

Since R Version 4.0 text is loaded as text and not as factor.

Read files

as.double(test_df$b)
## [1] 4 3 2 1 5

Is this correct? Why not?

Read files

as.numeric(sub(",",
               ".",
               test_df$b,
               fixed = TRUE))
## [1] 8.3 7.5 5.2 4.8 8.9
tdaten_name <- "../2data/22archdata/testdaten1.csv"
test_df <- read.csv(tdaten_name,
                    dec = ",",
                    stringsAsFactors = FALSE)   # since R 4.0 default
head(test_df)
id a b c
1 5 8.3 abc
2 4 7.5 def
3 5 5.2 ghi
4 2 4.8 jkl
5 6 8.9 mno

Hinweis

Write files

tdaten_name <- "../2data/22archdata/testdaten2.csv"
write.csv(test_df,
          tdaten_name,
          row.names = FALSE)

functions with underscores such as write_csv() or read_csv() are part of the tidyverse

write.csv2() uses ; as separator by default

Databases

library("RSQLite")
con = RSQLite::dbConnect(SQLite(),
                dbname = "../2data/22archdata/neckRings.sqlite")
RSQLite::dbListTables(con)
rings = RSQLite::dbGetQuery(con,
                   'select * from rings')
RSQLite::dbDisconnect(con)
rings[1:7, 1:3]
## [1] "rings"
id no type
1 02.08.00.01.01 M2.1.1.1.1.2.4.1
2 02.09.00.01.01 M2.1.1.1.1.2.1.1
3 02.22.00.01.01 M2.1.1.1.1.2.4.1
4 02.23.00.01.01 M2.1.1.1.1.1.1
5 03.01.00.02.02 M2.1.1.1.3.1
6 03.01.00.06.02 M2.1.1.1.3.3.1
7 03.01.00.07.01 M2.1.1.1.3.1

RSQLite: Müller et al. (2020)

SQL on dataframes

library(sqldf)
sqldf::sqldf('select type from rings where id is 3')
sqldf::sqldf("select * from rings where type LIKE 'M2.1.1.1.3.1%'")
type
M2.1.1.1.1.2.4.1
id no type Material Form d Seitenansicht Querschnittmaß glatt tordiert gewendelt wendstZahl Strichgruppen Gruppenzahl Strichzahl Zwischenmuster Zwischenwülste Wulst W.Gruppenzahl Wulstzahl Zwischenmuster2 Zwischenrippenz. Rippen Rippengruppenzahl Rippenzahl Längsrillen Rillenzahl Punzen Knöpfe Ösen Ösenzahl
5 03.01.00.02.02 M2.1.1.1.3.1 Bronze 2 19,5 1 1,0 1 0
7 03.01.00.07.01 M2.1.1.1.3.1 Bronze 2 14,0 1 0,4 1 0
44 05.22.01.03.01 M2.1.1.1.3.1 Bronze 2 18,4 1 1,0 1 0
54 05.31.01.01.01 M2.1.1.1.3.1 Bronze 2 19,5 1 0,4 1 0
83 09.03.13.01.01 M2.1.1.1.3.1.1 Bronze 2 18,0 3 0,9 1 0
93 11.07.01.01.01 M2.1.1.1.3.1.1 Bronze 2 19,7 1 0,8 1 0
101 14.06.00.01.01 M2.1.1.1.3.1 Bronze 2 10,0 3 0,7 1 0
112 14.18.02.00.02 M2.1.1.1.3.1 Bronze 2 1 0,3 1 0
113 15.01.01.01.01 M2.1.1.1.3.1.2 Bronze 2 18,0 3 1,2 1 0
116 15.10.00.01.01 M2.1.1.1.3.1.2 Bronze 2 18,6 1 1,0 1 0
118 16.03.01.01.01 M2.1.1.1.3.1.3 Bronze 2 18,3 1 0,8 1 0
128 16.07.02.03.01 M2.1.1.1.3.1.3 Bronze 2 15,2 1 0,4 1 0
130 16.08.01.01.01 M2.1.1.1.3.1 Bronze 2 1 0,6 1 0
132 16.10.00.01.01 M2.1.1.1.3.1.3 Bronze 2 18,0 1 0,4 1 0
152 20.03.05.01.01 M2.1.1.1.3.1 Bronze 2 18,6 3 0,8 1 0
159 20.16.00.01.01 M2.1.1.1.3.1.3 Bronze 2 16,0 1 0,6 1 0
160 20.18.00.01.01 M2.1.1.1.3.1.2 Bronze 2 16,6 3 0,8 1 0
161 20.19.02.01.01 M2.1.1.1.3.1.3 Bronze 2 17,0 1 0,8 1 0
177 20.37.00.01.01 M2.1.1.1.3.1.3 Bronze 2 18,6 1 0,8 1 0
183 20.38.00.00.31 M2.1.1.1.3.1.3 Bronze 2 14,0 1 0,6 1 0
184 20.38.00.00.32 M2.1.1.1.3.1.3 Bronze 2 17,7 3 0,9 1 0
186 20.47.00.01.01 M2.1.1.1.3.1.3 Bronze 2 20,1 1 0,7 1 0
188 20.47.00.05.01 M2.1.1.1.3.1 Bronze 2 16,5 1 0,9 1 0
189 20.47.00.06.01 M2.1.1.1.3.1.2 Bronze 2 18,3 1 1,0 1 0
208 21.05.00.01.01 M2.1.1.1.3.1.3 Bronze 2 15,8 1 0,8 1 0
211 21.06.00.03.01 M2.1.1.1.3.1.1 Bronze 2 18,0 3 0,8 1 0
213 21.06.00.06.01 M2.1.1.1.3.1 Bronze 2 20,0 1 0
214 21.06.00.07.01 M2.1.1.1.3.1 Bronze 2 18,0 1 0
215 21.06.00.08.01 M2.1.1.1.3.1.2 Bronze 2 18,0 1 0
216 21.06.00.09.01 M2.1.1.1.3.1.3 Bronze 2 15,7 1 0,8 1 0
221 21.07.00.09.01 M2.1.1.1.3.1.2 Bronze 2 18,0 1 0,8 1 0
228 21.10.00.01.01 M2.1.1.1.3.1.3 Bronze 2 15,5 1 0,7 1 0
229 21.10.00.03.01 M2.1.1.1.3.1 Bronze 1 15,0 1 0
233 21.11.00.05.01 M2.1.1.1.3.1 Bronze 2 18,0 1 0
251 23.09.00.00.01 M2.1.1.1.3.1 Bronze 2 17,5 1 1,1 1 0
254 23.12.01.01.01 M2.1.1.1.3.1.3 Bronze 2 14,3 1 1,0 1 0
271 24.13.10.01.01 M2.1.1.1.3.1.3 Bronze 2 20,0 1 1,1 1 0
327 36.04.04.01.01 M2.1.1.1.3.1.3 Bronze 2 16,5 1 0,6 1 0
398 47.01.04.02.01 M2.1.1.1.3.1.1 Bronze 2 17,8 1 1,2 1 0
421 47.19.00.00.08 M2.1.1.1.3.1.2 Bronze 2 16,0 1 0,8 1 0
422 47.19.00.00.09 M2.1.1.1.3.1.1 Bronze 2 19,0 1 1,1 1 0
426 47.19.00.00.13 M2.1.1.1.3.1 Bronze 2 11,0 1 1 0
431 47.19.00.00.26 M2.1.1.1.3.1.1 Bronze 2 18,5 1 0,8 1 0
434 47.24.03.01.01 M2.1.1.1.3.1.1 Bronze 1 19,5 1 0,9 1 0
461 53.01.07.02.01 M2.1.1.1.3.1 Bronze 2 18,0 3 0,6 1 0
465 53.03.02.01.01 M2.1.1.1.3.1.3 Bronze 2 18,0 1 0,8 1 0
469 53.10.15.01.04 M2.1.1.1.3.1.3 Bronze 2 18,3 1 0,7 1 0
478 55.06.00.01.01 M2.1.1.1.3.1.1 Bronze 2 18,0 1 1,3 1 0
500 58.01.10.01.01 M2.1.1.1.3.1.3 Bronze 2 14,2 1 0,7 1 0
501 58.01.10.01.02 M2.1.1.1.3.1.3 Bronze 2 19,5 1 0,6 1 0
502 58.03.00.01.01 M2.1.1.1.3.1.1 Bronze 2 15,5 1 1,2 1 0
508 61.08.00.02.04 M2.1.1.1.3.1.2 Bronze 2 17,0 1 1,0 1 0
515 70.10.00.00.01 M2.1.1.1.3.1.2 Bronze 2 1 1 0
516 70.10.00.00.14 M2.1.1.1.3.1 Bronze 2 1 1 0
517 70.10.00.00.26 M2.1.1.1.3.1 Bronze 2 1 1 0
522 70.29.00.08.02 M2.1.1.1.3.1.1 Bronze 2 12,4 1 0,5 1 0

sqldf: Grothendieck (2017)

Links:

5 min. Break

Visualisation

plot(test_df[, 2:3])

Visualisation

plot(test_df[, 2:3],
     main  = "Testplot")
lines(test_df[, 2:3])
points(test_df[1:3, 2:3],
       pch = 17,
       col = "red",
       cex = 3)
text(test_df[, 2:3],
       labels = test_df[, 1],
       pos    = 4,
       cex    = 1)

Visualisation

boxplot(test_df[, 2:3],
        main = "Boxplot")

barplot(test_df[, 2],
        main = "Barplot")

Visualisation

hist(test_df[, 2],
     main = "Histogramm",
     col = "grey")

table(test_df[, 2])
## 
## 2 4 5 6 
## 1 1 2 1

Export plots

png(filename  = "../4figures/histogr.png",
    width     = 480,
    height    = 480,
    units     = "px",
    pointsize = 12,
    bg        = "white"
    )
        hist(test_df[, 2],
             main = "Histogramm",
             col  = "grey"
             )
dev.off()

Explore parameter for .pdf, .svg, .jpeg and .tiff.

Note

A tutorial for data wrangling in base R is here.

Archaeological data

The package archdata (Carlson/Roth 2021) provides some archaeological data.

library(archdata)
data("Fibulae")
head(Fibulae)
Grave Mno FL BH BFA FA CD BRA ED FEL C BW BT FEW Coils Length
149 389 93 24 7 10 16 1 13 31 47 3.5 3.5 NA 4 114
190 615 21 7 6 9 6 5 2 11 10 3.5 1.7 NA 12 35
161 125 33 15 2 8 7 3 8 10 20 3.9 3.2 NA 4 60
31 812 23 26 4 7 9 5 12 1 16 6.2 7.7 2.8 4 74
49 798 20 23 2 8 7 1 8 5 16 7.7 5.2 2.6 6 68
6 673 27 15 6 8 7 5 3 11 11 3.7 3.5 1.8 4 55

Exercise

id a b c
1 5 8,3 “abc”
2 4 7,5 “def”
3 5 5,2 “ghi”
4 2 4,8 “jkl”
5 6 8,9 “mno”

Break

References

Carlson/Roth 2021: D. L. Carlson/G. Roth, Archdata: Example datasets from archaeological research (2021). https://cran.r-project.org/package=archdata.
Dowle/Srinivasan 2019: M. Dowle/A. Srinivasan, Data.table: Extension of ‘data.frame‘ (2019). https://cran.r-project.org/package=data.table.
Grothendieck 2017: G. Grothendieck, Sqldf: Manipulate r data frames using SQL (2017). https://cran.r-project.org/package=sqldf.
Kunst 2022: J. Kunst, Highcharter: A wrapper for the ’highcharts’ library (2022). https://cran.r-project.org/package=highcharter.
Magnusson/Burgos 2014: A. Magnusson/J. Burgos, r2d2: Bivariate (two-dimensional) confidence region and frequency distribution (2014). https://cran.r-project.org/package=r2d2.
Müller et al. 2020: K. Müller/H. Wickham/D. A. James/S. Falcon, RSQLite: ’SQLite’ interface for r (2020). https://cran.r-project.org/package=rsqlite.
Sarkar 2008: D. Sarkar, Lattice: Multivariate data visualization with r (New York 2008). http://lmdvr.r-forge.r-project.org.
Wickham 2016: H. Wickham, ggplot2: Elegant graphics for data analysis (2016). https://ggplot2.tidyverse.org.
Wickham/Hester 2021: H. Wickham/J. Hester, Readr: Read rectangular text data (2021). https://cran.r-project.org/package=readr.