diff --git a/student_reports.Rmd b/student_reports.Rmd new file mode 100644 index 0000000000000000000000000000000000000000..67f5e9ec8f8bd655518e6989697790b31c2f94a0 --- /dev/null +++ b/student_reports.Rmd @@ -0,0 +1,103 @@ +--- +title: "Student reports" +author: "Laurent" +date: "16/03/2020" +output: pdf_document +--- + +## Data input + +Add a first section called Data input, in which you will load the rWSBIM1207 package, use the interroA.csv() function to get the name of a csv file containing test results for a set of students, and read these data into R using read_csv. Display the few first observations and write a short sentence explaining the data. + +```{r, message=FALSE, warning=FALSE} +## install.packages("BiocManager") +## install.packages("remotes") +## BiocManager::install("UCLouvain-CBIO/rWSBIM1207") +library(rWSBIM1207) +interroA.csv() +library("tidyverse") +x <- read_csv(interroA.csv()) +``` + +Attention, ne pas faire ceci!!! +```{r} +## read_csv("/usr/local/lib/R/site-library/rWSBIM1207/extdata/interroA.csv") +``` + + +ou + + + +```{r, message=FALSE} +x <- interroA.csv() %>% + read_csv() +``` + + +Affichage + +```{r} +x +``` + + +## Visualisation + + +Here, the goal is to visualise the score distributions for the four tests using ggplot2. These distributions will be visualised using boxplots. You will need to visualise these distribution for each test separately, and for male and female students. + +As discussed during the course, we need data in a long format to be able to use ggplot2. Start by converting these data into a long format using pivot_longer() (or gather()). Display the first rows of these new data and write a short sentence describing them and the transformation you just applied. + + +```{r} +xl <- x %>% + pivot_longer(names_to = "interro", + values_to = "res", + starts_with("interro")) +``` + +```{r} +x %>% + gather(key = "interro", + value = "res", + starts_with("interro")) +``` + +```{r} +x %>% + pivot_longer(names_to = "interro", + values_to = "res", + 5:8) +``` + +```{r} +x %>% + pivot_longer(names_to = "interro", + values_to = "res", + c(interro1, interro2, + interro3, interro4)) +``` + +```{r} +x %>% + pivot_longer(names_to = "interro", + values_to = "res", + -(1:4)) +``` + + +Use ggplot2 to visualise the score distributions along boxplots for each test and for female and male students. + + +```{r} +ggplot(xl, aes(x = interro, y = res)) + + geom_boxplot() + + facet_wrap(~ gender) +``` + +```{r} +ggplot(xl, aes(x = gender, y = res)) + + geom_boxplot() + + facet_wrap(~ interro) +``` diff --git a/student_reports.html b/student_reports.html new file mode 100644 index 0000000000000000000000000000000000000000..7be6ab14778873bc5bd41183ce97e52d6ee38eae --- /dev/null +++ b/student_reports.html @@ -0,0 +1,483 @@ + + + + +
+ + + + + + + + + +Add a first section called Data input, in which you will load the rWSBIM1207 package, use the interroA.csv() function to get the name of a csv file containing test results for a set of students, and read these data into R using read_csv. Display the few first observations and write a short sentence explaining the data.
+## install.packages("BiocManager")
+## install.packages("remotes")
+## BiocManager::install("UCLouvain-CBIO/rWSBIM1207")
+library(rWSBIM1207)
+##
+## This is 'rWSBIM1207' version 0.1.9
+interroA.csv()
+## [1] "/usr/local/lib/R/site-library/rWSBIM1207/extdata/interroA.csv"
+library("tidyverse")
+## ── Attaching packages ─────────── tidyverse 1.3.0 ──
+## ✓ ggplot2 3.3.0 ✓ purrr 0.3.3
+## ✓ tibble 2.1.3 ✓ dplyr 0.8.5
+## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
+## ✓ readr 1.3.1 ✓ forcats 0.5.0
+## ── Conflicts ────────────── tidyverse_conflicts() ──
+## x dplyr::filter() masks stats::filter()
+## x dplyr::lag() masks stats::lag()
+x <- read_csv(interroA.csv())
+## Parsed with column specification:
+## cols(
+## id = col_character(),
+## height = col_double(),
+## gender = col_character(),
+## X = col_double(),
+## interro1 = col_double(),
+## interro2 = col_double(),
+## interro3 = col_double(),
+## interro4 = col_double()
+## )
+ou
+ +x <- interroA.csv() %>%
+ read_csv()
+## Parsed with column specification:
+## cols(
+## id = col_character(),
+## height = col_double(),
+## gender = col_character(),
+## X = col_double(),
+## interro1 = col_double(),
+## interro2 = col_double(),
+## interro3 = col_double(),
+## interro4 = col_double()
+## )
+Affichage
+x
+## # A tibble: 100 x 8
+## id height gender X interro1 interro2 interro3 interro4
+## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
+## 1 A74890 168 M 1.43 16 18 7 10
+## 2 A85494 167 M 1.05 15 18 13 NA
+## 3 A51820 166 M 0.435 4 10 NA 7
+## 4 A98669 164 M 0.715 15 15 18 13
+## 5 A75521 171 M 0.917 18 10 17 NA
+## 6 A96704 178 F -2.66 11 20 14 17
+## 7 A23214 155 M 1.11 12 2 8 14
+## 8 A31124 177 M -0.485 19 4 8 20
+## 9 A80471 187 F 0.231 19 16 16 8
+## 10 A21783 195 F -0.295 13 11 8 20
+## # … with 90 more rows
+