Writing functions

In the gapminder dataset calculate the range of population, gdp, and life expectancy

round(max(gapminder$lifeExp) - min(gapminder$lifeExp),1)
## [1] 59
round(max(gapminder$pop) - min(gapminder$pop),1)
## [1] 1318623085
round(max(gapminder$pop) - min(gapminder$pop),1)
## [1] 1318623085

Make a function for calculating range

max_minus_min <- function(x){
  round((max(x) - min(x)), 1)
  }

Test-run your function

max_minus_min(gapminder$lifeExp)
## [1] 59

Iterations using purrr package

Apply class() function to each column of gampminder data

gapminder %>% 
  map(class)
## $country
## [1] "factor"
## 
## $continent
## [1] "factor"
## 
## $year
## [1] "integer"
## 
## $lifeExp
## [1] "numeric"
## 
## $pop
## [1] "integer"
## 
## $gdpPercap
## [1] "numeric"

The default output of map() is a list. If you want a character vector as ouput use map_chr()

gapminder %>% 
  map_chr(class)
##   country continent      year   lifeExp       pop gdpPercap 
##  "factor"  "factor" "integer" "numeric" "integer" "numeric"

Another example, What is the number of distinct values in each column? Hint: use n_distinct()

gapminder %>% 
  map_int(n_distinct) # using map_int for integer output
##   country continent      year   lifeExp       pop gdpPercap 
##       142         5        12      1626      1704      1704

What is the median of all numeric columns?

gapminder %>% 
  dplyr::select_if(is.numeric) %>% 
  map_dbl(median)
##         year      lifeExp          pop    gdpPercap 
##    1979.5000      60.7125 7023595.5000    3531.8470

Using ~ with map()

~ helps reduce the amount of typing when you want to pass complex functions through map()

Example

my_vector <- c(1, 2, 3)
map_dbl(my_vector, function(x){x+10})
## [1] 11 12 13

Shortcut of the same code using ~

my_vector <- c(1, 2, 3)
map_dbl(my_vector, ~(.+10))
## [1] 11 12 13

More complex example:

Fitting a linear model with different groups of the data

gapminder %>% 
  split(.$continent) %>%  # split dataset by continent
  map(function(df) lm(lifeExp ~ pop, data = df)) # linear model for each group
## $Africa
## 
## Call:
## lm(formula = lifeExp ~ pop, data = df)
## 
## Coefficients:
## (Intercept)          pop  
##   4.816e+01    7.150e-08  
## 
## 
## $Americas
## 
## Call:
## lm(formula = lifeExp ~ pop, data = df)
## 
## Coefficients:
## (Intercept)          pop  
##   6.353e+01    4.587e-08  
## 
## 
## $Asia
## 
## Call:
## lm(formula = lifeExp ~ pop, data = df)
## 
## Coefficients:
## (Intercept)          pop  
##   5.992e+01    1.901e-09  
## 
## 
## $Europe
## 
## Call:
## lm(formula = lifeExp ~ pop, data = df)
## 
## Coefficients:
## (Intercept)          pop  
##   7.162e+01    1.650e-08  
## 
## 
## $Oceania
## 
## Call:
## lm(formula = lifeExp ~ pop, data = df)
## 
## Coefficients:
## (Intercept)          pop  
##   7.207e+01    2.545e-07

Shortcut of the same code using ~

gapminder %>% 
  split(.$continent) %>% # split dataset by continent
  map(~lm(lifeExp ~ pop, data = .)) # linear model for each group
## $Africa
## 
## Call:
## lm(formula = lifeExp ~ pop, data = .)
## 
## Coefficients:
## (Intercept)          pop  
##   4.816e+01    7.150e-08  
## 
## 
## $Americas
## 
## Call:
## lm(formula = lifeExp ~ pop, data = .)
## 
## Coefficients:
## (Intercept)          pop  
##   6.353e+01    4.587e-08  
## 
## 
## $Asia
## 
## Call:
## lm(formula = lifeExp ~ pop, data = .)
## 
## Coefficients:
## (Intercept)          pop  
##   5.992e+01    1.901e-09  
## 
## 
## $Europe
## 
## Call:
## lm(formula = lifeExp ~ pop, data = .)
## 
## Coefficients:
## (Intercept)          pop  
##   7.162e+01    1.650e-08  
## 
## 
## $Oceania
## 
## Call:
## lm(formula = lifeExp ~ pop, data = .)
## 
## Coefficients:
## (Intercept)          pop  
##   7.207e+01    2.545e-07

Reading multiple files using purrr()

Download the data here - Google drive link

This is data from diffrent countries. “_gm” suffix is for gapminder from where data is borrowed. In the example below, the files in a folder called “data”

Step 1 and 2: Make a list of all .csv files with _gm suffix

my_files <- dir(here("data"), # specify file path
                pattern = "*_gm.csv",  # look for .csv files with _gm suffix
                full.names = TRUE)  # preserve file path
my_files
## [1] "/Users/meenakshikushwaha/Dropbox/R projects/github/CSTEP_R_course/data/china_gm.csv"
## [2] "/Users/meenakshikushwaha/Dropbox/R projects/github/CSTEP_R_course/data/india_gm.csv"
## [3] "/Users/meenakshikushwaha/Dropbox/R projects/github/CSTEP_R_course/data/japan_gm.csv"
## [4] "/Users/meenakshikushwaha/Dropbox/R projects/github/CSTEP_R_course/data/nepal_gm.csv"

Step 3: Read and combine all files using map_dfr()

my_df <- my_files %>% 
  map_dfr(read_csv) 

my_df
## # A tibble: 16 × 6
##    country continent  year lifeExp       pop gdpPercap
##    <chr>   <chr>     <dbl>   <dbl>     <dbl>     <dbl>
##  1 China   Asia       1952    44   556263527      400.
##  2 China   Asia       1957    50.5 637408000      576.
##  3 China   Asia       1962    44.5 665770000      488.
##  4 China   Asia       1967    58.4 754550000      613.
##  5 India   Asia       1952    37.4 372000000      547.
##  6 India   Asia       1957    40.2 409000000      590.
##  7 India   Asia       1962    43.6 454000000      658.
##  8 India   Asia       1967    47.2 506000000      701.
##  9 Japan   Asia       1952    63.0  86459025     3217.
## 10 Japan   Asia       1957    65.5  91563009     4318.
## 11 Japan   Asia       1962    68.7  95831757     6577.
## 12 Japan   Asia       1967    71.4 100825279     9848.
## 13 Nepal   Asia       1952    36.2   9182536      546.
## 14 Nepal   Asia       1957    37.7   9682338      598.
## 15 Nepal   Asia       1962    39.4  10332057      652.
## 16 Nepal   Asia       1967    41.5  11261690      676.
LS0tCnRpdGxlOiAiRnVuY3Rpb25zIGFuZCBJdGVyYXRpb25zIgphdXRob3I6ICJNZWVuYWtzaGkgS3VzaHdhaGEiCmRhdGU6ICIyMDIyLTA4LTE4IgoKb3V0cHV0OiAKICBodG1sX2RvY3VtZW50OgogICAgdG9jOiB0cnVlCiAgICB0b2NfZmxvYXQ6IHRydWUKICAgIGNvZGVfZG93bmxvYWQ6IHRydWUKICAgIHRoZW1lOiBmbGF0bHkKICAgIGNvZGVfZm9hbGRpbmc6IHRydWUKLS0tCgpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFLCBtZXNzYWdlID0gRkFMU0UsIGNhY2hlID0gVFJVRSwgd2FybmluZyA9IEZBTFNFKQpsaWJyYXJ5KGdhcG1pbmRlcikKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkoaGVyZSkKYGBgCgojIyBXcml0aW5nIGZ1bmN0aW9ucwpJbiB0aGUgYGdhcG1pbmRlcmAgZGF0YXNldCBjYWxjdWxhdGUgdGhlIHJhbmdlIG9mIHBvcHVsYXRpb24sIGdkcCwgYW5kIGxpZmUgZXhwZWN0YW5jeQoKYGBge3J9CnJvdW5kKG1heChnYXBtaW5kZXIkbGlmZUV4cCkgLSBtaW4oZ2FwbWluZGVyJGxpZmVFeHApLDEpCgpyb3VuZChtYXgoZ2FwbWluZGVyJHBvcCkgLSBtaW4oZ2FwbWluZGVyJHBvcCksMSkKCnJvdW5kKG1heChnYXBtaW5kZXIkcG9wKSAtIG1pbihnYXBtaW5kZXIkcG9wKSwxKQpgYGAKCk1ha2UgYSBmdW5jdGlvbiBmb3IgY2FsY3VsYXRpbmcgcmFuZ2UKYGBge3J9Cm1heF9taW51c19taW4gPC0gZnVuY3Rpb24oeCl7CiAgcm91bmQoKG1heCh4KSAtIG1pbih4KSksIDEpCiAgfQpgYGAKClRlc3QtcnVuIHlvdXIgZnVuY3Rpb24KYGBge3J9Cm1heF9taW51c19taW4oZ2FwbWluZGVyJGxpZmVFeHApCmBgYAoKIyMgSXRlcmF0aW9ucyB1c2luZyBgcHVycnJgIHBhY2thZ2UKCkFwcGx5IGBjbGFzcygpYCBmdW5jdGlvbiB0byBlYWNoIGNvbHVtbiBvZiBnYW1wbWluZGVyIGRhdGEKCmBgYHtyfQpnYXBtaW5kZXIgJT4lIAogIG1hcChjbGFzcykKCmBgYAoKVGhlIGRlZmF1bHQgb3V0cHV0IG9mIGBtYXAoKWAgaXMgYSBsaXN0LiBJZiB5b3Ugd2FudCBhIGNoYXJhY3RlciB2ZWN0b3IgYXMgb3VwdXQgdXNlIGBtYXBfY2hyKClgCgpgYGB7cn0KZ2FwbWluZGVyICU+JSAKICBtYXBfY2hyKGNsYXNzKQpgYGAKCkFub3RoZXIgZXhhbXBsZSwgV2hhdCBpcyB0aGUgbnVtYmVyIG9mIGRpc3RpbmN0IHZhbHVlcyBpbiBlYWNoIGNvbHVtbj8gCkhpbnQ6IHVzZSBgbl9kaXN0aW5jdCgpYAoKYGBge3J9CmdhcG1pbmRlciAlPiUgCiAgbWFwX2ludChuX2Rpc3RpbmN0KSAjIHVzaW5nIG1hcF9pbnQgZm9yIGludGVnZXIgb3V0cHV0CmBgYAoKV2hhdCBpcyB0aGUgbWVkaWFuIG9mIGFsbCBudW1lcmljIGNvbHVtbnM/CmBgYHtyfQpnYXBtaW5kZXIgJT4lIAogIGRwbHlyOjpzZWxlY3RfaWYoaXMubnVtZXJpYykgJT4lIAogIG1hcF9kYmwobWVkaWFuKQpgYGAKCiMjIFVzaW5nIGB+YCB3aXRoIGBtYXAoKWAKCn4gaGVscHMgcmVkdWNlIHRoZSBhbW91bnQgb2YgdHlwaW5nIHdoZW4geW91IHdhbnQgdG8gcGFzcyBjb21wbGV4IGZ1bmN0aW9ucyB0aHJvdWdoIGBtYXAoKWAKCkV4YW1wbGUKYGBge3J9Cm15X3ZlY3RvciA8LSBjKDEsIDIsIDMpCm1hcF9kYmwobXlfdmVjdG9yLCBmdW5jdGlvbih4KXt4KzEwfSkKYGBgCgpTaG9ydGN1dCBvZiB0aGUgc2FtZSBjb2RlIHVzaW5nIGB+YApgYGB7cn0KbXlfdmVjdG9yIDwtIGMoMSwgMiwgMykKbWFwX2RibChteV92ZWN0b3IsIH4oLisxMCkpCmBgYAoKTW9yZSBjb21wbGV4IGV4YW1wbGU6CgpGaXR0aW5nIGEgbGluZWFyIG1vZGVsIHdpdGggZGlmZmVyZW50IGdyb3VwcyBvZiB0aGUgZGF0YQoKYGBge3J9CmdhcG1pbmRlciAlPiUgCiAgc3BsaXQoLiRjb250aW5lbnQpICU+JSAgIyBzcGxpdCBkYXRhc2V0IGJ5IGNvbnRpbmVudAogIG1hcChmdW5jdGlvbihkZikgbG0obGlmZUV4cCB+IHBvcCwgZGF0YSA9IGRmKSkgIyBsaW5lYXIgbW9kZWwgZm9yIGVhY2ggZ3JvdXAKYGBgCgpTaG9ydGN1dCBvZiB0aGUgc2FtZSBjb2RlIHVzaW5nIGB+YAoKYGBge3J9CmdhcG1pbmRlciAlPiUgCiAgc3BsaXQoLiRjb250aW5lbnQpICU+JSAjIHNwbGl0IGRhdGFzZXQgYnkgY29udGluZW50CiAgbWFwKH5sbShsaWZlRXhwIH4gcG9wLCBkYXRhID0gLikpICMgbGluZWFyIG1vZGVsIGZvciBlYWNoIGdyb3VwCmBgYAoKIyMgUmVhZGluZyBtdWx0aXBsZSBmaWxlcyB1c2luZyBgcHVycnIoKWAKRG93bmxvYWQgdGhlIGRhdGEgaGVyZSAtIFtHb29nbGUgZHJpdmUgbGlua10oaHR0cHM6Ly9kcml2ZS5nb29nbGUuY29tL2RyaXZlL2ZvbGRlcnMvMVg1NEhzSlVXbng5a0FLWW9UZGNlZVBvRVAxMTM5YktOP3VzcD1zaGFyaW5nKQoKVGhpcyBpcyBkYXRhIGZyb20gZGlmZnJlbnQgY291bnRyaWVzLiAiX2dtIiBzdWZmaXggaXMgZm9yIGdhcG1pbmRlciBmcm9tIHdoZXJlIGRhdGEgaXMgYm9ycm93ZWQuIEluIHRoZSBleGFtcGxlIGJlbG93LCB0aGUgZmlsZXMgaW4gYSBmb2xkZXIgY2FsbGVkICJkYXRhIgoKClN0ZXAgMSBhbmQgMjogTWFrZSBhIGxpc3Qgb2YgYWxsIGAuY3N2YCBmaWxlcyB3aXRoIF9nbSBzdWZmaXgKYGBge3J9Cm15X2ZpbGVzIDwtIGRpcihoZXJlKCJkYXRhIiksICMgc3BlY2lmeSBmaWxlIHBhdGgKICAgICAgICAgICAgICAgIHBhdHRlcm4gPSAiKl9nbS5jc3YiLCAgIyBsb29rIGZvciAuY3N2IGZpbGVzIHdpdGggX2dtIHN1ZmZpeAogICAgICAgICAgICAgICAgZnVsbC5uYW1lcyA9IFRSVUUpICAjIHByZXNlcnZlIGZpbGUgcGF0aApteV9maWxlcwpgYGAKClN0ZXAgMzogUmVhZCBhbmQgY29tYmluZSBhbGwgZmlsZXMgdXNpbmcgYG1hcF9kZnIoKWAKYGBge3J9CgpteV9kZiA8LSBteV9maWxlcyAlPiUgCiAgbWFwX2RmcihyZWFkX2NzdikgCgpteV9kZgpgYGAKCg==