Presenting Summary Statistics

Simple table in R

gapminder %>% 
  filter(country == "India",
         year > 1980) %>% 
  select(year, pop, lifeExp ) %>% 
  knitr::kable() # print in table format
year pop lifeExp
1982 708000000 56.596
1987 788000000 58.553
1992 872000000 60.223
1997 959000000 61.765
2002 1034172547 62.879
2007 1110396331 64.698

Summary statistics

Generate summary statistics by continent for gapminder dataset

gapminder %>% 
  select(-country) %>% # drop country and year variable
  tbl_summary()
Characteristic N = 1,7041
continent
Africa 624 (37%)
Americas 300 (18%)
Asia 396 (23%)
Europe 360 (21%)
Oceania 24 (1.4%)
year 1,980 (1,966, 1,993)
lifeExp 61 (48, 71)
pop 7,023,596 (2,793,664, 19,585,222)
gdpPercap 3,532 (1,202, 9,325)
1 n (%); Median (IQR)

Customizing your table

Include/exclude variables

gapminder %>% 
  select(-country) %>% # drop country variable
  tbl_summary(include = -year) # include everything but year
Characteristic N = 1,7041
continent
Africa 624 (37%)
Americas 300 (18%)
Asia 396 (23%)
Europe 360 (21%)
Oceania 24 (1.4%)
lifeExp 61 (48, 71)
pop 7,023,596 (2,793,664, 19,585,222)
gdpPercap 3,532 (1,202, 9,325)
1 n (%); Median (IQR)
gapminder %>% 
  select(-country) %>% # drop country variable
  tbl_summary(include = c(continent, lifeExp, gdpPercap))
Characteristic N = 1,7041
continent
Africa 624 (37%)
Americas 300 (18%)
Asia 396 (23%)
Europe 360 (21%)
Oceania 24 (1.4%)
lifeExp 61 (48, 71)
gdpPercap 3,532 (1,202, 9,325)
1 n (%); Median (IQR)

Format labels

gapminder %>% 
  select(-country, - year) %>% # drop country and year variable
  tbl_summary() %>% 
  bold_labels()
Characteristic N = 1,7041
continent
Africa 624 (37%)
Americas 300 (18%)
Asia 396 (23%)
Europe 360 (21%)
Oceania 24 (1.4%)
lifeExp 61 (48, 71)
pop 7,023,596 (2,793,664, 19,585,222)
gdpPercap 3,532 (1,202, 9,325)
1 n (%); Median (IQR)

Saving your table

Save your table as .docx file

gapminder %>% 
  select(-country, - year) %>% # drop country and year variable
  tbl_summary() %>% 
  as_flex_table() %>% # save table as Word file 
  save_as_docx(path = here("myfile.docx")) 

Simple linear models in R

lm(lifeExp ~ pop, data = gapminder)
## 
## Call:
## lm(formula = lifeExp ~ pop, data = gapminder)
## 
## Coefficients:
## (Intercept)          pop  
##   5.924e+01    7.904e-09

Store model as object

my_model <- gapminder %>% 
  lm(lifeExp ~ year, data = .) 
summary(my_model)
## 
## Call:
## lm(formula = lifeExp ~ year, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -39.949  -9.651   1.697  10.335  22.158 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -585.65219   32.31396  -18.12   <2e-16 ***
## year           0.32590    0.01632   19.96   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.63 on 1702 degrees of freedom
## Multiple R-squared:  0.1898, Adjusted R-squared:  0.1893 
## F-statistic: 398.6 on 1 and 1702 DF,  p-value: < 2.2e-16

Testing model assumptions

Make a model for Argentina and plot diagnostics

arg_model <- gapminder %>% 
  filter(country == "Argentina") %>% 
  lm(lifeExp ~ year, data = .)
plot(arg_model) # generates four diagnostic plots

Try the same for Rwanda and compare

rwanda_model <- gapminder %>% 
  filter(country == "Rwanda") %>% 
  lm(lifeExp ~ year, data = .)
plot(rwanda_model) # generates four diagnostic plots

Multiple models [Home Exercise]

How does life expectancy change with time in each country?

Global vs local trend

gapminder %>%
  ggplot(aes(x = year, y = lifeExp, group = country)) +
  geom_line(alpha = 1/3)

Linear model per country

by_country_lm <- by_country %>% 
  mutate(model = map(data, ~lm(lifeExp ~ year1950, data = .)))# to each element of data column apply lm() function
by_country_lm
## # A tibble: 142 × 4
##    continent country                                data model 
##    <fct>     <fct>                    <list<tibble[,5]>> <list>
##  1 Africa    Algeria                            [12 × 5] <lm>  
##  2 Africa    Angola                             [12 × 5] <lm>  
##  3 Africa    Benin                              [12 × 5] <lm>  
##  4 Africa    Botswana                           [12 × 5] <lm>  
##  5 Africa    Burkina Faso                       [12 × 5] <lm>  
##  6 Africa    Burundi                            [12 × 5] <lm>  
##  7 Africa    Cameroon                           [12 × 5] <lm>  
##  8 Africa    Central African Republic           [12 × 5] <lm>  
##  9 Africa    Chad                               [12 × 5] <lm>  
## 10 Africa    Comoros                            [12 × 5] <lm>  
## # … with 132 more rows

Plot for one country

by_country_lm %>% 
  filter(country=="India") %>% 
  unnest(data) %>% 
  ggplot(aes(x = year1950, y = lifeExp)) +
  geom_line()

Linear model for India

by_country_lm%>% 
  filter(country == "India") %>% 
  pluck("model", 1) %>% 
  summary()
## 
## Call:
## lm(formula = lifeExp ~ year1950, data = .)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.36441 -1.57472  0.04961  1.35537  2.30522 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 38.25911    0.98600   38.80 3.09e-12 ***
## year1950     0.50532    0.02885   17.52 7.81e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.725 on 10 degrees of freedom
## Multiple R-squared:  0.9684, Adjusted R-squared:  0.9653 
## F-statistic: 306.8 on 1 and 10 DF,  p-value: 7.813e-09

Linear model for Rwanda

by_country_lm%>% 
  filter(country == "Rwanda") %>% 
  pluck("model", 1) %>% 
  summary()
## 
## Call:
## lm(formula = lifeExp ~ year1950, data = .)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -17.310  -1.445   2.410   3.073   6.021 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 42.83361    3.74890  11.426 4.63e-07 ***
## year1950    -0.04583    0.10969  -0.418    0.685    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.558 on 10 degrees of freedom
## Multiple R-squared:  0.01716,    Adjusted R-squared:  -0.08112 
## F-statistic: 0.1746 on 1 and 10 DF,  p-value: 0.6849

Verify model results by plotting for Rwanda

by_country_lm %>% 
  filter(country=="Rwanda") %>% 
  unnest(data) %>% 
  ggplot(aes(x = year1950, y = lifeExp)) +
  geom_line()

Cleaning model results with broom

gm_models <- by_country_lm %>% 
  mutate(glance= map(model, glance),
         tidy = map(model, tidy),
         augment = map(model, augment),
         rsq = map_dbl(glance, "r.squared"))
gm_models
## # A tibble: 142 × 8
##    continent country                data model glance   tidy     augment     rsq
##    <fct>     <fct>              <list<t> <lis> <list>   <list>   <list>    <dbl>
##  1 Africa    Algeria            [12 × 5] <lm>  <tibble> <tibble> <tibble> 0.985 
##  2 Africa    Angola             [12 × 5] <lm>  <tibble> <tibble> <tibble> 0.888 
##  3 Africa    Benin              [12 × 5] <lm>  <tibble> <tibble> <tibble> 0.967 
##  4 Africa    Botswana           [12 × 5] <lm>  <tibble> <tibble> <tibble> 0.0340
##  5 Africa    Burkina Faso       [12 × 5] <lm>  <tibble> <tibble> <tibble> 0.919 
##  6 Africa    Burundi            [12 × 5] <lm>  <tibble> <tibble> <tibble> 0.766 
##  7 Africa    Cameroon           [12 × 5] <lm>  <tibble> <tibble> <tibble> 0.680 
##  8 Africa    Central African R… [12 × 5] <lm>  <tibble> <tibble> <tibble> 0.493 
##  9 Africa    Chad               [12 × 5] <lm>  <tibble> <tibble> <tibble> 0.872 
## 10 Africa    Comoros            [12 × 5] <lm>  <tibble> <tibble> <tibble> 0.997 
## # … with 132 more rows

Visualizing all models

gm_models %>%
  unnest(tidy) %>%
  select(continent, country, rsq, term, estimate) %>%
  pivot_wider(names_from = term, values_from = estimate) %>%
  ggplot(aes(x = `(Intercept)`, y = year1950)) +
  geom_point(aes(colour = continent, size = rsq)) +
  geom_smooth(se = FALSE, method = "loess") +
  scale_size_area() + labs(x = "Life expectancy (1950)", y = "Yearly improvement")

LS0tCnRpdGxlOiAiU3RhdGlzdGljcyBhbmQgTW9kZWxsaW5nIgphdXRob3I6ICJNZWVuYWtzaGkgS3VzaHdhaGEiCmRhdGU6ICIyMDIyLTA5LTAxIgoKb3V0cHV0OiAKICBodG1sX2RvY3VtZW50OgogICAgdG9jOiB0cnVlCiAgICB0b2NfZmxvYXQ6IHRydWUKICAgIGNvZGVfZG93bmxvYWQ6IHRydWUKICAgIHRoZW1lOiBmbGF0bHkKICAgIGNvZGVfZm9hbGRpbmc6IHRydWUKLS0tCgpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0Ka25pdHI6Om9wdHNfY2h1bmskc2V0KGVjaG8gPSBUUlVFLCBjYWNoZSA9IFRSVUUsIG1lc3NhZ2UgPSBGQUxTRSwgd2FybmluZyA9IEZBTFNFKQpsaWJyYXJ5KGdhcG1pbmRlcikKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkoaGVyZSkKbGlicmFyeShndHN1bW1hcnkpCmxpYnJhcnkoZmxleHRhYmxlKQpsaWJyYXJ5KGJyb29tKQpgYGAKCiMgUHJlc2VudGluZyBTdW1tYXJ5IFN0YXRpc3RpY3MKIyMgU2ltcGxlIHRhYmxlIGluIFIKCmBgYHtyfQpnYXBtaW5kZXIgJT4lIAogIGZpbHRlcihjb3VudHJ5ID09ICJJbmRpYSIsCiAgICAgICAgIHllYXIgPiAxOTgwKSAlPiUgCiAgc2VsZWN0KHllYXIsIHBvcCwgbGlmZUV4cCApICU+JSAKICBrbml0cjo6a2FibGUoKSAjIHByaW50IGluIHRhYmxlIGZvcm1hdApgYGAKCiMjIFN1bW1hcnkgc3RhdGlzdGljcwpHZW5lcmF0ZSBzdW1tYXJ5IHN0YXRpc3RpY3MgYnkgY29udGluZW50IGZvciBgZ2FwbWluZGVyYCBkYXRhc2V0CgpgYGB7cn0KZ2FwbWluZGVyICU+JSAKICBzZWxlY3QoLWNvdW50cnkpICU+JSAjIGRyb3AgY291bnRyeSBhbmQgeWVhciB2YXJpYWJsZQogIHRibF9zdW1tYXJ5KCkKYGBgCgojIyBDdXN0b21pemluZyB5b3VyIHRhYmxlCgojIyMgSW5jbHVkZS9leGNsdWRlIHZhcmlhYmxlcwpgYGB7cn0KZ2FwbWluZGVyICU+JSAKICBzZWxlY3QoLWNvdW50cnkpICU+JSAjIGRyb3AgY291bnRyeSB2YXJpYWJsZQogIHRibF9zdW1tYXJ5KGluY2x1ZGUgPSAteWVhcikgIyBpbmNsdWRlIGV2ZXJ5dGhpbmcgYnV0IHllYXIKYGBgCgoKYGBge3J9CmdhcG1pbmRlciAlPiUgCiAgc2VsZWN0KC1jb3VudHJ5KSAlPiUgIyBkcm9wIGNvdW50cnkgdmFyaWFibGUKICB0Ymxfc3VtbWFyeShpbmNsdWRlID0gYyhjb250aW5lbnQsIGxpZmVFeHAsIGdkcFBlcmNhcCkpCmBgYAoKRm9ybWF0IGxhYmVscwpgYGB7cn0KZ2FwbWluZGVyICU+JSAKICBzZWxlY3QoLWNvdW50cnksIC0geWVhcikgJT4lICMgZHJvcCBjb3VudHJ5IGFuZCB5ZWFyIHZhcmlhYmxlCiAgdGJsX3N1bW1hcnkoKSAlPiUgCiAgYm9sZF9sYWJlbHMoKQpgYGAKCiMjIyBTYXZpbmcgeW91ciB0YWJsZQpTYXZlIHlvdXIgdGFibGUgYXMgLmRvY3ggZmlsZQpgYGB7cn0KZ2FwbWluZGVyICU+JSAKICBzZWxlY3QoLWNvdW50cnksIC0geWVhcikgJT4lICMgZHJvcCBjb3VudHJ5IGFuZCB5ZWFyIHZhcmlhYmxlCiAgdGJsX3N1bW1hcnkoKSAlPiUgCiAgYXNfZmxleF90YWJsZSgpICU+JSAjIHNhdmUgdGFibGUgYXMgV29yZCBmaWxlIAogIHNhdmVfYXNfZG9jeChwYXRoID0gaGVyZSgibXlmaWxlLmRvY3giKSkgCmBgYAoKIyMgU2ltcGxlIGxpbmVhciBtb2RlbHMgaW4gUgoKYGBge3J9CmxtKGxpZmVFeHAgfiBwb3AsIGRhdGEgPSBnYXBtaW5kZXIpCmBgYAoKU3RvcmUgbW9kZWwgYXMgb2JqZWN0IApgYGB7cn0KbXlfbW9kZWwgPC0gZ2FwbWluZGVyICU+JSAKICBsbShsaWZlRXhwIH4geWVhciwgZGF0YSA9IC4pIApzdW1tYXJ5KG15X21vZGVsKQpgYGAKCiMjIFRlc3RpbmcgbW9kZWwgYXNzdW1wdGlvbnMKCk1ha2UgYSBtb2RlbCBmb3IgQXJnZW50aW5hIGFuZCBwbG90IGRpYWdub3N0aWNzCmBgYHtyfQphcmdfbW9kZWwgPC0gZ2FwbWluZGVyICU+JSAKICBmaWx0ZXIoY291bnRyeSA9PSAiQXJnZW50aW5hIikgJT4lIAogIGxtKGxpZmVFeHAgfiB5ZWFyLCBkYXRhID0gLikKcGxvdChhcmdfbW9kZWwpICMgZ2VuZXJhdGVzIGZvdXIgZGlhZ25vc3RpYyBwbG90cwpgYGAKClRyeSB0aGUgc2FtZSBmb3IgUndhbmRhIGFuZCBjb21wYXJlCmBgYHtyfQpyd2FuZGFfbW9kZWwgPC0gZ2FwbWluZGVyICU+JSAKICBmaWx0ZXIoY291bnRyeSA9PSAiUndhbmRhIikgJT4lIAogIGxtKGxpZmVFeHAgfiB5ZWFyLCBkYXRhID0gLikKcGxvdChyd2FuZGFfbW9kZWwpICMgZ2VuZXJhdGVzIGZvdXIgZGlhZ25vc3RpYyBwbG90cwpgYGAKCiMjIE11bHRpcGxlIG1vZGVscyBbSG9tZSBFeGVyY2lzZV0gCkhvdyBkb2VzIGxpZmUgZXhwZWN0YW5jeSBjaGFuZ2Ugd2l0aCB0aW1lIGluIGVhY2ggY291bnRyeT8KCiMjIyAgR2xvYmFsIHZzIGxvY2FsIHRyZW5kCmBgYHtyfQpnYXBtaW5kZXIgJT4lCiAgZ2dwbG90KGFlcyh4ID0geWVhciwgeSA9IGxpZmVFeHAsIGdyb3VwID0gY291bnRyeSkpICsKICBnZW9tX2xpbmUoYWxwaGEgPSAxLzMpCmBgYAoKCiMjIyBLZWVwIHJlbGF0ZWQgdGhpbmdzIHRvZ2V0aGVyIGBncm91cF9uZXN0KClgCk5lc3QgYnkgY291bnRyeQpgYGB7cn0KYnlfY291bnRyeSA8LSBnYXBtaW5kZXIgJT4lIAogIG11dGF0ZSh5ZWFyMTk1MCA9IHllYXIgLSAxOTUwKSAlPiUgCiAgZ3JvdXBfbmVzdChjb250aW5lbnQsIGNvdW50cnkpCmhlYWQoYnlfY291bnRyeSkKYGBgCgojIyMgTGluZWFyIG1vZGVsIHBlciBjb3VudHJ5CmBgYHtyfQpieV9jb3VudHJ5X2xtIDwtIGJ5X2NvdW50cnkgJT4lIAogIG11dGF0ZShtb2RlbCA9IG1hcChkYXRhLCB+bG0obGlmZUV4cCB+IHllYXIxOTUwLCBkYXRhID0gLikpKSMgdG8gZWFjaCBlbGVtZW50IG9mIGRhdGEgY29sdW1uIGFwcGx5IGxtKCkgZnVuY3Rpb24KYnlfY291bnRyeV9sbQpgYGAKCiMjIyBQbG90IGZvciBvbmUgY291bnRyeQpgYGB7cn0KYnlfY291bnRyeV9sbSAlPiUgCiAgZmlsdGVyKGNvdW50cnk9PSJJbmRpYSIpICU+JSAKICB1bm5lc3QoZGF0YSkgJT4lIAogIGdncGxvdChhZXMoeCA9IHllYXIxOTUwLCB5ID0gbGlmZUV4cCkpICsKICBnZW9tX2xpbmUoKQpgYGAKCiMjIyBMaW5lYXIgbW9kZWwgZm9yIEluZGlhCmBgYHtyfQpieV9jb3VudHJ5X2xtJT4lIAogIGZpbHRlcihjb3VudHJ5ID09ICJJbmRpYSIpICU+JSAKICBwbHVjaygibW9kZWwiLCAxKSAlPiUgCiAgc3VtbWFyeSgpCmBgYAoKIyMjIExpbmVhciBtb2RlbCBmb3IgUndhbmRhCmBgYHtyfQpieV9jb3VudHJ5X2xtJT4lIAogIGZpbHRlcihjb3VudHJ5ID09ICJSd2FuZGEiKSAlPiUgCiAgcGx1Y2soIm1vZGVsIiwgMSkgJT4lIAogIHN1bW1hcnkoKQpgYGAKCiMjIyBWZXJpZnkgbW9kZWwgcmVzdWx0cyBieSBwbG90dGluZyBmb3IgUndhbmRhCmBgYHtyfQpieV9jb3VudHJ5X2xtICU+JSAKICBmaWx0ZXIoY291bnRyeT09IlJ3YW5kYSIpICU+JSAKICB1bm5lc3QoZGF0YSkgJT4lIAogIGdncGxvdChhZXMoeCA9IHllYXIxOTUwLCB5ID0gbGlmZUV4cCkpICsKICBnZW9tX2xpbmUoKQpgYGAKCiMjIyBDbGVhbmluZyBtb2RlbCByZXN1bHRzIHdpdGggYGJyb29tYAoKYGBge3J9CmdtX21vZGVscyA8LSBieV9jb3VudHJ5X2xtICU+JSAKICBtdXRhdGUoZ2xhbmNlPSBtYXAobW9kZWwsIGdsYW5jZSksCiAgICAgICAgIHRpZHkgPSBtYXAobW9kZWwsIHRpZHkpLAogICAgICAgICBhdWdtZW50ID0gbWFwKG1vZGVsLCBhdWdtZW50KSwKICAgICAgICAgcnNxID0gbWFwX2RibChnbGFuY2UsICJyLnNxdWFyZWQiKSkKZ21fbW9kZWxzCmBgYAoKIyMjIFZpc3VhbGl6aW5nIGFsbCBtb2RlbHMKYGBge3J9CmdtX21vZGVscyAlPiUKICB1bm5lc3QodGlkeSkgJT4lCiAgc2VsZWN0KGNvbnRpbmVudCwgY291bnRyeSwgcnNxLCB0ZXJtLCBlc3RpbWF0ZSkgJT4lCiAgcGl2b3Rfd2lkZXIobmFtZXNfZnJvbSA9IHRlcm0sIHZhbHVlc19mcm9tID0gZXN0aW1hdGUpICU+JQogIGdncGxvdChhZXMoeCA9IGAoSW50ZXJjZXB0KWAsIHkgPSB5ZWFyMTk1MCkpICsKICBnZW9tX3BvaW50KGFlcyhjb2xvdXIgPSBjb250aW5lbnQsIHNpemUgPSByc3EpKSArCiAgZ2VvbV9zbW9vdGgoc2UgPSBGQUxTRSwgbWV0aG9kID0gImxvZXNzIikgKwogIHNjYWxlX3NpemVfYXJlYSgpICsgbGFicyh4ID0gIkxpZmUgZXhwZWN0YW5jeSAoMTk1MCkiLCB5ID0gIlllYXJseSBpbXByb3ZlbWVudCIp