Code for Quiz 5: more practice with the dplyr functions
drug_cos.csv
into R and assign it to drug_cos
.drug_cos <- read_csv("https://estanny.com/static/week5/drug_cos.csv")
3.Use glimpse()
to get a glimpse of your data.
glimpse(drug_cos)
Rows: 104
Columns: 9
$ ticker <chr> "ZTS", "ZTS", "ZTS", "ZTS", "ZTS", "ZTS", "Z...
$ name <chr> "Zoetis Inc", "Zoetis Inc", "Zoetis Inc", "Z...
$ location <chr> "New Jersey; U.S.A", "New Jersey; U.S.A", "N...
$ ebitdamargin <dbl> 0.149, 0.217, 0.222, 0.238, 0.182, 0.335, 0....
$ grossmargin <dbl> 0.610, 0.640, 0.634, 0.641, 0.635, 0.659, 0....
$ netmargin <dbl> 0.058, 0.101, 0.111, 0.122, 0.071, 0.168, 0....
$ ros <dbl> 0.101, 0.171, 0.176, 0.195, 0.140, 0.286, 0....
$ roe <dbl> 0.069, 0.113, 0.612, 0.465, 0.285, 0.587, 0....
$ year <dbl> 2011, 2012, 2013, 2014, 2015, 2016, 2017, 20...
distinct()
to distinct rows.drug_cos %>%
distinct(year)
# A tibble: 8 x 1
year
<dbl>
1 2011
2 2012
3 2013
4 2014
5 2015
6 2016
7 2017
8 2018
count()
to count observations by group.drug_cos %>%
count(year)
# A tibble: 8 x 2
year n
* <dbl> <int>
1 2011 13
2 2012 13
3 2013 13
4 2014 13
5 2015 13
6 2016 13
7 2017 13
8 2018 13
drug_cos %>%
count(name)
# A tibble: 13 x 2
name n
* <chr> <int>
1 AbbVie Inc 8
2 Allergan plc 8
3 Amgen Inc 8
4 Biogen Inc 8
5 Bristol Myers Squibb Co 8
6 ELI LILLY & Co 8
7 Gilead Sciences Inc 8
8 Johnson & Johnson 8
9 Merck & Co Inc 8
10 Mylan NV 8
11 PERRIGO Co plc 8
12 Pfizer Inc 8
13 Zoetis Inc 8
drug_cos %>%
count(ticker, name)
# A tibble: 13 x 3
ticker name n
<chr> <chr> <int>
1 ABBV AbbVie Inc 8
2 AGN Allergan plc 8
3 AMGN Amgen Inc 8
4 BIIB Biogen Inc 8
5 BMY Bristol Myers Squibb Co 8
6 GILD Gilead Sciences Inc 8
7 JNJ Johnson & Johnson 8
8 LLY ELI LILLY & Co 8
9 MRK Merck & Co Inc 8
10 MYL Mylan NV 8
11 PFE Pfizer Inc 8
12 PRGO PERRIGO Co plc 8
13 ZTS Zoetis Inc 8
filter()
to extract rows that meet criteria# A tibble: 26 x 9
ticker name location ebitdamargin grossmargin netmargin ros
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 ZTS Zoet~ New Jer~ 0.222 0.634 0.111 0.176
2 ZTS Zoet~ New Jer~ 0.379 0.672 0.245 0.326
3 PRGO PERR~ Ireland 0.236 0.362 0.125 0.19
4 PRGO PERR~ Ireland 0.178 0.387 0.028 0.088
5 PFE Pfiz~ New Yor~ 0.634 0.814 0.427 0.51
6 PFE Pfiz~ New Yor~ 0.34 0.79 0.208 0.221
7 MYL Myla~ United ~ 0.228 0.44 0.09 0.153
8 MYL Myla~ United ~ 0.258 0.35 0.031 0.074
9 MRK Merc~ New Jer~ 0.282 0.615 0.1 0.123
10 MRK Merc~ New Jer~ 0.313 0.681 0.147 0.206
# ... with 16 more rows, and 2 more variables: roe <dbl>, year <dbl>
# A tibble: 52 x 9
ticker name location ebitdamargin grossmargin netmargin ros
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 ZTS Zoet~ New Jer~ 0.217 0.64 0.101 0.171
2 ZTS Zoet~ New Jer~ 0.238 0.641 0.122 0.195
3 ZTS Zoet~ New Jer~ 0.335 0.659 0.168 0.286
4 ZTS Zoet~ New Jer~ 0.379 0.672 0.245 0.326
5 PRGO PERR~ Ireland 0.226 0.345 0.127 0.183
6 PRGO PERR~ Ireland 0.157 0.371 0.059 0.104
7 PRGO PERR~ Ireland -0.791 0.389 -0.76 -0.877
8 PRGO PERR~ Ireland 0.178 0.387 0.028 0.088
9 PFE Pfiz~ New Yor~ 0.447 0.82 0.267 0.307
10 PFE Pfiz~ New Yor~ 0.359 0.807 0.184 0.247
# ... with 42 more rows, and 2 more variables: roe <dbl>, year <dbl>
# A tibble: 16 x 9
ticker name location ebitdamargin grossmargin netmargin ros
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 PFE Pfiz~ New Yor~ 0.371 0.795 0.164 0.223
2 PFE Pfiz~ New Yor~ 0.447 0.82 0.267 0.307
3 PFE Pfiz~ New Yor~ 0.634 0.814 0.427 0.51
4 PFE Pfiz~ New Yor~ 0.359 0.807 0.184 0.247
5 PFE Pfiz~ New Yor~ 0.289 0.803 0.142 0.183
6 PFE Pfiz~ New Yor~ 0.267 0.767 0.137 0.158
7 PFE Pfiz~ New Yor~ 0.353 0.786 0.406 0.233
8 PFE Pfiz~ New Yor~ 0.34 0.79 0.208 0.221
9 MYL Myla~ United ~ 0.245 0.418 0.088 0.161
10 MYL Myla~ United ~ 0.244 0.428 0.094 0.163
11 MYL Myla~ United ~ 0.228 0.44 0.09 0.153
12 MYL Myla~ United ~ 0.242 0.457 0.12 0.169
13 MYL Myla~ United ~ 0.243 0.447 0.09 0.133
14 MYL Myla~ United ~ 0.19 0.424 0.043 0.052
15 MYL Myla~ United ~ 0.272 0.402 0.058 0.121
16 MYL Myla~ United ~ 0.258 0.35 0.031 0.074
# ... with 2 more variables: roe <dbl>, year <dbl>
select()
to select, rename, and reorder columnsticker
, name
, and ros
drug_cos %>%
select(ticker, name, ros)
# A tibble: 104 x 3
ticker name ros
<chr> <chr> <dbl>
1 ZTS Zoetis Inc 0.101
2 ZTS Zoetis Inc 0.171
3 ZTS Zoetis Inc 0.176
4 ZTS Zoetis Inc 0.195
5 ZTS Zoetis Inc 0.14
6 ZTS Zoetis Inc 0.286
7 ZTS Zoetis Inc 0.321
8 ZTS Zoetis Inc 0.326
9 PRGO PERRIGO Co plc 0.178
10 PRGO PERRIGO Co plc 0.183
# ... with 94 more rows
select()
to exclude colums ticker
, name
, and ros
drug_cos %>%
select(-ticker, -name, -ros)
# A tibble: 104 x 6
location ebitdamargin grossmargin netmargin roe year
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 New Jersey; U.S.A 0.149 0.61 0.058 0.069 2011
2 New Jersey; U.S.A 0.217 0.64 0.101 0.113 2012
3 New Jersey; U.S.A 0.222 0.634 0.111 0.612 2013
4 New Jersey; U.S.A 0.238 0.641 0.122 0.465 2014
5 New Jersey; U.S.A 0.182 0.635 0.071 0.285 2015
6 New Jersey; U.S.A 0.335 0.659 0.168 0.587 2016
7 New Jersey; U.S.A 0.366 0.666 0.163 0.488 2017
8 New Jersey; U.S.A 0.379 0.672 0.245 0.694 2018
9 Ireland 0.216 0.343 0.123 0.248 2011
10 Ireland 0.226 0.345 0.127 0.236 2012
# ... with 94 more rows
select()
start with drug_cos
THEN
change the name of location
to headquarter
THEN
put the columns in this order: year
, ticker
, headquarter
, netmargin
, roe
drug_cos %>%
select(year, ticker, headquarter =location, netmargin, roe)
# A tibble: 104 x 5
year ticker headquarter netmargin roe
<dbl> <chr> <chr> <dbl> <dbl>
1 2011 ZTS New Jersey; U.S.A 0.058 0.069
2 2012 ZTS New Jersey; U.S.A 0.101 0.113
3 2013 ZTS New Jersey; U.S.A 0.111 0.612
4 2014 ZTS New Jersey; U.S.A 0.122 0.465
5 2015 ZTS New Jersey; U.S.A 0.071 0.285
6 2016 ZTS New Jersey; U.S.A 0.168 0.587
7 2017 ZTS New Jersey; U.S.A 0.163 0.488
8 2018 ZTS New Jersey; U.S.A 0.245 0.694
9 2011 PRGO Ireland 0.123 0.248
10 2012 PRGO Ireland 0.127 0.236
# ... with 94 more rows
Start with drug_cos
THEN extract information for the tickers “AMGN”, “ABBV”, “JNJ” select the variables ticker
, year
, and ros
.
# A tibble: 24 x 3
ticker year ros
<chr> <dbl> <dbl>
1 JNJ 2011 0.199
2 JNJ 2012 0.218
3 JNJ 2013 0.224
4 JNJ 2014 0.284
5 JNJ 2015 0.282
6 JNJ 2016 0.286
7 JNJ 2017 0.243
8 JNJ 2018 0.233
9 AMGN 2011 0.305
10 AMGN 2012 0.351
# ... with 14 more rows
Start with drug_cos
THEN extract information for the tickers “ABBV”, “BMY” THEN select the variables ticker
, netmargin
and roe
change the name of roe
to return_on_equity
drug_cos %>%
filter(ticker %in% c("ABBV", "BMY")) %>%
select(ticker, netmargin, return_on_equity =roe)
# A tibble: 16 x 3
ticker netmargin return_on_equity
<chr> <dbl> <dbl>
1 BMY 0.175 0.229
2 BMY 0.111 0.131
3 BMY 0.156 0.177
4 BMY 0.126 0.132
5 BMY 0.095 0.104
6 BMY 0.229 0.292
7 BMY 0.048 0.072
8 BMY 0.218 0.373
9 ABBV 0.197 0.248
10 ABBV 0.287 0.69
11 ABBV 0.22 1.13
12 ABBV 0.089 0.435
13 ABBV 0.225 1.31
14 ABBV 0.232 1.11
15 ABBV 0.188 0.932
16 ABBV 0.174 -2.03
select
ranges of columns…drug_cos %>%
select(ebitdamargin:netmargin)
# A tibble: 104 x 3
ebitdamargin grossmargin netmargin
<dbl> <dbl> <dbl>
1 0.149 0.61 0.058
2 0.217 0.64 0.101
3 0.222 0.634 0.111
4 0.238 0.641 0.122
5 0.182 0.635 0.071
6 0.335 0.659 0.168
7 0.366 0.666 0.163
8 0.379 0.672 0.245
9 0.216 0.343 0.123
10 0.226 0.345 0.127
# ... with 94 more rows
drug_cos %>%
select(4:6)
# A tibble: 104 x 3
ebitdamargin grossmargin netmargin
<dbl> <dbl> <dbl>
1 0.149 0.61 0.058
2 0.217 0.64 0.101
3 0.222 0.634 0.111
4 0.238 0.641 0.122
5 0.182 0.635 0.071
6 0.335 0.659 0.168
7 0.366 0.666 0.163
8 0.379 0.672 0.245
9 0.216 0.343 0.123
10 0.226 0.345 0.127
# ... with 94 more rows
select
helper functions:starts_with("abc")
matches columns start with “abc”
ends_with("abc")
matches columns end with “abc”
contains("abc")
matches columns that contain “abc”
drug_cos %>%
select(ticker, contains("locat"))
# A tibble: 104 x 2
ticker location
<chr> <chr>
1 ZTS New Jersey; U.S.A
2 ZTS New Jersey; U.S.A
3 ZTS New Jersey; U.S.A
4 ZTS New Jersey; U.S.A
5 ZTS New Jersey; U.S.A
6 ZTS New Jersey; U.S.A
7 ZTS New Jersey; U.S.A
8 ZTS New Jersey; U.S.A
9 PRGO Ireland
10 PRGO Ireland
# ... with 94 more rows
drug_cos %>%
select(ticker, starts_with("r"))
# A tibble: 104 x 3
ticker ros roe
<chr> <dbl> <dbl>
1 ZTS 0.101 0.069
2 ZTS 0.171 0.113
3 ZTS 0.176 0.612
4 ZTS 0.195 0.465
5 ZTS 0.14 0.285
6 ZTS 0.286 0.587
7 ZTS 0.321 0.488
8 ZTS 0.326 0.694
9 PRGO 0.178 0.248
10 PRGO 0.183 0.236
# ... with 94 more rows
drug_cos %>%
select(ticker, ends_with("margin"))
# A tibble: 104 x 4
ticker ebitdamargin grossmargin netmargin
<chr> <dbl> <dbl> <dbl>
1 ZTS 0.149 0.61 0.058
2 ZTS 0.217 0.64 0.101
3 ZTS 0.222 0.634 0.111
4 ZTS 0.238 0.641 0.122
5 ZTS 0.182 0.635 0.071
6 ZTS 0.335 0.659 0.168
7 ZTS 0.366 0.666 0.163
8 ZTS 0.379 0.672 0.245
9 PRGO 0.216 0.343 0.123
10 PRGO 0.226 0.345 0.127
# ... with 94 more rows
group_by
to set up data for operations by groupgroup_by
drug_cos %>%
group_by(ticker)
# A tibble: 104 x 9
# Groups: ticker [13]
ticker name location ebitdamargin grossmargin netmargin ros
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 ZTS Zoet~ New Jer~ 0.149 0.61 0.058 0.101
2 ZTS Zoet~ New Jer~ 0.217 0.64 0.101 0.171
3 ZTS Zoet~ New Jer~ 0.222 0.634 0.111 0.176
4 ZTS Zoet~ New Jer~ 0.238 0.641 0.122 0.195
5 ZTS Zoet~ New Jer~ 0.182 0.635 0.071 0.14
6 ZTS Zoet~ New Jer~ 0.335 0.659 0.168 0.286
7 ZTS Zoet~ New Jer~ 0.366 0.666 0.163 0.321
8 ZTS Zoet~ New Jer~ 0.379 0.672 0.245 0.326
9 PRGO PERR~ Ireland 0.216 0.343 0.123 0.178
10 PRGO PERR~ Ireland 0.226 0.345 0.127 0.183
# ... with 94 more rows, and 2 more variables: roe <dbl>, year <dbl>
drug_cos %>%
group_by(year)
# A tibble: 104 x 9
# Groups: year [8]
ticker name location ebitdamargin grossmargin netmargin ros
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 ZTS Zoet~ New Jer~ 0.149 0.61 0.058 0.101
2 ZTS Zoet~ New Jer~ 0.217 0.64 0.101 0.171
3 ZTS Zoet~ New Jer~ 0.222 0.634 0.111 0.176
4 ZTS Zoet~ New Jer~ 0.238 0.641 0.122 0.195
5 ZTS Zoet~ New Jer~ 0.182 0.635 0.071 0.14
6 ZTS Zoet~ New Jer~ 0.335 0.659 0.168 0.286
7 ZTS Zoet~ New Jer~ 0.366 0.666 0.163 0.321
8 ZTS Zoet~ New Jer~ 0.379 0.672 0.245 0.326
9 PRGO PERR~ Ireland 0.216 0.343 0.123 0.178
10 PRGO PERR~ Ireland 0.226 0.345 0.127 0.183
# ... with 94 more rows, and 2 more variables: roe <dbl>, year <dbl>
summarise
to calculate summary statisticsroe
for all companiesdrug_cos %>%
summarise(max_roe = max(roe))
# A tibble: 1 x 1
max_roe
<dbl>
1 1.31
roe
for each year
drug_cos %>%
group_by(year) %>%
summarise(max_roe = max(roe))
# A tibble: 8 x 2
year max_roe
* <dbl> <dbl>
1 2011 0.451
2 2012 0.69
3 2013 1.13
4 2014 0.828
5 2015 1.31
6 2016 1.11
7 2017 0.932
8 2018 0.694
roe
for each ticker
drug_cos %>%
group_by(ticker) %>%
summarise(max_roe = max(roe))
# A tibble: 13 x 2
ticker max_roe
* <chr> <dbl>
1 ABBV 1.31
2 AGN 0.184
3 AMGN 0.585
4 BIIB 0.334
5 BMY 0.373
6 GILD 1.04
7 JNJ 0.244
8 LLY 0.306
9 MRK 0.248
10 MYL 0.283
11 PFE 0.342
12 PRGO 0.248
13 ZTS 0.694
Find the mean ros
for each year
and call variable mean_ros
Extract mean for 2016
# A tibble: 1 x 2
year mean_ros
<dbl> <dbl>
1 2016 0.253
Find the median ros
for each year and call the variable
median_ros
extract the median for 2016
# A tibble: 1 x 2
year median_ros
<dbl> <dbl>
1 2016 0.286
drug_cos %>%
filter(ticker == "JNJ") %>%
ggplot(aes(x = year, y = ros)) +
geom_col() +
scale_y_continuous(labels = scales::percent) +
labs(title = "Comparison of Return on Sales",
subtitle = "for Johnson & Johnson from 2011 to 2018",
x = NULL, y = NULL) +
theme_classic()
ggsave(filename = "preview.png",
path = here::here("_posts", "2021-03-08-data-manipulation"))