forked from dgrtwo/data-screencasts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwomen-workplace.Rmd
111 lines (96 loc) · 3.59 KB
/
women-workplace.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
---
title: "Women in the Workplace"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r}
library(tidyverse)
library(scales)
theme_set(theme_light())
jobs_gender <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-03-05/jobs_gender.csv")
earnings_female <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-03-05/earnings_female.csv")
employed_gender <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-03-05/employed_gender.csv")
```
```{r}
summarize_jobs_gender <- function(tbl) {
tbl %>%
summarize(total_earnings = sum(total_earnings * total_workers) / sum(total_workers),
total_earnings_male = sum(total_earnings_male * workers_male, na.rm = TRUE) /
sum(workers_male[!is.na(total_earnings_male)]),
total_earnings_female = sum(total_earnings_female * workers_female, na.rm = TRUE) /
sum(workers_female[!is.na(total_earnings_female)]),
total_workers = sum(total_workers),
workers_male = sum(workers_male),
workers_female = sum(workers_female)) %>%
mutate(wage_percent_of_male = total_earnings_female / total_earnings_male)
}
by_year_major_category <- jobs_gender %>%
filter(complete.cases(.)) %>%
group_by(year, major_category) %>%
summarize_jobs_gender()
by_year_major_category %>%
mutate(major_category = fct_reorder(major_category, -total_earnings)) %>%
ggplot(aes(year, total_earnings, color = major_category)) +
geom_line() +
expand_limits(y = 0)
by_year_major_category %>%
mutate(major_category = fct_reorder(major_category, -wage_percent_of_male)) %>%
ggplot(aes(year, wage_percent_of_male, color = major_category)) +
geom_line()
```
```{r}
by_minor_category_2016 <- jobs_gender %>%
filter(year == 2016) %>%
group_by(major_category, minor_category) %>%
summarize_jobs_gender() %>%
ungroup()
```
```{r}
by_minor_category_2016 %>%
mutate(minor_category = fct_reorder(minor_category, wage_percent_of_male)) %>%
ggplot(aes(minor_category, wage_percent_of_male, fill = major_category)) +
geom_col() +
coord_flip()
```
```{r}
library(plotly)
p <- jobs_gender %>%
filter(year == 2016) %>%
filter(major_category == "Healthcare Practitioners and Technical") %>%
arrange(desc(wage_percent_of_male)) %>%
ggplot(aes(workers_female / total_workers,
total_earnings,
size = total_workers,
label = occupation)) +
geom_point() +
scale_size_continuous(range = c(1, 10)) +
labs(size = "Total # of workers",
x = "% of workforce reported as female",
y = "Median salary in the occupation") +
scale_x_continuous(labels = percent_format()) +
scale_y_continuous(labels = dollar_format()) +
expand_limits(y = 0)
ggplotly(p)
```
```{r}
p <- jobs_gender %>%
filter(year == 2016,
total_workers >= 20000) %>%
filter(major_category == "Computer, Engineering, and Science") %>%
arrange(desc(wage_percent_of_male)) %>%
ggplot(aes(workers_female / total_workers,
total_earnings_female / total_earnings_male,
color = minor_category,
size = total_workers,
label = occupation)) +
geom_point() +
scale_size_continuous(range = c(1, 10)) +
labs(size = "Total # of workers",
x = "% of workforce reported as female",
y = "% of median female salary / median male") +
scale_x_continuous(labels = percent_format()) +
scale_y_continuous(labels = percent_format())
ggplotly(p)
```