forked from dgrtwo/data-screencasts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmedia-franchises.Rmd
131 lines (111 loc) · 3.87 KB
/
media-franchises.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
---
title: "Media Franchise Revenues"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
```{r}
library(tidyverse)
theme_set(theme_light())
media_franchises <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-07-02/media_franchises.csv")
```
```{r}
media_franchises %>%
count(franchise, sort = TRUE)
franchises <- media_franchises %>%
group_by(franchise, original_media, year_created, creators, owners) %>%
summarize(categories = n(),
total_revenue = sum(revenue),
most_profitable = revenue_category[which.max(revenue)]) %>%
ungroup()
franchises
media_franchises
```
### Franchise-level
What are the biggest franchises?
```{r}
library(glue)
top_franchises <- franchises %>%
mutate(franchise = glue("{ franchise } ({ year_created })")) %>%
top_n(20, total_revenue)
media_franchises %>%
mutate(franchise = glue("{ franchise } ({ year_created })")) %>%
semi_join(top_franchises, by = "franchise") %>%
mutate(franchise = fct_reorder(franchise, revenue, sum),
revenue_category = fct_reorder(revenue_category, revenue, sum)) %>%
ggplot(aes(franchise, revenue)) +
geom_col(aes(fill = revenue_category)) +
geom_text(aes(y = total_revenue,
label = paste0(scales::dollar(total_revenue, accuracy = 1), "B")),
data = top_franchises,
hjust = 0) +
scale_y_continuous(labels = scales::dollar) +
expand_limits(y = 100) +
coord_flip() +
theme(panel.grid.major.y = element_blank()) +
guides(fill = guide_legend(reverse = TRUE)) +
labs(title = "What are the most profitable franchises of all time?",
fill = "Category",
x = "",
y = "Revenue (Billions)")
```
```{r}
media_franchises %>%
group_by(owners) %>%
filter(n_distinct(franchise) > 2) %>%
ungroup() %>%
mutate(franchise = fct_reorder(franchise, revenue, sum),
owners = fct_reorder(owners, -revenue, sum),
revenue_category = fct_reorder(revenue_category, revenue, sum)) %>%
ggplot(aes(franchise, revenue, fill = revenue_category)) +
geom_col() +
facet_wrap(~ owners, scales = "free_y") +
guides(fill = guide_legend(reverse = TRUE)) +
coord_flip() +
labs(title = "What companies own at least 3 franchises?",
fill = "Category",
x = "",
y = "Revenue (Billions)")
```
```{r}
franchises %>%
ggplot(aes(year_created, total_revenue)) +
geom_point(aes(size = total_revenue, color = original_media)) +
geom_text(aes(label = franchise), check_overlap = TRUE, vjust = 1, hjust = 1) +
expand_limits(x = 1910) +
labs(title = "When were the 'great' franchises created?")
```
```{r}
media_franchises %>%
group_by(original_media) %>%
summarize(revenue = sum(revenue))
original_media_revenue_categories <- media_franchises %>%
group_by(original_media) %>%
filter(sum(revenue) >= 45) %>%
group_by(original_media, revenue_category) %>%
summarize(revenue = sum(revenue)) %>%
ungroup() %>%
mutate(revenue_category = fct_reorder(revenue_category, revenue, sum),
original_media = fct_reorder(original_media, -revenue, sum))
original_media_revenue_categories %>%
ggplot(aes(revenue_category, revenue)) +
geom_col() +
scale_y_continuous(labels = scales::dollar) +
coord_flip() +
facet_wrap(~ original_media) +
labs(x = "Revenue category",
y = "Revenue (Billions)",
title = "What kinds of media lead to what types of revenue?")
```
```{r}
original_media_revenue_categories %>%
mutate(revenue_category = fct_rev(revenue_category),
original_media = fct_rev(original_media)) %>%
ggplot(aes(revenue_category, original_media, fill = revenue)) +
geom_tile() +
scale_fill_gradient2(low = "white", high = "red", labels = scales::dollar) +
theme(panel.grid = element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(fill = "Revenue (billions)")
```