Getting Wizard of Oz
oz # get_gutenberg_corpus(55, meta_fields = NULL) |>
parse_html("gutenberg/55.htm") |>
filter(str_detect(part, "Chapter")) |>
identify_by(part) |>
load_texts(keep_original = TRUE) |>
chap_num = str_extract(part, "Chapter [IXV]+") |>
str_remove_all("Chapter ") |>
as.roman() |>
chap_title = str_remove_all(part, "Chapter [IXV]+") |> trimws(),
.before = original) |>
select(-part) |>
# A tibble: 39,225 × 6
doc_id title chap_num chap_title original word
<fct> <chr> <int> <chr> <chr> <chr>
1 1 The Wonderful Wizard of Oz 1 The Cyclone Dorothy dorothy
2 1 The Wonderful Wizard of Oz 1 The Cyclone lived lived
3 1 The Wonderful Wizard of Oz 1 The Cyclone in in
4 1 The Wonderful Wizard of Oz 1 The Cyclone the the
5 1 The Wonderful Wizard of Oz 1 The Cyclone midst midst
6 1 The Wonderful Wizard of Oz 1 The Cyclone of of
7 1 The Wonderful Wizard of Oz 1 The Cyclone the the
8 1 The Wonderful Wizard of Oz 1 The Cyclone great great
9 1 The Wonderful Wizard of Oz 1 The Cyclone Kansas kansas
10 1 The Wonderful Wizard of Oz 1 The Cyclone prairies, prairies
# ℹ 39,215 more rows
oz contextualize("yellow", limit = 1:10)
is paved with yellow brick,” said the
one paved with yellow bricks. Within a
on the hard, yellow road-bed. The
the road of yellow brick. When she
the path of yellow brick for the
stumbled over the yellow bricks, which were
the road of yellow brick. It was
the road of yellow brick, she was
was paved with yellow brick. The Tin
still paved with yellow brick, but these
Adding a dictionary
<- read_csv("~/GitHub/chromatome/all_colors.csv") all_colors
Rows: 3841 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): term, color, group, listing
dbl (6): red, green, blue, hue, saturation, brightness
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
<- read_csv("~/GitHub/chromatome/average_colors.csv") avg_colors
Rows: 478 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): term, color, group
dbl (8): sources, red, green, blue, red_diff, hue, saturation, brightness
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
<- avg_colors |>
colors select(term, color, hue, saturation, brightness) |>
<- oz |>
oz_c add_dictionary(colors)
Joining with `by = join_by(ngram)`
oz_c drop_na(color)
# A tibble: 372 × 11
doc_id title chap_num chap_title original word color color_term color_hue
<fct> <chr> <int> <chr> <chr> <chr> <chr> <chr> <dbl>
1 1 The Won… 1 The Cyclo… gray gray #A29… gray 0
2 1 The Won… 1 The Cyclo… gray gray #A29… gray 0
3 1 The Won… 1 The Cyclo… green, green #04B… green 0.334
4 1 The Won… 1 The Cyclo… gray gray #A29… gray 0
5 1 The Won… 1 The Cyclo… gray gray #A29… gray 0
6 1 The Won… 1 The Cyclo… gray; gray #A29… gray 0
7 1 The Won… 1 The Cyclo… red red #F80… red 0
8 1 The Won… 1 The Cyclo… gray gray #A29… gray 0
9 1 The Won… 1 The Cyclo… gray gray #A29… gray 0
10 1 The Won… 1 The Cyclo… gray gray #A29… gray 0
# ℹ 362 more rows
# ℹ 2 more variables: color_saturation <dbl>, color_brightness <dbl>
oz_c_plot |>
oz_c drop_na(color) |>
mutate(count = n(),
.by = c(chap_num, color)) |>
ggplot(aes(x = chap_num,
fill = color,
text = glue::glue("<b>{color_term}</b><br><br>{color}<br>{count} mentions<br>chapter {chap_num}")
)) geom_bar(show.legend = FALSE) +
scale_fill_identity() +
scale_y_continuous(expand = expansion(mult=c(0,.05))) +
scale_x_continuous(limits = c(0, 24),
expand = expansion(add=c(0.01,1.01)),
breaks = c(1:12 * 2),
labels = c(1:12 * 2)) +
theme_gray() +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
axis.line.x = element_line(color = "black")) +
labs(x = "chapter")
oz_c_plot ::ggplotly(tooltip = c("text")) |>
plotly::config(displayModeBar = FALSE) |>
plotly::layout(showlegend = FALSE) plotly
oz_c contextualize("rose")
three times and rose slowly through the
round hats that rose to a small
swelled out and rose into the air,
and the balloon rose into the air
and the Monkeys rose into the air
dropping noncolor words
oz_c_plot2 |>
oz_c drop_na(color) |>
filter(!color_term %in% c("rose", "straw", "rust", "desert")) |>
mutate(count = n(),
.by = c(chap_num, color)) |>
ggplot(aes(x = chap_num,
fill = color,
text = glue::glue("<b>{color_term}</b><br><br>{color}<br>{count} mentions<br>chapter {chap_num}")
)) geom_bar(show.legend = FALSE) +
scale_fill_identity() +
scale_y_continuous(expand = expansion(mult=c(0,.05))) +
scale_x_continuous(limits = c(0, 24),
expand = expansion(add=c(0.01,1.01)),
breaks = c(1:12 * 2),
labels = c(1:12 * 2)) +
theme_gray() +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
axis.line.x = element_line(color = "black")) +
labs(x = "chapter")
oz_c_plot2 ::ggplotly(tooltip = c("text")) |>
plotly::config(displayModeBar = FALSE) |>
plotly::layout(showlegend = FALSE) |>
plotly::ggplotly(tooltip = c("text")) |>
plotly::config(displayModeBar = FALSE) |>
plotly::layout(showlegend = FALSE) plotly
show saturation
oz_c drop_na(color_saturation) |>
summarize(saturation = median(color_saturation),
.by = doc_id) |>
ggplot(aes(doc_id, saturation)) +
