library(tidyverse)
library(tmtyro)
Getting Wizard of Oz
<-
oz # get_gutenberg_corpus(55, meta_fields = NULL) |>
parse_html("gutenberg/55.htm") |>
filter(str_detect(part, "Chapter")) |>
identify_by(part) |>
load_texts(keep_original = TRUE) |>
mutate(
chap_num = str_extract(part, "Chapter [IXV]+") |>
str_remove_all("Chapter ") |>
as.roman() |>
as.integer(),
chap_title = str_remove_all(part, "Chapter [IXV]+") |> trimws(),
.before = original) |>
select(-part) |>
identify_by(chap_num)
oz
# A tibble: 39,225 × 6
doc_id title chap_num chap_title original word
<fct> <chr> <int> <chr> <chr> <chr>
1 1 The Wonderful Wizard of Oz 1 The Cyclone Dorothy dorothy
2 1 The Wonderful Wizard of Oz 1 The Cyclone lived lived
3 1 The Wonderful Wizard of Oz 1 The Cyclone in in
4 1 The Wonderful Wizard of Oz 1 The Cyclone the the
5 1 The Wonderful Wizard of Oz 1 The Cyclone midst midst
6 1 The Wonderful Wizard of Oz 1 The Cyclone of of
7 1 The Wonderful Wizard of Oz 1 The Cyclone the the
8 1 The Wonderful Wizard of Oz 1 The Cyclone great great
9 1 The Wonderful Wizard of Oz 1 The Cyclone Kansas kansas
10 1 The Wonderful Wizard of Oz 1 The Cyclone prairies, prairies
# ℹ 39,215 more rows
|>
oz contextualize("yellow", limit = 1:10)
is paved with yellow brick,” said the
one paved with yellow bricks. Within a
on the hard, yellow road-bed. The
the road of yellow brick. When she
the path of yellow brick for the
stumbled over the yellow bricks, which were
the road of yellow brick. It was
the road of yellow brick, she was
was paved with yellow brick. The Tin
still paved with yellow brick, but these
Adding a dictionary
<- read_csv("~/GitHub/chromatome/all_colors.csv") all_colors
Rows: 3841 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): term, color, group, listing
dbl (6): red, green, blue, hue, saturation, brightness
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
<- read_csv("~/GitHub/chromatome/average_colors.csv") avg_colors
Rows: 478 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): term, color, group
dbl (8): sources, red, green, blue, red_diff, hue, saturation, brightness
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
<- avg_colors |>
colors select(term, color, hue, saturation, brightness) |>
make_dictionary()
<- oz |>
oz_c add_dictionary(colors)
Joining with `by = join_by(ngram)`
|>
oz_c drop_na(color)
# A tibble: 372 × 11
doc_id title chap_num chap_title original word color color_term color_hue
<fct> <chr> <int> <chr> <chr> <chr> <chr> <chr> <dbl>
1 1 The Won… 1 The Cyclo… gray gray #A29… gray 0
2 1 The Won… 1 The Cyclo… gray gray #A29… gray 0
3 1 The Won… 1 The Cyclo… green, green #04B… green 0.334
4 1 The Won… 1 The Cyclo… gray gray #A29… gray 0
5 1 The Won… 1 The Cyclo… gray gray #A29… gray 0
6 1 The Won… 1 The Cyclo… gray; gray #A29… gray 0
7 1 The Won… 1 The Cyclo… red red #F80… red 0
8 1 The Won… 1 The Cyclo… gray gray #A29… gray 0
9 1 The Won… 1 The Cyclo… gray gray #A29… gray 0
10 1 The Won… 1 The Cyclo… gray gray #A29… gray 0
# ℹ 362 more rows
# ℹ 2 more variables: color_saturation <dbl>, color_brightness <dbl>
<-
oz_c_plot |>
oz_c drop_na(color) |>
mutate(count = n(),
.by = c(chap_num, color)) |>
ggplot(aes(x = chap_num,
fill = color,
text = glue::glue("<b>{color_term}</b><br><br>{color}<br>{count} mentions<br>chapter {chap_num}")
+
)) geom_bar(show.legend = FALSE) +
scale_fill_identity() +
scale_y_continuous(expand = expansion(mult=c(0,.05))) +
scale_x_continuous(limits = c(0, 24),
expand = expansion(add=c(0.01,1.01)),
breaks = c(1:12 * 2),
labels = c(1:12 * 2)) +
theme_gray() +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
axis.line.x = element_line(color = "black")) +
labs(x = "chapter")
|>
oz_c_plot ::ggplotly(tooltip = c("text")) |>
plotly::config(displayModeBar = FALSE) |>
plotly::layout(showlegend = FALSE) plotly
|>
oz_c contextualize("rose")
three times and rose slowly through the
round hats that rose to a small
swelled out and rose into the air,
and the balloon rose into the air
and the Monkeys rose into the air
dropping noncolor words
<-
oz_c_plot2 |>
oz_c drop_na(color) |>
filter(!color_term %in% c("rose", "straw", "rust", "desert")) |>
mutate(count = n(),
.by = c(chap_num, color)) |>
ggplot(aes(x = chap_num,
fill = color,
text = glue::glue("<b>{color_term}</b><br><br>{color}<br>{count} mentions<br>chapter {chap_num}")
+
)) geom_bar(show.legend = FALSE) +
scale_fill_identity() +
scale_y_continuous(expand = expansion(mult=c(0,.05))) +
scale_x_continuous(limits = c(0, 24),
expand = expansion(add=c(0.01,1.01)),
breaks = c(1:12 * 2),
labels = c(1:12 * 2)) +
theme_gray() +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
axis.line.x = element_line(color = "black")) +
labs(x = "chapter")
|>
oz_c_plot2 ::ggplotly(tooltip = c("text")) |>
plotly::config(displayModeBar = FALSE) |>
plotly::layout(showlegend = FALSE) |>
plotly::ggplotly(tooltip = c("text")) |>
plotly::config(displayModeBar = FALSE) |>
plotly::layout(showlegend = FALSE) plotly
show saturation
|>
oz_c drop_na(color_saturation) |>
summarize(saturation = median(color_saturation),
.by = doc_id) |>
ggplot(aes(doc_id, saturation)) +
geom_point()
References
BBC Three. GIF of RuPaul saying "The library is officially open". https://i.giphy.com/media/v1.Y2lkPTc5MGI3NjExbnNpNzc2Zm83MXg5anJ4ZTllbWl1N2g5d3Z4MjcyZGp6d2oweXFneiZlcD12MV9pbnRlcm5hbF9naWZfYnlfaWQmY3Q9Zw/DGw7yzbpkiBtFvpQ4w/giphy.gif.
Johnston, Myfanwy, and David Robinson. gutenbergr: Download and Process Public Domain Works from Project Gutenberg. 2023, https://CRAN.R-project.org/package=gutenbergr.
Pinterest user Jeri P. GIF of Dolly Parton saying "I need the money. It’s amazing how much it costs a person to make a person look so cheap!". https://www.pinterest.com/pin/private-gif--403635185340416625/.
Project Gutenberg. www.gutenberg.org.
Reddit user kemmes7. GIF of Thorgy Thor saying "Love Pink!" after initially calling her a bad seed. https://i.redd.it/ehda7xsro0ab1.gif.
Tenor user BryanWilliam1. GIF of Law Roach saying "You did what needed to be done!". https://tenor.com/view/youdidwhatneededtobedone-you-did-what-legendary-gif-22038131.
Wickham, Hadley. rvest: Easily Harvest (Scrape) Web Pages. 2024, https://CRAN.R-project.org/package=rvest.
Wilde, Oscar. The Picture of Dorian Gray. Project Gutenberg, 1890, https://gutenberg.org/ebooks/4078.
Citation
BibTeX citation:
@misc{clawson2024,
author = {Clawson, James},
title = {Coloring {Kansas}},
date = {2024-07-11},
url = {https://jmclawson.net/posts/coloring-kansas},
langid = {en}
}
For attribution, please cite this work as:
Clawson, James. “Coloring Kansas.” jmclawson.net, 11 July 2024, https://jmclawson.net/posts/coloring-kansas.