library(tidyverse)
library(tmtyro)
Getting Wizard of Oz
<- get_gutenberg_corpus(55, meta_fields = NULL) |>
oz load_texts(keep_original = TRUE)
oz
# A tibble: 39,421 × 4
doc_id part original word
<int> <chr> <chr> <chr>
1 55 by L. Frank Baum This this
2 55 by L. Frank Baum book book
3 55 by L. Frank Baum is is
4 55 by L. Frank Baum dedicated dedicated
5 55 by L. Frank Baum to to
6 55 by L. Frank Baum my my
7 55 by L. Frank Baum good good
8 55 by L. Frank Baum friend friend
9 55 by L. Frank Baum comrade comrade
10 55 by L. Frank Baum My my
# ℹ 39,411 more rows
|>
oz contextualize("yellow", limit = 1:10)
is paved with yellow brick,” said the
one paved with yellow bricks. Within a
on the hard, yellow road-bed. The
the road of yellow brick. When she
the path of yellow brick for the
stumbled over the yellow bricks, which were
the road of yellow brick. It was
the road of yellow brick, she was
was paved with yellow brick. The Tin
still paved with yellow brick, but these
<- oz |>
oz filter(str_detect(part, "Chapter")) |>
mutate(
chap_num = str_extract(part, "Chapter [IXV]+") |>
str_remove_all("Chapter ") |>
as.roman() |>
as.integer(),
chap_title = str_remove_all(part, "Chapter [IXV]+"),
.before = original) |>
select(-part) |>
identify_by(chap_num)
oz
# A tibble: 39,225 × 5
doc_id chap_num chap_title original word
<fct> <int> <chr> <chr> <chr>
1 1 1 " The Cyclone" Dorothy dorothy
2 1 1 " The Cyclone" lived lived
3 1 1 " The Cyclone" in in
4 1 1 " The Cyclone" the the
5 1 1 " The Cyclone" midst midst
6 1 1 " The Cyclone" of of
7 1 1 " The Cyclone" the the
8 1 1 " The Cyclone" great great
9 1 1 " The Cyclone" Kansas kansas
10 1 1 " The Cyclone" prairies, prairies
# ℹ 39,215 more rows
Adding a dictionary
# all_colors <- read_csv("~/GitHub/chromatome/all_colors.csv")
<- read_csv("~/GitHub/chromatome/average_colors.csv") avg_colors
Rows: 478 Columns: 11
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): term, color, group
dbl (8): sources, red, green, blue, red_diff, hue, saturation, brightness
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
<- avg_colors |>
colors select(term, color, hue, saturation, brightness) |>
make_dictionary()
<- oz |>
oz_c add_dictionary(colors)
Joining with `by = join_by(ngram)`
|>
oz_c drop_na(color)
# A tibble: 372 × 10
doc_id chap_num chap_title original word color color_term color_hue
<fct> <int> <chr> <chr> <chr> <chr> <chr> <dbl>
1 1 1 " The Cyclone" gray gray #A29F9F gray 0
2 1 1 " The Cyclone" gray gray #A29F9F gray 0
3 1 1 " The Cyclone" green, green #04B905 green 0.334
4 1 1 " The Cyclone" gray gray #A29F9F gray 0
5 1 1 " The Cyclone" gray gray #A29F9F gray 0
6 1 1 " The Cyclone" gray; gray #A29F9F gray 0
7 1 1 " The Cyclone" red red #F80202 red 0
8 1 1 " The Cyclone" gray gray #A29F9F gray 0
9 1 1 " The Cyclone" gray gray #A29F9F gray 0
10 1 1 " The Cyclone" gray gray #A29F9F gray 0
# ℹ 362 more rows
# ℹ 2 more variables: color_saturation <dbl>, color_brightness <dbl>
<-
oz_c_plot |>
oz_c drop_na(color) |>
mutate(count = n(),
.by = c(chap_num, color)) |>
ggplot(aes(x = chap_num,
fill = color,
text = glue::glue("<b>{color_term}</b><br><br>{color}<br>{count} mentions<br>chapter {chap_num}")
+
)) geom_bar(show.legend = FALSE) +
scale_fill_identity() +
scale_y_continuous(expand = expansion(mult=c(0,.05))) +
scale_x_continuous(limits = c(0, 24),
expand = expansion(add=c(0.01,1.01)),
breaks = c(1:12 * 2),
labels = c(1:12 * 2)) +
theme_gray() +
theme(panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank(),
axis.line.x = element_line(color = "black")) +
labs(x = "chapter")
|>
oz_c_plot ::ggplotly(tooltip = c("text")) |>
plotly::config(displayModeBar = FALSE) |>
plotly::layout(showlegend = FALSE) plotly
|>
oz_c contextualize("rose")
three times and rose slowly through the
round hats that rose to a small
swelled out and rose into the air,
and the balloon rose into the air
and the Monkeys rose into the air
References
BBC Three. GIF of RuPaul saying "The library is officially open". https://i.giphy.com/media/v1.Y2lkPTc5MGI3NjExbnNpNzc2Zm83MXg5anJ4ZTllbWl1N2g5d3Z4MjcyZGp6d2oweXFneiZlcD12MV9pbnRlcm5hbF9naWZfYnlfaWQmY3Q9Zw/DGw7yzbpkiBtFvpQ4w/giphy.gif.
Johnston, Myfanwy, and David Robinson. gutenbergr: Download and Process Public Domain Works from Project Gutenberg. 2023, https://CRAN.R-project.org/package=gutenbergr.
Pinterest user Jeri P. GIF of Dolly Parton saying "I need the money. It’s amazing how much it costs a person to make a person look so cheap!". https://www.pinterest.com/pin/private-gif--403635185340416625/.
Project Gutenberg. www.gutenberg.org.
Reddit user kemmes7. GIF of Thorgy Thor saying "Love Pink!" after initially calling her a bad seed. https://i.redd.it/ehda7xsro0ab1.gif.
Tenor user BryanWilliam1. GIF of Law Roach saying "You did what needed to be done!". https://tenor.com/view/youdidwhatneededtobedone-you-did-what-legendary-gif-22038131.
Wickham, Hadley. rvest: Easily Harvest (Scrape) Web Pages. 2024, https://CRAN.R-project.org/package=rvest.
Wilde, Oscar. The Picture of Dorian Gray. Project Gutenberg, 1890, https://gutenberg.org/ebooks/4078.
Citation
BibTeX citation:
@misc{clawson2024,
author = {Clawson, James},
title = {Shadows of {Doubt}},
date = {2024-06-20},
url = {https://jmclawson.net/posts/shadows-of-doubt},
langid = {en}
}
For attribution, please cite this work as:
Clawson, James. “Shadows of Doubt.” jmclawson.net, 20 June 2024, https://jmclawson.net/posts/shadows-of-doubt.