Code
library(pacman)
pacman :: p_load(tidytuesdayR, tidyverse, dplyr, janitor, ggtext, showtext, scales, glue, marquee)
tuesdata <- tidytuesdayR::tt_load('2025-11-18')Ana Luisa Bodevan
November 18, 2025
This week challenge dataset in on Sherlock. Check the TidyTuesday GitHub repo for the data.
There are four canonical Sherlock Holmes novels: A Study in Scarlet, The Sign of the Fours, The Hound of Baskervilles and The Valley of Fear
ggplot(watson_counts, aes(x = reorder(book, year), y = count)) +
geom_col() +
geom_text(aes(label = count), vjust = -0.5, size = 5, fontface = "bold") +
labs(
title = "Frequency of 'My Dear Watson' in Canonical Sherlock Holmes Novels",
x = NULL,
y = "Number of Occurrences"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 11),
plot.title = element_text(face = "bold", size = 14)
)
# Create dot plot
ggplot(watson_counts, aes(x = count, y = reorder(book, -year))) +
# Segment from y-axis to point
geom_segment(aes(x = 0, xend = count, y = book, yend = book),
color = "#8B4513", linewidth = 1.2, alpha = 0.6) +
# Dots
geom_point(size = 10, color = "#8B4513", alpha = 0.9) +
geom_point(size = 7, color = "#D2691E", alpha = 0.8) +
# Count labels inside dots
geom_text(aes(label = count), color = "white", size = 4.5,
fontface = "bold", family = "lato") +
# Year labels after book titles
geom_text(aes(label = paste0("(", year, ")"), x = -0.5),
hjust = 1, size = 3.5, color = "#6D4C41",
family = "lato", fontface = "italic") +
# Styling
scale_x_continuous(breaks = seq(0, 10, 2),
limits = c(-2, 11),
expand = c(0, 0)) +
labs(
title = "**My Dear Watson...**",
subtitle = "The famous catchphrase was rarely featured in canonical novels",
caption = "#TidyTuesday {sherlock}\nGraphic: Ana Bodevan",
x = NULL,
y = NULL
) +
theme_void(base_family = "garamond") +
theme(
plot.title = element_marquee(
family = "garamond",
size = 32,
color = "#3E2723",
margin = margin(b = 5)
),
plot.subtitle = element_text(
size = 18,
family = "garamond",
color = "#6D4C41",
margin = margin(b = 20, t=2)
),
plot.caption = element_text(size = 10, family = "lato", color = "#6D4C41"),
plot.background = element_rect(fill = "#FFF8E1", color = NA),
panel.background = element_rect(fill = "#FFF8E1", color = NA),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major.x = element_line(color = "#E8DCC8", linewidth = 0.4, linetype = "dotted"),
axis.text.y = element_text(size = 14, color = "#3E2723",
hjust = 0, face = "italic"),
axis.text.x = element_blank(),
axis.ticks = element_blank(),
plot.margin = margin(20, 20, 20, 20)
)
---
title: "Sherlock"
author: "Ana Luisa Bodevan"
date: "2025-11-18"
categories: [dot chart, frequency]
image: "20251118.png"
execute:
warning: false
message: false
eval: true
format:
html:
code-tools: true
code-fold: true
---
This week challenge dataset in on Sherlock. Check the [TidyTuesday](https://github.com/rfordatascience/tidytuesday/blob/main/data/2025/2025-11-18/readme.md) GitHub repo for the data.
## 1. SETUP
### 1.1 Load libraries and data
```{r}
#| label: load libraries and data
library(pacman)
pacman :: p_load(tidytuesdayR, tidyverse, dplyr, janitor, ggtext, showtext, scales, glue, marquee)
tuesdata <- tidytuesdayR::tt_load('2025-11-18')
```
## 2. DATA WRANGLING
### 2.1 Manually get novels and their release years
There are four canonical Sherlock Holmes novels: A Study in Scarlet, The Sign of the Fours, The Hound of Baskervilles and The Valley of Fear
```{r}
holmes <- tuesdata$holmes
novels <-
tribble(
~book,~year,
"A Study In Scarlet",1887,
"The Sign of the Four",1890,
"The Hound of the Baskervilles",1901,
"The Valley Of Fear",1914
)
```
#### 2.2 Find "My Dear Watson"
```{r}
lines <-
holmes |>
filter(book %in% novels$book) |>
left_join(novels) |>
drop_na(text) |>
arrange(year) |>
mutate(watson = str_detect(text, "[Mm]y dear Watson"))
watson_counts <-
lines |>
group_by(book, year) |>
summarize(count = sum(watson), .groups = "drop") |>
arrange(year)
```
```{r}
ggplot(watson_counts, aes(x = reorder(book, year), y = count)) +
geom_col() +
geom_text(aes(label = count), vjust = -0.5, size = 5, fontface = "bold") +
labs(
title = "Frequency of 'My Dear Watson' in Canonical Sherlock Holmes Novels",
x = NULL,
y = "Number of Occurrences"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, size = 11),
plot.title = element_text(face = "bold", size = 14)
)
```
## 3. PLOT
### 3.1 Fonts and colors
```{r}
font_add_google("EB Garamond", "garamond")
font_add_google("Lato", "lato")
showtext_auto()
```
### 3.2 Plot
```{r}
# Create dot plot
ggplot(watson_counts, aes(x = count, y = reorder(book, -year))) +
# Segment from y-axis to point
geom_segment(aes(x = 0, xend = count, y = book, yend = book),
color = "#8B4513", linewidth = 1.2, alpha = 0.6) +
# Dots
geom_point(size = 10, color = "#8B4513", alpha = 0.9) +
geom_point(size = 7, color = "#D2691E", alpha = 0.8) +
# Count labels inside dots
geom_text(aes(label = count), color = "white", size = 4.5,
fontface = "bold", family = "lato") +
# Year labels after book titles
geom_text(aes(label = paste0("(", year, ")"), x = -0.5),
hjust = 1, size = 3.5, color = "#6D4C41",
family = "lato", fontface = "italic") +
# Styling
scale_x_continuous(breaks = seq(0, 10, 2),
limits = c(-2, 11),
expand = c(0, 0)) +
labs(
title = "**My Dear Watson...**",
subtitle = "The famous catchphrase was rarely featured in canonical novels",
caption = "#TidyTuesday {sherlock}\nGraphic: Ana Bodevan",
x = NULL,
y = NULL
) +
theme_void(base_family = "garamond") +
theme(
plot.title = element_marquee(
family = "garamond",
size = 32,
color = "#3E2723",
margin = margin(b = 5)
),
plot.subtitle = element_text(
size = 18,
family = "garamond",
color = "#6D4C41",
margin = margin(b = 20, t=2)
),
plot.caption = element_text(size = 10, family = "lato", color = "#6D4C41"),
plot.background = element_rect(fill = "#FFF8E1", color = NA),
panel.background = element_rect(fill = "#FFF8E1", color = NA),
panel.grid.major.y = element_blank(),
panel.grid.minor = element_blank(),
panel.grid.major.x = element_line(color = "#E8DCC8", linewidth = 0.4, linetype = "dotted"),
axis.text.y = element_text(size = 14, color = "#3E2723",
hjust = 0, face = "italic"),
axis.text.x = element_blank(),
axis.ticks = element_blank(),
plot.margin = margin(20, 20, 20, 20)
)
```