Code
library(pacman)
pacman :: p_load(tidytuesdayR, tidyverse, dplyr, janitor, ggtext, showtext, scales, glue, cowplot, ggdist)
tuesdata <- tidytuesdayR::tt_load('2025-11-04')Ana Luisa Bodevan
November 4, 2025
This week challenge dataset in on Lead concetration in Flint water samples. Check the TidyTuesday GitHub repo for the data.
my_theme <- function(gridline_x = TRUE, gridline_y = TRUE) {
gridline <- element_line(
linetype = "dashed",
linewidth = 0.15,
color = "#999999"
)
gridline_x <- if (isTRUE(gridline_x)) gridline else element_blank()
gridline_y <- if (isTRUE(gridline_y)) gridline else element_blank()
theme_minimal() +
theme(
plot.title = element_text(size = 18, face = "bold", color = "#333333", margin = margin(b = 10)),
plot.subtitle = element_text(size = 14, color = "#999999", margin = margin(b = 10)),
plot.caption = element_text(size = 13, color = "#777777", margin = margin(t = 15), hjust = 0),
axis.text = element_text(size = 11, color = "#333333"),
plot.title.position = "plot",
plot.caption.position = "plot",
panel.grid.minor = element_blank(),
panel.grid.major.x = gridline_x,
panel.grid.major.y = gridline_y,
axis.ticks.x = element_line(linetype = "solid", linewidth = 0.25, color = "#999999"),
axis.ticks.length.x = unit(4, units = "pt")
)
}flint_mdeq <- flint_mdeq |>
mutate(
excluded = is.na(lead2),
source = "MDEQ"
) |>
select(sample, lead, excluded, notes, source)
flint_vt <- flint_vt |>
reframe(
sample, lead,
excluded = FALSE,
notes = NA_character_,
source = "Virginia Tech"
)
flint_all <- bind_rows(flint_mdeq, flint_vt) |>
mutate(
source = factor(source, levels = c("MDEQ", "Virginia Tech")),
status = if_else(excluded, "Excluded by MDEQ", "Included")
)
summary_table <- flint_all %>%
group_by(source) %>%
summarise(
n = n(),
mean = mean(lead),
median = median(lead),
sd = sd(lead),
pct_above_5 = mean(lead > 5) * 100,
pct_above_15 = mean(lead > 15) * 100
) %>%
arrange(source)
print(summary_table)pal <- c("MDEQ" = "#616247", "Virginia Tech" = "#DAA03D")
font_add_google("Inter", "inter")
showtext_auto()
bg_color <- "#f5f5f5"
title <- "Lead Concentration in Flint Water Samples: Government vs Citizen Testing"
subtitle <- "Testing from Virginia Tech (n = 271) and MDEQ (n = 71) tell very different stories"
caption <- "Data: Loux and Gibson (2018)\nGraphic: Ana Bodevan anabodevan.github.io\n#TidyTuesday 2025 - Week 44"ggplot(flint_all, aes(x = lead, y = source, fill = source)) +
ggdist::stat_halfeye(
adjust = 0.8,
width = 0.6,
justification = -0.2,
point_colour = NA,
interval_alpha = 0,
.width = NA
) +
geom_vline(
data = thresholds,
aes(xintercept = value, color = type, linetype = type),
size = 0.8
) +
scale_color_manual(
name = "EPA threshold",
values = c("EPA Acceptable" = "#420612", "EPA Intervention" = "#420612")
) +
scale_linetype_manual(
name = "EPA threshold",
values = c("EPA Acceptable" = "dashed", "EPA Intervention" = "solid")
) +
scale_x_continuous(
trans = scales::pseudo_log_trans(base = 10),
breaks = c(0, 0.5, 1, 5, 10, 15, 50, 100),
labels = c("0", "0.5", "1", "5", "10", "15", "50", "100"),
expand = expansion(mult = c(0.01, 0.02))
) +
scale_fill_manual(values = pal, name = "Source") +
labs(
title = title,
subtitle = subtitle,
x = "Lead (Part per Billion)",
y = NULL,
caption = caption
) +
my_theme(gridline_x = TRUE, gridline_y = FALSE) +
theme(
legend.position = "top",
legend.box = "horizontal",
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 11),
plot.background = element_rect(fill = bg_color, color = NA),
plot.subtitle = element_text(color = "gray20"),
text = element_text(family = "inter"),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank(),
plot.margin = margin(t = 10, r = 10, b = 5, l = 10)
) +
coord_cartesian(ylim = c(1.5, 3))---
title: "Lead concetration in Flint water samples"
author: "Ana Luisa Bodevan"
date: "2025-11-04"
categories: [bar graph, social]
image: "20251104.png"
execute:
warning: false
message: false
eval: false
format:
html:
code-tools: true
code-fold: true
---
This week challenge dataset in on Lead concetration in Flint water samples. Check the [TidyTuesday](https://github.com/rfordatascience/tidytuesday/blob/main/data/2025/2025-11-04/readme.md) GitHub repo for the data.
## 1. SETUP
### 1.1 Load libraries and data
```{r}
#| label: load libraries and data
library(pacman)
pacman :: p_load(tidytuesdayR, tidyverse, dplyr, janitor, ggtext, showtext, scales, glue, cowplot, ggdist)
tuesdata <- tidytuesdayR::tt_load('2025-11-04')
```
### 1.2 Set theme
```{r}
#| label: set theme
my_theme <- function(gridline_x = TRUE, gridline_y = TRUE) {
gridline <- element_line(
linetype = "dashed",
linewidth = 0.15,
color = "#999999"
)
gridline_x <- if (isTRUE(gridline_x)) gridline else element_blank()
gridline_y <- if (isTRUE(gridline_y)) gridline else element_blank()
theme_minimal() +
theme(
plot.title = element_text(size = 18, face = "bold", color = "#333333", margin = margin(b = 10)),
plot.subtitle = element_text(size = 14, color = "#999999", margin = margin(b = 10)),
plot.caption = element_text(size = 13, color = "#777777", margin = margin(t = 15), hjust = 0),
axis.text = element_text(size = 11, color = "#333333"),
plot.title.position = "plot",
plot.caption.position = "plot",
panel.grid.minor = element_blank(),
panel.grid.major.x = gridline_x,
panel.grid.major.y = gridline_y,
axis.ticks.x = element_line(linetype = "solid", linewidth = 0.25, color = "#999999"),
axis.ticks.length.x = unit(4, units = "pt")
)
}
```
## 2. DATA WRANGLING
### 2.1 Inital cleaning
```{r}
flint_mdeq <- tuesdata$flint_mdeq |> clean_names()
flint_vt <- tuesdata$flint_vt |> clean_names()
rm(tuesdata)
```
### 2.2 Tidy values and prepare stats table
```{r}
flint_mdeq <- flint_mdeq |>
mutate(
excluded = is.na(lead2),
source = "MDEQ"
) |>
select(sample, lead, excluded, notes, source)
flint_vt <- flint_vt |>
reframe(
sample, lead,
excluded = FALSE,
notes = NA_character_,
source = "Virginia Tech"
)
flint_all <- bind_rows(flint_mdeq, flint_vt) |>
mutate(
source = factor(source, levels = c("MDEQ", "Virginia Tech")),
status = if_else(excluded, "Excluded by MDEQ", "Included")
)
summary_table <- flint_all %>%
group_by(source) %>%
summarise(
n = n(),
mean = mean(lead),
median = median(lead),
sd = sd(lead),
pct_above_5 = mean(lead > 5) * 100,
pct_above_15 = mean(lead > 15) * 100
) %>%
arrange(source)
print(summary_table)
```
### 2.3 Set thresholds
```{r}
thresholds <- tibble(
name = c("EPA Acceptable (5 ppb)", "EPA Intervention (15 ppb)"),
value = c(5, 15),
type = c("EPA Acceptable", "EPA Intervention")
)
```
## 3. Plot
### 3.1 Colors and text
```{r}
pal <- c("MDEQ" = "#616247", "Virginia Tech" = "#DAA03D")
font_add_google("Inter", "inter")
showtext_auto()
bg_color <- "#f5f5f5"
title <- "Lead Concentration in Flint Water Samples: Government vs Citizen Testing"
subtitle <- "Testing from Virginia Tech (n = 271) and MDEQ (n = 71) tell very different stories"
caption <- "Data: Loux and Gibson (2018)\nGraphic: Ana Bodevan anabodevan.github.io\n#TidyTuesday 2025 - Week 44"
```
### 3.2 Plot density
```{r}
ggplot(flint_all, aes(x = lead, y = source, fill = source)) +
ggdist::stat_halfeye(
adjust = 0.8,
width = 0.6,
justification = -0.2,
point_colour = NA,
interval_alpha = 0,
.width = NA
) +
geom_vline(
data = thresholds,
aes(xintercept = value, color = type, linetype = type),
size = 0.8
) +
scale_color_manual(
name = "EPA threshold",
values = c("EPA Acceptable" = "#420612", "EPA Intervention" = "#420612")
) +
scale_linetype_manual(
name = "EPA threshold",
values = c("EPA Acceptable" = "dashed", "EPA Intervention" = "solid")
) +
scale_x_continuous(
trans = scales::pseudo_log_trans(base = 10),
breaks = c(0, 0.5, 1, 5, 10, 15, 50, 100),
labels = c("0", "0.5", "1", "5", "10", "15", "50", "100"),
expand = expansion(mult = c(0.01, 0.02))
) +
scale_fill_manual(values = pal, name = "Source") +
labs(
title = title,
subtitle = subtitle,
x = "Lead (Part per Billion)",
y = NULL,
caption = caption
) +
my_theme(gridline_x = TRUE, gridline_y = FALSE) +
theme(
legend.position = "top",
legend.box = "horizontal",
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 11),
plot.background = element_rect(fill = bg_color, color = NA),
plot.subtitle = element_text(color = "gray20"),
text = element_text(family = "inter"),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.y = element_blank(),
plot.margin = margin(t = 10, r = 10, b = 5, l = 10)
) +
coord_cartesian(ylim = c(1.5, 3))
```