Henley Passport Index

sankey
misc
Author

Ana Luisa Bodevan

Published

September 9, 2025

Code
############## TIDYTUESDAY WEEK 36
############## HENLEY PASSPORT INDEX DATA 

############## 1. SETUP

library(pacman)

  # library(devtools)
  # devtools::install_github("davidsjoberg/ggsankey")

pacman :: p_load(tidyverse, dplyr, showtext, ggtext, ggsankey, scales)

rank_by_year <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-09-09/rank_by_year.csv')

############## 2. DATA WRANGLING 

# 2.1 Create a function to rank
create_ranking_tiers <- function(rank) {
  case_when(
    rank <= 25 ~ "Top 25",
    rank <= 50 ~ "Rank 26-50",
    rank <= 75 ~ "Rank 51-75",
    rank <= 100 ~ "Rank 76-100",
    rank <= 125 ~ "Rank 101+"
  )
}

# 2.2 Get regional performance 
regional_performance <- rank_by_year %>%
  mutate(rank_tier = create_ranking_tiers(rank)) %>%
  count(region, rank_tier, name = "countries") %>%
  drop_na() %>% 
  mutate(
    rank_tier = factor(rank_tier, levels = c("Top 25", "Rank 26-50", "Rank 51-75", 
                                             "Rank 76-100", "Rank 101+"))
  )

############## 3. PLOT

# 3.1 Base plot 

p <- ggplot(regional_performance, 
            aes(x = rank_tier, 
                node = region, 
                fill = region, 
                value = countries, 
                label = region)) +
  geom_sankey_bump(alpha = 0.8, 
                   smooth = 8,
                   width = 0.05) +
  theme_minimal() +
  theme(legend.position = "bottom",
        plot.background = element_rect(fill = "grey90", color = NA))

p

# 3.2 Fonts and colors 

font_add_google("Roboto", "roboto")
showtext_auto()

n_regions <- length(unique(regional_performance$region))
region_colors <- RColorBrewer::brewer.pal(min(n_regions, 11), "Set3")
if(n_regions > 11) {
  region_colors <- rainbow(n_regions, alpha = 0.7)
}

# 3.3 Sankey 

ggplot(regional_performance, 
       aes(x = rank_tier, 
           node = region, 
           fill = region, 
           value = countries, 
           label = region)) +
  geom_sankey_bump(alpha = 0.8, 
                   smooth = 8,
                   width = 0.05) +
  
  # Styling
  scale_fill_manual(values = region_colors) +
  scale_x_discrete(expand = c(0.1, 0.1)) +
  
  # Theme customization
  theme_minimal() +
  theme(
    # Background
    plot.background = element_rect(fill = "grey98", color = NA),
    panel.background = element_rect(fill = "grey98", color = NA),
    panel.grid = element_blank(),
    
    # Axes
    axis.text.x = element_text(size = 11, face = "bold", color = "grey20"),
    axis.text.y = element_blank(),
    axis.title = element_blank(),
    axis.ticks = element_blank(),
    
    # Legend
    legend.position = "bottom",
    legend.title = element_blank(),
    legend.text = element_text(size = 9),
    legend.box.margin = margin(t = 20),
    
    # Plot margins
    plot.margin = margin(20, 20, 20, 20),
    
    # Title and caption 
    plot.title = element_text(size = 20, face = "bold", family = "roboto", hjust = 0.5, margin = margin(b = 10)),
    plot.caption = element_text(size = 10, family = "roboto", color = "grey40")) +
  
  # Add labels and titles
  labs(
    title = "Regional Distribution of Henley's Passport Index",
    subtitle = "",
    caption = "Data: Henley Passport Index | #tidytuesday | @anabodevan "
  )