EuroLeague Basketball Analysis

bar plot
sports
Author

Ana Luisa Bodevan

Published

October 7, 2025

1. Setup

Code
library(tidyverse)
library(janitor)
library(ggrepel)
library(patchwork)
library(RColorBrewer)
library(showtext)

# Load fonts
font_add_google("Roboto", "roboto")
showtext_auto()
Code
# Load data
tuesdata <- tidytuesdayR::tt_load('2025-05-27')
euroleague_basketball <- tuesdata$euroleague_basketball

2. Set Theme

Code
# Define basketball colors
basketball_colors <- c(
  "primary" = "#FF6B35",
  "secondary" = "#004E89"
)

# Set theme
theme_basketball <- function(base_size = 14, base_family = "roboto") {
  theme_minimal(base_size = base_size, base_family = base_family) +
    theme(
      plot.background = element_rect(fill = "#fdf6e3", color = NA),
      panel.background = element_rect(fill = "#fdf6e3", color = NA),
      panel.grid.major = element_line(color = "#dcdcdc", linewidth = 0.5),
      panel.grid.minor = element_blank(),
      axis.title = element_text(face = "bold", size = base_size + 2),
      axis.text = element_text(color = "#1E1E1E"),
      plot.title = element_text(
        face = "bold", 
        size = base_size + 4, 
        color = basketball_colors["secondary"]
      ),
      plot.subtitle = element_text(
        face = "italic", 
        size = base_size + 2, 
        color = basketball_colors["secondary"]
      ),
      legend.position = "bottom",
      legend.background = element_blank(),
      legend.key = element_blank(),
      legend.title = element_text(face = "bold"),
      strip.text = element_text(face = "bold")
    )
}

# Set as default theme
theme_set(theme_basketball())

3. Data Exploration

Code
glimpse(euroleague_basketball)

4. Data Cleaning

Code
df <- euroleague_basketball |>
  janitor::clean_names() |>
  
  # Clean character columns - trim whitespace and handle NAs
  mutate(
    across(
      c(team, home_city, arena, country, last_season, 
        years_of_final_four_appearances, years_of_titles_won),
      ~if_else(is.na(.x), NA_character_, str_squish(.x))
    )
  ) |>
  
  # Parse numeric values using parse_number
  mutate(
    capacity_num = parse_number(capacity),
    # Handle last_season: parse_number for ordinals, NA for text like "(EuroCup)"
    last_season_num = suppressWarnings(parse_number(last_season)),
    final_four_appearances = as.integer(final_four_appearances),
    titles_won = as.integer(titles_won)
  ) |>
  
  # Replace NAs with 0s for appearance and title counts
  mutate(
    final_four_appearances = replace_na(final_four_appearances, 0),
    titles_won = replace_na(titles_won, 0)
  ) |>
  
  # Handle "None" values in years columns
  mutate(
    years_of_final_four_appearances = na_if(years_of_final_four_appearances, "None"),
    years_of_titles_won = na_if(years_of_titles_won, "None")
  ) |>
  
  # Parse years into lists
  mutate(
    years_of_final_four_appearances = str_split(years_of_final_four_appearances, ",\\s*"),
    years_of_titles_won = str_split(years_of_titles_won, ",\\s*")
  )

5. Create Color Palette

Code
countries <- unique(df$country)
country_colors <- setNames(
  c(RColorBrewer::brewer.pal(8, "Set2"), 
    RColorBrewer::brewer.pal(8, "Dark2"))[seq_along(countries)],
  countries
)

6. Visualizations

6.1 Country Performance

Code
p1 <- df |>
  group_by(country) |>
  summarise(
    total_titles = sum(titles_won, na.rm = TRUE),
    total_ff = sum(final_four_appearances, na.rm = TRUE),
    n_teams = n_distinct(team),
    .groups = "drop"
  ) |>
  mutate(
    titles_per_team = total_titles / n_teams,
    ff_per_team = total_ff / n_teams
  ) |>
  filter(titles_per_team > 0) |>
  arrange(desc(titles_per_team)) |>
  ggplot(aes(x = reorder(country, titles_per_team), y = titles_per_team, fill = country)) +
  geom_col() +
  geom_text(
    aes(label = round(titles_per_team, 2)), 
    hjust = -0.2, 
    size = 4, 
    family = "roboto"
  ) +
  coord_flip() +
  labs(
    title = "EuroLeague Performance by Country",
    subtitle = "Average titles per team",
    x = NULL,
    y = "Average Titles per Team"
  ) +
  scale_fill_manual(values = country_colors) +
  theme(legend.position = "none")

p1

6.2 Team Performance Index

Code
p2 <- df |>
  mutate(
    perf_index = titles_won + 0.5 * final_four_appearances
  ) |>
  filter(perf_index > 0) |>
  arrange(desc(perf_index)) |>
  ggplot(aes(x = reorder(team, perf_index), y = perf_index, fill = country)) +
  geom_col() +
  geom_text(
    aes(label = round(perf_index, 1)), 
    hjust = -0.2, 
    size = 3, 
    family = "roboto"
  ) +
  coord_flip() +
  labs(
    title = "Team Performance Index",
    subtitle = "Top performers, colored by their country",
    x = NULL,
    y = "Titles + 0.5 × Final Four Appearances"
  ) +
  scale_fill_manual(values = country_colors)

p2

6.3 Arena Capacity vs Titles

Code
p3 <- df |>
  filter(titles_won > 0) |>
  ggplot(aes(x = capacity_num, y = titles_won, color = country)) +
  geom_point(size = 4, alpha = 0.9) +
  geom_text_repel(aes(label = team), size = 3, family = "roboto") +
  labs(
    title = "Does Size Matter?",
    subtitle = "Arena Capacity vs Titles Won, at least one Title",
    x = "Arena Capacity",
    y = "Titles Won"
  ) +
  scale_color_manual(values = country_colors) +
  theme(legend.position = "bottom")

p3

6.4 Combined Visualization

Code
plot <- ((p1 / p2) | p3) +
  plot_annotation(
    caption = "Source: {EuroleagueBasketball} | Viz: @anabodevan\n#TidyTuesday 2025 W40",
    theme = theme(
      plot.caption = element_text(size = 10, family = "roboto"),
      plot.background = element_rect(fill = "#fdf6e3", color = NA)
    )
  )

plot

7. Save Plot

Code
ggsave(
  filename = "image.png",
  plot = plot,
  width = 16,
  height = 10,
  dpi = 300,
  bg = "#fdf6e3"
)