Code
library(tidyverse)
library(janitor)
library(ggrepel)
library(patchwork)
library(RColorBrewer)
library(showtext)
# Load fonts
font_add_google("Roboto", "roboto")
showtext_auto()Ana Luisa Bodevan
October 7, 2025
# Define basketball colors
basketball_colors <- c(
"primary" = "#FF6B35",
"secondary" = "#004E89"
)
# Set theme
theme_basketball <- function(base_size = 14, base_family = "roboto") {
theme_minimal(base_size = base_size, base_family = base_family) +
theme(
plot.background = element_rect(fill = "#fdf6e3", color = NA),
panel.background = element_rect(fill = "#fdf6e3", color = NA),
panel.grid.major = element_line(color = "#dcdcdc", linewidth = 0.5),
panel.grid.minor = element_blank(),
axis.title = element_text(face = "bold", size = base_size + 2),
axis.text = element_text(color = "#1E1E1E"),
plot.title = element_text(
face = "bold",
size = base_size + 4,
color = basketball_colors["secondary"]
),
plot.subtitle = element_text(
face = "italic",
size = base_size + 2,
color = basketball_colors["secondary"]
),
legend.position = "bottom",
legend.background = element_blank(),
legend.key = element_blank(),
legend.title = element_text(face = "bold"),
strip.text = element_text(face = "bold")
)
}
# Set as default theme
theme_set(theme_basketball())df <- euroleague_basketball |>
janitor::clean_names() |>
# Clean character columns - trim whitespace and handle NAs
mutate(
across(
c(team, home_city, arena, country, last_season,
years_of_final_four_appearances, years_of_titles_won),
~if_else(is.na(.x), NA_character_, str_squish(.x))
)
) |>
# Parse numeric values using parse_number
mutate(
capacity_num = parse_number(capacity),
# Handle last_season: parse_number for ordinals, NA for text like "(EuroCup)"
last_season_num = suppressWarnings(parse_number(last_season)),
final_four_appearances = as.integer(final_four_appearances),
titles_won = as.integer(titles_won)
) |>
# Replace NAs with 0s for appearance and title counts
mutate(
final_four_appearances = replace_na(final_four_appearances, 0),
titles_won = replace_na(titles_won, 0)
) |>
# Handle "None" values in years columns
mutate(
years_of_final_four_appearances = na_if(years_of_final_four_appearances, "None"),
years_of_titles_won = na_if(years_of_titles_won, "None")
) |>
# Parse years into lists
mutate(
years_of_final_four_appearances = str_split(years_of_final_four_appearances, ",\\s*"),
years_of_titles_won = str_split(years_of_titles_won, ",\\s*")
)p1 <- df |>
group_by(country) |>
summarise(
total_titles = sum(titles_won, na.rm = TRUE),
total_ff = sum(final_four_appearances, na.rm = TRUE),
n_teams = n_distinct(team),
.groups = "drop"
) |>
mutate(
titles_per_team = total_titles / n_teams,
ff_per_team = total_ff / n_teams
) |>
filter(titles_per_team > 0) |>
arrange(desc(titles_per_team)) |>
ggplot(aes(x = reorder(country, titles_per_team), y = titles_per_team, fill = country)) +
geom_col() +
geom_text(
aes(label = round(titles_per_team, 2)),
hjust = -0.2,
size = 4,
family = "roboto"
) +
coord_flip() +
labs(
title = "EuroLeague Performance by Country",
subtitle = "Average titles per team",
x = NULL,
y = "Average Titles per Team"
) +
scale_fill_manual(values = country_colors) +
theme(legend.position = "none")
p1p2 <- df |>
mutate(
perf_index = titles_won + 0.5 * final_four_appearances
) |>
filter(perf_index > 0) |>
arrange(desc(perf_index)) |>
ggplot(aes(x = reorder(team, perf_index), y = perf_index, fill = country)) +
geom_col() +
geom_text(
aes(label = round(perf_index, 1)),
hjust = -0.2,
size = 3,
family = "roboto"
) +
coord_flip() +
labs(
title = "Team Performance Index",
subtitle = "Top performers, colored by their country",
x = NULL,
y = "Titles + 0.5 × Final Four Appearances"
) +
scale_fill_manual(values = country_colors)
p2p3 <- df |>
filter(titles_won > 0) |>
ggplot(aes(x = capacity_num, y = titles_won, color = country)) +
geom_point(size = 4, alpha = 0.9) +
geom_text_repel(aes(label = team), size = 3, family = "roboto") +
labs(
title = "Does Size Matter?",
subtitle = "Arena Capacity vs Titles Won, at least one Title",
x = "Arena Capacity",
y = "Titles Won"
) +
scale_color_manual(values = country_colors) +
theme(legend.position = "bottom")
p3---
title: "EuroLeague Basketball Analysis"
author: "Ana Luisa Bodevan"
date: "2025-10-07"
categories: [bar plot, sports]
image: 20251007.png
page-navigation: true
execute:
warning: false
message: false
eval: false
format:
html:
code-tools: true
code-fold: true
---
## 1. Setup
```{r}
#| label: load-packages
library(tidyverse)
library(janitor)
library(ggrepel)
library(patchwork)
library(RColorBrewer)
library(showtext)
# Load fonts
font_add_google("Roboto", "roboto")
showtext_auto()
```
```{r}
#| label: load-data
# Load data
tuesdata <- tidytuesdayR::tt_load('2025-05-27')
euroleague_basketball <- tuesdata$euroleague_basketball
```
## 2. Set Theme
```{r}
#| label: custom-theme
# Define basketball colors
basketball_colors <- c(
"primary" = "#FF6B35",
"secondary" = "#004E89"
)
# Set theme
theme_basketball <- function(base_size = 14, base_family = "roboto") {
theme_minimal(base_size = base_size, base_family = base_family) +
theme(
plot.background = element_rect(fill = "#fdf6e3", color = NA),
panel.background = element_rect(fill = "#fdf6e3", color = NA),
panel.grid.major = element_line(color = "#dcdcdc", linewidth = 0.5),
panel.grid.minor = element_blank(),
axis.title = element_text(face = "bold", size = base_size + 2),
axis.text = element_text(color = "#1E1E1E"),
plot.title = element_text(
face = "bold",
size = base_size + 4,
color = basketball_colors["secondary"]
),
plot.subtitle = element_text(
face = "italic",
size = base_size + 2,
color = basketball_colors["secondary"]
),
legend.position = "bottom",
legend.background = element_blank(),
legend.key = element_blank(),
legend.title = element_text(face = "bold"),
strip.text = element_text(face = "bold")
)
}
# Set as default theme
theme_set(theme_basketball())
```
## 3. Data Exploration
```{r}
#| label: explore-data
glimpse(euroleague_basketball)
```
## 4. Data Cleaning
```{r}
#| label: clean-data
df <- euroleague_basketball |>
janitor::clean_names() |>
# Clean character columns - trim whitespace and handle NAs
mutate(
across(
c(team, home_city, arena, country, last_season,
years_of_final_four_appearances, years_of_titles_won),
~if_else(is.na(.x), NA_character_, str_squish(.x))
)
) |>
# Parse numeric values using parse_number
mutate(
capacity_num = parse_number(capacity),
# Handle last_season: parse_number for ordinals, NA for text like "(EuroCup)"
last_season_num = suppressWarnings(parse_number(last_season)),
final_four_appearances = as.integer(final_four_appearances),
titles_won = as.integer(titles_won)
) |>
# Replace NAs with 0s for appearance and title counts
mutate(
final_four_appearances = replace_na(final_four_appearances, 0),
titles_won = replace_na(titles_won, 0)
) |>
# Handle "None" values in years columns
mutate(
years_of_final_four_appearances = na_if(years_of_final_four_appearances, "None"),
years_of_titles_won = na_if(years_of_titles_won, "None")
) |>
# Parse years into lists
mutate(
years_of_final_four_appearances = str_split(years_of_final_four_appearances, ",\\s*"),
years_of_titles_won = str_split(years_of_titles_won, ",\\s*")
)
```
## 5. Create Color Palette
```{r}
#| label: color-palette
countries <- unique(df$country)
country_colors <- setNames(
c(RColorBrewer::brewer.pal(8, "Set2"),
RColorBrewer::brewer.pal(8, "Dark2"))[seq_along(countries)],
countries
)
```
## 6. Visualizations
### 6.1 Country Performance
```{r}
#| label: plot-country-performance
#| fig-width: 10
#| fig-height: 6
p1 <- df |>
group_by(country) |>
summarise(
total_titles = sum(titles_won, na.rm = TRUE),
total_ff = sum(final_four_appearances, na.rm = TRUE),
n_teams = n_distinct(team),
.groups = "drop"
) |>
mutate(
titles_per_team = total_titles / n_teams,
ff_per_team = total_ff / n_teams
) |>
filter(titles_per_team > 0) |>
arrange(desc(titles_per_team)) |>
ggplot(aes(x = reorder(country, titles_per_team), y = titles_per_team, fill = country)) +
geom_col() +
geom_text(
aes(label = round(titles_per_team, 2)),
hjust = -0.2,
size = 4,
family = "roboto"
) +
coord_flip() +
labs(
title = "EuroLeague Performance by Country",
subtitle = "Average titles per team",
x = NULL,
y = "Average Titles per Team"
) +
scale_fill_manual(values = country_colors) +
theme(legend.position = "none")
p1
```
### 6.2 Team Performance Index
```{r}
#| label: plot-team-performance
#| fig-width: 10
#| fig-height: 8
p2 <- df |>
mutate(
perf_index = titles_won + 0.5 * final_four_appearances
) |>
filter(perf_index > 0) |>
arrange(desc(perf_index)) |>
ggplot(aes(x = reorder(team, perf_index), y = perf_index, fill = country)) +
geom_col() +
geom_text(
aes(label = round(perf_index, 1)),
hjust = -0.2,
size = 3,
family = "roboto"
) +
coord_flip() +
labs(
title = "Team Performance Index",
subtitle = "Top performers, colored by their country",
x = NULL,
y = "Titles + 0.5 × Final Four Appearances"
) +
scale_fill_manual(values = country_colors)
p2
```
### 6.3 Arena Capacity vs Titles
```{r}
#| label: plot-capacity-titles
#| fig-width: 10
#| fig-height: 6
p3 <- df |>
filter(titles_won > 0) |>
ggplot(aes(x = capacity_num, y = titles_won, color = country)) +
geom_point(size = 4, alpha = 0.9) +
geom_text_repel(aes(label = team), size = 3, family = "roboto") +
labs(
title = "Does Size Matter?",
subtitle = "Arena Capacity vs Titles Won, at least one Title",
x = "Arena Capacity",
y = "Titles Won"
) +
scale_color_manual(values = country_colors) +
theme(legend.position = "bottom")
p3
```
### 6.4 Combined Visualization
```{r}
#| label: plot-combined
#| fig-width: 16
#| fig-height: 10
plot <- ((p1 / p2) | p3) +
plot_annotation(
caption = "Source: {EuroleagueBasketball} | Viz: @anabodevan\n#TidyTuesday 2025 W40",
theme = theme(
plot.caption = element_text(size = 10, family = "roboto"),
plot.background = element_rect(fill = "#fdf6e3", color = NA)
)
)
plot
```
## 7. Save Plot
```{r}
#| label: save-plot
#| eval: false
ggsave(
filename = "image.png",
plot = plot,
width = 16,
height = 10,
dpi = 300,
bg = "#fdf6e3"
)
```