The code:

rm(list=ls()) setwd("") library(data.table) library(tidyverse) #reading in the data with fread from data.table data <- fread("human-freedom-index-data-update.csv") #looking at the column names colnames(data) #looking at the first few column names colnames(data)[1:10] #selecting columns we need data <- select(data, year, countries, region, hf_score) #rename certain column names colnames(data)[2] <- "country" #let's look at the data one more time data #combining regions unique(data$region) data[region %like% "Europe", region_new := "Europe"] data[region %like% "Africa", region_new := "Africa & Middle East"] data[region %like% "Asia"] %>% select(region) %>% unique() data[region %like% "Asia", region_new := "Asia"] data[region %like% "Ocean" | region %like% "North Amer", region_new := "North America & Oceania"] data[region %like% "Latin", region_new := "Latin America"] #check if we got every region data[is.na(data$region_new)] #let's look at our new regions unique(data$region_new) #transforming new region variable to factor data$region_new <- factor(data$region_new) #finally we filter for 2016 data <- data[year == 2016] #lets start nice and easy plot <- ggplot(data, aes(x = region_new, y = hf_score, fill = region_new)) + geom_violin(trim = FALSE) + #Let's add the mean and the standard deviation #now let's add the actual observations #geom_point(aes(fill = region_new), position = "jitter") geom_jitter(aes(fill = region_new), width = 0.2, shape = 21) + stat_summary(fun.data=mean_sdl, fun.args = list(mult=1), geom="pointrange", shape = 17, color = "white") + #Let's add some titles ggtitle("Human Freedom Index") + theme(plot.title = element_text(hjust = 0.5)) + labs(x = "Region", y = "Human Freedom Index", fill = "Region") ggsave("freedom.png", plot, width = 10, height = 7)