I was poking around data.gov, and found a dataset about the leading causes of death in New York City by Year, Sex, and Ethnicity.

All of the following visualizations were created from that dataset. I broke down my visualizations into three parts, year, sex, and ethnicity each with three graphs.

Some vocab you might want to keep in mind before you venture down further:

A malignant neoplasm is essentially a cancerous tumor.

Atherosclerosis is plaque build up in the arteries.

And, nephritis, and other Neph prefixed words are diseases of the kidneys.

First up we’ll look at Ethnicity, and get a sense for what the main causes of death are in NYC.

Now, let’s look at the same graph with malignant neoplasms and diseases of the heart removed, so we can get a better look at the other causes of death.

Lastly, how do these causes of death break down percentage wise? (Note you can use the Total variable to help understand how disease percentages vary for specific diseases from the whole data set)

How do causes of death differ between sexes in NYC?

Once, again we’ll remove malignant neoplasms and diseases of the heart.

And percentage wise.

Lastly, how did cause of death vary between 2007-2011:

Some quick thoughts: Diseases of the heart and cancer are far and away the most common causes of death in New York City regardless of gender, ethnicity, and the year. Pneumonitis and Tuberculosis occur so rarely in New York City its impossible to tell how those causes of death are actually distributed across the traits investigated here simply because the sample size isn’t large enough.

In case you wanted the general distribution of this population across these traits:

code for graphics:

#new york deaths #you'll notice me swithc between dpylr, data.table and plyr for my data manipulations #which I shouldn't do, but I just use the function that makes the most intuitive sense to me #Load libraries library(data.table) library(ggplot2) library(dplyr) library(RColorBrewer) library(plyr) library(scales) #read in data nycDeaths <- fread("/Users/nicholasbernstein/Downloads/New_York_City_Leading_Causes_of_Death.csv") #collapse Ethnicity variable COD <- ddply(nycDeaths,.(`Cause of Death`, Ethnicity), summarise, Count = sum(Count)) #bar flot count vs cause of death with ethnicity variable filling the bars EthCOD <- ggplot(COD, aes(x= reorder(`Cause of Death`, Count), y = Count)) + geom_bar(stat = "identity", aes(fill=Ethnicity)) #change y axis scale EthCOD <- EthCOD + scale_y_continuous(labels = comma) #just some stylistic changes EthCOD <- EthCOD + theme(panel.background = element_rect(fill = 'lightgrey'), panel.grid.major = element_line(colour = "white", size=.5)) #use colorbrewer for pallet and coord_flip to make graph more readable EthCOD <- EthCOD + scale_fill_brewer(palette = "Dark2") + coord_flip() #label axis and move the legen to the bottom EthCOD <- EthCOD + xlab("Cause of Death") + ylab("Death Count") + theme(legend.position="bottom") ggsave(EthCOD, file="/Users/nicholasbernstein/Desktop/NycDeathEthnicity.jpg", dpi = 500, width = 12, height = 7) #everything else is just rinse and repeat COD <- filter(COD, `Cause of Death` != "MALIGNANT NEOPLASMS", `Cause of Death` != "DISEASES OF HEART") EthCOD <- ggplot(COD, aes(x= reorder(`Cause of Death`, Count), y = Count)) + geom_bar(stat = "identity", aes(fill=Ethnicity)) EthCOD <- EthCOD + scale_y_continuous(labels = comma) EthCOD <- EthCOD + theme(panel.background = element_rect(fill = 'lightgrey'), panel.grid.major = element_line(colour = "white", size=.5)) EthCOD <- EthCOD + scale_fill_brewer(palette = "Dark2") + coord_flip() EthCOD <- EthCOD + xlab("Cause of Death") + ylab("Death Count") + theme(legend.position="bottom") ggsave(EthCOD, file="/Users/nicholasbernstein/Desktop/NycDeathEthnicityNoCancer.jpg", dpi = 500, width = 12, height = 7) #recollapse to get dropped variables COD <- ddply(nycDeaths,.(`Cause of Death`, Ethnicity), summarise, Count = sum(Count)) #add total rows for reference setDT(COD) total <- COD[,sum(Count), by = Ethnicity] total$`Cause of Death` <- "TOTAL" setcolorder(total, c("Cause of Death", colnames(total)[1:2])) COD <- rbindlist(list(COD, total)) COD.dt <- as.data.table(COD) #add percentage column COD.dt[,`Percent of Disease`:=Count/sum(Count), by = list(`Cause of Death`)] COD <- as.data.table(COD.dt) EthCOD <- ggplot(COD, aes(x= reorder(`Cause of Death`,Count) , y = `Percent of Disease`)) + geom_bar(stat = "identity", aes(fill=Ethnicity)) EthCOD <- EthCOD + scale_y_continuous(labels = comma) EthCOD <- EthCOD + theme(panel.background = element_rect(fill = 'lightgrey'), panel.grid.major = element_line(colour = "white", size=.5)) EthCOD <- EthCOD + scale_fill_brewer(palette = "Dark2") + coord_flip() EthCOD <- EthCOD + xlab("Cause of Death") + ylab("Percent of Total Population") + theme(legend.position="bottom") ggsave(EthCOD, file="/Users/nicholasbernstein/Desktop/NycDeathEthnicityBar.jpg", dpi = 500, width = 12, height = 7) nycDeaths$Year<- factor(nycDeaths$Year) COD <- ddply(nycDeaths,.(`Cause of Death`, Year), summarise, Count = sum(Count)) EthCOD <- ggplot(COD, aes(x= reorder(`Cause of Death`, Count), y = Count)) + geom_bar(stat = "identity", aes(fill=Year)) EthCOD <- EthCOD #+ scale_x_continuous(labels = comma) EthCOD <- EthCOD + theme(panel.background = element_rect(fill = 'lightgrey'), panel.grid.major = element_line(colour = "white", size=.5)) EthCOD <- EthCOD + scale_fill_brewer(palette = "Dark2") + coord_flip() EthCOD <- EthCOD + xlab("Cause of Death") + ylab("Death Count") + theme(legend.position="bottom") ggsave(EthCOD, file="/Users/nicholasbernstein/Desktop/NycDeathYear.jpg", dpi = 500, width = 12, height = 7) COD <- filter(COD, `Cause of Death` != "MALIGNANT NEOPLASMS", `Cause of Death` != "DISEASES OF HEART") EthCOD <- ggplot(COD, aes(x= reorder(`Cause of Death`, Count), y = Count)) + geom_bar(stat = "identity", aes(fill=Year)) EthCOD <- EthCOD + scale_y_continuous(labels = comma) EthCOD <- EthCOD + theme(panel.background = element_rect(fill = 'lightgrey'), panel.grid.major = element_line(colour = "white", size=.5)) EthCOD <- EthCOD + scale_fill_brewer(palette = "Dark2") + coord_flip() EthCOD <- EthCOD + xlab("Cause of Death") + ylab("Death Count") + theme(legend.position="bottom") ggsave(EthCOD, file="/Users/nicholasbernstein/Desktop/NycDeathYearNoCancer.jpg", dpi = 500, width= 12, height = 7) COD <- ddply(nycDeaths,.(`Cause of Death`, Year), summarise, Count = sum(Count)) setDT(COD) total <- COD[,sum(Count), by = Year] total$`Cause of Death` <- "TOTAL" setcolorder(total, c("Cause of Death", colnames(total)[1:2])) COD <- rbindlist(list(COD, total)) COD.dt <- as.data.table(COD) COD.dt[,`Percent of Disease`:=Count/sum(Count), by = list(`Cause of Death`)] COD <- as.data.table(COD.dt) EthCOD <- ggplot(COD, aes(x= reorder(`Cause of Death`,Count) , y = `Percent of Disease`)) + geom_bar(stat = "identity", aes(fill=Year)) EthCOD <- EthCOD + scale_y_continuous(labels = comma) EthCOD <- EthCOD + theme(panel.background = element_rect(fill = 'lightgrey'), panel.grid.major = element_line(colour = "white", size=.5)) EthCOD <- EthCOD + scale_fill_brewer(palette = "Dark2") + coord_flip() EthCOD <- EthCOD + xlab("Cause of Death") + ylab("Percent of Total Population") + theme(legend.position="bottom") ggsave(EthCOD, file="/Users/nicholasbernstein/Desktop/NycDeathYearBar.jpg", dpi = 500, width = 12, height = 7) COD <- ddply(nycDeaths,.(`Cause of Death`, Sex), summarise, Count = sum(Count)) EthCOD <- ggplot(COD, aes(x= reorder(`Cause of Death`, Count), y = Count)) + geom_bar(stat = "identity", aes(fill=Sex)) EthCOD <- EthCOD #+ scale_x_continuous(labels = comma) EthCOD <- EthCOD + theme(panel.background = element_rect(fill = 'lightgrey'), panel.grid.major = element_line(colour = "white", size=.5)) EthCOD <- EthCOD + scale_fill_brewer(palette = "Dark2") + coord_flip() EthCOD <- EthCOD + xlab("Cause of Death") + ylab("Death Count") + theme(legend.position="bottom") ggsave(EthCOD, file="/Users/nicholasbernstein/Desktop/NycDeathSex.jpg", dpi = 500, width = 12, height = 7) COD <- filter(COD, `Cause of Death` != "MALIGNANT NEOPLASMS", `Cause of Death` != "DISEASES OF HEART") EthCOD <- ggplot(COD, aes(x= reorder(`Cause of Death`, Count), y = Count)) + geom_bar(stat = "identity", aes(fill=Sex)) EthCOD <- EthCOD + scale_y_continuous(labels = comma) EthCOD <- EthCOD + theme(panel.background = element_rect(fill = 'lightgrey'), panel.grid.major = element_line(colour = "white", size=.5)) EthCOD <- EthCOD + scale_fill_brewer(palette = "Dark2") + coord_flip() EthCOD <- EthCOD + xlab("Cause of Death") + ylab("Death Count") + theme(legend.position="bottom") ggsave(EthCOD, file="/Users/nicholasbernstein/Desktop/NycDeathSexNoCancer.jpg", dpi = 500, width = 12, height = 7) COD <- ddply(nycDeaths,.(`Cause of Death`, Sex), summarise, Count = sum(Count)) setDT(COD) total <- COD[,sum(Count), by = Sex] total$`Cause of Death` <- "TOTAL" setcolorder(total, c("Cause of Death", colnames(total)[1:2])) COD <- rbindlist(list(COD, total)) COD.dt <- as.data.table(COD) COD.dt[,`Percent of Disease`:=Count/sum(Count), by = list(`Cause of Death`)] COD <- as.data.table(COD.dt) EthCOD <- ggplot(COD, aes(x= reorder(`Cause of Death`,Count) , y = `Percent of Disease`)) + geom_bar(stat = "identity", aes(fill=Sex)) EthCOD <- EthCOD + scale_y_continuous(labels = comma) EthCOD <- EthCOD + theme(panel.background = element_rect(fill = 'lightgrey'), panel.grid.major = element_line(colour = "white", size=.5)) EthCOD <- EthCOD + scale_fill_brewer(palette = "Dark2") + coord_flip() EthCOD <- EthCOD + xlab("Cause of Death") + ylab("Percent of Total Population") + theme(legend.position="bottom") ggsave(EthCOD, file="/Users/nicholasbernstein/Desktop/NycDeathSexPercent.jpg", dpi = 500, width = 12, height = 7) EthCOD <- ggplot(nycDeaths, aes(x= reorder(Sex, Count), y = Count)) + geom_bar(stat = "identity", aes(fill=Sex)) EthCOD <- EthCOD + scale_y_continuous(labels = comma) EthCOD <- EthCOD + theme(panel.background = element_rect(fill = 'lightgrey'), panel.grid.major = element_line(colour = "white", size=.5)) EthCOD <- EthCOD + scale_fill_brewer(palette = "Dark2") EthCOD <- EthCOD + xlab("Sex") + ylab("Death Count") + theme(legend.position="bottom") ggsave(EthCOD, file="/Users/nicholasbernstein/Desktop/SexCount.jpg", dpi = 500, width = 12, height = 7) EthCOD <- ggplot(nycDeaths, aes(x= reorder(Year, Count), y = Count)) + geom_bar(stat = "identity", aes(fill=Year)) EthCOD <- EthCOD + scale_y_continuous(labels = comma) EthCOD <- EthCOD + theme(panel.background = element_rect(fill = 'lightgrey'), panel.grid.major = element_line(colour = "white", size=.5)) EthCOD <- EthCOD + scale_fill_brewer(palette = "Dark2") EthCOD <- EthCOD + xlab("Year") + ylab("Death Count") + theme(legend.position="bottom") ggsave(EthCOD, file="/Users/nicholasbernstein/Desktop/YearCount.jpg", dpi = 500, width = 12, height = 7) EthCOD <- ggplot(nycDeaths, aes(x= reorder(Ethnicity, Count), y = Count)) + geom_bar(stat = "identity", aes(fill=Ethnicity)) EthCOD <- EthCOD + scale_y_continuous(labels = comma) EthCOD <- EthCOD + theme(panel.background = element_rect(fill = 'lightgrey'), panel.grid.major = element_line(colour = "white", size=.5)) EthCOD <- EthCOD + scale_fill_brewer(palette = "Dark2") EthCOD <- EthCOD + xlab("Ethnicity") + ylab("Death Count") + theme(legend.position="bottom") ggsave(EthCOD, file="/Users/nicholasbernstein/Desktop/EthnicityCount.jpg", dpi = 500, width = 12, height = 7) EthCOD <- ggplot(nycDeaths, aes(x= reorder(`Cause of Death`, Count), y = Count)) + geom_bar(stat = "identity", aes(fill="steelblue")) EthCOD <- EthCOD + scale_y_continuous(labels = comma) EthCOD <- EthCOD + theme(panel.background = element_rect(fill = 'lightgrey'), panel.grid.major = element_line(colour = "white", size=.5)) EthCOD <- EthCOD + scale_fill_brewer(palette = "Dark2") + coord_flip() EthCOD <- EthCOD + xlab("Cause of Death") + ylab("Death Count") + guides(fill = FALSE) </pre> <pre>