Analysis of Dunkin’ Donuts in Connecticut and New England Andrew Ba Tran

This analysis accompanies the TrendCT.org story: Dunkin’ Donuts owns name rights to Hartford ball park, already owns New England

Here’s the raw data we pulled and cleaned from DunkinDonuts.com.

Let’s start out by mapping out all the Dunkin’ Donuts locations library(leaflet) dunk <- read.csv("dunkindonuts.csv", stringsAsFactors=FALSE) leaflet(dunk) %>% addTiles('http://{s}.basemaps.cartocdn.com/dark_all/{z}/{x}/{y}.png', attribution='Map tiles by <a href="http://stamen.com">Stamen Design</a>, <a href="http://creativecommons.org/licenses/by/3.0">CC BY 3.0</a> — Map data © <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a>') %>% setView(-98.557087, 39.810502, zoom = 4) %>% addCircles(~lng, ~lat, weight = 1, radius=3, color="#ffa500", stroke = FALSE, fillOpacity = 0.8)

Let’s look specifically at Connecticut library(dplyr) ## ## Attaching package: 'dplyr' ## ## The following objects are masked from 'package:stats': ## ## filter, lag ## ## The following objects are masked from 'package:base': ## ## intersect, setdiff, setequal, union library(ctnamecleaner) library(stringr) ct <- dunk %>% filter(state=="CT") %>% count(city) ctnamecleaner(city, ct) ## Joining by: "name2" ## [1] "...All names matched. That's a rare thing." ## Congrats. The new dataframe is called CTDATA. ## Don't forget to collapse the duplicate rows and sum/average the numeric columns. ct <- data.frame(tapply(CTDATA$n, CTDATA$real.town.name, sum)) ct$town <- rownames(ct) colnames(ct) <- c("Dunkins", "id") ctpopulator(id, ct) ## [1] "Checking to see if names match..." ## Joining by: "name2" ## [1] "Congrats. The new dataframe is called CTPOPULATED." ct <- CTPOPULATED ct$percapita <- (ct$Dunkins/ct$pop2013)*10000 ct$percapita <- round(ct$percapita, digits=2) ct$id <- str_to_title(ct$id) require(gtools) ## Loading required package: gtools require(ggplot2) ## Loading required package: ggplot2 require(rgdal) ## Loading required package: rgdal ## Loading required package: sp ## rgdal: version: 0.9-3, (SVN revision 530) ## Geospatial Data Abstraction Library extensions to R successfully loaded ## Loaded GDAL runtime: GDAL 1.11.2, released 2015/02/10 ## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.2/Resources/library/rgdal/gdal ## Loaded PROJ.4 runtime: Rel. 4.9.1, 04 March 2015, [PJ_VERSION: 491] ## Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.2/Resources/library/rgdal/proj ## Linking to sp version: 1.1-0 require(scales) ## Loading required package: scales require(ggmap) ## Loading required package: ggmap require(Cairo) ## Loading required package: Cairo require(gpclib) ## Loading required package: gpclib ## General Polygon Clipper Library for R (version 1.5-5) ## Type 'class ? gpc.poly' for help require(maptools) ## Loading required package: maptools ## Checking rgeos availability: TRUE require(reshape) ## Loading required package: reshape ## ## Attaching package: 'reshape' ## ## The following object is masked from 'package:dplyr': ## ## rename gpclibPermit() ## Warning in gpclibPermit(): support for gpclib will be withdrawn from ## maptools at the next major release ## [1] TRUE gpclibPermitStatus() ## [1] TRUE towntracts <- readOGR(dsn="townsmap", layer="towns") ## OGR data source with driver: ESRI Shapefile ## Source: "townsmap", layer: "towns" ## with 169 features ## It has 20 fields towntracts <- fortify(towntracts, region="NAME10") Dunk_Data <- left_join(towntracts, ct) ## Joining by: "id" Dunk_Data$Dunkins <- as.numeric(Dunk_Data$Dunkins) Dunk_Data$percapita <- as.numeric(Dunk_Data$percapita) dd <- ggplot() + geom_polygon(data = Dunk_Data, aes(x=long, y=lat, group=group, fill=percapita), color = "black", size=0.2) + coord_map() + scale_fill_distiller(type="seq", palette = "Reds", breaks=pretty_breaks(n=5)) + theme_nothing(legend=TRUE) + labs(title="Dunkin' Donuts stores per 10,000 residents", fill="") dd dd_all <- ggplot() + geom_polygon(data = Dunk_Data, aes(x=long, y=lat, group=group, fill=Dunkins), color = "black", size=0.2) + coord_map() + scale_fill_distiller(type="seq", palette = "Reds", breaks=pretty_breaks(n=5)) + theme_nothing(legend=TRUE) + labs(title="Total Dunkin' Donuts by town", fill="") dd_all

Analysis for Connecticut library(DT) ct <- ct[c("id", "pop2013", "Dunkins", "percapita")] colnames(ct) <- c("Town", "Population", "Dunkins", "Per Capita") datatable(ct)

Expanding scope to New England I had to do this part of the analysis outside of R. I used QGIS and did a spatial join to count the number of stores within town boundaries. I joined that with population counts and brought the spreadsheet into R below. dc <- read.csv("DunkinCount.csv", stringsAsFactors=FALSE) dc_table <- dc[c("Towns", "State", "Population", "Dunkin.Donuts","Per.1.000.residents")] colnames(dc_table) <- c("Town", "State","Population","Dunkins", "Per Capita") datatable(dc_table)

Is there a correlation between a town’s population and number of Dunkin’ Donuts? ddne <- ggplot(data=dc, aes(x=Dunkin.Donuts, y=Population)) + geom_point(aes(text=paste(Towns, State, sep=", ")))+ geom_smooth(method=lm, formula = y ~ poly(x, 2), size=1) + theme_minimal() + ggtitle("Dunkin' Donuts versus town population in New England") + labs(y="Population", x="Dunkin' Donuts") ddne That outlier in the top right is Boston. Will have to readjust by taking out the outlier and trying a polynomial model. dcnotboston <- filter(dc, Towns!="Boston") p <- ggplot(data=dcnotboston, aes(x=Dunkin.Donuts, y=Population)) + geom_point(aes(text=paste(Towns, State, sep=", ")))+ geom_smooth(method=lm, formula = y ~ poly(x, 2), size=1) + theme_minimal() + theme(text=element_text(family="Lato", size=14)) + ggtitle("Dunkin' Donuts versus town population in New England") + labs(y="Population", x="Dunkin' Donuts") p