Dublin Data Andre

This document presents the code necessary to plot a Voronoi diagram respect to Public Libraries in Dublin City. Moreover, Primary and Post-Primary schools are added to the scheme. Datasets in Github (https://github.com/arkeats/Data_Dublin)

Public Library Dataset (GPS coordinates and Opening Hours)

df <- read.csv("t_lib.csv",header = TRUE, sep = ",",stringsAsFactors=FALSE)

## name lat long Mon Tue ## 1 Ballymun Library 53.39 -6.265 10-8 10-8 ## 2 Cabra Library 53.36 -6.300 10-8 10-8 ## 3 Finglas Library 53.39 -6.299 1-8 10-5 ## 4 Coolock Library 53.40 -6.212 10-8 10-8 ## 5 Donaghmede Library 53.39 -6.162 1-8 10-5 ## 6 Marino Library 53.36 -6.232 12.45-4, 4.45-8 10-1, 2-5 ## Wed Thu Fri Sat Sun ## 1 10-8 10-8 10-5 10-5 Closed ## 2 10-8 10-8 10-5 10-5 Closed ## 3 1-8 10-5 10-5 10-5 Closed ## 4 10-8 10-8 10-5 10-5 Closed ## 5 10-5 1-8 10-5 10-5 Closed ## 6 12.45-4, 4.45-8 10-1, 2-5 10-1, 2-5 10-1, 2-5 Closed

R Libraries

## Warning: package 'deldir' was built under R version 3.1.2

## deldir 0.1-7

## Warning: package 'ggmap' was built under R version 3.1.3 ## Warning: package 'sp' was built under R version 3.1.2 ## Warning: package 'rgdal' was built under R version 3.1.1

## rgdal: version: 0.9-1, (SVN revision 518) ## Geospatial Data Abstraction Library extensions to R successfully loaded ## Loaded GDAL runtime: GDAL 1.9.2, released 2012/10/08 ## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.1/Resources/library/rgdal/gdal ## Loaded PROJ.4 runtime: Rel. 4.8.0, 6 March 2012, [PJ_VERSION: 480] ## Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.1/Resources/library/rgdal/proj ## Checking rgeos availability: TRUE

Voronoi Data: Creating segments

## ## PLEASE NOTE: The components "delsgs" and "summary" of the ## object returned by deldir() are now DATA FRAMES rather than ## matrices (as they were prior to release 0.0-18). ## See help("deldir"). ## ## PLEASE NOTE: The process that deldir() uses for determining ## duplicated points has changed from that used in version ## 0.0-9 of this package (and previously). See help("deldir").

dub_map <- get_map(location = "Dublin", zoom = 11)

## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=Dublin&zoom=11&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false ## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=Dublin&sensor=false

ggmap(dub_map) + geom_point(aes(x = long, y = lat), data = df, colour = "red", size = 3)+geom_segment( aes(x = x1, y = y1, xend = x2, yend = y2), size = 1, data = vor$dirsgs, linetype = 1, color= "#FFB958")

Data Wrangling: Primary Schools

df1 <- read.csv("primary_schools_2013_2014.csv",header = TRUE, sep = ",",stringsAsFactors=FALSE) dfa<-df1[,c("Roll_No","Off_Name","County","T_13_14","Lat","Long")] dfa<-subset(dfa, County == "Dublin") dfa['type']<-"P" colnames(dfa) <- c("roll_no","name","county","total","lat","long","type")

head(dfa)

## roll_no name county total lat long type ## 753 00752A CENTRAL SENIOR MXD N S Dublin 208 53.35 -6.255 P ## 754 01795A CENTRAL INFS SCHOOL Dublin 164 53.35 -6.255 P ## 755 03917V NAOMH PADRAIG BOYS Dublin 119 53.34 -6.224 P ## 756 04992R SCOIL AN CROI NAOFA Dublin 453 53.38 -6.273 P ## 757 05933G PRESENTATION PRIMARY SCHOOL Dublin 176 53.35 -6.271 P ## 758 07546J GOLDENBRIDGE CONVENT Dublin 267 53.34 -6.318 P

Data Wrangling: Post Primary Schools

df2 <- read.csv("post_primary_schools_2013_2014.csv",header = TRUE, sep = ",",stringsAsFactors=FALSE) dfb<-df2[,c("Roll_No","Off_Name","County","F_13_14","M_13_14","xcoord","ycoord")] dfb<-subset(dfb, County == "Dublin") dfb['T_13_14']=dfb$F_13_14 + dfb$M_13_14 data = data.frame(x=dfb$xcoord,y=dfb$ycoord) coordinates(data) <- ~ x+y proj4string(data) <- CRS("+init=epsg:29902") latlong = data.frame(spTransform(data, CRS("+init=epsg:4326"))) dfb['lat']=latlong$y dfb['long']=latlong$x dfb['type']<-"PP" dfb<-dfb[,c("Roll_No","Off_Name","County","T_13_14","lat","long","type")] colnames(dfb) <- c("roll_no","name","county","total","lat","long","type")

head(dfb)

## roll_no name county total lat long type ## 1 60010P Loreto Secondary School Dublin 1 53.61 -6.185 PP ## 2 60021U St Marys Secondary School Dublin 291 53.40 -6.128 PP ## 3 60030V Blackrock College Dublin 999 53.30 -6.190 PP ## 4 60040B Willow Park School Dublin 206 53.31 -6.195 PP ## 5 60041D Col\xe1iste Eoin Dublin 491 53.30 -6.205 PP ## 6 60042F Col\xe1iste \xcdosag\xe1in Dublin 494 53.30 -6.204 PP

Join dfa and dfb

dfc<-rbind(dfa,dfb)

Filtering by Area: Points inside a polygon

pol1=getKMLcoordinates("Dublin_Central.kml", ignoreAltitude=TRUE) pol2=getKMLcoordinates("Dublin_North_Central.kml", ignoreAltitude=TRUE) pol3=getKMLcoordinates("Dublin_North_West.kml", ignoreAltitude=TRUE) pol4=getKMLcoordinates("Dublin_South_Central.kml", ignoreAltitude=TRUE) pol5=getKMLcoordinates("Dublin_South_East.kml", ignoreAltitude=TRUE) a1=point.in.polygon(dfc$long,dfc$lat,pol1[[1]][,1],pol1[[1]][,2]) a2=point.in.polygon(dfc$long,dfc$lat,pol2[[1]][,1],pol2[[1]][,2]) a3=point.in.polygon(dfc$long,dfc$lat,pol3[[1]][,1],pol3[[1]][,2]) a4=point.in.polygon(dfc$long,dfc$lat,pol4[[1]][,1],pol4[[1]][,2]) a5=point.in.polygon(dfc$long,dfc$lat,pol5[[1]][,1],pol5[[1]][,2]) dff=data.frame(dfc$roll_no,a1,a2,a3,a4,a5) colnames(dff) <- c("roll_no","a1","a2","a3","a4","a5") dff=merge(dff,dfc,by="roll_no") dff$sum<-(dff$a1 + dff$a2 + dff$a3 + dff$a4 + dff$a5) dff=subset(dff, sum > 0)

head(dff)

## roll_no a1 a2 a3 a4 a5 name county total lat ## 4 00752A 1 0 0 0 0 CENTRAL SENIOR MXD N S Dublin 208 53.35 ## 6 01795A 1 0 0 0 0 CENTRAL INFS SCHOOL Dublin 164 53.35 ## 9 03917V 0 0 0 0 1 NAOMH PADRAIG BOYS Dublin 119 53.34 ## 10 04992R 0 0 1 0 0 SCOIL AN CROI NAOFA Dublin 453 53.38 ## 12 05933G 1 0 0 0 0 PRESENTATION PRIMARY SCHOOL Dublin 176 53.35 ## 14 07546J 0 0 0 1 0 GOLDENBRIDGE CONVENT Dublin 267 53.34 ## long type sum ## 4 -6.255 P 1 ## 6 -6.255 P 1 ## 9 -6.224 P 1 ## 10 -6.273 P 1 ## 12 -6.271 P 1 ## 14 -6.318 P 1

Defining Voronoi Areas (kml files). In this case, there are 20 public libraries so we create 20 polygons.

vor <- deldir(df$long, df$lat) w = tile.list(vor) polys = vector(mode='list', length=length(w)) for (i in seq(along=polys)) { pcrds = cbind(w[[i]]$x, w[[i]]$y) pcrds = rbind(pcrds, pcrds[1,]) polys[[i]] = Polygons(list(Polygon(pcrds)), ID=as.character(i)) } SP = SpatialPolygons(polys) voro = SpatialPolygonsDataFrame(SP, data=data.frame(x=df$long,y=df$lat, row.names=sapply(slot(SP, 'polygons'), function(x) slot(x, 'ID')))) for( i in seq_along(voro) ) {kmlPolygon(voro@polygons[[i]], kmlfile = paste0("library", i, ".kml"), lwd = 1, col= NULL, border ='blue', name = paste0("library", i),kmlname = paste0("library_area", i))}

Defining Voronoi Polygons in a Map

afc=data.frame() for (i in seq_along(voro)){ af=SP@polygons[[i]]@Polygons[[1]]@coords[,1:2] af=as.data.frame(af) af$ID<-df$name[i] afc<-rbind(af,afc) } dub_map <- get_map(location = "Dublin", zoom = 11)

## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=Dublin&zoom=11&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false

ggmap(dub_map) + geom_point(aes(x = long, y = lat), data = df, colour = "blue", size = 3)+ geom_polygon(aes(x=V1, y=V2,group=ID,fill=ID),data=afc, alpha=0.3)+ ggtitle("Voronoi Polygons for Public Libraries in Dublin City")+geom_segment( aes(x = x1, y = y1, xend = x2, yend = y2), size = 1, data = vor$dirsgs, linetype = 1, color= "#FFB958")

Determining points inside Voronoi Library area. nst: number of students nsc: number of schools

#Libraries for (i in (1:length(SP))){ f=paste0("l", i) dff[f]=point.in.polygon(dff$long,dff$lat,SP@polygons[[i]]@Polygons[[1]]@coords[,1],SP@polygons[[i]]@Polygons[[1]]@coords[,2]) } nst=NULL for ( i in (1:20)){ nst=append(nst,sum((subset(dff,eval(as.name(paste0("l", i))) > 0))$total)) } nsc=NULL for ( i in (1:20)){ nsc=append(nsc,nrow((subset(dff,eval(as.name(paste0("l", i))) > 0)))) } df['nst']<-nst df['nsc']<-nsc

head(df)

## name lat long Mon Tue ## 1 Ballymun Library 53.39 -6.265 10-8 10-8 ## 2 Cabra Library 53.36 -6.300 10-8 10-8 ## 3 Finglas Library 53.39 -6.299 1-8 10-5 ## 4 Coolock Library 53.40 -6.212 10-8 10-8 ## 5 Donaghmede Library 53.39 -6.162 1-8 10-5 ## 6 Marino Library 53.36 -6.232 12.45-4, 4.45-8 10-1, 2-5 ## Wed Thu Fri Sat Sun nst nsc ## 1 10-8 10-8 10-5 10-5 Closed 4819 17 ## 2 10-8 10-8 10-5 10-5 Closed 4071 14 ## 3 1-8 10-5 10-5 10-5 Closed 5283 19 ## 4 10-8 10-8 10-5 10-5 Closed 6263 18 ## 5 10-5 1-8 10-5 10-5 Closed 0 0 ## 6 12.45-4, 4.45-8 10-1, 2-5 10-1, 2-5 10-1, 2-5 Closed 5562 16

Obs: Donaghmede Library has zero students because this library is out of Dublin City area according to the boundary defined (North Central kml), so surrounding schools were filtered.

ggplot(data=df, aes(x=name, y=nst, fill=name)) + geom_bar(aes(fill=name), width=.7, stat="identity") + guides(fill=FALSE) + xlab("Public Libraries Dublin city") + ylab("Total Number of Students") + ggtitle("Total Students in Dublin by Library area")+ theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))

ggplot(data=df, aes(x=name, y=nsc, fill=name)) + geom_bar(aes(fill=name), width=.7, stat="identity") + guides(fill=FALSE) + xlab("Public Libraries Dublin city") + ylab("Number of Schools") + ggtitle("Number of Schools in Dublin by Library area")+ theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))

Same for City area

nst=NULL for ( i in (1:5)){ nst=append(nst,sum((subset(dff,eval(as.name(paste0("a", i))) > 0))$total)) } nsc=NULL for ( i in (1:5)){ nsc=append(nsc,nrow((subset(dff,eval(as.name(paste0("a", i))) > 0)))) } area<-c("Dublin_Central","Dublin_North_Central","Dublin_North_West","Dublin_South_Central","Dublin_South_East") dfa=data.frame(area, nst,nsc) ggplot(data=dfa, aes(x=area, y=nst, fill=area)) + geom_bar(aes(fill=area), width=.7, stat="identity") + guides(fill=FALSE) + xlab("Area Dublin") + ylab("Total Number of Students") + ggtitle("Total Students in Dublin (Primary + Post-Primary)")+ theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))

Dividing by Primary and Post-Primary school

nst_p=NULL for ( i in (1:5)){ nst_p=append(nst_p,sum((subset(dff,eval(as.name(paste0("a", i))) > 0 & type == "P"))$total)) } nst_pp=NULL for ( i in (1:5)){ nst_pp=append(nst_pp,sum((subset(dff,eval(as.name(paste0("a", i))) > 0 & type == "PP"))$total)) } dfa['nst_p']<-nst_p dfa['nst_pp']<-nst_pp

dfa1=dfa[,c('area','nst_p','nst_pp')] m = melt(dfa1, id='area') head(m)

## area variable value ## 1 Dublin_Central nst_p 8751 ## 2 Dublin_North_Central nst_p 8670 ## 3 Dublin_North_West nst_p 6406 ## 4 Dublin_South_Central nst_p 10176 ## 5 Dublin_South_East nst_p 5825 ## 6 Dublin_Central nst_pp 5550

ggplot(m, aes(x=area, y=value, fill=variable))+geom_bar(stat='identity', position='dodge')+ xlab("Dublin City") + ylab("Number of Students")+ ggtitle("Primary and Post-Primary Students in Dublin")+ scale_fill_manual(values=c("#E69F00", "#56B4E9"), name="Type",breaks=c("nst_p", "nst_pp"),labels=c("Primary School", "Post Primary School"))+ theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))

Density of Schools in Dublin city

dublin_map = get_map(location = c(lon = -6.259722, lat = 53.347778), zoom = 11, color = "bw")

## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=53.347778,-6.259722&zoom=11&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false