Graduate Urban Economics Lecture 11, May 10, 2018
Gentzkow and Shapiro wrote a very helpful document called "Code and Data for the Social Sciences: A Practitioner's Guide"
My Stata trick:
Write a test procedure
Know your data: I have caught countless errors by realizing data changed unexpectedly
library(rgdal) #manipulating shape files (.shp) library(ggplot2) #very advanced plotting/graphics library library(sp) #spatial data utility library(maptools) #mapping utility library(rgeos) #additional geographic utility
## Warning: package 'rgeos' was built under R version 3.3.2
library(raster) #raster data
## Warning: package 'raster' was built under R version 3.3.2
library(spatstat) #spatial statistics #library(ggmap) #interfacing with Google Maps (VPN required) library(OpenStreetMap)
## Warning: package 'OpenStreetMap' was built under R version 3.3.2
#chinaMap<-getData('GADM',country="CHN",level=1) #download data from GADM; getData is part of package raster ChinaRD0<-readRDS("data/CHN_adm0.rds") #Country outline; data folder is local ChinaRD1<-readRDS("data/CHN_adm1.rds") #provinces ChinaRD2<-readRDS("data/CHN_adm2.rds") #prefectures ChinaRD3<-readRDS("data/CHN_adm3.rds") #counties plot(ChinaRD0,main="Mainland China country outline") #Plot Mainland China
ChinaRD1@data[1:5,1:7] #shows first 5 rows 7 cols of ChinaRD1; could also use: slot(ChinaRD1,"data")[1:10,]
## OBJECTID ID_0 ISO NAME_0 ID_1 NAME_1 HASC_1 ## 1 1 49 CHN China 1 Anhui CN.AH ## 2 2 49 CHN China 2 Beijing CN.BJ ## 3 3 49 CHN China 3 Chongqing CN.CQ ## 4 4 49 CHN China 4 Fujian CN.FJ ## 5 5 49 CHN China 5 Gansu CN.GS
names(ChinaRD3) #names of columns in data
## [1] "OBJECTID" "ID_0" "ISO" "NAME_0" "ID_1" ## [6] "NAME_1" "ID_2" "NAME_2" "ID_3" "NAME_3" ## [11] "CCN_3" "CCA_3" "TYPE_3" "ENGTYPE_3" "NL_NAME_3" ## [16] "VARNAME_3"
shanghaiCities<-ChinaRD3[ChinaRD3$NAME_2=="Shanghai",] #Selects only cities in Shanghai municipality
plot(shanghaiCities) plot(shanghaiCities[shanghaiCities$NAME_3=="Shanghai",],border="blue",add=TRUE) #add=T draws on current plot text(coordinates(shanghaiCities), labels=shanghaiCities$NAME_3,col="red",cex=0.7) #Adds names
popData<-read.table("data/china_pop_9293.csv",header=T,sep=",",stringsAsFactors=FALSE) #examine: str(popData) ChinaData2 <- slot(ChinaRD2, "data") names(ChinaData2)[c(6,8)]<-c("province_name","city_name") table(ChinaData2$province_name) #Show frequency table of prefectures by province prefectureData<-merge(ChinaData2, popData, by=c("province_name", "city_name"),all.x=TRUE) #merging by two columns prefectureData$city_name[is.na(prefectureData$pop92)] #not a great way to match names(ChinaRD2)[c(6,8)]<-c("province_name","city_name") #SP package allows merging onto SpatialPolygonsDataFrame popDataSP<-merge(ChinaRD2, popData, by=c("province_name", "city_name"),all.x=TRUE) #rm(ChinaRD2) #Removes old object--not really necessary summary(popDataSP) #shows summary statistics on new object
gg_map_obj<-fortify(ChinaRD2, region= "OBJECTID") china_map<-ggplot() + geom_map(data = slot(popDataSP, "data"), aes(map_id = OBJECTID, fill = pop92), map = gg_map_obj) + expand_limits(x = gg_map_obj$long, y = gg_map_obj$lat) + ggtitle("Population in 1992, Selected Prefectures") china_map
#ggsave("china_pop92.pdf", china_map, width = 10, height = 6)
provinceCentroids<-coordinates(ChinaRD1) #gets coordinates plot(ChinaRD1) points(provinceCentroids,col="red") #adds points to current plot
#plot(ChinaRD3,border="blue",add=T) #draws borders of prefectures
spdf_pts <- SpatialPointsDataFrame(coords=provinceCentroids, data=data.frame(ChinaRD1$NAME_1)) proj4string(spdf_pts) <-proj4string(popDataSP) #Make sure projection is the same (should be since comes from same data source) #proj4string(spdf_pts)<-CRS(" +proj=longlat +datum=WGS84 +ellps=WGS84 +towgs84=0,0,0") #could use this code to adjust pref_pts<-over(spdf_pts,popDataSP) pref_pts$x<-slot(spdf_pts,"coords")[,1] pref_pts$y<-slot(spdf_pts,"coords")[,2] #write.table(pref_pts, file = "centroids_in_cities.CSV", sep = ",", col.names=names(pref_pts),row.names=FALSE,na="") #write CSV
plot(ChinaRD1) points(provinceCentroids,col="red") #adds points to current plot text(pref_pts$x,pref_pts$y, labels=pref_pts$city_name,col="blue",cex=0.7) #Add names
sufe_names="SUFE" sufe_pts<-SpatialPointsDataFrame(coords=cbind(121.5153883,31.2948375), data=data.frame(sufe_names)) sufe_pts$lon<-coordinates(sufe_pts)[1,1] sufe_pts$lat<-coordinates(sufe_pts)[1,2] sufe_map <- invisible(openmap(c(sufe_pts$lat+0.03,sufe_pts$lon-0.03), c(sufe_pts$lat-0.03,sufe_pts$lon+0.03), minNumTiles=10,type="osm")) #invisible suppresses warning messages
plot(sufe_map)