Income=read.csv("data\\Income.csv",stringsAsFactors=F)
Income<-Income %>%
rename(
county_fips = State...County.Name,
county_id=County.ID,
povertyrate=All.Ages.in.Poverty.Percent
)
Res=read.csv("data\\Restaurant.csv", stringsAsFactors=F)
a =Res$cuisines
vege <- str_detect(a,"Vegetarian")
ResV <- cbind(vege,Res)
ResV$vege <- as.numeric(ResV$vege)
ResVV <- subset(ResV,vege==1)
ResVVV <- ResVV %>%
dplyr::select(id,city,name,latitude, longitude,phones,paymentTypes,postalCode) %>%
mutate(latitude=as.numeric(latitude), longitude=as.numeric(longitude)) %>%
na.omit()
latlong2county <- function(pointsDF) {
# Prepare SpatialPolygons object with one SpatialPolygon
# per county
counties <- map('county', fill=TRUE, col="transparent", plot=FALSE)
IDs <- sapply(strsplit(counties$names, ":"), function(x) x[1])
counties_sp <- map2SpatialPolygons(counties, IDs=IDs,
proj4string=CRS("+proj=longlat +datum=WGS84"))
# Convert pointsDF to a SpatialPoints object
pointsSP <- SpatialPoints(pointsDF,
proj4string=CRS("+proj=longlat +datum=WGS84"))
# Use 'over' to get _indices_ of the Polygons object containing each point
indices <- over(pointsSP, counties_sp)
# Return the county names of the Polygons object containing each point
countyNames <- sapply(counties_sp@polygons, function(x) x@ID)
countyNames[indices]
}
# Test the function using points in Wisconsin and Oregon.
testPoints <- data.frame(x = ResVVV$longitude, y = ResVVV$latitude)
county_list<- latlong2county(testPoints)
county_list_data<-as.data.frame(county_list)
VRes<-cbind(ResVVV,county_list_data)
VRes<-VRes %>%
na.omit
#unique(VRes$county_list)
county_name <- as.character(VRes$county_list)
# Remove all before and up to ",":
county_name2 <- gsub(".*,","",county_name)
data <- cbind(county_name2,VRes)
data$county_list <- NULL
#unique(data$county_name2)
Income=read.csv("data\\Income.csv",stringsAsFactors=F)
Income<-Income %>%
rename(
county_fips = State...County.Name,
county_id=County.ID,
povertyrate=All.Ages.in.Poverty.Percent
)
Income$county_id=as.character(Income$county_id)
How is the distribution of vegetarian restaurants in New York State? Does poverty rate relate to that distribution? To dive into those questions, we combine two datasets (one contains NY restaurants from datafiniti and another comes from U.S.Income & Poverty rate dataset) together for visualization.
counties_sf <- get_urbn_map("counties", sf = TRUE)
counties_sf<-counties_sf %>%
filter(state_name == "New York")
spatial_data <- left_join(counties_sf,
Income,
by=c("county_fips"="county_id"))
ggplot() +
geom_sf(spatial_data,
mapping = aes(fill = povertyrate),
color = "#ffffff", size = 0.25) +
labs(fill = "povertyrate")
data1 <- st_as_sf(data, coords = c("longitude", "latitude"),
crs = 4326, agr = "constant")
g<-ggplot(data=spatial_data)+geom_sf(mapping = aes(fill = povertyrate))+geom_sf(data=data1,size = 4, shape = 23, fill = "darkred")+theme_map()+theme(legend.position="right")
g1<-ggplotly(g) %>%
highlight(
"plotly_hover",
selected = attrs_selected(line = list(color = "black"))
)
g1
Data Visualization (QMSS Spring 2020) Group F: Vegan
Please do not hesitate to give us your feedback ❤
Source files and process book.