How to prevent information on my ggplot2 timeline chart from being cut off - r

I am drawing a timeline chart with ggplot and it's plotting just fine, the problem comes in when the plot cuts off the names of the last organisations on my chart.
I changed the names of the organisations in my reproducible example but i have tried to retain the number of characters.
I tried making it a plotly graph so i can specify the margins but the names are still cut off.
Any help is really appreciated.
An image showing the graph is attached time line chart
library(scales)
library(ggplot2)
library(lubridate)
library(readxl)
library(plotly)
mydata<- "Jurisdiction Organisations Years.Start Years.End
Pan-African hfgvdbxvbdxncvnbx 1998 2018
International AfrimenRis 2006 2018
International AVSG 1984 2018
Local BOSCOUGYTRtruhjhjhgpp 2007 2018
International CarhIntemmnatoponal 1998 2018
International Caropkg 1980 2018
Local ChrjslignCounselling 2002 2018
Local GWWD-GIO 2004 2018
Local Hmgngnfhfhjdhfvhg 1994 2018
International bsbbjsdvvsnvfncvsjvbsdvvnbvcndbcv 1998 2018
International gkhvhdvfjvbvccvnbdvjbv 2006 2018
Local jhfdjhfgjhseghdfhjsgdjhgfjb 1998 2018
International bjhdbfvjhbjhgdbfvjhvsd 1998 2018
International vdcxnbvndbxcvbnvnbx 2006 2018
Local ACNEVTsvdcxbnvdjxbvfn 2007 2018
International ghjbgjxbdfngvcbdjfhcgbv 1986 2016"
usedata <- read.table(text=mydata, header = TRUE)
usedata$date<-with(usedata, ymd(sprintf("%04d%02d%02d", Years.Start, 1, 1)))
usedata$date2<-with(usedata, ymd(sprintf("%04d%02d%02d", Years.End, 1, 1)))
usedata<-usedata[with(usedata, order(date)),]
jurisdiction_level<-c("International", "Local", "Pan-African")
jurisdiction_colors <- c("#0070C0", "#00B050", "#FFC000")
positions <- c(0.5, -0.5, 1.0, -1.0, 1.5, -1.5)
directions <- c(1, -1)
line_pos <- data.frame(
"date"=unique(usedata$date),
"position"=rep(positions, length.out=length(unique(usedata$date))),
"direction"=rep(directions, length.out=length(unique(usedata$date)))
)
usedata<- merge(x=usedata, y=line_pos, by="date", all = TRUE)
usedata<-usedata[with(usedata, order(date, Jurisdiction)), ]
text_offset <- 0.2
usedata$year_count <- ave(usedata$date==usedata$date, usedata$date, FUN=cumsum)
usedata$text_position <- (usedata$year_count * text_offset * usedata$direction) + usedata$position
##############displaying all years
year_date_range <- as.Date(seq(min(usedata$date) , max(usedata$date) , by='year'), origin = "1970-01-01")
year_format <- format(year_date_range, '%Y')
year_df <- data.frame(year_date_range, year_format)
#png(file="timeline.png",width=1000,height=700,res=70)
####################################PLOT#####################################
timeline_plot<-ggplot(usedata,aes(x=date,y=0, col=Jurisdiction, label=Organisations))
timeline_plot<-timeline_plot+labs(col="Organisations")
timeline_plot<-timeline_plot+scale_color_manual(values=jurisdiction_colors, labels=jurisdiction_level, drop = FALSE)
timeline_plot<-timeline_plot+theme_classic()
########### Plot horizontal black line for timeline
timeline_plot<-timeline_plot+geom_hline(yintercept=0,
color = "black", size=0.3)
# Plot vertical segment lines for milestones
timeline_plot<-timeline_plot+geom_segment(data=usedata[usedata$year_count == 1,], aes(y=position,yend=0,xend=date), color='black', size=0.2)
# Plot scatter points at zero and date
timeline_plot<-timeline_plot+geom_point(aes(y=0), size=3)
# Don't show axes, appropriately position legend
timeline_plot<-timeline_plot+theme(axis.line.y=element_blank(),
axis.text.y=element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
axis.ticks.y=element_blank(),
axis.text.x =element_blank(),
axis.ticks.x =element_blank(),
axis.line.x =element_blank(),
legend.position = "bottom"
)
# Show year text
timeline_plot<-timeline_plot+geom_text(data=year_df, aes(x=year_date_range,y=-0.2,label=year_format, fontface="bold"),size=2.5, color='black')
# scale_x_date(date_labels = "%Y", breaks=seq(as.Date("1979-12-01"),as.Date("2008-06-01") ,by= "1 year" ))
# Show text for each milestone
timeline_plot<-timeline_plot+geom_text(aes(y=text_position,label=Organisations),size=3)
print(timeline_plot)
#####Making it a plotly graph
timeline_plot1<-ggplotly(timeline_plot) %>% layout(showlegend = TRUE,margin = list(l = 120, b =90) )
print(timeline_plot1)

First, we need to change the limits:
timeline_plot <- timeline_plot() +
xlim(as.Date("1977-01-01"), as.Date("2010-01-01"))
Next, since the horizontal line is now too long, remove the geom_hline call and instead use
timeline_plot <- timeline_plot +
geom_segment(data=NULL,
aes(y=0, yend=0,
x=as.Date("1979-01-01"), xend=as.Date("2008-01-01")),
color="black", size=.3)
Result:

Related

ggplot: multiple time periods on same plot by month

I am trying to plot multiple time-periods on the same time-series graph by month. This is my data: https://pastebin.com/458t2YLg. I was trying to avoid dput() example but I think it would have caused confusion to reduce the sample and still keep the structure of the original data. Here is basically a glimpse of how it looks like:
date fl_all_cumsum
671 2015-11-02 0.785000
672 2015-11-03 1.046667
673 2015-11-04 1.046667
674 2015-11-05 1.099000
675 2015-11-06 1.099000
676 2015-11-07 1.099000
677 2015-11-08 1.151333
Basically, it is daily data that spans several years. My goal is to compare the cumulative snow gliding (fl_all_cumsum) of several winter seasons (
It is very similar to this: ggplot: Multiple years on same plot by month however, there are some differences, such as: 1) the time periods are not years but winter seasons (1.10.xxxx - 6.30.xxxx+1); 2) Because I care only about the winter periods I would like the x-axis to go only from October to end of June the following year; 3) the data is not consistent (there are a lot of NA gaps during the months).
I managed to produce this:
library(zoo)
library(lubridate)
library(ggplot2)
library(scales)
library(patchwork)
library(dplyr)
library(data.table)
startTime <- as.Date("2016-10-01")
endTime <- as.Date("2017-06-30")
start_end <- c(startTime,endTime)
ggplot(data = master_dataset, aes(x = date, y = fl_all_cumsum))+
geom_line(size = 1, na.rm=TRUE)+
ggtitle("Cumulative Seasonal Gliding Distance")+
labs(color = "")+
xlab("Month")+
ylab("Accumulated Distance [mm]")+
scale_x_date(limits=start_end,breaks=date_breaks("1 month"),labels=date_format("%d %b"))+
theme(axis.text.x = element_text(angle = 50, size = 10 , vjust = 0.5),
axis.text.y = element_text(size = 10, vjust = 0.5),
panel.background = element_rect(fill = "gray100"),
plot.background = element_rect(fill = "gray100"),
panel.grid.major = element_line(colour = "lightblue"),
plot.margin = unit(c(1, 1, 1, 1), "cm"),
plot.title = element_text(hjust = 0.5, size = 22))
This actually works good visually as the x axis goes from October to June as desired; however, I did it by setting limits,
startTime <- as.Date("2016-10-01")
endTime <- as.Date("2017-06-30")
start_end <- c(startTime,endTime)
and then setting breaks of 1 month.
scale_x_date(limits=start_end,breaks=date_breaks("1 month"),labels=date_format("%d %b"))+
It is needless to say that this technique will not work if I would like to include other winter seasons and a legend.
I also tried to assign a season to certain time periods and then use them as a factor:
master_dataset <- master_dataset %>%
mutate(season = case_when(date>=as.Date('2015-11-02')&date<=as.Date('2016-06-30')~"season 2015-16",
date>=as.Date('2016-11-02')&date<=as.Date('2017-06-30')~"season 2016-17",
date>=as.Date('2017-10-13')&date<=as.Date('2018-06-30')~"season 2017-18",
date>=as.Date('2018-10-18')&date<=as.Date('2019-06-30')~"season 2018-19"))
ggplot(master_dataset, aes(month(date, label=TRUE, abbr=TRUE), fl_all_cumsum, group=factor(season),colour=factor(season)))+
geom_line()+
labs(x="Month", colour="Season")+
theme_classic()
As you can see, I managed to include the other seasons in the graph but there are several issues now:
grouped by month it aggregates the daily values and I lose the daily dynamic in the graph (look how it is based on monthly steps)
the x-axis goes in chronological order which messes up my visualization (remember I care for the winter season development so I need the x-axis to go from October-End of June; see the first graph I produced)
Not big of an issue but because the data has NA gaps, the legend also shows a factor "NA"
I am not a programmer so I can't wrap my mind around on how to code for such an issue. In a perfect world, I would like to have something like the first graph I produced but with all winter seasons included and a legend. Does someone have a solution for this? Thanks in advance.
Zorin
This is indeed kind of a pain and rather fiddly. I create "fake dates" that are the same as your date column, but the year is set to 2015/2016 (using 2016 for the dates that will fall in February so leap days are not lost). Then we plot all the data, telling ggplot that it's all 2015-2016 so it gets plotted on the same axis, but we don't label the year. (The season labels are used and are not "fake".)
## Configure some constants:
start_month = 10 # first month on x-axis
end_month = 6 # last month on x-axis
fake_year_start = 2015 # year we'll use for start_month-December
fake_year_end = fake_year_start + 1 # year we'll use for January-end_month
fake_limits = c( # x-axis limits for plot
ymd(paste(fake_year_start, start_month, "01", sep = "-")),
ceiling_date(ymd(paste(fake_year_end, end_month, "01", sep = "-")), unit = "month")
)
df = df %>%
mutate(
## add (real) year and month columns
year = year(date),
month = month(date),
## add the year for the season start and end
season_start = ifelse(month >= start_month, year, year - 1),
season_end = season_start + 1,
## create season label
season = paste(season_start, substr(season_end, 3, 4), sep = "-"),
## add the appropriate fake year
fake_year = ifelse(month >= start_month, fake_year_start, fake_year_end),
## make a fake_date that is the same as the real date
## except set all the years to the fake_year
fake_date = date,
fake_date = "year<-"(fake_date, fake_year)
) %>%
filter(
## drop irrelevant data
month >= start_month | month <= end_month,
!is.na(fl_all_cumsum)
)
ggplot(df, aes(x = fake_date, y = fl_all_cumsum, group = season,colour= season))+
geom_line()+
labs(x="Month", colour = "Season")+
scale_x_date(
limits = fake_limits,
breaks = scales::date_breaks("1 month"),
labels = scales::date_format("%d %b")
) +
theme_classic()

Plotting points near a coastline with geom_point / ggmap / plot

I'm used to working with matlab and am now trying to learn how to use the tidyverse in R (and specifically ggplot2), so I'm making a map of all the points off the coast of Nova Scotia where I am collecting data for a project. I know I'm plotting the part starting at "map" wrong, but I don't know how to make a plot with ggmap based on latitude/longitude. I assume the next line, "loc_map", then doesn't work because the "map" isn't made within the tidyverse, but I don't know how to fix this!
lat <- loc$Lat
long <- loc$Long
locs <- data.frame(long,lat)
data("coastlineWorldFine")
map <- plot(coastlineWorldFine, col='grey', clong= mean(long),
clat=mean(lat), span=400, projection = "+proj=merc",
main="Sample Sites")
loc_map <- map + geom_point(data=locs, aes(x=long, y=lat), size = 20)
Here's a start point that you can add your geom_point layer to. First, I load the libraries, which are numerous. marmap and oce are required for bathymetry and coastline data, respectively. RColorBrewer is used for the colour palette for the bathymetry, while dplyr is needed for mutate. magrittr provides the compound assignment pipe operator (%<>%), tibble is used when I restructure the bathymetry data, and ggthemes provides theme_tufte.
# Load libraries
library(ggplot2)
library(marmap)
library(oce)
library(RColorBrewer)
library(dplyr)
library(magrittr)
library(tibble)
library(ggthemes)
Here, I get the bathymetry data, restructure it, and bin it into depth intervals.
# Get bathymetry data
bathy <- getNOAA.bathy(lon1 = -68, lon2 = -56,
lat1 = 41, lat2 = 49,
resolution = 1, keep = TRUE)
bathy <- as.tibble(fortify.bathy(bathy))
bathy %<>% mutate(depth_bins = cut(z, breaks = c(Inf, 0, -200, -500, -1000,
-1500, -2000, -2500, -3000, -Inf)))
Next, I get the coastline data and put it into a data frame.
# Get coast line data
data(coastlineWorldFine, package = "ocedata")
coast <- as.data.frame(coastlineWorldFine#data)
Finally, I plot it.
# Plot figure
p <- ggplot()
p <- p + geom_raster(data = bathy, aes(x = x, y = y, fill = depth_bins), interpolate = TRUE, alpha = 0.75)
p <- p + geom_polygon(data = coast, aes(x = longitude, y = latitude))
p <- p + coord_cartesian(ylim = c(42, 47), xlim = c(-67, -57))
p <- p + theme_tufte()
p <- p + theme(axis.text = element_blank(),
axis.title = element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
legend.position = "right",
plot.title = element_text(size = 24),
legend.title = element_text(size = 20),
legend.text = element_text(size = 18))
p <- p + scale_fill_manual(values = rev(c("white", brewer.pal(8, "Blues"))), guide = "none")
print(p)
This gives the following:
Adding a geom_point layer would allow you to plot your field sites.
You didn't give us much to work with, but here goes.
library(sf)
library(lwgeom) # needed only for st_sample
library(tidyverse)
Let's get an Admin01-level Canada shapefile, yank out Nova Scotia and simplify the polygons a bit
# Get a Canada Admin01 shapefile
canada <- st_as_sf(raster::getData("GADM", country = "CAN", level = 1))
# just get Nova Scotia
ns <- filter(canada, NAME_1 == "Nova Scotia")
# simplify the polygons a bit (tweak `0.01` as you need)
ns <- st_simplify(ns, preserveTopology = TRUE, 0.01)
Now, we'll generate some point data since you didn't provide any. These will not all be on the coastline:
set.seed(2018-11-23)
some_random_points <- as_data_frame(st_coordinates(st_sample(ns, 20)))
some_random_points
## # A tibble: 18 x 2
## X Y
## <dbl> <dbl>
## 1 -63.4 44.7
## 2 -63.9 45.1
## 3 -64.2 44.7
## 4 -60.8 46.8
## 5 -65.0 44.3
## 6 -63.8 45.4
## 7 -62.7 45.3
## 8 -66.1 44.3
## 9 -64.8 44.1
## 10 -64.5 44.8
## 11 -63.8 44.5
## 12 -64.7 44.8
## 13 -63.1 44.9
## 14 -65.5 43.9
## 15 -64.6 44.4
## 16 -60.4 45.9
## 17 -63.9 44.6
## 18 -62.4 45.6
Now, do some modern gg_cartography:
ggplot() +
geom_sf(data = ns, fill = "gray90", color = "#2b2b2b", size=0.125) +
geom_point(data = some_random_points, aes(X, Y)) +
theme_bw()

Different colour palettes for different series ggplot

This is a continuation of this problem: Plot data with different length
I have data that looks like this in a df I call df_Filtered:
Country Region Sales Year
Germany Berlin 2000 2000
Germany Hamburg 1500 2001
Germany Kiel 2150 2002
UK London 1200 2000
UK York 1300 2001
UK Leeds 2000 2002
Japan Tokyo 500 2000
Japan Kyoto 750 2001
I want to plot the data for each region and year wrt the sales value:
ggplot(df_Filtered, aes(x = Year, y = Sales, colour = Country,
scale_y_continuous(breaks = 1), size = mysize)) +
geom_line() +
labs(x = paste("Sales per country"), y = "Sales per country", title = NULL) +
scale_x_continuous(breaks = c(2000, 2001, 2002)) +
scale_size(range = c(1, 4), guide = "none") +
theme(panel.background = element_blank())
ggsave(paste("Output/", "Sales", ".png", sep = ""), width = 20, height = 11,
limitsize = FALSE)
Note that I also have a vector for the size of the lines that i want to use in the real data. In the real code the lines representing the average country value is thicker, but to make it simple:
Size = 2
df_Filtered$mysize <- rep(Size, nrow(df_Filtered))
My question now is how do I assign a colour pattern to each country? That is, when I plot the Japanese regions I want the lines for the regions to get different red colours, when I plot the German once I want it to be black etc. I also want one line for each series (the one that is thicker) to be in the middle of the palett. For Japan that means that one line should be red and the other once dark and light red.
Is there any way to do this without manually having to assign each region a colour value manually?

How plot timing graph with specific options

I have this data.table which has 3 columns. the first one is about MonthlySalesMean , the second is the year and then the month.
> data[,MonthlySalesMean:=mean(StoreMean),by=c("DateMonth","DateYear")][,c("MonthlySalesMean","DateYear","DateMonth")]
MonthlySalesMean DateYear DateMonth
1: 6839.340 2015 7
2: 6839.340 2015 7
3: 6839.340 2015 7
4: 6839.340 2015 7
5: 6839.340 2015 7
---
641938: 6852.171 2013 1
641939: 6852.171 2013 1
641940: 6852.171 2013 1
641941: 6852.171 2013 1
641942: 6852.171 2013 1
I need to plot a graph of three lines because I have 3 years:
> unique(data[,DateYear])
[1] 2015 2014 2013
>
And For each year or each line, it should be plotted across all months of a year the MonthlySalesMean values. In another word it should be like this graph:
How can I do this, please?
thank you for advance!
Without a reproducible example, I can't test with your data, but here's the idea. You plot a path, with aesthetics of sales (y) against month (x) grouped by year (color)
library(tidyverse)
example_data <- tibble(
MonthlySalesMean = rnorm(36, 100, 20),
DateYear = c(rep(2013, 12), rep(2014, 12), rep(2015, 12)),
DateMonth = c(1:12, 1:12, 1:12)
)
ggplot(example_data, aes(x = DateMonth, y = MonthlySalesMean, color = as.factor(DateYear))) +
geom_path() +
geom_point(size = 2) +
geom_text(aes(label = DateYear),
data = filter(example_data, DateMonth == 1),
nudge_x = -0.5) + # plot year numbers
scale_x_continuous(breaks = 1:12, labels = month.abb) +
scale_colour_manual(guide = FALSE, # hides legend
values = c("red", "green", "blue")) + # custom colors
expand_limits(x = 0.5) + # adds a space before January
labs(x = "Month", y = "Sales") +
theme_bw() +
theme(panel.grid = element_blank()) # removes gridlines

Combining choropleth made in ggplot and ggmap

Created a choropleth using ggplot2. Here's the ggplot code
okc <- ggplot() +
geom_polygon(data = mapdata, aes(x = long, y = lat, group = group,
fill = B19013_001), color = "black", size = 0.5)+
scale_fill_distiller(palette = "Reds", labels = comma,
breaks = pretty_breaks(n = 10), values = c(1,0)) +
guides(fill = guide_legend(reverse = TRUE)) +
theme_nothing(legend = TRUE) +
ggtitle('Map of 40109')
Here's a sample of the data from mapdata:
long lat order hole piece group id
1 -97.54285 35.51951 1 FALSE 1 40109100100.1 40109100100
2 -97.54282 35.51954 2 FALSE 1 40109100100.1 40109100100
3 -97.54280 35.51963 3 FALSE 1 40109100100.1 40109100100
4 -97.54276 35.51976 4 FALSE 1 40109100100.1 40109100100
5 -97.54270 35.51993 5 FALSE 1 40109100100.1 40109100100
6 -97.54266 35.52016 6 FALSE 1 40109100100.1 40109100100
NAME state county tract B19013_001
1 Census Tract 1001, Oklahoma County, Oklahoma 40 109 100100 33440
2 Census Tract 1001, Oklahoma County, Oklahoma 40 109 100100 33440
3 Census Tract 1001, Oklahoma County, Oklahoma 40 109 100100 33440
4 Census Tract 1001, Oklahoma County, Oklahoma 40 109 100100 33440
5 Census Tract 1001, Oklahoma County, Oklahoma 40 109 100100 33440
6 Census Tract 1001, Oklahoma County, Oklahoma 40 109 100100 33440
It produced this plot.
I also created a roadway map using ggmap. Here's the code:
map <- get_map(location = c(lon = mean(mapdata$lon), lat = mean(mapdata$lat))
, zoom = 10
, maptype = "roadmap"
, color = "bw")
p <- ggmap(map) +
scale_x_continuous(limits = c(min(mapdata$lon), max(mapdata$lon)), expand = c(0, 0)) +
scale_y_continuous(limits = c(min(mapdata$lat), max(mapdata$lat)), expand = c(0, 0))
p
And here's the map it produces.
When I try to combine them though I get an error. Here's the code I use to combine them and the error:
okc <- okc + p
Error in p + o : non-numeric argument to binary operator
In addition: Warning message:
Incompatible methods ("+.gg", "Ops.data.frame") for "+"
I'm not sure why I'm getting this error. Is it because the maps are not scaled the same? I could not figure how else to scale ggmap other than using the very imprecise zoom function.
If anyone has any ideas about how to layer the choropleth on top of the ggmap I will be very thankful.
Here's the rest of the code to recreate the ggplot choropleth.
library(acs)
library(ggplot2)
library(ggmap)
library(UScensus2010)
library(RColorBrewer)
library(dplyr)
library(scales)
#http://api.census.gov/data/key_signup.html
api.key.install(key="c369cd6ed053a84332caa62301eb8afe98bed825")
# Load in Shape File (You'll need to download this file from the census)
#ftp://ftp2.census.gov/geo/tiger/TIGER2013/TRACT/tl_2013_40_tract.zip
## load, subset shapefile
geodat<-readShapePoly("insert shapefile here", proj4string=CRS('+proj=longlat +datum=NAD83'))
geodat<-geodat[geodat$COUNTYFP==109,]
## fortify for ggplot digestion
geodat.f<-fortify(geodat,region="GEOID")
# American Community Survey Data: Median HH Income for OK Census Tracts
ok.counties=geo.make(state="OK", county="Oklahoma", tract="*")
ok.income<-acs.fetch(geography=ok.counties, table.number="B19013", endyear=2013)
# Merge Data Sets
geo_dat<-geography(ok.income)
var_dat<-as.data.frame(estimate(ok.income))
acs_data<-cbind(geo_dat,var_dat)
acs_data$id<- paste("40109", acs_data$tract, sep = "")
## from dplyr
mapdata<-left_join(geodat.f,acs_data)
okc <- ggplot() +
geom_polygon(data = mapdata, aes(x = long, y = lat, group = group,
fill = B19013_001), color = "black", size = 0.5)+
scale_fill_distiller(palette = "Reds", labels = comma,
breaks = pretty_breaks(n = 10), values = c(1,0)) +
guides(fill = guide_legend(reverse = TRUE)) +
theme_nothing(legend = TRUE) +
ggtitle('Map of OKC')
This is actually much better done in Leaflet. It looks better aesthetically and also code wise it is more intuitive.
library(leaflet)
library(rgdal)
library(RColorBrewer)
pal <- colorNumeric("OrRd", domain = new$pct_minority_popn)
leaflet(mapdata) %>%
addTiles %>%
addPolygons(stroke=T, fillOpacity=.5, smoothFactor=.5, color=~pal(B19013_001)) %>%
addLegend("bottomright", pal=pal, values=~B19013_001, title="Legend Title", opacity=.8)
You can change the bottom map by replacing the addTiles command with something like addProviderTiles("CartoDB.Positron"). You can see the rest of the options and more info on leaflet at: https://rstudio.github.io/leaflet/basemaps.html

Resources