I'm trying to plot my data and got quite far with it (as I am a bloody beginner with R and ggplot). Now I'm stuck in making the legend:
I would like to have separate legend entries for line and point layers, which means something like
-- female
-- male
-- Plot A
-- Plot B
O Start
O End
O Year 1
Any suggestions how to solve that problem?
test<-data.frame(id=1:6, sex=rep(c("female", "male"),times=3), plot=rep(c("A", "B"), times=3), start=sample(seq(as.Date('2015/01/01'), as.Date('2016/01/01'), by="day"), 6), end=sample(seq(as.Date('2016/01/01'), as.Date('2016/10/01'), by="day"),6))
test$duration <- difftime(test$end, test$start, units="days")
test$Year1 <- as.Date(test$start+366)
test$Year1[test$Year1>=Sys.Date() | test$duration<365] <-NA
startTime<- as.Date("2015-01-01")
endTime <- Sys.Date()
start.end <-c(startTime, endTime)
ggplot(test, aes(x=start, y=id, color=sex, linetype=plot))+
geom_segment(aes(x=start, xend=end, y=id, yend=id), size=.75)+
geom_point(aes(Year1), na.rm=TRUE, shape=16, size=3)+
geom_point(aes(start), shape=1, size=3)+
geom_point(data=subset(test, end!= Sys.Date()), aes(end), shape=13, size=3)+
guides(color=guide_legend(title=NULL))+
scale_x_date(date_breaks="6 months", date_minor_breaks = "1 month", date_labels="%m/%Y", name="duration", limits=start.end)+
scale_color_discrete(name="", breaks=c("female", "male"), labels=c("f", "m"))+
scale_linetype_manual(name="", breaks=c("A", "B"), labels=c("Plot A", "Plot B"), values=c("dotdash","solid"))+
scale_shape_manual(name="", guide='legend', breaks=c("Year1", "start", "end"), labels=c("Year1", "start", "end"), values=c("16", "1", "13"))
enter image description here
You have to place the start, end and Year1 in a common variable and map the shape aesthetic to this variable. This should work :
library(ggplot2)
library(tidyr)
library(dplyr)
test<-data.frame(id=1:6, sex=rep(c("female", "male"),times=3), plot=rep(c("A", "B"), times=3), start=sample(seq(as.Date('2015/01/01'), as.Date('2016/01/01'), by="day"), 6), end=sample(seq(as.Date('2016/01/01'), as.Date('2016/10/01'), by="day"),6))
test$duration <- difftime(test$end, test$start, units="days")
test$Year1 <- as.Date(test$start+366)
test$Year1[test$Year1>=Sys.Date() | test$duration<365] <-NA
startTime<- as.Date("2015-01-01")
endTime <- Sys.Date()
start.end <-c(startTime, endTime)
test_melt <- test %>%
select(id, sex, start, end, Year1) %>%
gather(type, value, -sex,-id)
ggplot(test)+
geom_segment(aes(x=start, xend=end, y=id, yend=id, color=sex, linetype=plot), size=.75)+
geom_point(aes(x = value, y = id, color = sex, shape = type), data = test_melt, size = 3) +
guides(color=guide_legend(title=NULL))+
scale_x_date(date_breaks="6 months", date_minor_breaks = "1 month", date_labels="%m/%Y", name="duration", limits=start.end)+
scale_color_discrete(name="", breaks=c("female", "male"), labels=c("f", "m"))+
scale_linetype_manual(name="", breaks=c("A", "B"), labels=c("Plot A", "Plot B"), values=c("dotdash","solid"))+
scale_shape_manual(name="", guide='legend', breaks=c("Year1", "start", "end"), labels=c("Year1", "start", "end"), values=c(16, 1, 13))
Related
since a long time I despair to straighten the label of the x-axis in my plot (ggplot2).
The challenge is that I have two geom_paths, each fetching the data from a different dataframe - I'm sure this will become a bit clearer in the code:
ggplot(data=dx, aes(x = year, y=en.x ))+
scale_y_continuous(breaks = scales::pretty_breaks(n = 2))+
geom_path(data=ps, aes(x, y, color = "Person 1", linetype="Person 1"), size=0.5)+
geom_path(data=pg, aes(x , y, color = "Person 2", linetype="Person 2"), size=0.5)+
scale_color_manual("",labels = c(Nutzer1, Nutzer2), values = c("Person 1" = Nutzer1Farbe, "Person 2" = Nutzer2Farbe)) +
scale_linetype_manual("",labels = c(Nutzer1, Nutzer2), values=c("Person 1"=Nutzer1Format, "Person 2"=Nutzer2Format))
The goal is, to Label the X-Axis with the years from the dataframe "dx", as shown in the aes-parameter. And it works! But only if you disable the geom_paths - shown below:
ggplot(data=dx, aes(x = year, y=en.x ))+
scale_y_continuous(breaks = scales::pretty_breaks(n = 2))+
#geom_path(data=ps, aes(x, y, color = "Person 1", linetype="Person 1"), size=0.5)+
#geom_path(data=pg, aes(x , y, color = "Person 2", linetype="Person 2"), size=0.5)+
scale_color_manual("",labels = c(Nutzer1, Nutzer2), values = c("Person 1" = Nutzer1Farbe, "Person 2" = Nutzer2Farbe)) +
scale_linetype_manual("",labels = c(Nutzer1, Nutzer2), values=c("Person 1"=Nutzer1Format, "Person 2"=Nutzer2Format))
I can't really understand why the paths destroy the labeling like this - it must be the aes parameters.
If someone has a solution for this, I would be extremely grateful!
This could be achieved like so:
Convert your original month variable to a date time before calling xspline. This way the interpolated date values could be easily converted back to datetime via e.g. lubridate::as_datetime.
besides that you could row bind your datasets which makes plotting a bit easier
library(ggplot2)
library(tidyr)
library(dplyr)
datengesamt <- datengesamt %>%
# Convert to datetime
mutate(month = as.POSIXct(month))
plot(1, 1)
ps <- xspline(datengesamt[,1], datengesamt[,2], 1, draw=FALSE)
pg <- xspline(datengesamt[,1], datengesamt[,3], 1, draw=FALSE)
pp <- list("Person 1" = data.frame(ps), "Person 2" = data.frame(pg)) %>%
bind_rows(.id = "id") %>%
mutate(x = lubridate::as_datetime(x))
ggplot(pp, aes(x, y, color = id, linetype = id)) +
scale_y_continuous(breaks = scales::pretty_breaks(n = 2)) +
geom_path(size=0.5) +
scale_x_datetime(date_labels = "%Y")
I am trying to produce a Gantt chart out of a table with different task (each having a start date and end date).
library(tidyverse)
# Sample data
df1 <- data.frame(from = c("2020-01-01", "2020-02-02", "2020-05-04", "2020-02-01", "2020-01-20", "2020-02-10"),
to = c("2020-03-30", "2020-03-15", "2020-05-20", "2020-04-05", "2020-03-05", "2020-04-13"),
task= c("Task 1", "Task 2", "Task 3", "Task 4", "Task 5", "Task 6"),
group = c("Finance", "Finance", "Research", "Research", "Other", "Other")
)
# Plot gantt-chart
df1 %>% mutate(from = as.Date(from),
to = as.Date(to)) %>%
pivot_longer(cols = c(from, to), values_to = "date") %>%
ggplot(aes(x=date, y=task, colour = group)) +
geom_line(lwd=3) +
geom_point(aes(color=group), alpha=.5, pch=18, size=5) +
scale_x_date(position="bottom", date_breaks = "1 week", date_labels="%U") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))+
ggtitle("Milestones") +
labs(y="", x = "", colour="Responsible")
So far so good, but now I have a major problem and a minor problem:
major problem:
How can I re-order the categories on the y-axis as they appear in the data (not alphabetically)? From top to bottom it should be: Finance, Research, Other. Additionally, within each category (finance, research, other) the lines should be ordered by starting date (i.e. the task starting first should be on top)
minor problem:
on the x-axis how can I plot a thicker line for each month and a thinner line for each week?
Thanks for help!
This could be achieved like so:
As #RuiBarrades mentioned in his comments to get the right order you have to convert to a factor and set the levels in the right order. First, set the levels for the groups. Second, to get the tasks in the desired order I rearrange the dataset by group and start date and make use of forcats::fct_inorder to set the levels of the tasks in the desired order.
If I got you right you want different grid lines for week and month? This could be achieved by setting date_breaks_minor="month" and styling of the grid lines via theme and panel.grid.minor/major.x. Here I opted for a "black" color but if you prefer different sizes you could do so via size.
library(tidyverse)
library(ggplot2)
# Sample data
df1 <- data.frame(from = c("2020-01-01", "2020-02-02", "2020-05-04", "2020-02-01", "2020-01-20", "2020-02-10"),
to = c("2020-03-30", "2020-03-15", "2020-05-20", "2020-04-05", "2020-03-05", "2020-04-13"),
task= c("Task 1", "Task 2", "Task 3", "Task 4", "Task 5", "Task 6"),
group = c("Finance", "Finance", "Research", "Research", "Other", "Other")
)
# Plot gantt-chart
df1 %>% mutate(from = as.Date(from),
to = as.Date(to),
group = factor(group, levels = c("Finance", "Research", "Other"))) %>%
arrange(desc(group), desc(from)) %>%
mutate(task = forcats::fct_inorder(task)) %>%
pivot_longer(cols = c(from, to), values_to = "date") %>%
ggplot(aes(x=date, y=task, colour = group)) +
geom_line(lwd=3) +
geom_point(aes(color=group), alpha=.5, pch=18, size=5) +
scale_x_date(position="bottom", date_breaks = "1 week", date_minor_breaks = "1 month",
date_labels="%U") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5),
panel.grid.minor.x = element_line(color = "black"))+
ggtitle("Milestones") +
labs(y="", x = "", colour="Responsible")
I want to make a seemingly trivial adjustment to the chart pictured below:
I would like the labels along the x-axis to be even years, rather than odd years. So instead of going from 2009 -> 2011 -> 2013, they should go from 2008 -> 2010 -> 2012, and so forth...
How do I go about doing this?
Here is the code:
germany_yields <- read.csv(file = "Germany 10-Year Yield Weekly (2007-2020).csv", stringsAsFactors = F)
italy_yields <- read.csv(file = "Italy 10-Year Yield Weekly (2007-2020).csv", stringsAsFactors = F)
germany_yields <- germany_yields[, -(3:6)]
italy_yields <- italy_yields[, -(3:6)]
colnames(germany_yields)[1] <- "Date"
colnames(germany_yields)[2] <- "Germany.Yield"
colnames(italy_yields)[1] <- "Date"
colnames(italy_yields)[2] <- "Italy.Yield"
combined <- join(germany_yields, italy_yields, by = "Date")
combined <- na.omit(combined)
combined$Date <- as.Date(combined$Date,format = "%B %d, %Y")
combined["Spread"] <- combined$Italy.Yield - combined$Germany.Yield
fl_dates <- c(tail(combined$Date, n=1), head(combined$Date, n=1))
ggplot(data=combined, aes(x = Date, y = Spread)) + geom_line() +
scale_x_date(limits = fl_dates,
expand = c(0, 0),
date_breaks = "2 years",
date_labels = "%Y")
A -- not very elegant -- way would be to put these arguments in your scale_x_date() :
scale_x_date(date_labels = "%Y",
breaks = ymd(unique(year(combined$fl_dates)[year(combined$fl_dates)%%2 == 0]), truncated = 2L)
(we define breaks manually, by subsetting the whole range of dates and keeping the even years)
That's actually fairly simple. Just set the lower limit to an even number, and set the upper limit to NA. As you haven't provided a reproducible example, here on some fake data.
library(tidyverse)
mydates <- seq(as.Date("2007/1/1"), by = "3 months", length.out =100)
df <- tibble(
myvalue = rnorm(length(mydates))
)
# without limits argument
ggplot(df ) +
aes(x = mydates, y = myvalue) +
geom_line(size = 1L, colour = "#0c4c8a") +
scale_x_date(date_breaks = "2 years",
date_labels = "%Y")
# with limits argument
ggplot(df ) +
aes(x = mydates, y = myvalue) +
geom_line(size = 1L, colour = "#0c4c8a") +
scale_x_date(date_breaks = "2 years",
date_labels = "%Y",
limits = c(as.Date("2006/1/1"), NA))
Created on 2020-04-29 by the reprex package (v0.3.0)
I have the following code:
file = "http://dd.weather.gc.ca/hydrometric/csv/SK/hourly/SK_hourly_hydrometric.csv"
skdat <- read.csv(file, head=T, sep=",", dec=".", stringsAsFactors = F)
colnames(skdat) <- c("ID", "Date", "WaterLevel", "Grade1", "Symbol1",
"QAQC-1", "DischargeDebit", "Grade2", "Symbol2",
"QAQC-2")
subds <- subset(skdat, ID=='05AH050')
subds$datetime1 <- as.numeric(as.POSIXct(subds$Date))
class(data$datetime1)
subds[1:10, ]
ggplot(aes(x = datetime1, y = "WaterLevel"), data = subds) + geom_line()
Is there a way I can show just the time in 2 hr intervals on the Y axis?
Unclear what do you mean by "time in 2 hr intervals on the Y axis" because time is on the x-axis. Here is an example to change the breaks to 2 hours on the x-axis. The datetime1 need to be in POSIXct class.
library(ggplot2)
library(scales)
subds$datetime1 <- as.POSIXct(subds$Date)
ggplot(aes(x = datetime1, y = WaterLevel), data = subds) +
geom_line() +
scale_x_datetime(breaks = date_breaks("2 hours"),
labels = date_format("%H:%M"))
I have a 18x3 data.frame. I would like a similar plot as in
ggplot: arranging boxplots of multiple y-variables for each group of a continuous x
but I want the x-axis divided into two variables (y0 & y1, y2 & y3, y4 & y5)
My best attempt was to add an additional factor column for the facet_wrap function, but it maintains all 6 variables in each plot.
require (ggplot2)
require (plyr)
library(reshape2)
y.0 <- rnorm(3)
y.1 <- rnorm(3)
y.2 <- rnorm(3)
y.3 <- rnorm(3)
y.4 <- rnorm(3)
y.5 <- rnorm(3)
df<- (as.data.frame(cbind(y.0, y.1, y.2, y.3, y.4, y.5)))
dfm <- melt(df, measure.vars = 1:6)
dfm$colour <- rep(c("no PM", "no PM", "no PM", "PM", "PM", "PM"), 3)
dfm$facet <- rep(c(rep("lys", 6), rep("lystrp", 6), rep("trplys", 6)))
ggplot(dfm, aes(x = variable,y = value, fill = colour)) +
geom_boxplot() +
labs(title = "Protease assay", x = "Sample type", y = "peptide count") +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1)) +
facet_wrap(~facet)
Any help would be appreciated. Thanks.