ggplot time series: messed up x axis - 2 - r

This is modified version of this question.
I need to create time series plot for 2 lines for the following data:
# Creating data set
year <- c(rep(2018,4), rep(2019,4), rep(2020,4))
month_1 <- c(2, 3, 7, 8, 6, 10, 11, 12, 5, 7, 8, 12)
avg_dlt_calc <- c(10, 20, 11, 21, 13, 7, 10, 15, 9, 14, 16, 32)
avg_dlt_standard <- c(rep(9,12))
data_to_plot <- data.frame(cbind(year,month_1,avg_dlt_calc,avg_dlt_standard ))
data_to_plot$month_1 <- factor(data_to_plot$month_1, levels=unique(data_to_plot$month_1))
ggplot(data_to_plot,aes(x = as.factor(month_1))) +
geom_line(aes(y = avg_dlt_calc, group = year, colour = "DLT Calculated"), size = 0.5) +
geom_line(aes(y = avg_dlt_standard, group = year, colour = "DLT standard"), size = 0.5) +
geom_point(aes(y = avg_dlt_calc, colour = "DLT Calculated")) +
scale_x_discrete(name = "months", limits = data_to_plot$month_1) +
facet_grid(~year, scales = "free")+
scale_color_manual(name="",
labels = c("DLT Calculated",
"DLT standard"),
values = c( "blue",
"red")) +
theme(legend.position="top",
legend.text = element_text(size = 8))
s = data_to_plot$month_1) +
facet_grid(~year, scales = "free")+
But x-axis looks wrong:
If to plot data without this line:
data_to_plot$month_1 <- factor(data_to_plot$month_1, levels=unique(data_to_plot$month_1))
Then x-axis will still be messy:
I am setting limits for x-axis, but looks like it is not working.
How can I fix it?

I've skipped some lines and features of your plot, but in essence, this is what needs to be changed:
ggplot(data_to_plot, aes(x=month_1))+ # no as.factor
geom_point(aes(y=avg_dlt_calc)) +
geom_line(aes(y=avg_dlt_calc)) +
geom_line(aes(y=avg_dlt_standard), colour='red') +
scale_x_continuous(breaks=1:12, limits=c(1,2)) + # do *not* use scale_x_discrete,
# your x-axis is *continuous*; use breaks-argument to set the ticks.
# note, limits should only have 2 values - upper and lower limit.
facet_grid(~year)
In your code, you used limits = data_to_plot$month_1, but ggplot2 only used the 2 first elements of month_1 - it did not interpret it as a set of acceptable values.

Related

Axes will not start at 0 in ggplot

I have just created my first bubble chart in ggplot and, despite trying just about every 'solution' I could find on the internet for the past few hours, I have not been able to force the axes to start at 0. The x and y axes both start at 10 but I would like the axes to start at 0. I have tried using various combination of xlim, ylim, expand_limits, scale_y_continuous and scale_x_continuous with no luck.
Here's the code I use to create the graph:
ggplot(mean_diffs, aes(x=sample, y=marked, size = pop_est)) +
geom_point(alpha=0.7) + scale_size(range = c(2, 10), name="Absolute error", breaks = c(5,10,20,40))
It's not clear what exactly you tried, but the following seems to work well, assuming your data are all numeric:
ggplot(mean_diffs, aes(x = sample, y = marked, size = pop_est)) +
geom_point(alpha = 0.7) +
scale_size(range = c(2, 10), name = "Absolute error",
breaks = c(5, 10, 20, 40)) +
scale_x_continuous(limits = c(0, 60), breaks = 0:6 * 10) +
scale_y_continuous(limits = c(0, 60), breaks = 0:6 * 10)
Reproducible data set, inferred from plot in question
mean_diffs <- data.frame(sample = rep(c(10, 30, 50), each = 3),
marked = rep(c(10, 30, 50), 3),
pop_est = c(30, 20, 10, 40, 20, 3, 30, 7, 3))

ggplot time series: messed up x axis for data with missing values

I am creating time series plot for the following data:
# Creating data set
year <- c(rep(2018,4), rep(2019,4), rep(2020,4))
month_1 <- c(2, 3, 7, 8, 6, 10, 11, 12, 5, 7, 8, 12)
avg_dlt_calc <- c(10, 20, 11, 21, 13, 7, 10, 15, 9, 14, 16, 32)
data_to_plot <- data.frame(cbind(year,month_1,avg_dlt_calc ))
ggplot(data_to_plot, aes(x = month_1)) +
geom_line(aes(y = avg_dlt_calc), size = 0.5) +
scale_x_discrete(name = "months", limits = data_with_avg$month_1) +
facet_grid(~year, scales = "free")
I am ok with the plot itself, but x-axis labels are messed up:
How I can fix it?
It is ok not to have labels for missing months (for example, for 2018 it will be only 2,3,7,8 - so it will be clear, that there is data only for those months).
A remedy is to coerce month_1 to a factor and group the observations by year like so:
ggplot(data_to_plot, aes(x = as.factor(month_1), y = avg_dlt_calc, group = year)) +
geom_line(size = 0.5) +
scale_x_discrete(name = "months") +
facet_grid(~year, scales = "free")
Note that I've moved y = avg_dlt_calc inside aes() in ggplot() which is more idiomatic than your approach. You may use the breaks argument in scale_x_discrete() to set breaks manually, see ?scale_x_discrete.
I think a fixed x-axis and adding points is more suitable for conveying the information that data is only available for some periods:
ggplot(data_to_plot, aes(x = as.factor(month_1), y = avg_dlt_calc, group = year)) +
geom_line(size = 0.5) +
geom_point() +
scale_x_discrete(name = "months") +
facet_grid(~year, scales = "free_y")

show range in a log scale

I'd like to include the range of possible values for y_val where the upper and lower bands are available. I tried using geom_errorbar which works for the continuous scale but not in log scale. How can I fix this?
x_val <- c(2, 3, 6, 12, 24)
y_val<-c(1,3,15,25,30)
y_upper<- c(1.2,3.2, 16, 28,40)
y_lower <-c(0.8,2.9, 12, 22,25)
df <- data.frame(x_val=x_val,y_val=y_val,y_upper=y_upper,
y_lower=y_lower)
ggplot(data=df,aes(x=x_val,y=y_val))+
geom_line()+
geom_point()+
geom_errorbar(ymin = y_lower, ymax = y_upper)+
scale_y_log10()+
scale_x_log10()
Per this answer, you need coord_trans(y = "log10") rather than scale_y_log10()
library(ggplot2)
x_val <- c(2, 3, 6, 12, 24)
y_val<-c(1,3,15,25,30)
y_upper<- c(1.2,3.2, 16, 28,40)
y_lower <-c(0.8,2.9, 12, 22,25)
df <- data.frame(x_val=x_val,
y_val=y_val,
y_upper=y_upper,
y_lower=y_lower)
ggplot(data=df,aes(x=x_val,y=y_val))+
geom_line()+
geom_point()+
geom_errorbar(ymin = y_lower, ymax = y_upper)+
coord_trans(y="log10", x = "log10", ylim = range(c(y_upper, y_lower)))
Created on 2021-03-16 by the reprex package (v1.0.0)
A trick could be to use geom_segment with arrow settings produced by a call to arrow().
library(ggplot2)
ggplot(data=df,aes(x=x_val,y=y_val))+
geom_line()+
geom_point()+
geom_segment(aes(xend = x_val, y = y_lower, yend = y_upper),
arrow = arrow(angle = 90, ends = "both")) +
scale_y_log10()+
scale_x_log10()

How to replicate this excel 3d histogram in R

I am looking to reproduce roughly the following 3d histogram (made in Excel) in R. The trick is that the labels should fall between the Justice names, as the bars are meant to delineate the number of times certain justices voted against other justices.
You can use the following code to generate the data:
cutpoints <- c(0, 22, 16, 12, 13, 7, 16, 13, 20)
justice_names <- c("Peckham", "Brewer", "Shiras",
"Fuller", "Gray", "Brown", "McKenna", "White", "Harlan")
Here's a rough reproduction, omitting the 3D.
Plotting the x-axis as numeric and applying labels after allows you to shift the labels to the right by half a mark.
library(ggplot2)
cutpoints <- c(0, 22, 16, 12, 13, 7, 16, 13, 20)
justice_names <- c("Peckham",
"Brewer",
"Shiras",
"Fuller",
"Gray",
"Brown",
"McKenna",
"White",
"Harlan")
hist_data <- data.frame(justice_names, cutpoints, order = seq(1:9))
hist_data$justice_names <-
factor(hist_data$justice_names, levels = hist_data$justice_names)
ggplot(hist_data, aes(x = order, y = cutpoints)) +
geom_bar(stat = "identity", width = 0.5) +
scale_x_continuous(breaks = hist_data$order + 0.5, # Shift labels by 0.5
labels = hist_data$justice_names) +
geom_text(aes(label = cutpoints), vjust = 0, nudge_y = 0.25) +
labs(x = "Justice", y = "Number",
title = "Fig A-1. Number of Cutpoints, 1899-1901") +
theme(panel.grid.minor = element_blank(),
panel.grid.major.x = element_line(linetype = "dashed"))

My graphs legend is not showing the line graph part in ggplot (R)

I have created a graph using this code:
df.1 <- data.frame(
Month = c("Dec-17", "Jan-18", "Feb-18", "Mar-18", "Apr-18", "May-18"),
Total_1 = c(25, 14, 8, 16, 137, 170),
Total_2 = c(3, 2, 3, 2, 18, 27),
Total_3 = c(5, 4, 3, 2, 16, 54)
)
df.1 <- melt(df.1,id.vars = "Month")
#reorder the month column so it isn't alphabetical
df.1$Month <- factor(df.1$Month, levels(df.1$Month)[c(2,4,3,5,1,6)])
#partition my data into the 2 different graphs I need
df.1.1 <- df.1[7:18,]
df.1.2 <- df.1[1:6,]
ggplot(data = df.1.1, aes(x = Month, y = value)) +
geom_bar(aes(fill = variable), position = position_dodge(),stat = 'identity') +
geom_line(data = df.1.2, aes(x=Month, y=value, group=1), size =1.25, color = "#380B61") +
theme(axis.title.x=element_blank(), axis.title.y = element_blank(), legend.position="bottom", legend.direction="horizontal")
Which created this graph:
Example Graph
As you can see only the bar chart is showing on the legend. How can I get the line part (Total_1) to also show on the legend as well?
EDIT: To be clear I want the finished chart to look as close to this as possible:
Example Graph

Resources