Below is my code. I am tried to add one line (data from a different csv file) on top of a stacked barplot however it wont work, the error says "object variable not found". Without added the geom_line the stacked barplot works so I assume it is the line that is creating the issue. Any ideas on how I fix this?
a <- read.csv("data.csv", header=TRUE, sep=",")
line1 <- read.csv("data1.csv", header=TRUE, sep=",")
line2 <- data.frame(line1)
library(reshape2)
c <- melt(a, id.var="day")
library(ggplot2)
a <- ggplot(c, aes(x=day, y=value, fill=variable)) +
geom_bar(stat="identity", aes(x=day, y=value), width=0.7) +
geom_line(data=line2, aes(x=day, y=value), color="black", stat="identity")
+
scale_fill_manual(values = c("black", "grey47", "grey")) +
scale_x_continuous(breaks = round(seq(min(m$day), max(m$day), by = 1),0))
print(a)
The following is a complete code example to produce the graph below.
I have changed your variables' names, in order to make them more consistent. You had named both the data.frame in file "data.csv" and the result of your ggplot instruction a.
library(reshape2)
library(ggplot2)
a <- read.csv("~/data.csv")
line1 <- read.csv("~/data2.csv")
long <- melt(a, id.var = "day")
g <- ggplot(long, aes(x = day, y = value)) +
geom_bar(aes(x = day, y = value, fill = variable),
stat = "identity", width = 0.7) +
geom_line(data = line1,
aes(x = day, y = value),
color = "black") +
scale_fill_manual(values = c("black", "grey47", "grey")) +
scale_x_continuous(breaks = min(long$day):max(long$day))
print(g)
Data in dput format.
a <-
structure(list(day = 1:31, emigration = c(6L, 6L, 6L, 6L, 5L,
3L, 1L, 9L, 8L, 7L, 6L, 4L, 3L, 1L, 2L, 4L, 5L, 6L, 8L, 7L, 5L,
4L, 1L, 2L, 4L, 9L, 8L, 7L, 6L, 4L, 3L), security = c(5L, 5L,
5L, 5L, 6L, 6L, 8L, 9L, 9L, 9L, 8L, 8L, 5L, 7L, 7L, 6L, 5L, 5L,
4L, 3L, 2L, 2L, 2L, 2L, 4L, 9L, 7L, 6L, 4L, 3L, 2L), checkin = c(4,
6, 9, 1, 3, 5, 7, 9, 8, 6, 4, 2, 1, 3, 4, 5, 6, 7, 8, 8, 2, 1,
2, 3, 4, 5, 7, 8, 9, 1, 1)), class = "data.frame",
row.names = c(NA, -31L))
line1 <-
structure(list(day = 1:31, value = c(12, 11, 10, 8, 7, 6, 6,
6, 7, 8, 14, 6, 6, 6, 8, 8, 10, 10, 12, 12, 12, 13, 13, 14, 15,
15, 10, 10, 10, 10, 12)), class = "data.frame",
row.names = c(NA, -31L))
Based on your comments of your data structure, I suppose it might help joining your dataframes first and then building the plot using one dataset. You can try:
library(dplyr)
c <- c %>%
left_join(line2 %>%
rename(value_line2 = value),
by="day")
Then adjust geom_line():
geom_line(data=c, aes(x=day, y=value_line2), color="black", stat="identity")
This might help. Please tell me if joining the data doesn't work as intended.
In case it wasn't clear, this is what I meant in my comment above:
library(ggplot2)
a <- ggplot(c, aes(x=day, y=value)) +
geom_bar(stat="identity", aes(x=day, y=value, fill=variable), width=0.7) +
geom_line(data=line2, aes(x=day, y=value), color="black", stat="identity")
Related
I am wishing to show multiple geom_points in order of "season_pts" from each group "drafted_qbs". The issue though is that I'm not sure what to assign the other variable. I have a "team" column which is just the row number of each group but that will only order the first grouping "2".
Any way of laying on the same graph (not interested in faceting) each groups "fantasy_pts" in order of points would be helpful.
Data
structure(list(team = c(1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L,
1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L, 1L, 2L, 3L, 4L, 5L),
season_pts = c(447.44, 381.62, 416.6, 367.96, 419.92, 490.78,
501.66, 458.56, 484.48, 458.36, 518, 495.7, 511.34, 499.68,
536.42, 522.92, 536.92, 518.46, 538.06, 525.96, 541.84, 523.26,
542.98, 527.4, 527), drafted_qbs = c(2, 2, 2, 2, 2, 3, 3,
3, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -25L))
It usually helps to ask yourself "what is it I am trying to show with this plot?"
If you are trying to show that an increased number of drafted QBs tends to produce an increased number of points, then you can do something like this:
ggplot(df, aes(drafted_qbs, season_pts)) +
geom_point(size = 4, aes(color = factor(team))) +
geom_smooth(color = 'gray20', size = 0.5, linetype = 2, alpha = 0.15) +
scale_color_brewer(palette = 'Set1') +
theme_light(base_size = 16) +
labs(x = 'Drafted QBs', y = 'Season Points', color = 'Team') +
theme(panel.grid.minor.x = element_blank())
If you want to show that not all teams are affected equally by this effect, then something like this might be preferable:
ggplot(df, aes(team, season_pts, color = drafted_qbs)) +
geom_point(size = 4, alpha = 0.5) +
scale_color_gradient(low = 'red3', high = 'blue3') +
theme_light(base_size = 16) +
labs(x = 'Team', y = 'Season Points', color = 'Drafted QBs') +
theme(panel.grid.minor.x = element_blank())
The example data:
nltt <- structure(list(time = c(0, 1.02579504946471, 1.66430039972234,
1.67435173974229, 1.82009140034063, 1.95574135900067, 2.06963147976273,
2.64869249896209, 3.10438864450297, 0, 0.56927280073675, 1.94864234867127,
3.40490224199942, 0, 0.444318793403606, 1.34697160089298, 5.86547288923207,
0, 1.10824151737219, 1.77801220982399, 1.82246583876729, 2.18034182214015,
2.33051663760657, 3.01615794541527, 0, 0.101501512884473, 0.98261402255534,
1.04492141817475, 1.16554239437694, 1.25441082400256, 1.25777371029976,
1.62464049949719, 1.87253384965378, 1.91118229908154, 1.94105777022533,
2.17755553127212, 2.37899716574036, 2.85003451051712, 3.16711386665322
), num = c(2, 3, 4, 5, 4, 5, 6, 5, 6, 2, 3, 4, 5, 2, 3, 2, 3,
2, 3, 4, 5, 6, 7, 6, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 10, 11,
12, 13, 14), rep = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L)), row.names = c(NA,
-39L), class = "data.frame")
The example code:
ggplot2::ggplot(nltt, aes(time, num, group = as.factor(rep), color = as.factor(rep))) +
ggplot2::geom_line() + ggplot2::coord_cartesian(xlim = c(0, 3)) +
ggplot2::theme(legend.position = "none") + ggplot2::xlab("age")
The example plot:
I would like each line in the plot to stop precisely at x = 3, but adding coord_cartesian(xlim = c(0, 3)) does not achieve my goal because the lines continues in the right padding area. How can I limit the lines in the range of [0, 3] without truncating my raw data?
Up front: two answers, the first removes the margins and requires no change to the data; the second preserves the margins, which requires one to modify the data in-place.
Remove the margin(s)
The default behavior is to expand the margins a little. While there is the ggplot2::expansion to control the multiplicative and additive components of the expansion, it can only be used in scale_continuous which, as you know, will result in loss (NA) of out-of-bound points.
If you can accept losing the boundary on the left as well, though, you can add expand=FALSE to your coord_cartesian and get your desired results:
ggplot2::ggplot(nltt, aes(time, num, group = as.factor(rep), color = as.factor(rep))) +
ggplot2::geom_line() +
ggplot2::coord_cartesian(xlim = c(0, 3), expand = FALSE) +
ggplot2::theme(legend.position = "none") +
ggplot2::xlab("age")
If you want to retain the left margin, though, you can force it by adjusting the xlim=, realizing that the default is around expansion(mult=0.05, add=0):
ggplot2::ggplot(nltt, aes(time, num, group = as.factor(rep), color = as.factor(rep))) +
ggplot2::geom_line() +
ggplot2::coord_cartesian(xlim = c(-0.15, 3), expand = FALSE) +
ggplot2::theme(legend.position = "none") +
ggplot2::xlab("age")
Interpolate and truncate, external to ggplot
ggplot2::scale_x_continuous(..., oob=) supports several mechanisms for dealing with out-of-bounds data, including:
the default censor (replaces with NA), which doesn't work since we don't have data at time=3
scales::squish that will take (for example) x=4 and squish it back to x=3; the unfortunate side-effect of this is that it is univariate (it does not attempt to change the corresponding y= value), so the slopes of the squished line segments will be steeper, and (in my mind at least) this corrupts the data and vis;
a user-defined function that is passed the values and the associated limits; unfortunately, it is also univariate, so we're stuck with the same data/vis slope-corruption as the previous bullet.
This brings me to the suggestion to interpolate the data yourself before passing to ggplot. I'll demo with dplyr but it can be done easily with base R or other dialects as well.
library(dplyr)
group_by(nltt, rep) %>%
## step 1: interpolate, returns *just* time=3 data, nothing more
summarize(as.data.frame(setNames(approx(time, num, xout = 3), c("time", "num")))) %>%
## step 2: combine with the original data
bind_rows(nltt) %>%
## step 3: remove data over 3
dplyr::filter(time <= 3) %>%
ggplot(aes(time, num, group = as.factor(rep), color = as.factor(rep))) +
ggplot2::geom_line() + ggplot2::coord_cartesian(xlim = c(0, 3)) +
ggplot2::theme(legend.position = "none") +
ggplot2::xlab("age")
Similar to this question and also this, my preferred output is to have the legend for geom_text to be the text label, instead of 'aaa'. (ie the 'aaa' inside the circle of the mchild legend should be 0,1,2,3,4. Please see the attached picture)
Here is my data:
data.frame(
ID = c(1L, 3L, 5L, 11L, 13L, 18L, 20L, 24L, 33L, 34L, 36L),
sum_hrs = c(12, 8, 68, 44, 16, 12, 36, 16, 4, 20, 4),
avg_workload = c(263.1615,275.312,269.462444444444,
268.867666666667,276.686,257.3605,267.8695,268.3355,
239.409,260.230333333333,330.061),
avg_achv = c(92.5,98,94.2222222222222,
94.8333333333333,92,98,94.25,93.5,98,95.3333333333333,100),
mchild = c(1L, 0L, 1L, 2L, 3L, 0L, 4L, 0L, 2L, 0L, 1L),
drinker = as.factor(c("No","Yes","Yes",
"Yes","Yes","No","Yes","Yes","No","No",
"Yes"))
)
I have tried the answer suggestions in the related SO questions, but could not get it to work. Most probably my grobs indexing is incorrect. So I have read the documentation but still could not quite get it. Here is what I have done so far:
p2 <- ggplot(data=sum_ua, aes(x=avg_achv, y=sum_hrs, size=mchild, color=drinker, alpha=sum_hrs)) +
geom_point() +
geom_text(data=sum_ua, aes(x=avg_achv, y=sum_hrs, label=mchild), color='#A9A9A9') +
scale_size(range=c(4,20)) +
scale_alpha_binned(range = c(0.01, 1), guide = 'none') +
new_scale_color()
g <- ggplotGrob(p2)
lbls <- unique(sort(sum_ua$mchild))
g$grobs[[15]][[1]][[1]]$grobs[[1]]$label <- lbls[1]
g$grobs[[15]][[1]][[1]]$grobs[[6]]$label <- lbls[2]
g$grobs[[15]][[1]][[1]]$grobs[[8]]$label <- lbls[3]
g$grobs[[grep("guide", g$layout$name)]]
grid.draw(g)
Here is my output:
Sorry for the basic question and thank you for your help! :)
As an alternative to messing around in the gtable of the plot, you can make a custom scale_discrete_identity() for the label aesthetic. You just need to make sure that it has a legend and that all information is similar to the size legend (title, breaks, labels etc.).
library(ggplot2)
library(ggnewscale)
sum_ua <- data.frame(
ID = c(1L, 3L, 5L, 11L, 13L, 18L, 20L, 24L, 33L, 34L, 36L),
sum_hrs = c(12, 8, 68, 44, 16, 12, 36, 16, 4, 20, 4),
avg_workload = c(263.1615,275.312,269.462444444444,
268.867666666667,276.686,257.3605,267.8695,268.3355,
239.409,260.230333333333,330.061),
avg_achv = c(92.5,98,94.2222222222222,
94.8333333333333,92,98,94.25,93.5,98,95.3333333333333,100),
mchild = c(1L, 0L, 1L, 2L, 3L, 0L, 4L, 0L, 2L, 0L, 1L),
drinker = as.factor(c("No","Yes","Yes",
"Yes","Yes","No","Yes","Yes","No","No",
"Yes"))
)
ggplot(data=sum_ua, aes(x=avg_achv, y=sum_hrs, size=mchild, color=drinker, alpha=sum_hrs)) +
geom_point() +
geom_text(data=sum_ua, aes(x=avg_achv, y=sum_hrs, label=as.character(mchild)), color='#A9A9A9') +
scale_size(range=c(4,20)) +
scale_alpha_binned(range = c(0.01, 1), guide = 'none') +
scale_discrete_identity(guide = "legend", aesthetics = "label",
name = "mchild") +
new_scale_color()
Created on 2021-02-01 by the reprex package (v0.3.0)
For your case this seems to work
g <- ggplotGrob(p2)
g$grobs[[15]][[1]][[1]]$grobs[[5]]$label <- 0
g$grobs[[15]][[1]][[1]]$grobs[[8]]$label <- 1
g$grobs[[15]][[1]][[1]]$grobs[[11]]$label <- 2
g$grobs[[15]][[1]][[1]]$grobs[[14]]$label <- 3
g$grobs[[15]][[1]][[1]]$grobs[[17]]$label <- 4
grid::grid.draw(g)
Or to avoid hardcoding and repeating the same code again we can use :
labels <- 0:4
inds <- which(colSums(sapply(g$grobs[[15]][[1]][[1]]$grobs,class) == 'text') > 0)
g$grobs[[15]][[1]][[1]]$grobs[inds] <- Map(function(x, y) {x$label <- y;x},
g$grobs[[15]][[1]][[1]]$grobs[inds], labels)
grid::grid.draw(g)
First of all, thanks^13 to tidyverse. I want the bars in the chart below to follow the same factor levels reordered by forcats::fct_reorder (). Surprisingly, I see different order of levels in the data set when View ()ed as when they are displayed in the chart (see below). The chart should illustrate the number of failed students before and after the bonus marks (I want to sort the bars based on the number of failed students before the bonus).
MWE
ggplot (df) +
geom_bar (aes (forcats::fct_reorder (subject, FailNo, .desc= TRUE), FailNo, fill = forcats::fct_rev (Bonus)), position = 'dodge', stat = 'identity') +
theme (axis.text.x=element_text(angle=45, vjust=1.5, hjust=1.5, size = rel (1.2)))
Data output of dput (df)
structure(list(subject = structure(c(1L, 2L, 5L, 6L, 3L, 7L,
4L, 9L, 10L, 8L, 12L, 11L, 1L, 2L, 5L, 6L, 3L, 7L, 4L, 9L, 10L,
8L, 12L, 11L), .Label = c("CAB_1", "DEM_1", "SSR_2", "RRG_1",
"TTP_1", "TTP_2", "IMM_1", "RRG_2", "DEM_2", "VRR_2", "PRS_2",
"COM_2", "MEB_2", "PHH_1", "PHH_2"), class = "factor"), Bonus = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("After", "Before"), class = "factor"),
FailNo = c(29, 28, 20, 18, 15, 13, 12, 8, 5, 4, 4, 2, 21,
16, 16, 14, 7, 10, 10, 5, 3, 4, 4, 1)), .Names = c("subject",
"Bonus", "FailNo"), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-24L))
Bar chart
The issue
According to the table above, SSR_2 var should come in the fifth rank and IMM_1 in the sixth, however in the chart we see these two variables swapping their positions. How to sort it right after tidyverse in this case?
Use factor with unique levels for your x -axis.
ggplot (df) +
geom_bar (aes(factor(forcats::fct_reorder
(subject, FailNo, .desc= TRUE),
levels=unique(subject)),
FailNo,
fill = forcats::fct_rev (Bonus)),
position = 'dodge', stat = 'identity') +
theme(axis.text.x=element_text(angle=45, vjust=1.5, hjust=1.5, size = rel (1.2)))
Edited: #dotorate comment
Sort failNo before the bonus
library(dplyr)
df_before_bonus <- df %>% filter(Bonus == "Before") %>% arrange(desc(FailNo))
Use FailNo before the bonus to create the factor
df$subject <- factor(df$subject, levels = df_before_bonus$subject, ordered = TRUE)
Updated plot
ggplot(df) +
geom_bar(aes (x = subject, y = FailNo, fill = as.factor(Bonus)),
position = 'dodge', stat = 'identity') +
theme (axis.text.x=element_text(angle=45, vjust=1.5, hjust=1.5, size = rel (1.2)))
I have been trying the whole day to arrange two factor levels called "type" and "name" by a numeric value called "score", and plot by category type (with color determined by type) ordered by score. I am also trying to get the group called "ALL" on top so it is separated by the other 3 categories in "type". My attempts until now have been very unsuccessful, I don't get why I can't even get the reordering correctly. Any help is very appreciated.
This is my data:
df = structure(list(score = c(12, 12.2, 12.5, 12.3, 12.2, 12.4, 12.5, 12.7, 12.1, 12.8, 12.4, 12.3, 12.2, 12.6, 12.8, 12.1, 12.5), range1 = c(0.003356, 1.20497, -0.128138, -42.6093, -41.1975, -44.706, -20, -46.4245, -0.543379, 2.09828, -20, -20, -44.2262, -46.6559, -20, -20, 2.37709), point = c(1.56805, 2.11176, 0.1502, -22.6093, -21.1975, -24.706, -0.491829, -26.4245, 2.49973, 2.94457, 0.0443572, 0.0208999, -24.2262, -26.6559, 2.69408, 3.22951, 3.33255), range2 = c(2.3767, 2.73239, 0.430373, 4.34247, 4.96875, 3.78027, 1.91331, 4.07937, 3.54538, 3.5491, 1.87162, 2.41067, 5.26578, 4.50965, 4.55967, 5.05772, 3.97742), type = structure(c(1L, 1L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("ALL", "A", "B", "C"), class = "factor"), name = structure(c(13L, 14L, 15L, 1L, 4L, 5L, 6L, 8L, 12L, 17L, 2L, 3L, 7L, 9L, 10L, 11L, 16L), .Label = c("B_vision1", "C_vision2", "C_vision3", "B_vision4", "B_vision5", "A_vision2", "C_vision4", "B_vision6", "C_vision6", "C_vision5", "C_vision1", "B_vision7", "B_ALL", "C_ALL", "A", "C_vision7", "B_vision3"), class = "factor")), .Names = c("score", "range1", "point", "range2", "type", "name"), row.names = c(NA, -17L), class = "data.frame")
I have tried all these options:
df$name2 = reorder(df$name, -df$score)
# df$name <- reorder(df$name, -df$score)
df <- transform(df, category2 = factor(paste(name, type)))
df <- transform(df, category2 = reorder(category2, score))
#library(plyr)
#df = arrange(df,type, name)
ggplot(df, aes(x=name, y=point, ymin=range1, ymax=range2, colour=type)) +
geom_pointrange() +
coord_flip()
or
ggplot(df, aes(x=category2, y=point, ymin=range1, ymax=range2, colour=type)) +
geom_pointrange() +
coord_flip()
I am trying to get something similar to the grouped forest plot on this question but with each group defined by names and reordered by score.
I think I've interpreted what you're trying to do correctly, but I might have got it wrong.
The names (and scores) can be ordered by the sorted list of scores as
ordered.names <- as.character(df$name)[order(df$score)]
ordered.scores <- as.character(df$score)[order(df$score)]
Re-ordering the name levels (with the score annotated) is then
df$name <- factor(df$name, levels=ordered.names, labels=paste(ordered.names, "(", ordered.scores, ")"))
Plotting these with ggplot:
library(ggplot)
ggplot(df, aes(x=name, y=point, ymin=range1, ymax=range2, group=type, color=type)) +
geom_pointrange() +
theme(axis.text.x=element_text(angle=90, hjust=0))
produces
If you want this split up by type as well, you can facet the plot
ggplot(df, aes(x=name, y=point, ymin=range1, ymax=range2, group=type, color=type)) +
geom_pointrange() +
theme(axis.text.x=element_text(angle=90, hjust=0)) +
facet_wrap(~type, ncol=4, scale="free_x")