I have prepare a dataframe and use a ggplot on him. But the initial order is not respected. How i can respect this order ?
Patient Nb_peptides Type_affinite
1 22 563 a
2 22 1040 b
3 22 11139 c
4 24 489 a
5 24 1120 b
6 24 11779 c
7 13 467 a
8 13 1239 b
9 13 14600 c
g_plot <- ggplot(data = nb_peptides_type,
aes(x = reorder(Patient, True_order),
y = Nb_peptides,
fill = Type_affinite)) +
geom_bar(stat = "identity")
print(g_plot)
Please provide stand-alone code to make it easier.
I would use levels outside of your plot to reorder factor levels : Is it what you're looking for ?
## fake dataframe
df <- data.frame(patient = as.factor(rep((21:30), each=3)),
nb = rpois(30, 1000),
type=sample(letters[1:3], replace =T, size =30))
## initial plot
ggplot(data = df,
aes(x = patient,
y = nb,
fill = type)) +
geom_bar(stat = "identity")
## adjust factors levels
True_order <- sample((21:30), 10)
levels(df$patient) <- True_order
## re-plot
ggplot(data = df,
aes(x = patient,
y = nb,
fill = type)) +
geom_bar(stat = "identity")
Related
I am trying to use color to highlight differences between and within factor levels. For example, with these reproducible data:
set.seed(123)
dat <- data.frame(
Factor = sample(c("AAA", "BBB", "CCC"), 50, replace = T),
ColorValue = sample(1:4, 50 , replace = T),
x = sample(1:50, 50, replace =T),
y = sample(1:50, 50, replace =T))
head(dat)
Factor ColorValue x y
1 AAA 1 30 43
2 CCC 2 17 25
3 BBB 4 25 20
4 CCC 1 48 13
5 CCC 3 25 6
6 AAA 1 45 20
I want to have a different color for each Factor. Then, within each factor I am trying to use ColorValue as a continuous coloring variable to show intensity.
In the plot below, each facet would have different shades of red, green, and blue that reflect the ColorValue, ideally with a single intensity (i.e. ColorValue) legend for all three factor levels.
ggplot(dat, aes(x = x, y = y, color = Factor)) +
geom_point(size = 3) +
facet_wrap(~Factor) +
theme_bw()
ggplot(dat, aes(x = x, y = y, color = Factor, alpha = ColorValue)) +
geom_point(size = 3) +
facet_wrap(~Factor) +
theme_bw()
I have drawn the attached funnel plot in ggplot, But I have 2 questions:
Is there any way to make the coloured green dot bigger (only that one);
is there any way to colour the upper and lower part of the confidence intervals?
This is what I am able to make so far:
Thank you!
The data set I am working on:
df <-
read.table(text = "
school_id year sdq_emotional
1060 7 4
1060 7 5
1060 7 7
1060 7 6
1060 7 4
1060 7 7
1060 7 8
1115 7 5
1115 7 9
1115 7 3
1136 7 1
1136 7 8
1136 7 5
1136 7 9
1135 7 4
1139 7 7
1139 7 3
2371 7 6
2371 7 3
2372 7 4
2372 7 1
2378 7 6
2378 7 7
2378 7 5", header=TRUE)
My code as follows:
# Format the data
df1 <- plyr::count(df, c('school_id'))
df2 <- merge(df,df1, by= c("school_id"))
df <- df2
M3 <- aggregate(df$sdq_emotional[df$freq > 10], by=list(df$school_id[df$freq > 10]),mean,na.rm=T)
S3 <- aggregate(df$sdq_emotional[df$freq > 10], by=list(df$school_id[df$freq > 10]),nona)
CG_PLOT1 <- merge(M3,S3,by="Group.1")
names(CG_PLOT1) <- c("School","Mean","Size")
LINE3 <- data.frame(M3=rep(mean(df$sdq_emotional,na.rm=T),max(CG_PLOT1$Size)+25),
SD3=rep(sd(df$sdq_emotional,na.rm=T),max(CG_PLOT1$Size)+25),
N3=sqrt(1:(max(CG_PLOT1$Size)+25)))
ID <- 1060
filling3 <- rep("white",nrow(CG_PLOT1))
filling3[CG_PLOT1$School ==ID]<-"green"
# Build the graph
ggplot(data = CG_PLOT1) +
geom_line(data = LINE3, aes(x = 1:(max(CG_PLOT1$Size) + 25),
y = M3 + qnorm(0.975) * SD3 / N3), size = 1, colour = "steelblue2",
linetype = 5) +
geom_line(data = LINE3, aes(x = 1:(max(CG_PLOT1$Size) + 25),
y = M3 - qnorm(0.975) * SD3 / N3), size = 1, colour = "steelblue2",
linetype = 5) +
geom_segment(xend = max(CG_PLOT1$Size)+25,yend=mean(LINE3$M3,na.rm=T)),
aes(x = 1, y = mean(LINE3$M3,na.rm=T), size=1, colour="steelblue2") +
geom_point(data = CG_PLOT1, aes(x = Size, y = Mean), size = 2,
colour = "black", shape = 21,fill = filling3) +
ylim(0, 8)
thank you very much!
As you didn't provide a reproducible example, I have used this question as a template for your problem:
Creating a dataset here:
library(ggplot2)
set.seed(101)
x <- runif(100, min=1, max=10)
y <- rnorm(length(x), mean=5, sd=0.1*x)
df <- data.frame(x=x*70, y=y)
m <- lm(y ~ x, data=df)
fit95 <- predict(m, interval="conf", level=.95)
fit99 <- predict(m, interval="conf", level=.999)
df <- cbind.data.frame(df,
lwr95=fit95[,"lwr"], upr95=fit95[,"upr"],
lwr99=fit99[,"lwr"], upr99=fit99[,"upr"])
To add a colour background to the funnel plot, we can use the geom_ribbon function within ggplot to fill the area between a ymin and ymax. In this case, we will use the data used to construct each of the lines:
ggplot(df, aes(x, y)) +
# Add background
geom_ribbon(ymin= df$upr99, ymax = Inf, fill = "#e2a49a", alpha = 0.5) +
geom_ribbon(ymin = df$lwr99, ymax = df$upr99, fill = "#e0ba9d", alpha = 0.5 ) +
geom_ribbon(ymin = 0, ymax = df$lwr99, fill = "#8fd6c9", alpha = 0.5 ) +
# Overlay points and lines
geom_point() +
geom_smooth(method="lm", colour="black", lwd=1.1, se=FALSE) +
geom_line(aes(y = upr95), color="black", linetype=2) +
geom_line(aes(y = lwr95), color="black", linetype=2) +
geom_line(aes(y = upr99), color="red", linetype=3) +
geom_line(aes(y = lwr99), color="red", linetype=3)
labs(x="No. admissions...", y="Percentage of patients...")
As for changing the size of one point, you can check out the answer here. I would recommend subsetting the data to extract the one point, and then add another layer for the geom_point and then changing the size and colour argument of the new layer`
This question already has answers here:
ggplot bar plot with facet-dependent order of categories
(4 answers)
Closed 5 years ago.
In the df below, I want to reorder bars from highest to lowest in each facet
I tried
df <- df %>% tidyr::gather("var", "value", 2:4)
ggplot(df, aes (x = reorder(id, -value), y = value, fill = id))+
geom_bar(stat="identity")+facet_wrap(~var, ncol =3)
It gave me
It didn't order the bars from highest to lowest in each facet.
I figured out another way to get what I want. I had to plot each variable at a time, then combine all plots using grid.arrange()
#I got this function from #eipi10's answer
#http://stackoverflow.com/questions/38637261/perfectly-align-several-plots/38640937#38640937
#Function to extract legend
# https://github.com/hadley/ggplot2/wiki/Share-a-legend-between-two-ggplot2-graphs
g_legend<-function(a.gplot) {
tmp <- ggplot_gtable(ggplot_build(a.gplot))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend <- tmp$grobs[[leg]]
return(legend)
}
p1 <- ggplot(df[df$var== "A", ], aes (x = reorder(id, -value), y = value, fill = id))+
geom_bar(stat="identity") + facet_wrap(~var, ncol =3)
fin_legend <- g_legend(p1)
p1 <- p1 + guides(fill= F)
p2 <- ggplot(df[df$var== "B", ], aes (x = reorder(id, -value), y = value, fill = id))+
geom_bar(stat="identity") + facet_wrap(~var, ncol =3)+guides(fill=FALSE)
p3 <- ggplot(df[df$var== "C", ], aes (x = reorder(id, -value), y = value, fill = id))+
geom_bar(stat="identity") + facet_wrap(~var, ncol =3)+guides(fill=FALSE)
grid.arrange(p1, p2, p3, fin_legend, ncol =4, widths = c(1.5, 1.5, 1.5, 0.5))
The result is what I want
I wonder if there is a straightforward way that can help me order the bars from highest to lowest in all facets without having to plot each variable separtely and then combine them. Any suggestions will be much appreciated.
DATA
df <- read.table(text = c("
id A B C
site1 10 15 20
site2 20 10 30
site3 30 20 25
site4 40 35 40
site5 50 30 35"), header = T)
The approach below uses a specially prepared variable for the x-axis with facet_wrap() but uses the labels parameter to scale_x_discrete() to display the correct x-axis labels:
Prepare data
I'm more fluent in data.table, so this is used here. Feel free to use what ever package you prefer for data manipulation.
Edit: Removed second dummy variable, only ord is required
library(data.table)
# reshape from wide to long
molten <- melt(setDT(df), id.vars = "id")
# create dummy var which reflects order when sorted alphabetically
molten[, ord := sprintf("%02i", frank(molten, variable, -value, ties.method = "first"))]
molten
# id variable value ord
# 1: site1 A 10 05
# 2: site2 A 20 04
# 3: site3 A 30 03
# 4: site4 A 40 02
# 5: site5 A 50 01
# 6: site1 B 15 09
# 7: site2 B 10 10
# 8: site3 B 20 08
# 9: site4 B 35 06
#10: site5 B 30 07
#11: site1 C 20 15
#12: site2 C 30 13
#13: site3 C 25 14
#14: site4 C 40 11
#15: site5 C 35 12
Create plot
library(ggplot2)
# `ord` is plotted on x-axis instead of `id`
ggplot(molten, aes(x = ord, y = value, fill = id)) +
# geom_col() is replacement for geom_bar(stat = "identity")
geom_col() +
# independent x-axis scale in each facet,
# drop absent factor levels (not the case here)
facet_wrap(~ variable, scales = "free_x", drop = TRUE) +
# use named character vector to replace x-axis labels
scale_x_discrete(labels = molten[, setNames(as.character(id), ord)]) +
# replace x-axis title
xlab("id")
Data
df <- read.table(text = "
id A B C
site1 10 15 20
site2 20 10 30
site3 30 20 25
site4 40 35 40
site5 50 30 35", header = T)
If you're willing to lose the X axis labels, you can do this by using the actual y values as the x aesthetic, then dropping unused factor levels in each facet:
ggplot(df, aes (x = factor(-value), y = value, fill = id))+
geom_bar(stat="identity", na.rm = TRUE)+
facet_wrap(~var, ncol =3, scales = "free_x", drop = TRUE) +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank()
)
Result:
The loss of x-axis labels is probably not too bad here as you still have the colours to go on (and the x-axis is confusing anyway since it's not consistent across facets).
I have the following generated data frame called Raw_Data:
Time Velocity Type
1 10 1 a
2 20 2 a
3 30 3 a
4 40 4 a
5 50 5 a
6 10 2 b
7 20 4 b
8 30 6 b
9 40 8 b
10 50 9 b
11 10 3 c
12 20 6 c
13 30 9 c
14 40 11 c
15 50 13 c
I plotted this data with ggplot2:
ggplot(Raw_Data, aes(x=Time, y=Velocity))+geom_point() + facet_grid(Type ~.)
I have the objects: Regression_a, Regression_b, Regression_c. These are the linear regression equations for each plot. Each plot should display the corresponding equation.
Using annotate displays the particular equation on each plot:
annotate("text", x = 1.78, y = 5, label = Regression_a, color="black", size = 5, parse=FALSE)
I tried to overcome the issue with the following code:
Regression_a_eq <- data.frame(x = 1.78, y = 1,label = Regression_a,
Type = "a")
p <- x + geom_text(data = Raw_Data,label = Regression_a)
This did not solve the problem. Each plot still showed Regression_a, rather than just plot a
You can put the expressions as character values in a new dataframe with the same unique Type's as in your data-dataframe and add them with geom_text:
regrDF <- data.frame(Type = c('a','b','c'), lbl = c('Regression_a', 'Regression_b', 'Regression_c'))
ggplot(Raw_Data, aes(x = Time, y = Velocity)) +
geom_point() +
geom_text(data = regrDF, aes(x = 10, y = 10, label = lbl), hjust = 0) +
facet_grid(Type ~.)
which gives:
You can replace the text values in regrDF$lbl with the appropriate expressions.
Just a supplementary for the adopted answer if we have facets in both horizontal and vertical directions.
regrDF <- data.frame(Type1 = c('a','a','b','b'),
Type2 = c('c','d','c','d'),
lbl = c('Regression_ac', 'Regression_ad', 'Regression_bc', 'Regression_bd'))
ggplot(Raw_Data, aes(x = Time, y = Velocity)) +
geom_point() +
geom_text(data = regrDF, aes(x = 10, y = 10, label = lbl), hjust = 0) +
facet_grid(Type1 ~ Type2)
The answer is good but still imperfect as I do not know how to incorporate math expressions and newline simultaneously (Adding a newline in a substitute() expression).
I have a data frame which I generated using the following piece of code,
x <- c(1:10)
y <- x^3
z <- y-20
s <- z/3
t <- s*6
q <- s*y
x1 <- cbind(x,y,z,s,t,q)
x1 <- data.frame(x1)
The data frame x1 thus has the following data,
x y z s t q
1 1 1 -19 -6.333333 -38 -6.333333
2 2 8 -12 -4.000000 -24 -32.000000
3 3 27 7 2.333333 14 63.000000
4 4 64 44 14.666667 88 938.666667
5 5 125 105 35.000000 210 4375.000000
6 6 216 196 65.333333 392 14112.000000
7 7 343 323 107.666667 646 36929.666667
8 8 512 492 164.000000 984 83968.000000
9 9 729 709 236.333333 1418 172287.000000
10 10 1000 980 326.666667 1960 326666.666667
Now I want to plot columns x vs y, z vs s and t vs q in the same plot, so for this I use the following code,
p <- ggplot() +
geom_line(data = x1, aes(x = x1[,1], y = x1[,2], color = "red")) +
geom_line(data = x1, aes(x = x1[,3], y = x1[,4], color = "blue")) +
geom_line(data = x1, aes(x = x1[,5], y = x1[,6], color = "green")) +
xlab('x') +
ylab('y')
While the above piece of code works fine for a data frame of just 6 columns, I would like to perform the same operation for a data frame with many number of columns. For example if there are 20 columns in a data frame, there should be one single plot generated containing plot of col 1 vs 2, col 3 vs 4, col 5 vs 6 and so on until col 19 vs 20. To do this I use this following piece of code,
p <- ggplot() + geom_line(data = x1, aes(x = x1[,1], y = x1[,2], color = "red")) + xlab('x') + ylab('y')
ctr <- 1
for (iz in seq(3, ncol(x1), by = 2))
{
p$ctr <- p + geom_line(data = x1, aes(x = x1[,iz], y = x1[,iz+1], color = "green"))
ctr <- ctr+1
}
So the plots should be layered incrementally and the last object should contain the entire plot. Using the above code the plot gets overwritten every time when the loop runs, could some one point out how to capture the full data. I would like to display a legend for each of the plot as well.
Thanks
You don't need a loop if you put your data into the right format. You can create a long data frame based on your original data frame.
x1_long <- data.frame(x = unlist(x1[c(TRUE, FALSE)]),
y = unlist(x1[c(FALSE, TRUE)]),
ind = gl(ncol(x1) / 2, nrow(x1)))
Now, a single geom_line command is sufficient:
library(ggplot2)
ggplot(x1_long) +
geom_line(aes(x = x, y = y, colour = ind))
(Note. The red line is plotted too but its values are quite small.)
How about this?
ggplot() +
lapply(seq(1,ncol(x1),2), # every second col index
function(x){ # return the geom_line calls in a list
geom_line(aes_string(x=x1[x], # remember to use aes_string for x
y=x1[x+1]), # and y
color=factor(x), # then color
size=2) # and size
}) +
xlab('x') + ylab('y')