geom_crossbar produces a plot with inappropriate y scale - r

It is difficult to describe my problem without attaching an image of the plot. I have two groups of data, one with two observations having mean around 1 and error around 1.5; the other has two observations with mean around 30 and error around 2.
But in the plot the bars overlap and the y-axis tick marks are out of order:
0; 0.1; 1; 1.7; 2; 27.8; 29.3; 29.8; 3.2; 31.3; 31.8; 33.3
Data and code (dataframe my.data):
my.data <- structure(list(factor1 = structure(c(1L, 1L, 2L, 2L), .Label = c("oil1", "oil2"), class = "factor"), factor2 = structure(c(1L, 2L, 1L, 2L), .Label = c("prod1", "prod2"), class = "factor"), value = c(1.7, 1, 29.8, 31.3), err = c(1.5, 1, 2, 2), min = c(0.2, 0, 27.8, 29.3), max = c(3.2, 2, 31.8, 33.3)), .Names = c("factor1", "factor2", "value", "err", "min", "max"), class = "data.frame", row.names = c("1", "2", "3", "4"))
# Plots
p1 <- ggplot(data=my.data, aes(x=factor2, fill=factor1))
p2 <- p1 + geom_crossbar(aes(y=value, ymin=min, ymax=max), position = position_dodge(width = 0.66), width=0.6)
p2
I would greatly apreciate help on this, I have been stuck on it for two days now. Thanks in advance.

Using your data and your script I get:

Related

Control order across factors in ggplot2

I have a plot that looks like below. I want to change the order so that the larger value comes first (so cyan would precede red). But I can't seem to do this. What am I doing wrong?
This is my current code block so far:
ggplot(df, aes(x = Gene.Set.Size, y = OR, label =P.value, color = Method, group = Method)) +
geom_point(position=position_dodge(width=0.5)) +
ggrepel::geom_text_repel(size = 6, box.padding = 1, segment.angle = 20, position=position_dodge(width=0.5))+
geom_pointrange(aes(ymax = UpperCI, ymin = LowerCI),position=position_dodge(width=0.5)) +
theme_bw() +
theme(text=element_text(size=25),axis.text.x = element_text(angle = 45, hjust = 1)) +
ylab("Odds ratio") +
xlab("Gene set size") +
theme(plot.margin = unit(c(2,2,2,2), "cm"))
> dput(df)
structure(list(Method = structure(c(1L, 1L, 1L, 2L, 2L, 2L), .Label = c("MAGMA",
"Pairwise"), class = "factor"), P.value = c(8.74e-28, 1.33e-56,
5.57e-92, 1.63e-44, 4.23e-71, 2.78e-95), OR = c(1.39, 1.424668,
1.4, 1.513, 1.478208, 1.409563), UpperCI = c(1.481491, 1.487065,
1.446039, 1.601557, 1.417117, 1.455425), LowerCI = c(1.316829,
1.364601, 1.356358, 1.42, 1.541768, 1.365056), Gene.Set.Size = structure(c(1L,
2L, 3L, 1L, 2L, 3L), .Label = c("500", "1000", "2000"), class = "factor")), row.names = c(NA,
-6L), class = "data.frame")
You must set the factor order.
library(ggplot2)
df <- structure(list(Method = structure(c(1L, 1L, 1L, 2L, 2L, 2L), .Label = c("MAGMA",
"Pairwise"), class = "factor"), P.value = c(8.74e-28, 1.33e-56,
5.57e-92, 1.63e-44, 4.23e-71, 2.78e-95), OR = c(1.39, 1.424668,
1.4, 1.513, 1.478208, 1.409563), UpperCI = c(1.481491, 1.487065,
1.446039, 1.601557, 1.417117, 1.455425), LowerCI = c(1.316829,
1.364601, 1.356358, 1.42, 1.541768, 1.365056), Gene.Set.Size = structure(c(1L,
2L, 3L, 1L, 2L, 3L), .Label = c("500", "1000", "2000"), class = "factor")), row.names = c(NA,
-6L), class = "data.frame")
#reorder Factor
df$Method = factor(df$Method, levels=c("Pairwise", "MAGMA"))
ggplot(df, aes(x=Gene.Set.Size, y=OR, label=P.value,
group= Method, color=Method)) +
geom_point(position=position_dodge(width=0.5)) +
ggrepel::geom_text_repel(size = 6, box.padding = 1, segment.angle = 20, position=position_dodge(width=0.5))+
geom_pointrange(aes(ymax = UpperCI, ymin = LowerCI),position=position_dodge(width=0.5)) +
theme_bw() +
theme(text=element_text(size=25),axis.text.x = element_text(angle = 45, hjust = 1)) +
ylab("Odds ratio") +
xlab("Gene set size") +
theme(plot.margin = unit(c(2,2,2,2), "cm"))
df %>% mutate(Method = fct_relevel(Method, 'Pairwise')) %>% <<your ggplot2 code>
should do the job, assuming you have imported the tidyverse pipe operator %>% and the forcats package, which you can do with require(tidyverse)
You can simply reverse the ordering of the Method factor with forcats::fct_rev.
df$Method <- fct_rev(df$Method)
Alternatively, you can specify the first level when you initially converted that column to a factor.

ggraph edges are connecting wrong?

I am working on generating a hierarchical edge plot where the edge's color/transparency/thickness varies by the column (pvalue) in my connect dataframe, however the color/transparency/thickness of the edges in the plot I generated don't always map to the values in column (pvalue). For example, subgroup1 and subgroup4 should have the strongest thickest connection (pvalue is E-280), when in fact they don't, rather the connection between subgroup3 and subgroup4 looks to be strongest.
This data generates a reproducible example:
> dput(vertices)
structure(list(name = structure(c(3L, 1L, 2L, 4L, 5L, 6L, 7L), .Label = c("gp1",
"gp2", "origin", "subgroup1", "subgroup2", "subgroup3", "subgroup4"
), class = "factor"), id = c(NA, NA, NA, 1L, 2L, 3L, 4L), angle = c(NA,
NA, NA, 0, -90, 0, -90), hjust = c(NA, NA, NA, 1, 1, 1, 1)), row.names = c(NA,
-7L), class = "data.frame")
> dput(hierarchy)
structure(list(from = structure(c(3L, 3L, 1L, 1L, 2L, 2L), .Label = c("gp1",
"gp2", "origin"), class = "factor"), to = structure(1:6, .Label = c("gp1",
"gp2", "subgroup1", "subgroup2", "subgroup3", "subgroup4"), class = "factor")), class = "data.frame", row.names = c(NA,
-6L))
> dput(connect)
structure(list(from = structure(c(1L, 1L, 2L, 3L, 1L, 2L, 3L,
1L), .Label = c("subgroup1", "subgroup2", "subgroup3"), class = "factor"),
to = structure(c(1L, 2L, 2L, 1L, 3L, 3L, 3L, 3L), .Label = c("subgroup2",
"subgroup3", "subgroup4"), class = "factor"), pvalue = c(1.68e-204,
1.59e-121, 9.32e-73, 9.32e-73, 1.59e-21, 9.32e-50, 9.32e-40,
9.32e-280)), class = "data.frame", row.names = c(NA, -8L))
and this is the code I used to make this example plot:
from <- match( connect$from, vertices$name)
to <- match( connect$to, vertices$name)
col <- connect$pvalue
#Let's add information concerning the label we are going to add: angle, horizontal adjustement and potential flip
#calculate the ANGLE of the labels
vertices$id <- NA
myleaves <- which(is.na( match(vertices$name, hierarchy$from) ))
nleaves <- length(myleaves)
vertices$id[ myleaves ] <- seq(1:nleaves)
vertices$angle <- 90 - 360 * vertices$id / nleaves
# calculate the alignment of labels: right or left
# If I am on the left part of the plot, my labels have currently an angle < -90
vertices$hjust <- ifelse( vertices$id < 41, 1, 0)
# flip angle BY to make them readable
vertices$angle <- ifelse(vertices$angle < -90, vertices$angle+180, vertices$angle)
mygraph <- graph_from_data_frame( hierarchy, vertices=vertices )
ggraph(mygraph, layout = 'dendrogram', circular = TRUE) +
geom_node_point(aes(filter = leaf, x = x*1.05, y=y*1.05), size = 2, alpha = 0.8) +
geom_conn_bundle(data = get_con(from = from, to = to, col = col), aes(colour=col, alpha = col, width = col)) +
geom_node_text(aes(x = x*1.1, y=y*1.1, filter = leaf, label=name, angle = angle, hjust=hjust), size=3.5, alpha=0.6) +scale_edge_color_continuous(trans = "log",low="red", high="yellow")+ scale_edge_alpha_continuous(trans = "log",range = c(1, 0.1)) +scale_edge_width_continuous(trans = "log", range = c(4, 1))+
theme_void()
I think there is wrong mapping somewhere but I can't figure out where. Thank you so much for your input!
I believe there is a bug in this library. Rearranging the input data by the column of choice (pvalue in my case) in an ascending order helped but did not solve the issue.
connect_new <- arrange(connect, pvalue)
and I found the solution in a github issue submitted by another user. The subgroups within each group need to be ordered alphabetically in the hierarchy and vertices file. In addition, in the connect dataframe, the subgroups need to be ordered following the same order in the hierarchy and vertices file. Thanks to zhuxr11

Creating multiple graphs based upon the column names

This is my first question on stackoverlow, please correct me if I am not following correct question protocols.
I am trying to create some graphs for data that has been collected over three time points (time 1, time 2, time 3) which equates to X1..., X2... and X3... at the beginning of column names. The graphs are also separated by the column $Group from the data frame.
I have no problem creating the graphs, I just have many variables (~170) and am wanting to compare time 1 vs time 2, time 2 vs time 3, etc. so am trying to work a shortcut to be running this kind of code rather than having to type out each one individually.
As indicated above, I have created variable names like X1... X2... which indicate the time that the variable was recorded i.e. X1BCSTCAT = time 1; X2BCSTCAT = time 2; X3BCSTCAT = time 3. Here is a small sample of what my data looks like:
df <- structure(list(ID = structure(1:6, .Label = c("101","102","103","118","119","120"), class = "factor"),
Group = structure(c(1L,1L,1L,2L,2L,2L), .Label = c("C8","TC"), class = "factor"),
Wave = structure(c(1L, 2L, 3L, 4L, 1L, 2L), .Label = c("A","B","C","D"), class = "factor"),
Yr = structure(c(1L, 2L, 1L, 2L, 1L, 2L), .Label = c("3","5"), class = c("ordered", "factor")),
Age.Yr. = c(10.936,10.936, 9.311, 10.881, 10.683, 11.244),
Training..hr. = c(10.667,10.333, 10.667, 10.333, 10.333, 10.333),
X1BCSTCAT = c(-0.156,0.637,-1.133,0.637,2.189,1.229),
X1BCSTCR = c(0.484,0.192, -1.309, 0.912, 1.902, 0.484),
X1BCSTPR = c(-1.773,0.859, 0.859, 0.12, -1.111, 0.12),
X2BCSTCAT = c(1.006, -0.379,-1.902, 0.444, 2.074, 1.006),
X2BCSTCR = c(0.405, -0.457,-1.622, 1.368, 1.981, 0.168),
X2BCSTPR = c(-0.511, -0.036,2.189, -0.036, -0.894, 0.949),
X3BCSTCAT = c(1.18, -1.399,-1.399, 1.18, 1.18, 1.18),
X3BCSTCR = c(0.967, -1.622, -1.622,0.967, 0.967, 1.255),
X3BCSTPR = c(-1.282, -1.282, 1.539,1.539, 0.792, 0.792)),
row.names = c(1L, 2L, 3L, 4L, 5L,8L), class = "data.frame")
Here is some working code to create one graph using ggplot for time 1 vs time 2 data on one variable:
library(ggplot2)
p <- ggplot(df, aes(x=df$X1BCSTCAT, y=df$X2BCSTCAT, shape = df$Group, color = df$Group)) +
geom_point() + geom_smooth(method=lm, aes(fill=df$Group), fullrange = TRUE) +
labs(title="BCSTCAT", x="Time 1", y = "Time 2") +
scale_color_manual(name = "Group",labels = c("C8","TC"),values = c("blue", "red")) +
scale_shape_manual(name = "Group",labels = c("C8","TC"),values = c(16, 17)) +
scale_fill_manual(name = "Group",labels = c("C8", "TC"),values = c("light blue", "pink"))
So I am really trying to create some kind of a shortcut where R will cycle through and match up variable names X1... vs X2... and so on and create the graphs. I assume there must be some way to plot either based upon matching column numbers e.g. df[,7] vs df[,10] and iterating through this process or plotting by actually matching the names (where the only difference in variable names is the number which indicates time).
I have previously cycled through creating individual graphs using the lapply function, but have no idea where to even start with trying to do this one.
A solution using tidyeval approach. We will need ggplot2 v3.0.0 (remember to restart your R session)
install.packages("ggplot2", dependencies = TRUE)
First we build a function that takes column and group names as inputs. Note the use of rlang::sym, rlang::quo_name & !!.
Then create 2 name vectors for x- & y- values so that we can loop through them simultaneously using purrr::map2.
library(rlang)
library(tidyverse)
df <- structure(list(ID = structure(1:6, .Label = c("101","102","103","118","119","120"), class = "factor"),
Group = structure(c(1L,1L,1L,2L,2L,2L), .Label = c("C8","TC"), class = "factor"),
Wave = structure(c(1L, 2L, 3L, 4L, 1L, 2L), .Label = c("A","B","C","D"), class = "factor"),
Yr = structure(c(1L, 2L, 1L, 2L, 1L, 2L), .Label = c("3","5"), class = c("ordered", "factor")),
Age.Yr. = c(10.936,10.936, 9.311, 10.881, 10.683, 11.244),
Training..hr. = c(10.667,10.333, 10.667, 10.333, 10.333, 10.333),
X1BCSTCAT = c(-0.156,0.637,-1.133,0.637,2.189,1.229),
X1BCSTCR = c(0.484,0.192, -1.309, 0.912, 1.902, 0.484),
X1BCSTPR = c(-1.773,0.859, 0.859, 0.12, -1.111, 0.12),
X2BCSTCAT = c(1.006, -0.379,-1.902, 0.444, 2.074, 1.006),
X2BCSTCR = c(0.405, -0.457,-1.622, 1.368, 1.981, 0.168),
X2BCSTPR = c(-0.511, -0.036,2.189, -0.036, -0.894, 0.949),
X3BCSTCAT = c(1.18, -1.399,-1.399, 1.18, 1.18, 1.18),
X3BCSTCR = c(0.967, -1.622, -1.622,0.967, 0.967, 1.255),
X3BCSTPR = c(-1.282, -1.282, 1.539,1.539, 0.792, 0.792)),
row.names = c(1L, 2L, 3L, 4L, 5L,8L), class = "data.frame")
# define a function that accept strings as input
pair_plot <- function(x_var, y_var, group_var) {
# convert strings to symbols
x_var <- rlang::sym(x_var)
y_var <- rlang::sym(y_var)
group_var <- rlang::sym(group_var)
# unquote symbols using !!
ggplot(df, aes(x = !! x_var, y = !! y_var, shape = !! group_var, color = !! group_var)) +
geom_point() + geom_smooth(method = lm, aes(fill = !! group_var), fullrange = TRUE) +
labs(title = "BCSTCAT", x = rlang::quo_name(x_var), y = rlang::quo_name(y_var)) +
scale_color_manual(name = "Group", labels = c("C8", "TC"), values = c("blue", "red")) +
scale_shape_manual(name = "Group", labels = c("C8", "TC"), values = c(16, 17)) +
scale_fill_manual(name = "Group", labels = c("C8", "TC"), values = c("light blue", "pink")) +
theme_bw()
}
# Test if the new function works
pair_plot("X1BCSTCAT", "X2BCSTCAT", "Group")
# Create 2 parallel lists
list_x <- colnames(df)[-c(1:6, (ncol(df)-2):(ncol(df)))]
list_x
#> [1] "X1BCSTCAT" "X1BCSTCR" "X1BCSTPR" "X2BCSTCAT" "X2BCSTCR" "X2BCSTPR"
list_y <- lead(colnames(df)[-(1:6)], 3)[1:length(list_x)]
list_y
#> [1] "X2BCSTCAT" "X2BCSTCR" "X2BCSTPR" "X3BCSTCAT" "X3BCSTCR" "X3BCSTPR"
# Loop through 2 lists simultaneously
# Supply inputs to pair_plot function using purrr::map2
map2(list_x, list_y, ~ pair_plot(.x, .y, "Group"))
Sample outputs:
#> [[1]]
#>
#> [[2]]
Created on 2018-05-24 by the reprex package (v0.2.0).

Adding different labels to each point on a Taylor diagram

I was trying to plot Taylor diagram to compare original and bias-corrected rainfall for five rain gauge stations using 'openair' package. The plotting is okay, but I want to label each point by their name for same for each panel.
How can I do that? Solutions will be highly appreciated. I am using the below code:
TaylorDiagram(kj, obs = "Gauge", mod = "value", group = c("prod","variable"), type = "station", normalise = T, pch=1)
And the output is:
kj is the dataframe I used. I can share it if needed
Sample of kj: by dput(head(kj))
structure(list(Gauge = c(0, 0, 0, 0, 20, 0), variable = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("A", "B", "C", "D"), class = c("ordered",
"factor")), value = c(0, 0, 0, 0, 0, 0), station = structure(c(1L,
1L, 1L, 1L, 1L, 1L), .Label = c("Sunamganj", "Sheola", "Nakuagaon",
"Brahmanbaria", "Bhairab.Bazar"), class = c("ordered", "factor"
)), prod = c("original GSRPs", "original GSRPs", "original GSRPs",
"original GSRPs", "original GSRPs", "original GSRPs")), .Names = c("Gauge",
"variable", "value", "station", "prod"), row.names = c("1", "2",
"3", "4", "5", "6"), class = "data.frame")

Placing a point on a fitted geom_smooth

I have the following data.
> dput(testdat)
structure(list(Type = structure(c(2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Saline",
"Compound1"), class = "factor"), Treatment = structure(c(1L,
2L, 3L, 4L, 6L, 5L), .Label = c(".0032uM", ".016uM", ".08uM",
".4uM", "2uM", "10uM"), class = "factor"), Peak = c(1071.28430020209,
1458.23366806524, 2714.49856342393, 3438.83453920159, 3938.86391759534,
2980.10159109856), Area1 = c(3312.99749863082, 4798.35142770291,
9044.21362002965, 11241.1497514069, 11575.3444645068, 9521.69011119236
), SS1 = c(781.759834505516, 1191.6273298958, 2180.02082601411,
2601.33855989239, 2492.11886600804, 2185.39715502702), Conc = c(0.0032,
0.016, 0.08, 0.4, 10, 2), logconc = c(-2.49485002168009, -1.79588001734408,
-1.09691001300806, -0.397940008672038, 1, 0.301029995663981),
Conc_nm = c(3.2, 16, 80, 400, 10000, 2000), logconc_nm = c(0.505149978319906,
1.20411998265592, 1.90308998699194, 2.60205999132796, 4,
3.30102999566398)), .Names = c("Type", "Treatment", "Peak",
"Area1", "SS1", "Conc", "logconc", "Conc_nm", "logconc_nm"), row.names = 2:7, class = "data.frame")
I've fitted the data (Peak) with a nls regression using the following code:
fit = nls(Peak ~ SSlogis(logconc_nm,Asym,xmid,scal),data=testdat)
This gives me a nice fit and I'm happy with it so I plot the dose response as follows:
m <- coef(fit)
vallog <- as.numeric(format((m[3]),dig=4))
val =round(10^val,2)
ggplot(data = testdat,aes(logconc_nm,Peak))+
geom_point()+
scale_x_log10(breaks=round(testdat$logconc_nm,2))+
geom_smooth(method = 'nls',
formula = y ~ SSfpl(x,A,B,xmid,scal),se=FALSE)+
geom_vline(color='red',xintercept = vallog,alpha=.5)+
geom_text(aes(x=vallog,y=max(Peak),label = paste0('EC50',val,'nM')),color='red')#,angle=90)
My Question is:
How can I add a big ol' red point on the blue line where the blue and red line meet. I'd like to replace the need for the red line with the red dot. I know i have to use geom_point but because it's a fitted line, i can't just say x=vallog can i?

Resources