Placing a point on a fitted geom_smooth - r

I have the following data.
> dput(testdat)
structure(list(Type = structure(c(2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Saline",
"Compound1"), class = "factor"), Treatment = structure(c(1L,
2L, 3L, 4L, 6L, 5L), .Label = c(".0032uM", ".016uM", ".08uM",
".4uM", "2uM", "10uM"), class = "factor"), Peak = c(1071.28430020209,
1458.23366806524, 2714.49856342393, 3438.83453920159, 3938.86391759534,
2980.10159109856), Area1 = c(3312.99749863082, 4798.35142770291,
9044.21362002965, 11241.1497514069, 11575.3444645068, 9521.69011119236
), SS1 = c(781.759834505516, 1191.6273298958, 2180.02082601411,
2601.33855989239, 2492.11886600804, 2185.39715502702), Conc = c(0.0032,
0.016, 0.08, 0.4, 10, 2), logconc = c(-2.49485002168009, -1.79588001734408,
-1.09691001300806, -0.397940008672038, 1, 0.301029995663981),
Conc_nm = c(3.2, 16, 80, 400, 10000, 2000), logconc_nm = c(0.505149978319906,
1.20411998265592, 1.90308998699194, 2.60205999132796, 4,
3.30102999566398)), .Names = c("Type", "Treatment", "Peak",
"Area1", "SS1", "Conc", "logconc", "Conc_nm", "logconc_nm"), row.names = 2:7, class = "data.frame")
I've fitted the data (Peak) with a nls regression using the following code:
fit = nls(Peak ~ SSlogis(logconc_nm,Asym,xmid,scal),data=testdat)
This gives me a nice fit and I'm happy with it so I plot the dose response as follows:
m <- coef(fit)
vallog <- as.numeric(format((m[3]),dig=4))
val =round(10^val,2)
ggplot(data = testdat,aes(logconc_nm,Peak))+
geom_point()+
scale_x_log10(breaks=round(testdat$logconc_nm,2))+
geom_smooth(method = 'nls',
formula = y ~ SSfpl(x,A,B,xmid,scal),se=FALSE)+
geom_vline(color='red',xintercept = vallog,alpha=.5)+
geom_text(aes(x=vallog,y=max(Peak),label = paste0('EC50',val,'nM')),color='red')#,angle=90)
My Question is:
How can I add a big ol' red point on the blue line where the blue and red line meet. I'd like to replace the need for the red line with the red dot. I know i have to use geom_point but because it's a fitted line, i can't just say x=vallog can i?

Related

connect points within position_dodged factor x-axis in ggplot2

I'm trying to add significance annotations to an errorbar plot with a factor x-axis and dodged groups within each level of the x-axis. It is a similar but NOT identical use case to this
My base errorbar plot is:
library(ggplot2)
library(dplyr)
pres_prob_pd = structure(list(x = structure(c(1, 1, 1, 2, 2, 2, 3, 3, 3), labels = c(`1` = 1,
`2` = 2, `3` = 3)), predicted = c(0.571584427222816, 0.712630712634987,
0.156061969566517, 0.0162388386564817, 0.0371877245103279, 0.0165022541901018,
0.131528946944238, 0.35927812866896, 0.0708662221985375), std.error = c(0.355802875027348,
0.471253661425626, 0.457109887762665, 0.352871728451576, 0.442646879181155,
0.425913568532558, 0.376552208691762, 0.48178172708116, 0.451758041335245
), conf.low = c(0.399141779923204, 0.496138837620712, 0.0701919316506831,
0.00819832576725402, 0.0159620304815404, 0.00722904089045731,
0.0675129352870401, 0.17905347369819, 0.030504893442457), conf.high = c(0.728233665534388,
0.861980236164486, 0.311759350126477, 0.031911364587827, 0.0842227723261319,
0.0372248587668487, 0.240584344249407, 0.590437963881823, 0.156035177669385
), group = structure(c(1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("certain",
"neutral", "uncertain"), class = "factor"), group_col = structure(c(1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), .Label = c("certain", "neutral",
"uncertain"), class = "factor"), language = structure(c(2L, 2L,
2L, 1L, 1L, 1L, 3L, 3L, 3L), .Label = c("english", "dutch", "german"
), class = "factor"), top = c(0.861980236164486, 0.861980236164486,
0.861980236164486, 0.0842227723261319, 0.0842227723261319, 0.0842227723261319,
0.590437963881823, 0.590437963881823, 0.590437963881823)), row.names = c(NA,
-9L), groups = structure(list(language = structure(1:3, .Label = c("english",
"dutch", "german"), class = "factor"), .rows = structure(list(
4:6, 1:3, 7:9), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr", "list"))), row.names = c(NA, 3L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
#dodge
pd = position_dodge(.75)
#plot
p = ggplot(pres_prob_pd,aes(x=language,y=predicted,color=group,shape=group)) +
geom_point(position=pd,size=2) +
geom_errorbar(aes(ymax=conf.high,ymin=conf.low),width=.125,position=pd)
p
What I want to do is annotate the plot such that the contrasts between group within each level of language are annotated for significance. I've plotted points representing the relevant contrasts and (toy) sig. annotations as follows:
#bump function
f = function(x){
v = c()
bump=0.025
constant = 0
for(i in x){
v = c(v,i+constant+bump)
bump = bump + 0.075
}
v
}
#create contrasts
combs = data.frame(gtools::combinations(3, 2, v=c("certain", "neutral", "uncertain"), set=F, repeats.allowed=F)) %>%
mutate(contrast=c("cont_1","cont_2","cont_3"))
combs = rbind(combs %>% mutate(language = 'english'),
combs %>% mutate(language='dutch'),
combs %>% mutate(language = "german")) %>%
left_join(select(pres_prob_pd,language:top)%>%distinct(),by='language') %>%
group_by(language)
#long transform and calc y_pos
combs_long = mutate(combs,y_pos=f(top)) %>% gather(long, probability, X1:X2, factor_key=TRUE) %>% mutate(language=factor(language,levels=c("english","dutch","german"))) %>%
arrange(language,contrast)
#back to wide
combs_wide =combs_long %>% spread(long,probability)
combs_wide$p = rep(c('***',"*","ns"),3)
#plot
p +
geom_point(data=combs_long,
aes(x = language,
color=probability,
shape=probability,
y=y_pos),
inherit.aes = T,
position=pd,
size=2) +
geom_text(data=combs_wide,
aes(x=language,
label=p,
y=y_pos+.025,
group=X1),
color='black',
position=position_dodge(.75),
inherit.aes = F)
What I am failing to achieve is plotting a line connecting each of the contrasts of group within each level of language, as is standard when annotating significant group-wise differences. Any help much appreciated!

ggraph edges are connecting wrong?

I am working on generating a hierarchical edge plot where the edge's color/transparency/thickness varies by the column (pvalue) in my connect dataframe, however the color/transparency/thickness of the edges in the plot I generated don't always map to the values in column (pvalue). For example, subgroup1 and subgroup4 should have the strongest thickest connection (pvalue is E-280), when in fact they don't, rather the connection between subgroup3 and subgroup4 looks to be strongest.
This data generates a reproducible example:
> dput(vertices)
structure(list(name = structure(c(3L, 1L, 2L, 4L, 5L, 6L, 7L), .Label = c("gp1",
"gp2", "origin", "subgroup1", "subgroup2", "subgroup3", "subgroup4"
), class = "factor"), id = c(NA, NA, NA, 1L, 2L, 3L, 4L), angle = c(NA,
NA, NA, 0, -90, 0, -90), hjust = c(NA, NA, NA, 1, 1, 1, 1)), row.names = c(NA,
-7L), class = "data.frame")
> dput(hierarchy)
structure(list(from = structure(c(3L, 3L, 1L, 1L, 2L, 2L), .Label = c("gp1",
"gp2", "origin"), class = "factor"), to = structure(1:6, .Label = c("gp1",
"gp2", "subgroup1", "subgroup2", "subgroup3", "subgroup4"), class = "factor")), class = "data.frame", row.names = c(NA,
-6L))
> dput(connect)
structure(list(from = structure(c(1L, 1L, 2L, 3L, 1L, 2L, 3L,
1L), .Label = c("subgroup1", "subgroup2", "subgroup3"), class = "factor"),
to = structure(c(1L, 2L, 2L, 1L, 3L, 3L, 3L, 3L), .Label = c("subgroup2",
"subgroup3", "subgroup4"), class = "factor"), pvalue = c(1.68e-204,
1.59e-121, 9.32e-73, 9.32e-73, 1.59e-21, 9.32e-50, 9.32e-40,
9.32e-280)), class = "data.frame", row.names = c(NA, -8L))
and this is the code I used to make this example plot:
from <- match( connect$from, vertices$name)
to <- match( connect$to, vertices$name)
col <- connect$pvalue
#Let's add information concerning the label we are going to add: angle, horizontal adjustement and potential flip
#calculate the ANGLE of the labels
vertices$id <- NA
myleaves <- which(is.na( match(vertices$name, hierarchy$from) ))
nleaves <- length(myleaves)
vertices$id[ myleaves ] <- seq(1:nleaves)
vertices$angle <- 90 - 360 * vertices$id / nleaves
# calculate the alignment of labels: right or left
# If I am on the left part of the plot, my labels have currently an angle < -90
vertices$hjust <- ifelse( vertices$id < 41, 1, 0)
# flip angle BY to make them readable
vertices$angle <- ifelse(vertices$angle < -90, vertices$angle+180, vertices$angle)
mygraph <- graph_from_data_frame( hierarchy, vertices=vertices )
ggraph(mygraph, layout = 'dendrogram', circular = TRUE) +
geom_node_point(aes(filter = leaf, x = x*1.05, y=y*1.05), size = 2, alpha = 0.8) +
geom_conn_bundle(data = get_con(from = from, to = to, col = col), aes(colour=col, alpha = col, width = col)) +
geom_node_text(aes(x = x*1.1, y=y*1.1, filter = leaf, label=name, angle = angle, hjust=hjust), size=3.5, alpha=0.6) +scale_edge_color_continuous(trans = "log",low="red", high="yellow")+ scale_edge_alpha_continuous(trans = "log",range = c(1, 0.1)) +scale_edge_width_continuous(trans = "log", range = c(4, 1))+
theme_void()
I think there is wrong mapping somewhere but I can't figure out where. Thank you so much for your input!
I believe there is a bug in this library. Rearranging the input data by the column of choice (pvalue in my case) in an ascending order helped but did not solve the issue.
connect_new <- arrange(connect, pvalue)
and I found the solution in a github issue submitted by another user. The subgroups within each group need to be ordered alphabetically in the hierarchy and vertices file. In addition, in the connect dataframe, the subgroups need to be ordered following the same order in the hierarchy and vertices file. Thanks to zhuxr11

Creating multiple graphs based upon the column names

This is my first question on stackoverlow, please correct me if I am not following correct question protocols.
I am trying to create some graphs for data that has been collected over three time points (time 1, time 2, time 3) which equates to X1..., X2... and X3... at the beginning of column names. The graphs are also separated by the column $Group from the data frame.
I have no problem creating the graphs, I just have many variables (~170) and am wanting to compare time 1 vs time 2, time 2 vs time 3, etc. so am trying to work a shortcut to be running this kind of code rather than having to type out each one individually.
As indicated above, I have created variable names like X1... X2... which indicate the time that the variable was recorded i.e. X1BCSTCAT = time 1; X2BCSTCAT = time 2; X3BCSTCAT = time 3. Here is a small sample of what my data looks like:
df <- structure(list(ID = structure(1:6, .Label = c("101","102","103","118","119","120"), class = "factor"),
Group = structure(c(1L,1L,1L,2L,2L,2L), .Label = c("C8","TC"), class = "factor"),
Wave = structure(c(1L, 2L, 3L, 4L, 1L, 2L), .Label = c("A","B","C","D"), class = "factor"),
Yr = structure(c(1L, 2L, 1L, 2L, 1L, 2L), .Label = c("3","5"), class = c("ordered", "factor")),
Age.Yr. = c(10.936,10.936, 9.311, 10.881, 10.683, 11.244),
Training..hr. = c(10.667,10.333, 10.667, 10.333, 10.333, 10.333),
X1BCSTCAT = c(-0.156,0.637,-1.133,0.637,2.189,1.229),
X1BCSTCR = c(0.484,0.192, -1.309, 0.912, 1.902, 0.484),
X1BCSTPR = c(-1.773,0.859, 0.859, 0.12, -1.111, 0.12),
X2BCSTCAT = c(1.006, -0.379,-1.902, 0.444, 2.074, 1.006),
X2BCSTCR = c(0.405, -0.457,-1.622, 1.368, 1.981, 0.168),
X2BCSTPR = c(-0.511, -0.036,2.189, -0.036, -0.894, 0.949),
X3BCSTCAT = c(1.18, -1.399,-1.399, 1.18, 1.18, 1.18),
X3BCSTCR = c(0.967, -1.622, -1.622,0.967, 0.967, 1.255),
X3BCSTPR = c(-1.282, -1.282, 1.539,1.539, 0.792, 0.792)),
row.names = c(1L, 2L, 3L, 4L, 5L,8L), class = "data.frame")
Here is some working code to create one graph using ggplot for time 1 vs time 2 data on one variable:
library(ggplot2)
p <- ggplot(df, aes(x=df$X1BCSTCAT, y=df$X2BCSTCAT, shape = df$Group, color = df$Group)) +
geom_point() + geom_smooth(method=lm, aes(fill=df$Group), fullrange = TRUE) +
labs(title="BCSTCAT", x="Time 1", y = "Time 2") +
scale_color_manual(name = "Group",labels = c("C8","TC"),values = c("blue", "red")) +
scale_shape_manual(name = "Group",labels = c("C8","TC"),values = c(16, 17)) +
scale_fill_manual(name = "Group",labels = c("C8", "TC"),values = c("light blue", "pink"))
So I am really trying to create some kind of a shortcut where R will cycle through and match up variable names X1... vs X2... and so on and create the graphs. I assume there must be some way to plot either based upon matching column numbers e.g. df[,7] vs df[,10] and iterating through this process or plotting by actually matching the names (where the only difference in variable names is the number which indicates time).
I have previously cycled through creating individual graphs using the lapply function, but have no idea where to even start with trying to do this one.
A solution using tidyeval approach. We will need ggplot2 v3.0.0 (remember to restart your R session)
install.packages("ggplot2", dependencies = TRUE)
First we build a function that takes column and group names as inputs. Note the use of rlang::sym, rlang::quo_name & !!.
Then create 2 name vectors for x- & y- values so that we can loop through them simultaneously using purrr::map2.
library(rlang)
library(tidyverse)
df <- structure(list(ID = structure(1:6, .Label = c("101","102","103","118","119","120"), class = "factor"),
Group = structure(c(1L,1L,1L,2L,2L,2L), .Label = c("C8","TC"), class = "factor"),
Wave = structure(c(1L, 2L, 3L, 4L, 1L, 2L), .Label = c("A","B","C","D"), class = "factor"),
Yr = structure(c(1L, 2L, 1L, 2L, 1L, 2L), .Label = c("3","5"), class = c("ordered", "factor")),
Age.Yr. = c(10.936,10.936, 9.311, 10.881, 10.683, 11.244),
Training..hr. = c(10.667,10.333, 10.667, 10.333, 10.333, 10.333),
X1BCSTCAT = c(-0.156,0.637,-1.133,0.637,2.189,1.229),
X1BCSTCR = c(0.484,0.192, -1.309, 0.912, 1.902, 0.484),
X1BCSTPR = c(-1.773,0.859, 0.859, 0.12, -1.111, 0.12),
X2BCSTCAT = c(1.006, -0.379,-1.902, 0.444, 2.074, 1.006),
X2BCSTCR = c(0.405, -0.457,-1.622, 1.368, 1.981, 0.168),
X2BCSTPR = c(-0.511, -0.036,2.189, -0.036, -0.894, 0.949),
X3BCSTCAT = c(1.18, -1.399,-1.399, 1.18, 1.18, 1.18),
X3BCSTCR = c(0.967, -1.622, -1.622,0.967, 0.967, 1.255),
X3BCSTPR = c(-1.282, -1.282, 1.539,1.539, 0.792, 0.792)),
row.names = c(1L, 2L, 3L, 4L, 5L,8L), class = "data.frame")
# define a function that accept strings as input
pair_plot <- function(x_var, y_var, group_var) {
# convert strings to symbols
x_var <- rlang::sym(x_var)
y_var <- rlang::sym(y_var)
group_var <- rlang::sym(group_var)
# unquote symbols using !!
ggplot(df, aes(x = !! x_var, y = !! y_var, shape = !! group_var, color = !! group_var)) +
geom_point() + geom_smooth(method = lm, aes(fill = !! group_var), fullrange = TRUE) +
labs(title = "BCSTCAT", x = rlang::quo_name(x_var), y = rlang::quo_name(y_var)) +
scale_color_manual(name = "Group", labels = c("C8", "TC"), values = c("blue", "red")) +
scale_shape_manual(name = "Group", labels = c("C8", "TC"), values = c(16, 17)) +
scale_fill_manual(name = "Group", labels = c("C8", "TC"), values = c("light blue", "pink")) +
theme_bw()
}
# Test if the new function works
pair_plot("X1BCSTCAT", "X2BCSTCAT", "Group")
# Create 2 parallel lists
list_x <- colnames(df)[-c(1:6, (ncol(df)-2):(ncol(df)))]
list_x
#> [1] "X1BCSTCAT" "X1BCSTCR" "X1BCSTPR" "X2BCSTCAT" "X2BCSTCR" "X2BCSTPR"
list_y <- lead(colnames(df)[-(1:6)], 3)[1:length(list_x)]
list_y
#> [1] "X2BCSTCAT" "X2BCSTCR" "X2BCSTPR" "X3BCSTCAT" "X3BCSTCR" "X3BCSTPR"
# Loop through 2 lists simultaneously
# Supply inputs to pair_plot function using purrr::map2
map2(list_x, list_y, ~ pair_plot(.x, .y, "Group"))
Sample outputs:
#> [[1]]
#>
#> [[2]]
Created on 2018-05-24 by the reprex package (v0.2.0).

Mosaic Plot does not fit

The labels for the mosaic plot don't fit the screen ( they're partially cut) so id like to move/shift the plot to the right so that the labels fully fit -- tried using ''par'' function but to no avail -- any ideas?
structure(list(Road_Type = structure(c(4L, 4L, 4L, 4L, 4L, 4L
), .Label = c("Roundabout", "One way Street", "Dual Carriageway",
"Single carriageway", "Slip Road"), class = "factor"), Accident_Severity_combined = structure(c(2L,
2L, 2L, 2L, 1L, 2L), .Label = c("Serious", "Slight"), class = "factor")), .Names = c("Road_Type",
"Accident_Severity_combined"), row.names = c(NA, 6L), class = "data.frame")
>
mos <- mosaic(~Road_Type + Accident_Severity_combined, data = uk1, shade = TRUE, legend = TRUE,
labeling_args = list(set_varnames = c(Accident_Severity_combined="Gender", Road_Type="survival"),
highlighting_fill = c("darlblue","red")
labeling=labeling_border(
rot_labels = c(90, 0, 90, 0),
just_labels=c("left","left","right","right"),
tl_varnames = FALSE,
gp_labels = gpar(fontsize = 9)))

I'm trying to recreate a specific plot in R (ggplot2)

Despite having tried many types of lines, I just cannot get the same result.
Here is how I need the lines to look:
And this is how I got it so far (and am stuck at):
Here is my code:
myData <- read.csv(file.choose(), header = TRUE)
require(ggplot2)
g <- ggplot(myData, aes(speed, resp))
g + geom_point(aes(color = padlen, shape = padlen)) +
geom_smooth(method = "lm", formula = y ~ splines::bs(x, df = 4, degree = 2), se = FALSE, aes(color = padlen), linetype = "solid", size = 1) +
scale_color_manual(values = c("red", "black")) +
scale_shape_manual(values = c(2, 1))
And here is the database (dput):
myData <- structure(list(resp = c(0, 0.125, 0.583333333, 1, 0.958333333,
1, 0, 0.041666667, 0.25, 0.916666667, 1, 1), padlen = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("big",
"small"), class = "factor"), speed = c(2L, 3L, 4L, 5L, 6L, 7L,
2L, 3L, 4L, 5L, 6L, 7L)), .Names = c("resp", "padlen", "speed"
), class = "data.frame", row.names = c(NA, -12L))
I have also tried all these polynomial models (and others), but none works:
## Quadratic model
lmQuadratic <- lm(formula = y ~ x + I(x^2),
data = fpeg)
## Cubit model
lmCubic <- lm(formula = y ~ x + I(x^2) + I(x^3),
data = fpeg)
## Fractional polynomial model
lmFractional <- lm(formula = y ~ x + I(x^2) + I(x^(1/2)),
data = fpeg)
So, what should I do/not do to get my lines the same as the original ones? Thanks.
Instead of using method = "lm" in the geom_smooth-function use the glm with the binomial family. The glm-smooth gives you only values between 0 and 1 (what you want to have, because you're dealing with proportion).
library(ggplot2)
ggplot(myData, aes(speed, resp)) +
geom_point(aes(color = padlen, shape = padlen)) +
geom_smooth(method = "glm", method.args = list(family = "binomial"),
se = FALSE, aes(color = padlen), linetype = "solid", size = 1) +
scale_color_manual(values = c("red", "black")) +
scale_shape_manual(values = c(2, 1)) +
theme_classic()
Data
myData <-
structure(list(resp = c(0, 0.125, 0.583333333, 1, 0.958333333, 1, 0,
0.041666667, 0.25, 0.916666667, 1, 1),
padlen = c("small", "small", "small", "small", "small",
"small", "big", "big", "big", "big", "big", "big"),
speed = c(2L, 3L, 4L, 5L, 6L, 7L, 2L, 3L, 4L, 5L, 6L, 7L)),
.Names = c("resp", "padlen", "speed"), class = "data.frame",
row.names = c(NA, -12L))

Resources