Related
I have a list of values and their corresponding row position provided as id. Essentially, given a vector of names I want to grab the row position from the list and assign the name it's from as a column. I can achieve the first part, but I cannot assign the names accordingly.
For example
predictors <- c('status', 'verbal')
row_predictor_value <-
lapply(row_predictor_data, function(x)
which(x$name %in% predictors, arr.ind = TRUE) %>% setNames(., predictors))
Produces the following result:
[[1]]
status verbal
2 4
[[2]]
status verbal
2 4
[[3]]
status verbal
2 4
However, this assigns the wrong name from where I got it.
It should produce instead:
[[1]]
status verbal
2 4
[[2]]
verbal status
2 4
[[3]]
status verbal
2 4
Here's some example data:
row_predictor_data <- list(structure(list(id = structure(1:4, .Label = c("1", "2",
"3", "4"), class = "factor"), name = structure(c(1L, 3L, 2L,
4L), .Label = c("(Intercept)", "income", "status", "verbal"), class = "factor")), class = "data.frame", row.names = c(NA,
-4L)), structure(list(id = structure(1:4, .Label = c("1", "2",
"3", "4"), class = "factor"), name = structure(c(1L, 4L, 2L,
3L), .Label = c("(Intercept)", "sex", "status", "verbal"), class = "factor")), class = "data.frame", row.names = c(NA,
-4L)), structure(list(id = structure(1:5, .Label = c("1", "2",
"3", "4", "5"), class = "factor"), name = structure(c(1L, 4L,
2L, 5L, 3L), .Label = c("(Intercept)", "income", "sex", "status",
"verbal"), class = "factor")), class = "data.frame", row.names = c(NA,
-5L)))
The issue with which and %in% is that it returns the positions without differentiating the order of 'predictors', thus if the order is different as in the second list element, when we use a fixed 'predictors' to assign as names, this gets the wrong result. Instead, use the 'names' column to generate the names of the vector
lapply(row_predictor_data, function(x)
with(subset(x, name %in% predictors),
setNames(as.character(id), name)))
NOTE: Here we return a named vector of 'id's
I have the following problem:
I have a table with many empty cells. But the first column does not contain empty cells. Like this:
structure(list(variable = c("variable", "Mean (M)", "Standard",
"deviation", "(SD)", "1. Challenge"), M = c("M", "", "", "",
"", "3.06"), SD = c("SD", "", "", "", "", "1.08"), X1 = c("1",
"3.03", "1.09", "", "", ""), X2 = c("2", "2.19", "1.07", "",
"", "â\210’0.06"), X3 = c("3", "1.93", "1.10", "", "", "0.52***"
), X4 = c("4", "1.86", "1.04", "", "", "â\210’0.14*")), row.names = c(NA,
6L), class = "data.frame"))
How it looks like now
Now I would like to paste the entries of the two cells under Standard (= "deviation" and "(SD)") into on cell and delete these rows containing only empty cells. So the output should look like this:
structure(list(variable = structure(c(2L, 3L, 1L), .Label = c("1. Challenge",
"Mean (M)", "Standard deviation SD", "variable"), class = "factor"),
M = structure(c(1L, 1L, 2L), .Label = c("", "3.06", "M"), class = "factor"),
SD = structure(c(1L, 1L, 2L), .Label = c("", "1.08", "SD"
), class = "factor"), `1` = structure(c(4L, 3L, 1L), .Label = c("",
"1", "1.09", "3.03"), class = "factor"), `2` = structure(c(3L,
1L, 4L), .Label = c("1.07", "2", "2.19", "â\210’0.06"), class = "factor"),
`3` = structure(3:1, .Label = c("0.52***", "1.10", "1.93",
"3"), class = "factor"), `4` = structure(c(2L, 1L, 4L), .Label = c("1.04",
"1.86", "4", "â\210’0.14*"), class = "factor")), row.names = 2:4, class = "data.frame")
How I would like it to look like
Can you help?
Thanks!
One option is to create a grouping column based on the occurrence of blanks ("") in all the columns except the first and then paste the elements of each of the columns when the number of rows are greater than 1 and get the distinct rows
library(dplyr)
library(stringr)
df1 %>%
group_by(grp = cumsum(rowSums(.[-1] == "") != ncol(.)-1)) %>%
mutate_at(vars(-group_cols()), ~ if(n() > 1) str_c(., collapse=" ") else .) %>%
ungroup %>%
type.convert(as.is = TRUE) %>%
select(-grp) %>%
distinct
I have produced two different plots based on two different models: model and model1. Please find enclosed My Data below. I have attached the two plots:
Model
Model1
I wish to merge the two plots and keep the confidence bands at the same time. I have tried several solution, e.g. rbind, but that does not seem to work - please see below.
I have used the following scripts to produce the two plots
model <- cph(Surv(os.neck,mors)~rcs(test),data=n)
model1 <- cph(Surv(os.neck,mors)~rcs(test),data=n1)
j <- ggplot(Predict(model, fun=exp), colfill = "blue")
k <- ggplot(Predict(model1, fun=exp), colfill = "yellow")
I have tried rbind:
e <- Predict(model, fun=exp, conf.int = TRUE)
f <- Predict(model1, fun=exp, conf.int = TRUE)
j <- ggplot(rbind(e,f))
Which gave this:
rbind()
My data:
n <- subset(w, w$stadie %in% 1:2)
n1 <- subset(w, w$stadie %in% 3:5)
The requested dput(out) from the comments
w <- structure(list(model = c("1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "1", "2",
"2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2", "2",
"2", "2", "2", "2", "2", "2"), test = c(0.0438735177865613, 0.0465676207122569,
0.0492617236379526, 0.0519558265636483, 0.0546499294893439, 0.0573440324150396,
0.0600381353407353, 0.062732238266431, 0.0654263411921266, 0.0681204441178223,
0.070814547043518, 0.0735086499692136, 0.0762027528949093, 0.078896855820605,
0.0815909587463007, 0.0842850616719963, 0.086979164597692, 0.0896732675233877,
0.0923673704490833, 0.095061473374779, 0.05, 0.0530569514237856,
0.0561139028475712, 0.0591708542713568, 0.0622278056951424, 0.065284757118928,
0.0683417085427136, 0.0713986599664992, 0.0744556113902848, 0.0775125628140703,
0.0805695142378559, 0.0836264656616415, 0.0866834170854271, 0.0897403685092127,
0.0927973199329983, 0.0958542713567839, 0.0989112227805695, 0.101968174204355,
0.105025125628141, 0.108082077051926), yhat = c(0.715524721809984,
0.72420520893997, 0.732895287854242, 0.741495950465592, 0.749903690905934,
0.758010700841758, 0.765705214141122, 0.772872009692537, 0.779393079520142,
0.785148467039571, 0.79001727733411, 0.793878857700365, 0.796614142441177,
0.798107151024956, 0.798246668871875, 0.796979824770716, 0.794412433838086,
0.790683064226291, 0.785933397797749, 0.780306386213083, 1.24887346414771,
1.12142387236568, 1.00744333341272, 0.906978784944319, 0.819807522848923,
0.745379660125369, 0.682977886151413, 0.631846830283734, 0.591296955987878,
0.560790614744859, 0.53975355731851, 0.52685030147002, 0.520878199524915,
0.520957917193064, 0.526437601275528, 0.53682068603444, 0.551708849922178,
0.570754454105439, 0.593618741429514, 0.619933518450193), lower = c(0.445870969928758,
0.472487603995491, 0.498645159577579, 0.523317755828918, 0.545270747924011,
0.563214260495099, 0.576107648755599, 0.583517928079882, 0.585795811114823,
0.583918701876133, 0.579131268180072, 0.572630973080174, 0.565412209767786,
0.558237952034289, 0.551671245622871, 0.546072898734981, 0.541548416151744,
0.538098574671309, 0.535672640626991, 0.534183860233478, 0.613882362074539,
0.611611984419279, 0.601234738035742, 0.579326232945668, 0.543582975437934,
0.496000647093785, 0.443637816386947, 0.39437687025085, 0.353159479619957,
0.321944706132161, 0.30083406381699, 0.288326373517578, 0.282948308375769,
0.283624310505754, 0.289563062775844, 0.300128054614955, 0.314709399887597,
0.332603569457389, 0.352917102130059, 0.374528152852913), upper = c(1.14825961332055,
1.11002527943736, 1.07718984661152, 1.05063556210888, 1.03133268706487,
1.02018052967182, 1.01769951541058, 1.02367230657634, 1.03697151956046,
1.05572593121937, 1.07769573631852, 1.10061046351294, 1.12235654089946,
1.14104571750444, 1.1550316414364, 1.16317224781343, 1.16534569433533,
1.16183119131315, 1.15311341092747, 1.13982862772903, 2.54069024589915,
2.05619172538896, 1.68809618910841, 1.4199434956646, 1.23639702655924,
1.1201413566373, 1.05144055745915, 1.01230687460364, 0.990011907755607,
0.976832690818709, 0.968420593537629, 0.962698059052612, 0.958882208194717,
0.956889594556209, 0.957085290437296, 0.96017831230139, 0.967186411308867,
0.979426190201882, 0.998487202942342, 1.02613799355416), .predictor. = c("test",
"test", "test", "test", "test", "test", "test", "test", "test",
"test", "test", "test", "test", "test", "test", "test", "test",
"test", "test", "test", "test", "test", "test", "test", "test",
"test", "test", "test", "test", "test", "test", "test", "test",
"test", "test", "test", "test", "test", "test", "test"), .set. = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("1", "2"), class = "factor")), .Names = c("model",
"test", "yhat", "lower", "upper", ".predictor.", ".set."), row.names = c("1.1",
"1.2", "1.3", "1.4", "1.5", "1.6", "1.7", "1.8", "1.9", "1.10",
"1.11", "1.12", "1.13", "1.14", "1.15", "1.16", "1.17", "1.18",
"1.19", "1.20", "2.201", "2.202", "2.203", "2.204", "2.205",
"2.206", "2.207", "2.208", "2.209", "2.210", "2.211", "2.212",
"2.213", "2.214", "2.215", "2.216", "2.217", "2.218", "2.219",
"2.220"), class = c("Predict", "data.frame"), info = structure(list(
Design = structure(list(label = structure("Set", .Names = ".set."),
units = structure("", .Names = ".set.")), .Names = c("label",
"units")), varying = ".set.", adjust = structure(list(`1` = NULL,
`2` = NULL), .Names = c("1", "2"))), .Names = c("Design",
"varying", "adjust")))
Thank you in advance,
C.
Here is a basic plot
ggplot(as.data.frame(out), aes(x = test)) +
geom_ribbon(aes(fill = model, ymin = lower, ymax = upper), alpha = .3) +
geom_line(aes(y = yhat, col = model))
We need as.data.frame(out) because out is of class Predict.
You could add another theme change fill and color or you might also want to add a meaningful title, subtitle etc. SO is full of examples.
We can use the JCO palette from the ggsci package
library(ggsci)
ggplot(as.data.frame(out), aes(x = test)) +
geom_ribbon(aes(fill = model, ymin = lower, ymax = upper), alpha = .3) +
geom_line(aes(y = yhat, col = model)) +
scale_color_jco() +
scale_fill_jco()
To change legend labels do
... +
scale_color_jco(labels = c("A", "B")) +
scale_fill_jco(labels = c("A", "B"))
I have two data sets like below
df1<- structure(list(time = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L,
3L, 3L), .Label = c("24", "48", "72"), class = "factor"), place = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L), .Label = c("B,C", "D,E", "F,G"
), class = "factor"), key = c("boy1", "boy2", "boy3", "boy1",
"boy2", "boy3", "boy1", "boy2", "boy3"), value = c(177.72258835,
0, 74.438539625, 134.3410045, 48915.1, 38.302204425, 97.32286187,
25865.25, 28.67291878), x = c("1", "2", "3", "1", "2", "3", "1",
"2", "3"), y = c(177.72258835, 0, 74.438539625, 134.3410045,
48915.1, 38.302204425, 97.32286187, 25865.25, 28.67291878)), .Names = c("time",
"place", "key", "value", "x", "y"), row.names = c(NA, -9L), class = "data.frame")
df2<- structure(list(time = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L,
3L, 3L), .Label = c("24", "48", "72"), class = "factor"), place = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L), .Label = c("B,C", "D,E", "F,G"
), class = "factor"), key = c("boy1", "boy2", "boy3", "boy1",
"boy2", "boy3", "boy1", "boy2", "boy3"), value = c(58.852340736,
0, 21.291893740908, 42.92051958201, 72521.52726, 16.309811239722,
32.403556124268, 38347.81965, 10.342042262244), x = c("1", "2",
"3", "1", "2", "3", "1", "2", "3"), y = c(58.852340736, 0, 21.291893740908,
42.92051958201, 72521.52726, 16.309811239722, 32.403556124268,
38347.81965, 10.342042262244)), .Names = c("time", "place", "key",
"value", "x", "y"), row.names = c(NA, -9L), class = "data.frame")
I want to plot them together with df2 as the standard deviation for df1
when I plot df1, I do the following
library(ggplot2)
ggplot(df1, aes(x, y, col = key)) +
geom_point() +
scale_x_discrete(labels=c("first", "second", "third"), limits = c(1, 2,3)) +
facet_grid(time ~ .)
but now I want to have the second df as the standard deviation (i.e., the first y-value in df1 is 177.72259, so it's standard deviation is the corresponding y-value in df2, which is 58.85234).
If I understand your question correctly, it sounds like you want to include error bars in your plot. This can be accomplished using only a single data frame, if you just add the standard error as an additional variable like so:
df <- structure(list(time = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L),
.Label = c("24", "48", "72"), class = "factor"), place = structure(c(1L, 1L, 1L,
2L, 2L, 2L, 3L, 3L, 3L), .Label = c("B,C", "D,E", "F,G"), class = "factor"),
key = c("boy1", "boy2", "boy3", "boy1", "boy2", "boy3", "boy1", "boy2", "boy3"),
value = c(58.852340736, 0, 21.291893740908, 42.92051958201, 72521.52726,
16.309811239722, 32.403556124268, 38347.81965, 10.342042262244),
x = c("1", "2", "3", "1", "2", "3", "1", "2", "3"), y = c(177.72258835, 0,
74.438539625, 134.3410045, 48915.1, 38.302204425, 97.32286187, 25865.25, 28.67291878),
sd = c(58.852340736, 0, 21.291893740908, 42.92051958201, 72521.52726, 16.309811239722,
32.403556124268,38347.81965, 10.342042262244)), .Names = c("time", "place", "key",
"value", "x", "y", "sd"), row.names = c(NA, -9L), class = "data.frame")
Then you can add error bars to the plot using geom_errorbar(), as follows (I am borrowing the "free-y" scale trick from #jazzurro's answer above):
ggplot(df, aes(x, y, col = key)) +
geom_point() +
scale_x_discrete(labels=c("first", "second", "third"), limits = c(1, 2,3)) +
facet_grid(time ~ .) +
geom_errorbar(aes(ymin = y-sd, ymax = y+sd)) +
facet_grid(time ~ ., scale = "free_y")
Unfortunately your data is a little skewed, in that some measurements are way larger in magnitude than others (especially at time=48 and time=72); you may want to consider a log transformation so that the error bars for the smaller observations do not appear so negligible.
Here is one way for you. I changed the shape of the sd in the second geom_point(). Since the y-scale has a wide range for two of the plots, you see points overlapping.
ggplot() +
geom_point(data = df1, aes(x, y, col = key)) +
geom_point(data = df2, aes(x, y, col = key), shape = 22, alpha = 0.3) +
scale_x_discrete(labels=c("first", "second", "third"), limits = c(1, 2, 3)) +
facet_grid(time ~ ., scale = "free_y")
I'm trying to plot against dates in R. I've run into trouble trying to create vertical lines against a plot that I already have. All of the different formats that I try either result in nothing showing up on the plot, or a line at 1970 (the default date). The year-data is in the form yyyy-mm-dd. For example, "1914-07-01".
I've also tried inputting these dates in a data.frame, but got the same problem.
I've been trying to make a reproducible example, but I haven't seen any example datasets to do so with, and got frustrated trying to create one... sorry about that. Here's the relevant code:
ggplot(M,aes(x=date,color=origin,y=value)) +
geom_point() +
geom_line() +
facet_grid(topic~origin) +
geom_vline(xintercept=as.numeric(as.Date("1914-07-01")))
Everything plots correctly without the addition of the final line.
Edit: here's the result of dput(head(M)):
structure(list(topic = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24",
"25"), class = "factor"), date = structure(c(-1767196800, -1765987200,
-1764518400, -1763308800, -1762099200, -1760889600), class = c("POSIXct",
"POSIXt"), tzone = ""), origin = structure(c(2L, 2L, 2L, 2L,
2L, 2L), .Label = c("Blast", "The_Egoist"), class = "factor"),
value = c(6.69960398194253e-07, 7.48757156068349e-07, 7.04834977806836e-07,
7.10226526475778e-07, 6.8295233938925e-07, 6.16466066169137e-07
)), .Names = c("topic", "date", "origin", "value"), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -6L), vars = list(
topic, date), drop = TRUE, indices = list(0L, 1L, 2L, 3L,
4L, 5L), group_sizes = c(1L, 1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
topic = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12",
"13", "14", "15", "16", "17", "18", "19", "20", "21", "22",
"23", "24", "25"), class = "factor"), date = structure(c(-1767196800,
-1765987200, -1764518400, -1763308800, -1762099200, -1760889600
), class = c("POSIXct", "POSIXt"), tzone = "")), class = "data.frame", row.names = c(NA,
-6L), .Names = c("topic", "date"), vars = list(topic, date)))
You were very close, the problem is your data is in POSIXct, and you were trying to convert to Date. To fix it, change to POSIXct:
ggplot(M,aes(x=date,color=origin,y=value)) +
geom_point() +
geom_line() +
facet_grid(topic~origin) +
geom_vline(xintercept=as.numeric(as.POSIXct("1914-07-01")))
You can see the difference in the calls:
as.numeric(as.POSIXct("1914-07-01"))
[1] -1751569200
as.numeric(as.Date("1914-07-01"))
[1] -20273
Explaining why the intecept was so close to 1970 (the 0 for both)