Fixing the data order in facets in ggplot - r

I have the a problem in ploting the data with ggplot. I couldn't make the data inside each facet to be ordered correctly. my sample data is:
data <- structure(list(Parameter = c("{0.1, 0.7, 0.0, 0.2}", "{0.2, 0.7, 0.0, 0.1}",
"{0.3, 0.7, 0.0, 0.0}", "{0.0, 0.7, 0.0, 0.3}", "{0.0, 0.6, 0.0, 0.4}",
"{0.1, 0.6, 0.0, 0.3}", "{0.2, 0.6, 0.0, 0.2}", "{0.3, 0.6, 0.0, 0.1}",
"{0.4, 0.6, 0.0, 0.0}", "{0.1, 0.3, 0.2, 0.4}", "{0.1, 0.7, 0.0, 0.2}",
"{0.2, 0.7, 0.0, 0.1}", "{0.3, 0.7, 0.0, 0.0}", "{0.0, 0.7, 0.0, 0.3}",
"{0.1, 0.3, 0.2, 0.4}", "{0.1, 0.5, 0.1, 0.3}", "{0.2, 0.5, 0.1, 0.2}",
"{0.3, 0.3, 0.2, 0.2}", "{0.4, 0.3, 0.2, 0.1}", "{0.5, 0.3, 0.2, 0.0}",
"{0.1, 0.6, 0.0, 0.3}", "{0.2, 0.6, 0.0, 0.2}", "{0.4, 0.6, 0.0, 0.0}",
"{0.3, 0.6, 0.0, 0.1}", "{0.0, 0.6, 0.0, 0.4}", "{0.1, 0.7, 0.0, 0.2}",
"{0.2, 0.7, 0.0, 0.1}", "{0.3, 0.7, 0.0, 0.0}", "{0.0, 0.7, 0.0, 0.3}",
"{0.1, 0.4, 0.1, 0.4}"), Map = c(0.19608779, 0.19608779, 0.19581,
0.19490847, 0.18973944, 0.18943608, 0.18943608, 0.18915829, 0.18915829,
0.18856215, 0.20025444, 0.20025444, 0.19997665, 0.19907513, 0.19272882,
0.19250154, 0.19250154, 0.19249977, 0.19249977, 0.19249977, 0.19608779,
0.19608779, 0.19585875, 0.19581, 0.1952106, 0.19046278, 0.19046278,
0.19023374, 0.18928346, 0.18833488), Sigma = c("Sigma = 370",
"Sigma = 370", "Sigma = 370", "Sigma = 370", "Sigma = 370", "Sigma = 370",
"Sigma = 370", "Sigma = 370", "Sigma = 370", "Sigma = 370", "Sigma = 380",
"Sigma = 380", "Sigma = 380", "Sigma = 380", "Sigma = 380", "Sigma = 380",
"Sigma = 380", "Sigma = 380", "Sigma = 380", "Sigma = 380", "Sigma = 390",
"Sigma = 390", "Sigma = 390", "Sigma = 390", "Sigma = 390", "Sigma = 390",
"Sigma = 390", "Sigma = 390", "Sigma = 390", "Sigma = 390")), .Names = c("Parameter",
"Map", "Sigma"), class = "data.frame", row.names = c(NA, -30L
))
data
Parameter Map Sigma
1 {0.1, 0.7, 0.0, 0.2} 0.1960878 Sigma = 370
2 {0.2, 0.7, 0.0, 0.1} 0.1960878 Sigma = 370
3 {0.3, 0.7, 0.0, 0.0} 0.1958100 Sigma = 370
4 {0.0, 0.7, 0.0, 0.3} 0.1949085 Sigma = 370
5 {0.0, 0.6, 0.0, 0.4} 0.1897394 Sigma = 370
6 {0.1, 0.6, 0.0, 0.3} 0.1894361 Sigma = 370
7 {0.2, 0.6, 0.0, 0.2} 0.1894361 Sigma = 370
8 {0.3, 0.6, 0.0, 0.1} 0.1891583 Sigma = 370
9 {0.4, 0.6, 0.0, 0.0} 0.1891583 Sigma = 370
10 {0.1, 0.3, 0.2, 0.4} 0.1885622 Sigma = 370
11 {0.1, 0.7, 0.0, 0.2} 0.2002544 Sigma = 380
12 {0.2, 0.7, 0.0, 0.1} 0.2002544 Sigma = 380
13 {0.3, 0.7, 0.0, 0.0} 0.1999767 Sigma = 380
14 {0.0, 0.7, 0.0, 0.3} 0.1990751 Sigma = 380
15 {0.1, 0.3, 0.2, 0.4} 0.1927288 Sigma = 380
16 {0.1, 0.5, 0.1, 0.3} 0.1925015 Sigma = 380
17 {0.2, 0.5, 0.1, 0.2} 0.1925015 Sigma = 380
18 {0.3, 0.3, 0.2, 0.2} 0.1924998 Sigma = 380
19 {0.4, 0.3, 0.2, 0.1} 0.1924998 Sigma = 380
20 {0.5, 0.3, 0.2, 0.0} 0.1924998 Sigma = 380
21 {0.1, 0.6, 0.0, 0.3} 0.1960878 Sigma = 390
22 {0.2, 0.6, 0.0, 0.2} 0.1960878 Sigma = 390
23 {0.4, 0.6, 0.0, 0.0} 0.1958587 Sigma = 390
24 {0.3, 0.6, 0.0, 0.1} 0.1958100 Sigma = 390
25 {0.0, 0.6, 0.0, 0.4} 0.1952106 Sigma = 390
26 {0.1, 0.7, 0.0, 0.2} 0.1904628 Sigma = 390
27 {0.2, 0.7, 0.0, 0.1} 0.1904628 Sigma = 390
28 {0.3, 0.7, 0.0, 0.0} 0.1902337 Sigma = 390
29 {0.0, 0.7, 0.0, 0.3} 0.1892835 Sigma = 390
30 {0.1, 0.4, 0.1, 0.4} 0.1883349 Sigma = 390
And this is my R code:
ggplot (data, aes(x=Map, y=reorder(Parameter, Map))) +
geom_segment(aes(yend=Parameter), xend=0, colour="grey50") +
geom_point(size=3) + xlab("") + ylab("") +
facet_grid(Sigma ~ ., scales="free_y", space="free_y")+
theme(axis.text.x=element_text(angle = -70, hjust = 0))+
scale_x_continuous(breaks =Br,expand = waiver())
the gives me the following plot:
as you can see inside each facet the data is not ordered in the right way. Any help is very much appreciated.

A not so elegant way could be
library(ggplot2)
library(grid)
library(gridExtra)
Br <- seq(0.19, 0.20, by = 0.0025)
df1 <- split(data, data$Sigma)
df2 <- lapply(df1, function(x) x[order(x$Map), ])
grphs <- lapply(df2, function(dfx){
ggplot (dfx, aes(x=Map, y=reorder(Parameter, Map))) +
geom_segment(aes(yend = Parameter), xend =0, colour = "grey50") +
geom_point(size = 3) + xlab("") +
ylab("") +
facet_grid(Sigma ~ ., scales="free_y", space="free_y") +
theme(axis.text.x=element_text(angle = -70, hjust = 0)) +
scale_x_continuous(breaks = Br, expand = waiver(),
limits=c(0.1880, 0.2005))
}
)
grid.arrange(grphs[[1]], grphs[[2]], grphs[[3]], ncol = 1)

Related

Iterate over character vector in dplyR summarise and use it to assign new column names

I have a character vector with the name of my variables:
variables -> c("w", "x", "y", "z")
I need to create a function that calculates the mean of every variable for a specified parameter (as below for alpha). However, it doesn't rename the columns with the iterating variable names and does not reduce the alpha columns down to one on the left.
calc <- function(df,
parameter,
iteration,
variables){
variable <- sym(variables[iteration])
mean <- df %>% group_by(.dots = parameter) %>%
summarise(variable = mean(!!variable),sd_variable = sd(!!variable))
return(mean)
}
means <- map_dfc(1:length(variables), ~calc(df = input,
parameter = "alpha",
iteration = .,
variables = variables))
Ideally the output df (means) would look like this:
alpha | w | sd_w | x | sd_x | y | sd_y | z | sd_z |
Here is what the input df looks like:
structure(list(time = c(0, 0.1, 0.2, 0.3, 0.4, 0.5, 999.5, 999.6,
999.7, 999.8, 999.9, 1000), w = c(10, 10.0057192322758, 10.0198266325956,
10.040096099625, 10.0637654242843, 10.087779652849, 0.889708853982268,
0.890916575744663, 0.892121389863897, 0.89332329218135, 0.894522278550115,
0.895718344834999), x = c(10, 11.0467963604334, 12.1782709261765,
13.3728962503142, 14.6035317074526, 15.8398164069251, 62.6631746231113,
62.6583134156356, 62.6534565303638, 62.648604016965, 62.6437559251575,
62.6389123047088), y = c(10, 9.89605687874935, 9.59253574727296,
9.11222320249057, 8.48917353431654, 7.76447036695841, 0.00833796964522317,
0.00835876233547079, 0.00837957883570158, 0.00840041916631544,
0.00842128334742553, 0.00844217139885453), z = c(10, 9.05439359565339,
8.21533762023494, 7.48379901688836, 6.85562632179817, 6.3231517466183,
-7.50539460838544, -7.48234149534558, -7.45927733670319, -7.43620225192078,
-7.41311636057114, -7.39001978233681), alpha = c(0.1, 0.1, 0.1,
0.1, 0.1, 0.1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5), beta = c(0.1, 0.1,
0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1), eta = c(0.1,
0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1), zeta = c(0.1,
0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1), lambda = c(0.95,
0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95
), phi = c(5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5), kappa = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), ode_outputs..iteration.. = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA)), row.names = c("1",
"1.1", "1.2", "1.3", "1.4", "1.5", "3.9995", "3.9996", "3.9997",
"3.9998", "3.9999", "3.10000"), class = "data.frame")
Ideally the function would use dplyr and/or baseR.
If I understand you correctly, there's no need to iterate over columns. It can all be done directly in dplyr...
library(tidyverse)
df %>%
group_by(alpha) %>%
summarise(
across(
c(w, x, y, z),
list(mean=mean, sd=sd)
),
.groups="drop"
) %>%
rename_with(function(x) str_sub(x,1,1), ends_with("mean"))
# A tibble: 2 x 9
alpha w w_sd x x_sd y y_sd z z_sd
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 0.1 10.0 0.0345 12.8 2.20 9.14 0.875 7.99 1.38
2 0.5 0.893 0.00225 62.7 0.00908 0.00839 0.0000390 -7.45 0.0432

Counterintuitive results for dplyr::between() when using vectors

When using dplyr::between(), I assumed that it would compare each element. However it seems like that is not the case, as shown in the below example.
x <- c(0.2, 0.2, 0.2, 0.5, 0.5, 0.5)
y <- c(0.0, 0.0, 0.0, 0.1, 0.052, -0.3)
z <- c(0.43, 0.52, 0.0, 0.76, 0.85, 0.83)
dplyr::between(x=x, left=y, right=z)
# [1] TRUE TRUE TRUE FALSE FALSE FALSE
For example, in the 3rd element, 0.2 is not between 0.0 and 0.0, but TRUE is returned.
In the 4th element, 0.5 is between 0.052 and 0.85, but FALSE is returned.
Any ideas on what causes this behavior?
dplyr::between only accepts single value in left and right, it cannot work with vector of values.
The behaviour that you expect is present in data.table::between :
x <- c(0.2, 0.2, 0.2, 0.5, 0.5, 0.5)
y <- c(0.0, 0.0, 0.0, 0.1, 0.052, -0.3)
z <- c(0.43, 0.52, 0.0, 0.76, 0.85, 0.83)
data.table::between(x=x, lower=y, upper=z)
#[1] TRUE TRUE FALSE TRUE TRUE TRUE
In base R, this is easier with comparison operators and is flexible
x >= y & x <= z
#[1] TRUE TRUE FALSE TRUE TRUE TRUE
data
x <- c(0.2, 0.2, 0.2, 0.5, 0.5, 0.5)
y <- c(0.0, 0.0, 0.0, 0.1, 0.052, -0.3)
z <- c(0.43, 0.52, 0.0, 0.76, 0.85, 0.83)

qplot: Only graphing nodes below a threshold

I am trying to make a visual graph of a dissimilarity matrix. Using this site, I ran into the qgraph function from the package qgraph. Using the threshold flag, I am able to remove edges from my network above the supplied numerical value. This works beautifully, however, what if I only want to plot values below a certain threshold, not above?
For this, I came back to this site and read here: How to plot near-zero values with qgraph? to use the cut flag for this purpose. However, as the answer states, this flag will only "adjust the saturation so that everything above the cut point has the strongest color intensity, anything below the cut point, the saturation gets weaker."
What I would like to do is to plot only lines between the nodes that are below my cut value (or threshold), not anything else.
Here is some reproducible data:
Dist <- data.frame(Sample_1 = c(0.0, 0.245, 0.191, 0.78, 0.5),
Sample_2 = c(0.3, 0.0, 0.2, 0.99, 0.6),
Sample_3 = c(0.65, 0.45, 0.0, 0.05, 0.8),
Sample_4 = c(0.45, 0.06, 0.88, 0.0, 0.7),
Sample_5 = c(0.11, 0.79, 0.66, 0.37, 0.0),
row.names = c("Sample_1", "Sample_2", "Sample_3", "Sample_4", "Sample_5"))
Plotting the graph:
qgraph(Dist, layout = "circle", vsize = 5, color = c("cyan", "yellow", "pink", "green3", "gray"), labels = c("Sample_1", "Sample_2", "Sample_3", "Sample_4", "Sample_5"), label.cex = 3, cut = 0.2)
As you can see, anything above the cut = 0.2 is also plotted and darker.
I would like only values below the 0.2 threshold to be plotted. Is there any way to do this?
Thanks.
qgraph does not seems to have the ability to cut below a threshold, so we have to manipulate the input data.
Replacing values above the threshold to 0 or NA should do it. Using NA result in the same output but with a warning.
Dist <- data.frame(
Sample_1 = c(0.0, 0.245, 0.191, 0.78, 0.5),
Sample_2 = c(0.3, 0.0, 0.2, 0.99, 0.6),
Sample_3 = c(0.65, 0.45, 0.0, 0.05, 0.8),
Sample_4 = c(0.45, 0.06, 0.88, 0.0, 0.7),
Sample_5 = c(0.11, 0.79, 0.66, 0.37, 0.0),
row.names = c("Sample_1", "Sample_2", "Sample_3", "Sample_4", "Sample_5")
)
library(qgraph)
qgraph(
replace(Dist, Dist > 0.2, 0),
layout = "circle",
vsize = 5,
color = c("cyan", "yellow", "pink", "green3", "gray"),
labels = c("Sample_1", "Sample_2", "Sample_3", "Sample_4", "Sample_5"),
label.cex = 3
)
Created on 2020-04-06 by the reprex package (v0.3.0)

R - display ≥ / more or equal on a forestplot

I prepared the code for forestplot, however I have problem with exporting plot with "≥" / more or equal sign.
library(forestplot)
names6 <- c("Variable",
"A ≥ 4000***",
"B ≥ 50***",
"C**",
"D",
"E***",
"F",
"G*",
"H**",
"I*",
"J***")
coef6 <- c(0.42, 1.58, 1.35, 0.49,
0.46, 0.66, 0.62, 1.34, 0.52, 0.72)
low6 <- c(0.34, 1.29, 1.08, 0.21,
0.33, 0.44, 0.43, 1.08, 0.29, 0.61)
high6 <- c(0.51, 1.93, 1.69, 1.21,
0.64, 0.99, 0.91, 1.66, 0.92, 0.86)
boxsize6 <-c(0.2, 0.2, 0.2,0.2,
0.2,0.2, 0.2,0.2,
0.2, 0.2)
test_data <- data.frame(coef=coef6,
low=low6,
high=high6,
boxsize=boxsize6)
row_names <- cbind(names6,
c("OR",test_data$coef), c("CI-95%", test_data$low), c("CI+95%", test_data$high) )
test_data <- rbind(NA, test_data)
#####FIGURE
forestplot(labeltext = row_names,
mean = test_data$coef, upper = test_data$high,
lower = test_data$low,
is.summary=c(TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE),
boxsize = test_data$boxsize,
zero = 1,
xlog = FALSE,
xlab = "OR (95% CI)",
col = fpColors(lines="black", box="black"),
ci.vertices = TRUE,
xticks = c(0,0.5, 1, 1.5, 2.0),
colgap = unit(0.03,'npc'),
lineheight = unit(1.1,"cm"),
txt_gp=fpTxtGp(label = gpar(cex = 0.8),
title = gpar(cex = 1),
ticks = gpar(cex = 0.6),
xlab = gpar(cex = 0.7)))
However I cannot export plot as a .pdf file with present "≥" / more or equal.
Instead of this, I got sign "=" / equal.
What should I change to get this sign on plot?
Edit:
\u2265 do not work...
This solution might be system-specific. Here is something that works on windows:
library("forestplot")
library("withr")
names6 <- c("Variable",
"A \u2265 4000***",
"B \u2265 50***",
"C**",
"D",
"E***",
"F",
"G*",
"H**",
"I*",
"J***")
### Data & params code here....
with_cairo_pdf('forestplot.pdf',
### Forest plot code here....
forestplot(labeltext = row_names,
....)
)

Element wise mean from data frame [closed]

Closed. This question needs details or clarity. It is not currently accepting answers.
Want to improve this question? Add details and clarify the problem by editing this post.
Closed 7 years ago.
Improve this question
I have a data frame with 3 columns and 16 rows. Each element has values like row1 values are (0.9, 0.9, 1.0), (0.7,0.9, 1.0), (0.9, 0.9, 1.0). I want element wise mean e.g., (0.9+0.7+0.9/3), (0.9+0.9+0.9/3), (1.0+1.0+1.0/3) and store the result as new column. Any suggestions?
SHO1 SHO2 SHO3
1 0.7, 0.9, 1.0 0.9, 0.9, 1.0 0.7, 0.9, 1.0
2 0.7, 0.9, 1.0 0.9, 0.9, 1.0 0.7, 0.9, 1.0
3 0.0, 0.0, 0.1 0.9, 0.9, 1.0 0.0, 0.0, 0.1
expected out for row1:
0.7+0.9+0.7/3, 0.9+0.9+0.9/3, 1.0+1.0+1.0/3
Based on the dput output by the OP (in the comments), we found that the columns in 'df1' are not 'strings'. Infact each element of each column is a list. So, instead of doing strsplit (as I suggested earlier), we loop through the columns with lapply and rbind the list elements (do.call(rbind). The output 'list' contains 'matrix' as list elements.
We can use Reduce to take the elementwise sum (Reduce('+', ..), and divide by the length of the list i.e. 3.
The matrix output ('m1') can be pasted together rowwise (do.call(paste) after converting to 'data.frame' and create a new column in the original dataset ('df1').
m1 <- Reduce('+', lapply(df1, function(x) do.call(rbind, x)))/ncol(df1)
df1$newCol <- do.call(paste, c(as.data.frame(m1), sep=", "))
df1
# SHO1 SHO2 SHO3
#1 0.9, 0.9, 1.0 0.7, 0.9, 1.0 0.9, 0.9, 1.0
#2 0.9, 0.9, 1.0 0.7, 0.9, 1.0 0.9, 0.9, 1.0
#3 0.3, 0.5, 0.7 0.7, 0.9, 1.0 0.3, 0.5, 0.7
#4 0.7, 0.9, 1.0 0.9, 0.9, 1.0 0.9, 0.9, 1.0
# newCol
#1 0.833333333333333, 0.9, 1
#2 0.833333333333333, 0.9, 1
#3 0.433333333333333, 0.633333333333333, 0.8
#4 0.833333333333333, 0.9, 1
data
df1 <- structure(list(SHO1 = structure(list(VH = c(0.9, 0.9, 1),
VH = c(0.9,
0.9, 1), M = c(0.3, 0.5, 0.7), H = c(0.7, 0.9, 1)), .Names = c("VH",
"VH", "M", "H")), SHO2 = structure(list(H = c(0.7, 0.9, 1), H = c(0.7,
0.9, 1), H = c(0.7, 0.9, 1), VH = c(0.9, 0.9, 1)), .Names = c("H",
"H", "H", "VH")), SHO3 = structure(list(VH = c(0.9, 0.9, 1),
VH = c(0.9, 0.9, 1), M = c(0.3, 0.5, 0.7), VH = c(0.9, 0.9,
1)), .Names = c("VH", "VH", "M", "VH"))), .Names = c("SHO1",
"SHO2", "SHO3"), row.names = c(NA, 4L), class = "data.frame")

Resources