Map straight shaded error onto a coord_polar plot - r

I'd wondering if someone could please offer me a suggestion on how to sort out my shaded error for each line in this radar plot? I've tried several different approaches but am not getting what I want.
library(ggplot2)
library(scales)
# Make some data
Group.no <- 3
Group.names <- c("1","2","3")
Metric.no <- 4
Metriclist <- c("M1", "M4", "M6","M8")
Metric <- c(rep(c(Metriclist), each = Group.no))
Group <- c(rep(c(Group.names), times = Metric.no))
Mg <- c(87.7, 93.8, 72.5, 190.3, 170.9, 138.4, 283.2, 248.7, 196.5, 340.6, 307.9, 240.9)
d <- data.frame(Metric, Group, Mg)
d$lowCI <- Mg-8
d$highCI <- Mg+8
# Plot data
Plot <- ggplot(d, aes(x = Metric, y = Mg, group = Group)) +
geom_polygon(aes(group = Group, colour = Group), fill = NA, size = 1.1) +
geom_ribbon(aes(x=Metric,y=Mg,ymin=lowCI,ymax=highCI, group = Group, fill=Group), alpha=.3) +
coord_polar(start = -((180/Metric.no)*(pi/180)))+
theme_light()
Plot
Example plot
As you can see, the geom_ribbon isnt plotting the lower and upper CI's in line with geom_polgyon. I am currently at the peak of my knowledge on this. Is anyone able to offer a suitable fix so that the shaded CI's track the polygon please so that both are straight lines?
Thanks in advance for any solutions!

With a bit of fiddling with the data, you can just about make it work. Create a separate dataframe that has the confidence intervals (note that we need to repeat the first point to make the intervals work correctly between M8 and M1) with:
i <- c(seq_along(d[[1]]), which(d$Metric=="M1"))
dCI <- rbind(data.frame(d[i,1:2], CI=d$lowCI[i], type="low") ,
data.frame(d[i,1:2], CI=d$highCI[i], type="high"))
Then we can add the confidence intervals as a polygon with:
Plot <- ggplot(d, aes(x = Metric, y = Mg, group = Group)) +
geom_polygon(aes(group = Group, colour = Group), fill = NA, size = 1.1) +
geom_polygon(aes(x=Metric,y=CI, group = Group, fill=Group), data=dCI, alpha=.3) +
coord_polar(start = -((180/Metric.no)*(pi/180)))+
theme_light()
Plot
This produces the output:

Related

Stacked boxplot and scatter plot - group BOTH by same variable

I am trying to create a scatter plot stacked on a boxplot. Similar dummy data below. The boxplot behaves well, as I want one boxplot for each of the three "exp" variables both "before" AND "after" (as seen in graph below, 6 box plots).
The problem however is that I also want the scatter plot data to lie on top of the correct plot (divided by before/after). Now, the points are just in between the two box plots, as you can see.
exp <- rep(c("smile", "neutral", "depressor"), each=5, times=2)
time <- rep(c("before", "after"), each = 15)
result <- rnorm(15, mean=50, sd=4)
result <- append(result, c(rnorm(15, mean=47, sd=3)))
data <- data.frame(exp, time, result)
ggplot(data, aes(exp, result, fill=time)) +
geom_boxplot() +
geom_point()
I would really appreciate some input, thanks in advance!
Is this solving your issue?
Here you add the time group in geom_point.
ggplot(data, aes(exp, result, fill=time)) +
stat_boxplot(width=0.5, position = position_dodge(1)) +
geom_boxplot(position = position_dodge(1), outlier.shape = NA)+
geom_point(aes(fill = time, group = time), color="black",
position = position_jitterdodge(jitter.width = .1, dodge.width = 1))
Before
After
You could use geom_jitter like this:
exp <- rep(c("smile", "neutral", "depressor"), each=5, times=2)
time <- rep(c("before", "after"), each = 15)
result <- rnorm(15, mean=50, sd=4)
result <- append(result, c(rnorm(15, mean=47, sd=3)))
data <- data.frame(exp, time, result)
library(ggplot2)
ggplot(data, aes(exp, result, fill=time)) +
geom_boxplot() +
geom_jitter()
Created on 2022-08-30 with reprex v2.0.2

How do you plot multiple columns of a data frame all within the same boxplot in r (using ggplot2)?

I have a data frame that looks like this:
Train_Table_Time_Power <- data.frame(
Mean = runif(100),
STD = runif(100),
Kurt = runif(100),
Skew = runif(100),
TI = sample(c("0.05", "0.10", "0.15", "0.20"), 10, replace = TRUE)
)
I then created a box for the Skew Feature using the code below:
Skew_BoxPlot <- ggplot(Train_Table_Time_Power, aes(x = TI, y = Skew, color = TI)) +
geom_boxplot(notch = T, id=TRUE) +
stat_summary(fun = mean, geom="point", shape=19, color="red", size=2) +
geom_jitter(shape=16, position = position_jitter(0.2), size = 0.3) +
labs(title = "Crest_Time", x = "TI", y = "Normalized Magnitude") +
theme_minimal() + theme_Publication()
The above box plot displays the different distributions of the Skew feature as the TI feature varies. However, I now want to create a new box plot that shows the distributions of all of the features (Mean, STD, Kurt, and Skew) for just one value of TI, say TI = 0.05, and I would like the figure to plot all of the box plot distributions on the same graph horizontally, next to each other. Can anyone direct me on how best to go about doing this?
You can convert your data into a long table and then plot. Using tidyverse this can be easily done
library(tidyverse)
Train_Table_Time_Power %>% filter(TI == 0.05) %>%
pivot_longer( cols=1:4) %>%
ggplot(aes(x=name, y=value)) + geom_boxplot()
You can change TI == 0.05 to any value that you want or you can do all TI values and used facet_grid() to split out individual plots
Train_Table_Time_Power %>% pivot_longer( cols=1:4) %>%
ggplot(aes(x=name, y=value)) + geom_boxplot() +facet_grid(~TI)

r ggplot when two colors overlap

I have some codes to generate a plot,the only problem I have is there're many overlapping colors.
When two colors overlap, how do I specify the dominant color?
For example, there're 4 black points when indicator = threshold. They are at 4 x-axis correspondingly. However, the black points at "Wire" and "ACH" scales do not show up because it is overlap with blue points. The black point at "RDFI" scale barely shows up. How can I make black as the dominant color when two colors overlap? Thanks ahead!
ggplot(df, aes(a-axis, y-axis), color=indicator)) +
geom_quasirandom(groupOnX=TRUE, na.rm = TRUE) +
labs(title= 'chart', x='x-axis', y= 'y-axis') +
scale_color_manual(name = 'indicator', values=c("#99ccff","#000000" ))
for specify the dominant color you should use the function new_scale () and its aliases new_scale_color () and new_scale_fill ().
As an example, lets overlay some measurements over a contour map of topography using the beloed volcano
library(ggplot2)
library(ggnewscale)
# Equivalent to melt(volcano)
topography <- expand.grid(x = 1:nrow(volcano),
y = 1:ncol(volcano))
topography$z <- c(volcano)
# point measurements of something at a few locations
set.seed(42)
measurements <- data.frame(x = runif(30, 1, 80),
y = runif(30, 1, 60),
thing = rnorm(30))
dominant point:
ggplot(mapping = aes(x, y)) +
geom_contour(data = topography, aes(z = z, color = stat(level))) +
# Color scale for topography
scale_color_viridis_c(option = "D") +
# geoms below will use another color scale
new_scale_color() +
geom_point(data = measurements, size = 3, aes(color = thing)) +
# Color scale applied to geoms added after new_scale_color()
scale_color_viridis_c(option = "A")
dominant contour:
ggplot(mapping = aes(x, y)) +
geom_point(data = measurements, size = 3, aes(color = thing)) +
scale_color_viridis_c(option = "A")+
new_scale_color() +
geom_contour(data = topography, aes(z = z, color = stat(level))) +
scale_color_viridis_c(option = "D")
Your problem may not lie with what color is dominant. You have selected colors that will show up often. You may be losing the bottom of your Y axis. The code you have in your example can not have possibly produced that plot it has errors.
Here is a simple example that show's one way to overcome your problem by simply overplottting the threshold points after you have plotted the beeswarm.
library(dplyr)
library(ggbeeswarm)
distro <- data.frame(
'variable'=rep(c('runif','rnorm'),each=1000),
'value'=c(runif(2000, min=-3, max=3))
)
distro$indicator <- "NA"
distro[3,3] <- "Threshhold"
distro[163,3] <- "Threshhold"
ggplot2::ggplot(distro,aes(variable, value, color=indicator)) +
geom_quasirandom(groupOnX=TRUE, na.rm = TRUE, width=0.1) +
scale_color_manual(name = 'indicator', values=c("#99ccff","#000000")) +
geom_point(data = distro %>% filter(indicator == "Threshhold"))
You sort your data based on the color variable (your indicator).
Basically you want your black dots to be plotted last = on top of the other ones.
df$indicator <- sort(df$indicator, decreasing=T)
#Tidyverse solution
df <- df %>% arrange(desc(indicator))
Dependent on your levels you may have to reverse sort or not.
Then you just plot.
pd <- tibble(x=rnorm(1000), y=1, indicator=sample(c("A","B"), replace=T, size = 1000))
ggplot(pd, aes(x=x,y=y,color=indicator)) + geom_point()
pd <- pd %>% arrange(indicator)
ggplot(pd, aes(x=x,y=y,color=indicator)) + geom_point()
pd <- pd %>% arrange(desc(indicator))
ggplot(pd, aes(x=x,y=y,color=indicator)) + geom_point()

Plot legend for multiple histograms plotted on top of each other ggplot

I've made this multiple histogram plot in ggplot and now I want to add a legend for both the light purple part and the dark purple part. I know the conventional way is to to it with aes, but I can't seem to figure out how I integrate this feature as one into my multiple histogram plot.
I don't shy manual labour, but more sophisticated solutions are preferred. Anyone help me out?
#dataframe
set.seed(20)
df <- data.frame(expl = rbinom(n=100, size = 1, prob=0.08),
resp = sample(50:100, size = 100, replace = T))
#graph
graph <- ggplot(data = df, aes(x = resp))
graph +
geom_histogram(fill = "#BEBADA", alpha = 0.5, bins = 10) +
geom_histogram(data = subset(df, expl == '1'), fill = "#BEBADA", bins = 10)
Your data is already in the long format that is well suited for ggplot; you just need to map expl to alpha. In general, if you find yourself making multiples of the same geom, you probably want to rethink either the shape of your data or your approach for feeding it into geoms.
library(tidyverse)
set.seed(20)
df <- data.frame(expl = rbinom(n=100, size = 1, prob=0.08),
resp = sample(50:100, size = 100, replace = T))
To map expl onto alpha, make it a factor, and then assign that to alpha inside your aes. Then you can set the alpha scale to values of 0.5 and 1.
ggplot(df, aes(x = resp, alpha = as.factor(expl))) +
geom_histogram(fill = "#bebada", bins = 10) +
scale_alpha_manual(values = c(0.5, 1))
However, differentiating by alpha is a little awkward. You could instead map to fill and use light and dark purples:
ggplot(df, aes(x = resp, fill = as.factor(expl))) +
geom_histogram(bins = 10) +
scale_fill_manual(values = c("0" = "mediumpurple1", "1" = "mediumpurple4"))
Note also that you can adjust the position of the histogram bars if you need to, by assigning geom_histogram(position = ...), where you could fill in with something such as "dodge" if that's what you'd like.
If you want a legend on the alpha value, the idea is to include it as an aesthetic rather than as a direct argument as you tried. In order to do this, a simple solution is to enrich the data frame used by ggplot:
df2 <- rbind(
cbind(df, filter="all lines"),
cbind(subset(df, expl == '1'), filter="expl==1")
)
df2 corresponds to df after appending the lines from your subset of interest (with a field filter telling from which copy each record comes)
Then, this solves your problem
ggplot(df2, aes(resp, alpha=filter)) +
geom_histogram(fill="#BEBADA", bins=10, position="identity") +
scale_alpha_discrete(range=c(.5,1))

ggplot2 shading envelope of time series

I am plotting the results of 50 - 100 experiments.
Each experiment results in a time series.
I can plot a spaghetti plot of all time series, but
what I'd like to have is sort of a density map for the time series plume.
(something similar to the gray shading in the lower panel
in this figure: http://www.ipcc.ch/graphics/ar4-wg1/jpg/fig-6-14.jpg)
I can 'sort of' do this with 2d binning or binhex but the result could be prettier (see example below).
Here is a code that reproduces a plume plot for mock data (uses ggplot2 and reshape2).
# mock data: random walk plus a sinus curve.
# two envelopes for added contrast.
tt=10*sin(c(1:100)/(3*pi))
rr=apply(matrix(rnorm(5000),100,50),2,cumsum) +tt
rr2=apply(matrix(rnorm(5000),100,50),2,cumsum)/1.5 +tt
# stuff data into a dataframe and melt it.
df=data.frame(c(1:100),cbind(rr,rr2) )
names(df)=c("step",paste("ser",c(1:100),sep=""))
dfm=melt(df,id.vars = 1)
# ensemble average
ensemble_av=data.frame(step=df[,1],ensav=apply(df[,-1],1,mean))
ensemble_av$variable=as.factor("Mean")
ggplot(dfm,aes(step,value,group=variable))+
stat_binhex(alpha=0.2) + geom_line(alpha=0.2) +
geom_line(data=ensemble_av,aes(step,ensav,size=2))+
theme(legend.position="none")
Does anyone know of a nice way do get a shaded envelope with gradients. I have also tried geom_ribbon but that did not give any indication of density changes along the plume. binhex does that, but not with aesthetically pleasing results.
Compute quantiles:
qs = data.frame(
do.call(
rbind,
tapply(
dfm$value, dfm$step, function(i){quantile(i)})),
t=1:100)
head(qs)
X0. X25. X50. X75. X100. t
1 -0.8514179 0.4197579 0.7681517 1.396382 2.883903 1
2 -0.6506662 1.2019163 1.6889073 2.480807 5.614209 2
3 -0.3182652 2.0480082 2.6206045 4.205954 6.485394 3
4 -0.1357976 2.8956990 4.2082762 5.138747 8.860838 4
5 0.8988975 3.5289219 5.0621513 6.075937 10.253379 5
6 2.0027973 4.5398120 5.9713921 7.015491 11.494183 6
Plot ribbons:
ggplot() +
geom_ribbon(data=qs, aes(x=t, ymin=X0., ymax=X100.),fill="gray30", alpha=0.2) +
geom_ribbon(data=qs, aes(x=t, ymin=X25., ymax=X75.),fill="gray30", alpha=0.2)
This is for two quantile intervals, (0-100) and (25-75). You'll need more args to quantile and more ribbon layers for more quantiles, and need to adjust the colours too.
Based on the idea of Spacedman, I found a way to add more intervals in an automatic way: I first compute the quantiles for each step, group them by pairs of symmetric values and then use geom_ribbon in the right order...
library(tidyr)
library(dplyr)
condquant <- dfm %>% group_by(step) %>%
do(quant = quantile(.$value, probs = seq(0,1,.05)), probs = seq(0,1,.05)) %>%
unnest() %>%
mutate(delta = 2*round(abs(.5-probs)*100)) %>%
group_by(step, delta) %>%
summarize(quantmin = min(quant), quantmax= max(quant))
ggplot() +
geom_ribbon(data = condquant, aes(x = step, ymin = quantmin, ymax = quantmax,
group = reorder(delta, -delta), fill = as.numeric(delta)),
alpha = .5) +
scale_fill_gradient(low = "grey10", high = "grey95") +
geom_line(data = dfm, aes(x = step, y = value, group=variable), alpha=0.2) +
geom_line(data=ensemble_av,aes(step,ensav),size=2)+
theme(legend.position="none")
Thanks Erwan and Spacedman.
Avoiding 'tidyr' ('dplyr' and 'magrittr') my version of Erwans answer becomes
probs=c(0:10)/10 # use fewer quantiles than Erwan
arr=t(apply(df[,-1],1,quantile,prob=probs))
dfq=data.frame(step=df[,1],arr)
names(dfq)=c("step",colnames(arr))
dfqm=melt(dfq,id.vars=c(1))
# add inter-quantile (per) range as delta
dfqm$delta=dfqm$variable
levels(dfqm$delta)=abs(probs-rev(probs))*100
dfplot=ddply(dfqm,.(step,delta),summarize,
quantmin=min(value),
quantmax=max(value) )
ggplot() +
geom_ribbon(data = dfplot, aes(x = step, ymin = quantmin,
ymax =quantmax,group=rev(delta),
fill = as.numeric(delta)),
alpha = .5) +
scale_fill_gradient(low = "grey25", high = "grey75") +
geom_line(data=ensemble_av,aes(step,ensav),size=2) +
theme(legend.position="none")

Resources