boxplot data ggplot2 package - r

I'm new in R, hope you can help me. I want to make multiple boxplots in one graph, but I can't get output like this:
Here is my own data:
I used this command:
library(tidyverse)
library(readxl)
library(ggplot2)
marte <- read_xlsx("marterstudio.xlsx")
head(marte)
marte <- gather(marte, "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "O", "P", key="ID", value="value")
marte$Group <- as.factor(marte$Group)
marte$ID <- as.factor(marte$ID)
ggplot(marte, aes(x = value, y = ID, color = ID)) +
geom_boxplot()
This is the result:
Can you help me?

What you need is coord_flip function instead of assign x/y as you did.
ggplot(data = marte) +
# I can put the data param in ggplot call but rather define the aes
# inside the geom_ call this allow you specify different aes for
# different geom if you happened to use multiple geom in one plot
geom_boxplot(aes(x = ID, y = value, color = ID)) +
coord_flip()

Making a boxplot with only 1 or 2 values per group is probably misleading about the true variance in each population. But just for the sake of demonstrating the code, you could do something like:
# load necessary packages
library(tidyverse)
# to reproduce sampling rows
set.seed(1)
# produce boxplot (not recommended for small samples)
iris %>%
select(Species, Sepal.Length, Sepal.Width) %>%
pivot_longer(-Species) %>%
group_by(Species, name) %>%
sample_n(size = 2, replace = FALSE) %>%
ggplot(aes(x = name, y = value, fill = Species)) +
geom_boxplot() +
coord_flip()
Which produces this plot:
In practice, when sample size is fairly small (e.g. n < 10), it is more informative to show the individual data points, perhaps with some summary statistic such as the mean or median. Here's how I would be more inclined to represent data with a sample size = 2:
# to reproduce sampling rows
set.seed(1)
# produce bar plot with overlaid points (recommended for small samples)
iris %>%
select(Species, Sepal.Length, Sepal.Width) %>%
pivot_longer(-Species) %>%
group_by(Species, name) %>%
sample_n(size = 2, replace = FALSE) %>%
ggplot(aes(x = name, y = value, fill = Species)) +
stat_summary(fun = mean, geom = "bar", position = "dodge") +
geom_point(shape = 21, size = 3, position = position_dodge(width = 0.9)) +
coord_flip()
Which gives this plot:

Related

Why pies are flat in geom_scatterpie in R?

Why are the pies flat?
df<- data.frame(
Day=(1:6),
Var1=c(172,186,191,201,205,208),
Var2= c(109,483,64010,161992,801775,2505264), A=c(10,2,3,4.5,16.5,39.6), B=c(10,3,0,1.4,4.8,11.9), C=c(2,5,2,0.1,0.5,1.2), D=c(0,0,0,0,0.1,0.2))
ggplot() +
geom_scatterpie(data = df, aes(x = Var1 , y = Var2, group = Var1), cols = c("A", "B", "C", "D"))
I have tried using coord_fixed() and does not work either.
The problem seems to be the scales of the x- and y-axes. If you rescaled them to both to have zero mean and unit variance, the plot works. So, one thing you could do is plot the rescaled values, but transform the labels back into the original scale. To do this, you would have to do the following:
Make the data:
df<- data.frame(
Day=(1:6),
Var1=c(172,186,191,201,205,208),
Var2= c(109,483,64010,161992,801775,2505264), A=c(10,2,3,4.5,16.5,39.6), B=c(10,3,0,1.4,4.8,11.9), C=c(2,5,2,0.1,0.5,1.2), D=c(0,0,0,0,0.1,0.2))
Rescale the variables
df <- df %>%
mutate(x = c(scale(Var1)),
y = c(scale(Var2)))
Find the linear map that transforms the rescaled values back into their original values. Then, you can use the coefficients from the model to make a function that will transform the rescaled values back into the original ones.
m1 <- lm(Var1 ~ x, data=df)
m2 <- lm(Var2 ~ y, data=df)
trans_x <- function(x)round(coef(m1)[1] + coef(m1)[2]*x)
trans_y <- function(x)round(coef(m2)[1] + coef(m2)[2]*x)
Make the plot, using the transformation functions as the call to labels in the scale_[xy]_continuous() functions
ggplot() +
geom_scatterpie(data=df, aes(x = x, y=y), cols = c("A", "B", "C", "D")) +
scale_x_continuous(labels = trans_x) +
scale_y_continuous(labels = trans_y) +
coord_fixed()
There may be an easier way than this, but it wasn't apparent to me.
The range on the y-axis is so large it's compressing the disks to lines. Change the y-axis to a log scale, and you can see the shapes. Adding coord_fixed() to keep the pies circular:
ggplot() +
geom_scatterpie(data = df, aes(x = Var1 , y = Var2, group = Var1), cols = c("A", "B", "C", "D")) +
scale_y_log10() +
coord_fixed()

Z-Score Plot of multiple series as one plot/histogram

I am trying to make Zscores graph as given below.
My data is based on NetLogo experiments and have repeated columns of same variables(say 10 times). It consists of different scenarios. I am trying to make a one representative graph to understand data. I am trying bit by bit. For the data given below lets say for Adaption scenario;
df <- data.frame(Biomass = c(1,2,3,4,5,6,7,8,9,10),
Livestock = c(2,4,6,8,10,12,14,16,18,20),
Totalpeople = c(10,20,30,40,50,60,70,80,90,100)
)
I suppose that Zscore will be a series we get from the data. How above type of graph can be made?
I am using these code;
df %>% (.funs = function(x) {(x - mean(x)) / sd(x)}) %>%
ggplot(aes(x = Scenario, y= value, fill = name)) +
geom_col(colour = 'black', position = position_dodge()) +
theme_classic() +
scale_fill_viridis_d() + ylab("Z-scores")
and gives error
Error in is.data.frame(x) :
'list' object cannot be coerced to type 'double'
Please help!
Are you looking for such a solution? Just created own Scenario like Limey did.
df %>%
as_tibble() %>%
add_column(Scenario_TarJae = as.factor(c("A", "A", "A", "B", "B", "B", "C", "C", "C", "C"))) %>%
pivot_longer(
cols = c(1:3),
names_to = "group",
values_to = "value"
) %>%
group_by(Scenario_TarJae) %>%
mutate(z_score = scale(value)) %>%
ggplot(aes(x = Scenario_TarJae, y= z_score[,1], fill = group)) +
geom_col(colour = 'black', position = position_dodge()) +
theme_classic() +
scale_fill_viridis_d() + ylab("Z-scores")
You've not given us values for Scenario in you MWE, so it's not clear if you want to standardise within or between scenarios. This code standardises within scenario: that is, it calculates the effect size for each column.
As always, it helps to tidy your data before you start: that is the effect of the call to pivot_longer. Obviously, when you have your own Scenario, you can remove the add_column call.
df %>%
pivot_longer(everything(), values_to="value") %>%
add_column(Scenario="ADAPTATION") %>%
group_by(Scenario, name) %>%
summarise(value=(value - mean(value)/sd(value)), .groups="drop") %>%
ggplot(aes(x = Scenario, y= value, fill = name)) +
geom_col(colour = 'black', position = position_dodge()) +
theme_classic() +
scale_fill_viridis_d() + ylab("Z-scores")

Two ggplot with subset in pipe

I would like to plot two lines in one plot (both has the same axis), but one of the line is subset values from data frame.
I tries this
DF%>% ggplot(subset(., Cars == "A"), aes(Dates, sold_A)) +geom_line()+ ggplot(., (Dates, sold_ALL))
but this error occurred
object '.' not found
(1) You can't add a ggplot object to a ggplot object:
(2) Try taking the subset out of the call to ggplot.
DF %>%
subset(Cars == "A") %>%
ggplot(aes(Dates, sold_A)) +
geom_line() +
geom_line(data = DF, aes(Dates, sold_ALL))
I think you are misunderstanding how ggplot works. If we are attempting to do it your way, we could do:
DF %>% {ggplot(subset(., Cars == "A"), aes(Dates, sold_A)) +
geom_line(colour = "red") +
geom_line(data = subset(., Cars == "B"), colour = "blue") +
lims(y = c(0, 60))}
But it would be easier and better to map the variable Cars to the colour aesthetic, so your plot would be as simple as:
DF %>% ggplot(aes(Dates, sold_A, color = Cars)) + geom_line() + lims(y = c(0, 60))
Note that as well as being simpler code, we get the legend for free.
Data
Obviously, we didn't have your data for this question, but here is a constructed data set with the same name and same column variables:
set.seed(1)
Dates <- rep(seq(as.Date("2020-01-01"), by = "day", length = 20), 2)
Cars <- rep(c("A", "B"), each = 20)
sold_A <- rpois(40, rep(c(20, 40), each = 20))
DF <- data.frame(Dates, Cars, sold_A)
If you want only one plot, you would need to remove ggplot(., aes(Dates, sold_ALL)) and wrap directly into a structure like geom_line(data=., aes(Dates, sold_ALL)). Then, use the sage advice from #MrFlick. Here an example using iris data:
library(ggplot2)
library(dplyr)
#Example
iris %>%
{ggplot(subset(., Species == "setosa"), aes(Sepal.Length, Sepal.Width)) +
geom_point()+
geom_point(data=.,aes(Petal.Length, Petal.Width),color='blue')}
Output:
The ggplot(., aes(Dates, sold_ALL)) is creating a new canvas and the new plot.

Individual legends for separate geom_line aesthetics in the same ggplot

I'm new to R and I'm trying to create a single plot with data from 2 melted dataframes.
Ideally I would have a legend for each of the dataframes with their respective titles; however, I get a only a single legend with the title of the first aesthetic.
My starting point is:
aerobic_melt <- melt(aerobic, id.vars = 'Distance', variable.name = 'Aerobic')
anaerobic_melt <- melt(anaerobic, id.vars = 'Distance', variable.name = 'Anaerobic')
plot <- ggplot() +
geom_line(data = aerobic_melt, aes(Distance, value, col=Aerobic)) +
geom_line(data = anaerobic_melt, aes(Distance, value, col= Anaerobic)) +
xlim(0, 125) +
ylab('Energy (J/kg )') +
xlab('Distance (m)')
Which results in
I've searched, but with my limited ability I haven't been able to find a way to do it.
My question is:
How do I create separate legends with titles 'Aerobic' and 'Anaerobic' which should respectively refer to A,B,C,F,G,L and E,H,I,J,K?
Any help is appreciated.
Obviously we don't have your data, but I have created some sample data that should have the same names and structure as your own data frames, since it works with your own plot code. See the end of the answer for the data used here.
You can use the package ggnewscale if you want two color scales on the same plot. Just add in a new_scale_color() call between your geom_line calls. I have left the rest of your code as-is.
library(ggplot2)
library(ggnewscale)
plot <- ggplot() +
geom_line(data = aerobic_melt, aes(Distance, value, col=Aerobic)) +
new_scale_color() +
geom_line(data = anaerobic_melt, aes(Distance, value, col= Anaerobic)) +
xlim(0, 125) +
ylab('Energy (J/kg )') +
xlab('Distance (m)')
plot
Data
set.seed(1)
aerobic_melt <- data.frame(
Aerobic = rep(c("A", "B", "C", "F", "G", "L"), each = 120),
value = as.numeric(replicate(6, cumsum(rnorm(120)))),
Distance = rep(1:120, 6))
anaerobic_melt <- data.frame(
Anaerobic = rep(c("E", "H", "I", "J", "K"), each = 120),
value = as.numeric(replicate(5, cumsum(rnorm(120)))),
Distance = rep(1:120, 5))

Some bars don't reorder in ggplot

My dataframe:
data <- data.frame(commodity = c("A", "A", "B", "C", "C", "D"),
cost = c(1809065, 348456, 203686, 5966690, 172805, 3176424))
data
commodity cost
1 A 1809065
2 A 348456
3 B 203686
4 C 5966690
5 C 172805
6 D 3176424
Next I plot a barplot with reorder:
library(tidyverse)
data %>%
ggplot(aes(x = reorder(factor(commodity), cost), y = cost)) +
geom_bar(stat = "identity", fill = "steelblue3")
What happens next is that most bars are ordered just like I want, but a few aren't. Here's an image of my problematic plot:
you can try
library(tidyverse)
data %>%
ggplot(aes(x = reorder(commodity, cost, sum), y = cost)) +
geom_col(fill = "steelblue3")
Change the default mean function of reorder to sum. Then the order is in line with the bar function of ggplot. Of note, using geom_col is prefered over geom_bar when using stat="identity". If you need a decreased ordering try rev(reorder(commodity, cost, sum)) or create a function by yourself like function(x) -sum(x).
Reorder will by default reorder by the mean value for each group, as explained in the help page. Jimbou's solution is better but you could also do this in a different way by aggregating the data before plotting and using geom_col instead:
data %>%
group_by(commodity) %>%
summarise(cost = sum(cost)) %>%
ggplot(aes(x = reorder(factor(commodity), cost), y = cost)) +
geom_col(fill = "steelblue3")

Resources