How to rearrange the data to produce a barplot - r

I created a small example here for my big data set with more than 400000 records, I was able to plot the point_plot, here the code:
Data1 <- data.frame(State=rep('SC',24),ID=rep(11,24),Month=c(rep(1,times=9),
rep(2,times=6),rep(3,times=9)),Day=c(rep(1:3,each=3),rep(1:2,each=3),
rep(1:3,each=3)),Group=rep(1:3,8),Value=rep(10.1:20.9,length.out=24))
Data2 <- Data1[rep(1:nrow(Data1),4),]
Data <- data.frame(State=c(rep('SC',48),rep('NC',48)),ID=c(rep(11:14,each=24)),
Month=Data2$Month,Day=Data2$Day,Group=rep(1:3,32),Value=rep(10:20,length.out=96))
states = unique(Data$State)
for(j in 1:length(states)) {
jpeg(file=paste("Pic", j, ".jpeg", sep=""))
data <- subset(Data,State==states[j])
plot(data$Month, data$Value, type="p", xlab="Months", ylab="Value")
colors <- rainbow(3)
for (i in 1:3) { # add lines
group <- subset(data, Group==i)
lines(group$Month, group$Value, type="p", col=colors[i])
}
title(paste(unique(data$State),"Value",sep=' ')) # add a title and subtitle
leg.txt <- c("G1","G2","G3") # add a legend
legend("topleft", legend=leg.txt, fill=colors, bty="o")
dev.off()
}
But now I need to plot the bar_plot with the 3 groups side by side for each month, I tried with the following two, but was not able to get it right:
1)ggplot(data, aes(factor(data$Month), data$Value, fill = factor(data$Group))) +
geom_bar(position = "dodge", width = 0.5)
scale_x_discrete(labels = data$Month)
2) barplot(data$Value,beside=T,names.arg=factor(data$Month))
Any help would be greatly appreciated!

You need the argument stat = "identity" for geom_bar:
ggplot(data, aes(as.factor(Month), Value, fill = as.factor(Group))) +
geom_bar(position = "dodge", width = 0.5, stat = "identity") +
scale_fill_discrete("Group", labels = c("N", "E", "L"))

ggplot(Data, aes(x=Month, y=Value, fill=as.factor(Group))) +
geom_bar(stat="identity", position="dodge", color="black")

Related

modify horizontal barplots for combination (tight design)

I have the following sample data:
library(tidyverse)
df <- data.frame(col=rep(c("A_B", "A_C", "A_D",
"B_A", "C_A", "D_A",
"B_C", "B_D",
"C_B", "D_B",
"C_D", "D_C"), 2),
level=c(rep("lower_level", 12), rep("higher_level", 12)),
value=abs(rnorm(24, mean=5, sd=2)))%>% tibble()
df[c('origin', 'target')] <- str_split_fixed(df$col, '_', 2)
df <- df %>% select(c(origin, target, level, value))
I now want to create horizontal stacked barplots for each target (df %>% filter(target=="A")). I do this using the following code:
# plot
p1 <- ggplot(data = df %>% filter(target=="A"),
aes(x = factor(level), y = value, fill = factor(origin)))+
geom_bar(stat="identity", position="fill", width = .1) +
scale_fill_manual(values = c("A"="yellow", "B" = "green", "C"="red", "D"="blue")) +
coord_flip()
Since I want to combine multiple such plots later (s. below), I would like to
remove the empty space between y-axis and the bars (or manipulate it to value X)
have the fill label displayed on the right side
have one value on the left, saying "target: A"
and have fill legend and y axis shared between all plots.
See annotated plot:
For reference, I create additional plots with this code:
p2 <- ggplot(data = df %>% filter(target=="B"),
aes(x = factor(level), y = value, fill = factor(origin)))+
geom_bar(stat="identity", position="fill", width = .1) +
scale_fill_manual(values = c("A"="yellow", "B" = "green", "C"="red", "D"="blue")) +
coord_flip()
p3 <- ggplot(data = df %>% filter(target=="C"),
aes(x = factor(level), y = value, fill = factor(origin)))+
geom_bar(stat="identity", position="fill", width = .1) +
scale_fill_manual(values = c("A"="yellow", "B" = "green", "C"="red", "D"="blue")) +
coord_flip()
p4 <- ggplot(data = df %>% filter(target=="D"),
aes(x = factor(level), y = value, fill = factor(origin)))+
geom_bar(stat="identity", position="fill", width = .1) +
scale_fill_manual(values = c("A"="yellow", "B" = "green", "C"="red", "D"="blue")) +
coord_flip()
And combine them with this code (but happy to use other ways of combining them if needed).
library("gridExtra")
grid.arrange(p1, p2, p3, p4, ncol = 1, nrow = 4)
It sounds very much as though you simply want to facet by target. No need for stitching multiple plots here.
ggplot(data = df %>% mutate(target = paste('Target:', target)),
aes(x = factor(level), y = value, fill = factor(origin)))+
geom_col(position = "fill", width = 0.9) +
scale_fill_manual(values = c("A"="yellow", "B" = "green",
"C"="red", "D"="blue"), name = 'origin') +
facet_grid(target~., switch = 'y') +
coord_flip() +
theme(strip.placement = 'outside',
strip.background = element_blank(),
axis.title.y = element_blank())
two suggestions_
to remove the offset between axis and bar, set the axis expansion to zero
scale_x_continuous(..., expand = c(0,0))
instead of tediously subsetting the data frame, use the facet_wrap or facet_grid option of ggplot:
ggplot(data = df,
aes(x = factor(level), y = value, fill = factor(origin))) +
## other plot instructions
facet_wrap( ~target)
see ?facet_wrap for various layout options like number of plot columns
3. the vertical spacing between bars will be adjusted to the output dimensions (here: figure height) anyway

How to add legend of boxplot and points in ggplot2?

I have the following to plot a boxplot of some data "Samples" and add points of the "Baseline" and "Theoretical" data.
library(reshape2)
library(ggplot2)
meltshear <- melt(Shear)
samples <- rep(c("Samples"), each = 10)
baseline <- c("Baseline",samples)
method <- rep(baseline, 4)
xlab <- rep(c("EXT.Single","EXT.Multi","INT.Single","INT.Multi"), each = 11)
plotshear <- data.frame(Source = c(method,"theoretical","theoretical","theoretical"),
Shear = c(xlab,"EXT.Multi","INT.Single","INT.Multi"),
LLDF = c(meltshear[,2],0.825,0.720,0.884))
data <- subset(plotshear, Source %in% c("Samples"))
baseline <- subset(plotshear, Source %in% c("Baseline"))
theoretical <- subset(plotshear, Source %in% c("theoretical"))
ggplot(data = data, aes(x = Shear, y = LLDF)) + geom_boxplot(outlier.shape = NA) +
stat_summary(fun = mean, geom="point", shape=23, size=3) +
stat_boxplot(geom='errorbar', linetype=1, width=0.5) +
geom_jitter(data = baseline, colour = "green4") +
geom_jitter(data = theoretical, colour = "red")
I get the following plot but I cannot add the legend to the plot. I want to have the legend showing labels = c("Samples","Baseline","Theoretical") for the boxplot shape, green dot, and red dot respectively.
You could try to add fill into aes.
ggplot(data = data, aes(x = Shear, y = LLDF, fill = Shear))
Or you can see this resource, maybe it is useful http://www.cookbook-r.com/Graphs/

is it possible to create a ggMarginal plot without desaggredating the data?

I have a data frame with some points and their frequency of occurrence and I want to plot points (balls) using their frequency to represent their size. But I also want to use ggMarginal to create the marginal plots. The code bellow creates the marginal without taking in account their frequencies.
library(ggplot2)
df <- data.frame("x" = 1:5, "y" = c(5,8,8,12,10), "f" = c(4,5,8,8,5))
p <- ggplot(df, aes(x=x, y=y, size=f)) + geom_point() + theme_bw()
ggExtra::ggMarginal(p, data=df, type = "histogram")
I don't want to create another data frame with disaggregated data. But it would lead to the right marginals. As presented bellow:
# disaggregated data
df2 <- df[ rep(1:nrow(df), df$f), c("x", "y") ]
p <- ggplot(df2, aes(x=x, y=y)) + geom_point() + theme_bw()
ggExtra::ggMarginal(p, data=df2, type = "histogram")
But even if I try to use both data frames, the resulting marginals still go wrong.
p <- ggplot(df, aes(x=x, y=y, size=f)) + geom_point() + theme_bw()
ggExtra::ggMarginal(p, data=df2, type = "histogram")
Is it possible to create the marginals with disaggregating the data? How?
If 1. is not possible, how to do it anyway, since none of the examples above provided the desired plot?
It can be done with cowplot package.
library(tidyverse)
library(cowplot)
df <- data.frame("x" = 1:5,
"y" = c(5,8,8,12,10),
"f" = c(4,5,8,8,5))
df2 <- df[rep(1:nrow(df), df$f), c("x", "y") ]
p <-
ggplot(df, aes(x=x, y=y, size=f)) +
geom_count() +
theme_bw()
xhist <-
axis_canvas(p, axis = "x") +
geom_histogram(data = df2, aes(x = x), color = 'lightgray')
yhist <-
axis_canvas(p, axis = "y", coord_flip = TRUE) +
geom_histogram(data = df2, aes(x = y), color = 'lightgray') +
coord_flip()
p %>%
insert_xaxis_grob(xhist, grid::unit(1, "in"), position = "top") %>%
insert_yaxis_grob(yhist, grid::unit(1, "in"), position = "right") %>%
ggdraw()

ggplot2 confusion matrix geom_text labeling

I've plotted a confusion matrix (predicting 5 outcomes) in R using ggplot and scales for geom_text labeling.
The way geom_text(aes(label = percent(Freq/sum(Freq))) is written in code, it's showing Frequency of each box divided by sum of all observations, but what I want to do is get Frequency of each box divided by sum Frequency for each Reference.
In other words, instead of A,A = 15.8%,
it should be A,A = 15.8%/(0.0%+0.0%+0.0%+0.0%+15.8%%) = 100.0%
library(ggplot2)
library(scales)
valid_actual <- as.factor(c("A","B","B","C","C","C","E","E","D","D","A","A","A","E","E","D","D","C","B"))
valid_pred <- as.factor(c("A","B","C","C","E","C","E","E","D","B","A","B","A","E","D","E","D","C","B"))
cfm <- confusionMatrix(valid_actual, valid_pred)
ggplotConfusionMatrix <- function(m){
mytitle <- paste("Accuracy", percent_format()(m$overall[1]),
"Kappa", percent_format()(m$overall[2]))
p <-
ggplot(data = as.data.frame(m$table) ,
aes(x = Reference, y = Prediction)) +
geom_tile(aes(fill = log(Freq)), colour = "white") +
scale_fill_gradient(low = "white", high = "green") +
geom_text(aes(x = Reference, y = Prediction, label = percent(Freq/sum(Freq)))) +
theme(legend.position = "none") +
ggtitle(mytitle)
return(p)
}
ggplotConfusionMatrix(cfm)
The problem is that, as far as I know, ggplot is not able to do group calculation. See this recent post for similar question.
To solve your problem you should take advantage of the dplyrpackage.
This should work
library(ggplot2)
library(scales)
library(caret)
library(dplyr)
valid_actual <- as.factor(c("A","B","B","C","C","C","E","E","D","D","A","A","A","E","E","D","D","C","B"))
valid_pred <- as.factor(c("A","B","C","C","E","C","E","E","D","B","A","B","A","E","D","E","D","C","B"))
cfm <- confusionMatrix(valid_actual, valid_pred)
ggplotConfusionMatrix <- function(m){
mytitle <- paste("Accuracy", percent_format()(m$overall[1]),
"Kappa", percent_format()(m$overall[2]))
data_c <- mutate(group_by(as.data.frame(m$table), Reference ), percentage =
percent(Freq/sum(Freq)))
p <-
ggplot(data = data_c,
aes(x = Reference, y = Prediction)) +
geom_tile(aes(fill = log(Freq)), colour = "white") +
scale_fill_gradient(low = "white", high = "green") +
geom_text(aes(x = Reference, y = Prediction, label = percentage)) +
theme(legend.position = "none") +
ggtitle(mytitle)
return(p)
}
ggplotConfusionMatrix(cfm)
And the result:

ggplot combining linetype and fill in legend

Do you how the way to combine linetype and fill in legend??
Here is my dataset:
values <- runif(1200, 1, 100)
ind <- as.factor(rep(c(1:6), each=200))
inout <- as.factor(rep(c(1:2), each =600))
df <- data.frame(values,ind,inout)
ggplot(df) +
geom_density(aes(x=values, y=..density..*100, group=interaction(ind,inout), linetype=factor(inout), colour=ind), size =1, alpha=1,na.rm = TRUE) +
geom_density(aes(x=values, y=..density..*100, group=inout, linetype=factor(inout), fill=factor(inout)), alpha=.4)
The original plot:
I would like to combine the legend "factor(inout)" and legend "NA".
Thanks for your help.
Use manual scales and make sure they both have identical names and labels (similar idea here):
ggplot(df, aes(x=values, y=..density..*100, linetype=factor(inout))) +
geom_density(aes(group=interaction(ind, inout), colour=ind),
size=1, alpha=1, na.rm=TRUE) +
geom_density(aes(group=inout, fill=factor(inout)), alpha=.4) +
scale_fill_manual(name = "fancy curves", labels = 1:2, values = c("red", "blue")) +
scale_linetype_manual(name = "fancy curves", labels = 1:2, values = 1:2)

Resources