Looking to plot grouped bar plots
data:
structure(list(Main = c(0.468893939007605, 0.0629924918425918,
0.561410474480681), Total = c(0.388090040532888, -0.0706047151157143,
0.483298239353565)), class = "data.frame", row.names = c(NA,
-3L))
intended output should look like this:
My current plot code which does not make sense to me:
barplot(main_total$Main, main_total$Total)
ggplot would be preferred but i have trouble coding it.any help will be appreciated. Thank you
It's because barplot prefers transposed matrices.
m <- as.matrix(main_total)
Use t to transpose the matrix.
b <- barplot(t(m), beside=TRUE, ylab="Value",
ylim=c(round(min(m), 1), round(max(m), 1)), col=3:4)
axis(1, colMeans(b), c("H", "M", "S"))
legend("topleft", legend=c("Main", "Total"), fill=3:4)
box()
Gives
You'll get the idea here since you didn't include the grouping variables in your example code.
df <- structure(list(Main = c(0.468893939007605, 0.0629924918425918,
0.561410474480681), Total = c(0.388090040532888, -0.0706047151157143,
0.483298239353565)), class = "data.frame", row.names = c(NA,
-3L))
df$Group <- c('H','M','S') # Assign group variables
library(reshape2) # Data frame needs to be in long format
df.m <- melt(df,id.vars = "Group")
df.m
library(ggplot2)
ggplot(df.m, aes(x=Group,y=value)) +
geom_bar(aes(fill=variable),stat = 'identity',position = 'dodge')
Related
I was trying to use ggplot to make a plot with following needs.
Use points to specify subjects.
Use color to specify models. I have 6 models, so each subject should appear 6 times on the plot.
The plot is expected to look something like this:
I can use color to specify models but cannot find a way to specify subjects as point shapes.
Example data
structure(list(subject = c("S1", "S8", "S3", "S9"), alphamean = c(0.224104019995071,
0.195354811041001, 0.5675953626788, 0.491972414993715), lambdamean = c(0.35985383877637,
0.268124038994992, 0.92122181060701, 0.43561465728315), model = c("a",
"b", "c", "d")), row.names = c(NA, -4L), class = c("data.table",
"data.frame"))
My attempts
data %>%
ggplot(aes(x = alphamean, y = lambdamean)) +
geom_point(aes(color=model,shape=subject)) +
scale_shape_manual(values = paste0('S',1:40))
You could use geom_text instead of geom_point:
library(ggplot2)
library(magrittr)
data %>%
ggplot(aes(x = alphamean, y = lambdamean)) +
geom_text(aes(color=model,label=subject))
Hi I have series of time intervals stored in data frame df.
replicate ID timeA timeB mean
1 60 80 70
2 10 70 40
3 25 35 30
I am trying to plot a dumbbell:
library(ggplot2)
devtools::install_github("hrbrmstr/ggalt")
library(ggalt)
library(dplyr)
df <- arrange(df, timeA)
#calculate mean middle point between two values
df$mean <- rowMeans(df[2:3])
#add factor levels
df <- mutate(df, rep=factor(replicateID, levels=rev(replicateID)))
gg <- ggplot(df, aes(x=timeA, xend=timeB, y=rep))
gg <- gg + geom_dumbbell(colour="#a3c4dc",
point.colour.l="#0e668b",
point.colour.r="#0000ff",
point.size.l=2.5,
point.size.r=2.5)
gg <- gg + geom_point(aes(y = df$mean), color = "red", linetype = "dotted")
The dumbbell plot gets plotted correctly till a certain point, however, I would like to have the middle point of each pair of values displayed on the graph too and connect all the middle values with a line.
I tried to do that by adding geom_point but this doesn't work.
Any suggestion?
First of all, the parameter/aesthetic names have changed, so don't get confused. If you don't update you'll have to use your parameter names. But for geom_segment and geom_point it will be like below:
#data
df=structure(list(replicateID = c(2, 3, 1), timeA = c(10, 25, 60
), timeB = c(70, 35, 80), mean = c(40, 30, 70), rep = structure(3:1, .Label = c("1",
"3", "2"), class = "factor"), time_mean = c(40, 30, 70)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -3L), groups = structure(list(
rep = structure(1:3, .Label = c("1", "3", "2"), class = "factor"),
.rows = list(3L, 2L, 1L)), row.names = c(NA, -3L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE))
gg <- gg + geom_dumbbell(colour="#a3c4dc",
colour_x = "#0e668b",
colour_xend="#0000ff",
size_x=2.5,
size_xend=2.5,
linetype = "dotted")
gg <- gg + geom_point(aes(x = mean), color = "red") #draws the points
gg+geom_path(aes(x=mean,group=1)) #draws the line
You could also try to use geom_line with the same parameters. Here you would get a connection from ID 1-3 which might not be what you're looking for. I'm not sure from your question.
PS: in the future please consider posting the output from dput(df) as this is easier for others to read into an r-session
Using the dumbbell R package
##Load some libraries
library(tidyverse)
library(ggplot2)
library(rlang)
library(utils)
library(data.table)
library(dumbbell)
## reformat the data
df2<-df %>% mutate("key"="Time") %>% mutate("diff"=timeA-timeB)
df3<-df2%>% arrange(desc(timeA))
df2$replicateID<-factor(df2$replicateID, df3$replicateID)
##Plot
dumbbell::dumbbell(df2,key="key",id="replicateID", column1 = "timeA", column2="timeB", lab1 = "timeA", lab2="timeB", pt_val = 1, delt=1, textsize = 3) + geom_point(aes(x = df2$mean, y=df2$replicateID), color = "red") +
geom_path(aes(x=df2$mean,y=df2$replicateID,group=1))
Dont have enough points so here is the link to the plot
dumbbell R package
I have an issue with the follwoing dataframe although I followed standard examples for line plots.
As shown in the plot, two of the three subjects dummy time courses are plotted jointly instead of being plotted separately
library(tidyverse)
blub <- structure(list(time = seq(0,10,by=1),
sub1 = seq(10,20,by=1),
sub2 = seq(20,30,by=1),
sub3 = seq(30,40,by=1)),
row.names = c(NA, -11L),
class = "data.frame")
bluba <- gather(data = blub, key=subjects, value=value, 2:ncol(blub))
basic <- c("N","P","P")
statusArray <- rep(basic,each=11)
bluba$status <- statusArray
print(ggplot(data=bluba,
aes(x=time,y=value, color=status)) +
geom_line())
Any comments would be appreciated!
Add group to aes to specify the connection of line
print(ggplot(data=bluba,
aes(x=time,y=value, color=status, group = subjects)) +
geom_line())
You can use linetype = subjects
bluba %>%
ggplot(aes(x=time,y=value, color=status, linetype=subjects)) +
geom_line()
I was attempting to overlay two plots using ggplot2, I can graph them individually, but I want to overlay them to show a comparison. They have the same y axis. The y axis is a score from 0 to 100, the x axis is a specific date in the month (from a range of 3 weeks)
Here is what I have tried:
data <- read.table(text = Level5avg, header = TRUE)
data2 <- read.table(text = Level6avg, header = TRUE)
colnames(data) = c("x","y")
colnames(data2) = c("x","y")
ggplot(rbind(data.frame(data2, group="a"), data.frame(data, group="b")), aes(x=x,y=y)) +
stat_density2d(geom="tile", aes(fill = group, alpha=..density..), contour=FALSE) + scale_fill_manual(values=c("b"="#FF0000", "a"="#00FF00")) + geom_point() + theme_minimal()
When I do this, I get a strange graph that has several dots, but I'm not sure if my code is right, since I can't distinguish the data. I want to add 3 more (small) datasets to the plot, if it is possible. If it is possible, how do I make it into a line graph in order to distinguish the datasets?
Note: I was under the impression ggplot would work for my purposes because of this post (and several other posts on this site advised using ggplot as opposed to Lattice). I'm not sure if what I want is possible, so I came here.
Data sets:
dput(data) structure(list(x = structure(1:6, .Label = c("10/27/2015",
"10/28/2015",
"10/29/2015", "10/30/2015", "10/31/2015", "11/1/2015"), class = "factor"),
y = c(0, 12.5, 0, 0, 11, 43)), .Names = c("x", "y"), class = "data.frame",
row.names = c(NA, -6L))
dput(data2) structure(list(x = structure(1:3, .Label
=c("10/28/2015","10/31/2015",
"11/1/2015"), class = "factor"), y = c(0, 0, 41.5)), .Names = c("x",
"y"), class = "data.frame", row.names = c(NA, -3L))
I've now managed to get my overlay, but is there a way to organize the horizontal axis? The dates have no order.
It seems to me that the answer that you are basing your plots on uses density plots that are not useful for your data. If you are just looking for some line plots with points, you could do the following (note I created a dataframe outside of the ggplot() call to make it look a little cleaner):
data$group <- "b"
data2$group <- "a"
df <- rbind(data2,data)
df$x <- as.Date(df$x,"%m/%d/%Y")
ggplot(df,aes(x=x,y=y,group=group,color=group)) + geom_line() +
geom_point() + theme_minimal()
Note that by converting the date, the dates end up in the right order all on their own.
I have data frame (df) with 3 columns e.g.
NUMERIC1: NUMERIC2: GROUP(CHARACTER):
100 1 A
200 2 B
300 3 C
400 4 A
I want to group NUMERIC1 by GROUP(CHARACTER), and then calculate mean for each group.
Something like that:
mean(NUMERIC1): GROUP(CHARACTER):
250 A
200 B
300 C
Finally I'd like to draw bar chart using ggplot2 having GROUP(CHARACTER) on x axis a =nd mean(NUMERIC) on y axis.
It should look like:
I used
mean <- tapply(df$NUMERIC1, df$GROUP(CHARACTER), FUN=mean)
but I'm not sure if it's ok, and even if it's, I don't know what I supposed to do next.
This is what stat_summmary(...) is designed for:
colnames(df) <- c("N1","N2","GROUP")
library(ggplot2)
ggplot(df) + stat_summary(aes(x=GROUP,y=N1),fun.y=mean,geom="bar",
fill="lightblue",col="grey50")
Try something like:
res <- aggregate(NUMERIC1 ~ GROUP, data = df, FUN = mean)
ggplot(res, aes(x = GROUP, y = NUMERIC1)) + geom_bar(stat = "identity")
data
df <- structure(list(NUMERIC1 = c(100L, 200L, 300L, 400L), NUMERIC2 = 1:4,
GROUP = structure(c(1L, 2L, 3L, 1L), .Label = c("A", "B",
"C"), class = "factor")), .Names = c("NUMERIC1", "NUMERIC2",
"GROUP"), class = "data.frame", row.names = c(NA, -4L))
I'd suggest something like:
#Imports; data.table, which allows for really convenient "apply a function to
#"each part of a df, by unique value", and ggplot2
library(data.table)
library(ggplot2)
#Convert df to a data.table. It remains a data.frame, so any function that works
#on a data.frame can still work here.
data <- as.data.table(df)
#By each unique value in "CHARACTER", subset and calculate the mean of the
#NUMERIC1 values within that subset. You end up with a data.frame/data.table
#with the columns CHARACTER and mean_value
data <- data[, j = list(mean_value = mean(NUMERIC1)), by = "CHARACTER"]
#And now we play the plotting game (the plotting game is boring, lets
#play Hungry Hungry Hippos!)
plot <- ggplot(data, aes(CHARACTER, mean_value)) + geom_bar()
#And that should do it.
Here's a solution using dplyr to create the summary. In this case, the summary is created on the fly within ggplot, but you can also create a separate summary data frame first and then feed that to ggplot.
library(dplyr)
library(ggplot2)
ggplot(df %>% group_by(GROUP) %>%
summarise(`Mean NUMERIC1`=mean(NUMERIC1)),
aes(GROUP, `Mean NUMERIC1`)) +
geom_bar(stat="identity", fill=hcl(195,100,65))
Since you're plotting means, rather than counts, it might make more sense use points, rather than bars. For example:
ggplot(df %>% group_by(GROUP) %>%
summarise(`Mean NUMERIC1`=mean(NUMERIC1)),
aes(GROUP, `Mean NUMERIC1`)) +
geom_point(pch=21, size=5, fill="blue") +
coord_cartesian(ylim=c(0,310))
Why ggplot when you could do the same with your own code and barplot:
barplot(tapply(df$NUMERIC1, df$GROUP, FUN=mean))