I'm trying to plot bar plot with data points on top in base R.
I'm using base R because it's impossible to create in a simple way texture fill in ggplot (e.g. see here, and ggtexture doesn't allow complex editing).
Using barplot() function and points(), I can do this:
library(tidyverse)
#Sample data
data <- iris %>%
group_by(Species) %>%
summarise(m = mean(Sepal.Length),
se = sd(Sepal.Length)/
sqrt(sum(!is.na(Sepal.Length)))) %>%
ungroup()
chart <- barplot(height=data$m, names=data$Species,
density = c(5, 5, 5),
angle = c(0,45,90),
col = "brown",
width = c(0.1,0.1,0.1),
font.axis = 2,
border = c("black"),
lwd = 2)
points(x = chart,
y = data$m)
However, I would like to create something similar to the below:
iris %>%
group_by(Species) %>%
summarise(m = mean(Sepal.Length),
se = sd(Sepal.Length)/
sqrt(sum(!is.na(Sepal.Length)))) %>%
ungroup() %>%
ggplot(aes(Species, m,
group = Species,
color = Species,
shape = Species)) +
geom_bar(stat = "identity", fill="white",
color="black") +
geom_jitter(
aes(Species, Sepal.Length),
data = iris)
Converting Species to factor using barplot output as labels. When then converted back to numeric using the as.numeric(as.character(x))) approach, the points appear at the right places for each group.
# op <- par(xpd=TRUE)
b <- barplot(with(iris, tapply(Sepal.Length, Species, mean)), density=c(5, 5, 5),
angle=c(0, 45, 90),
col="brown",
width=c(0.1, 0.1, 0.1),
font.axis=2,
border=c("black"),
lwd=2,
ylim=c(0, max(jitter(iris$Sepal.Length)) * 1.15) ## better use dynamic ylim
)
iris$Species.f <- factor(iris$Species, labels=b)
with(iris, points(jitter(as.numeric(as.character(iris$Species.f))),
jitter(Sepal.Length), pch=as.numeric(Species) + 14,
col=as.numeric(Species) + 1, cex=.8))
legend("topleft", title="Species", legend=levels(iris$Species),
pch=seq(levels(iris$Species)) + 14, col=seq(levels(iris$Species)) + 1,
horiz=TRUE, cex=.8)
box(lwd=2)
# par(op)
You can use the jitter function here.
chart <- barplot(height=data$m, names=data$Species,
density = c(5, 5, 5),
angle = c(0,45,90),
col = "brown",
width = c(0.1,0.1,0.1),
font.axis = 2,
border = c("black"),
lwd = 2, las=1,
ylim=c(0,8))
points(x = lapply(rep(chart, each=50), jitter, amount=0.05),
y = iris$Sepal.Length,
col=iris$Species, pch=20)
Related
I try to plot labels above bars with the stat_summary function and a custom function that I wrote. There are three bars and each should be labeled with the letters a:c, respectively. However, instead of putting one label per bar, all three labels are placed on top of each other:
codes <- c ("a", "b", "c")
simple_y <- function(x) {
return (data.frame (y = mean (x) + 1, label = codes))
}
ggplot (iris, mapping = aes (x = Species, y = Sepal.Length)) +
geom_bar (stat = "summary", fun.y = "mean", fill = "blue", width = 0.7, colour = "black", size = 0.7) +
stat_summary (fun.data = simple_y, geom = "text", size = 10)
I do understand why this is not working: each time the simply_y-function is recycled, it sees the whole codes - vector. However, I have no clue how to tell R to separate the three labels. Is it possible to tell R to subsequently use the n_th element of an input-vector when recycling a function?
Does anybody have a good hint?
I would consider doing something like this:
labels <-
tibble(
Species = factor(c("setosa", "versicolor", "virginica")),
codes = c("a", "b", "c")
)
iris %>%
group_by(Species) %>%
summarize(Mean = mean(Sepal.Length)) %>%
ungroup() %>%
left_join(labels, by = "Species") %>%
ggplot(aes(x = Species, y = Mean)) +
geom_col(fill = "blue", width = 0.7, color = "black", size = 0.7) +
geom_text(aes(y = Mean + 0.3, label = codes), size = 6, show.legend = FALSE)
First, you can generate the data frame with means separately, avoiding the need for geom_bar and stat_summary. Then after joining the manual labels/codes to that summarized data frame, it's pretty straightforward to add them with geom_text.
Utilizing the example package code in ggpubr, the ggdotchart function does not create separate segments as is shown in the example, instead there is only a single segment, though the dots seem to be placed in the correct orientation. Does anyone have any tips on what the problem may be? I've thought it may be due to factors, tibbles vs. df, but I haven't been able to determine the problem.
Code:
df <- diamonds %>%
filter(color %in% c("J", "D")) %>%
group_by(cut, color) %>%
summarise(counts = n())
ggdotchart(df, x = "cut", y ="counts",
color = "color", palette = "jco", size = 3,
add = "segment",
add.params = list(color = "lightgray", size = 1.5),
position = position_dodge(0.3),
ggtheme = theme_pubclean()
)
With the expected output of:
But instead I am getting:
Here is a way to get your desired plot without ggpubr::ggdotchart. The issue seems to be that geom_segment does not allow dodging, as discussed here: R - ggplot dodging geom_lines and here: how to jitter/dodge geom_segments so they remain parallel?.
# your data
df <- diamonds %>%
filter(color %in% c("J", "D")) %>%
group_by(cut, color) %>%
summarise(counts = n())
The first step is to expand your data. We will need this when we call geom_line which allows for dodging. I took this idea from #Stibu's answer. We create a copy of df and change the counts column to be 0 in df2. Finally we use bind_rows to create a single data frame from df and df2.
df2 <- df
df2$counts <- 0
df_out <- purrr::bind_rows(df, df2)
df_out
Then I use ggplot to create / replicate your desired output.
ggplot(df_out, aes(x = cut, y = counts)) +
geom_line(
aes(col = color), # needed for dodging, we'll later change colors to "lightgrey"
position = position_dodge(width = 0.3),
show.legend = FALSE,
size = 1.5
) +
geom_point(
aes(fill = color),
data = subset(df_out, counts > 0),
col = "transparent",
shape = 21,
size = 3,
position = position_dodge(width = 0.3)
) +
scale_color_manual(values = c("lightgray", "lightgray")) + #change line colors
ggpubr::fill_palette(palette = "jco") +
ggpubr::theme_pubclean()
There is an extra "group" argument you need!
df <- diamonds %>%
dplyr::filter(color %in% c("J", "D")) %>%
dplyr::group_by(cut, color) %>%
dplyr::summarise(counts = n())
ggdotchart(df, x = "cut", y ="counts",
color = "color", group="color", # here it is
palette = "jco", size = 3,
add = "segment",
add.params = list(color = "lightgray", size = 1.5),
position = position_dodge(0.3),
ggtheme = theme_pubclean()
)
I am new to R and I am trying to generate a series of figures for my clustering algorithm. Right now I am using the following code:
ggplot(df,aes(x=V1,y=V2)) +
geom_point(aes(colour = factor(cluster)),alpha=0.7) +
scale_colour_manual(values=c("purple", "green","orange","black")) +
ggtitle("Visualizing users and their K-Means Euclidean Clusters")
As you can see I have four clusters which are results of k-means. Now I want to show some text over my plot. For example in the following image:
I need the mean of each cluster (or any text like cluster labels) shown over it in this figure (for example 0.5 over the green area). I guess I should geom_text for this purpose but unfortunately I have no idea how. Any help is much appreciated.
Thanks
Try this
library(ggplot2)
cl <- kmeans(iris[, 1:2], 3, nstart = 25)
ggplot(transform(iris[, 1:2], cl = factor(cl$cluster)),
aes(x = Sepal.Length, y = Sepal.Width, colour = cl)) +
geom_point() +
scale_colour_manual(values=c("purple", "green","orange")) +
annotate("point", x = cl$centers[, 1], y = cl$centers[, 2], size = 5, colour = c("purple", "green","orange")) +
annotate("text", x = cl$centers[, 1], y = cl$centers[, 2], font = 2, size = 10,
label = apply(cl$centers, 1, function(x) paste(sprintf('%02.2f', x), collapse = ",") ),
colour = c("purple", "green","orange") )
library(dplyr);library(purrr);library(ggplot2)
data.plot %>%
group_by(Class) %>%
do(model = kmeans(.[c('x', 'y')], 1)) %>%
ungroup() %>% group_by(Class) %>% do(map_df(.$model, broom::tidy)) %>% ungroup() %>%
select(Class,x,y ) %>% data.frame() %>% dplyr::rename(x.center=x,y.center=y,Class=Class) ->label.data
say I have the means of two datasets that I want to plot as barplots with error bars next to each other in ggplot2, or base
Each dataset consists of a matrix of numbers
10 20 12
10 20 12
10 20 12
which is then transformed into a mean vector of for example 3 elements
10 20 12
What I want to do is to take both mean vectors and plot them as a bar plot where the first element of one is besides the first element of the other
Dataset1Element1Bar-Dataset2Element1Bar Dataset1Element2Bar-Dataset2Element2Bar etc
Give each bar an error bar, say of standard deviation. I know I can calculate it through sd but I'm not sure how to stick it into the graph in the proper form
And lastly color them by their element number (ie Element 1)
I have the code to do one dataset but I'm not sure where to go from there.
result<-barplot(bardata, main="Mean Coverage", names.arg=namePosTargetGroup, ylab="mean Magnitude", cex.names=.4,col=c("red","blue","green"))
legend(10,legend=c("Group1","Group2","Group3"),fill = c("red","blue","green"))
A lot of what I look up gives the answer for one thing or another but its difficult to figure out how to combine them together.
I would generally not recommend plotting just a bar chart with error bars. There are many other ways to plot your data, which reveal the data and its structure a lot better.
Especially if you just have very few cases, plotting means with bars is not good. A good explanation can be found here: Beyond Bar and Line Graphs: Time for a New Data Presentation Paradigm
I find it difficult to give you a good solution, since I don't know your research-question. Knowing what you actually want to show or emphasis would make things easier.
I will give you two suggestions, one for a small dataset, one for a bigger one. All of them are created with ggplot2. I'm not coloring them by their "element number" but by their origin ("dataset 1/2"), since I find it easier to accomplish a proper graphic this way.
Small Dataset
Use geom_jitter to display all your cases, avoiding overplotting.
# import hadleyverse
library(magrittr)
library(dplyr)
library(tidyr)
library(ggplot2)
# generate small amount of data
set.seed(1234)
df1 <- data.frame(v1 = rnorm(5, 4, 1),
v2 = rnorm(5, 5, 1),
v3 = rnorm(5, 6, 1),
origin = rep(factor("df1", levels = c("df1", "df2")), 5))
df2 <- data.frame(v1 = rnorm(5, 4.5, 1),
v2 = rnorm(5, 5.5, 1),
v3 = rnorm(5, 6.5, 1),
origin = rep(factor("df2", levels = c("df1", "df2")), 5))
# merge dataframes and gather in long format
pdata <- bind_rows(df1, df2) %>%
gather(id, variable, -origin)
# plot data
ggplot(pdata, aes(x = id, y = variable, fill = origin, colour = origin)) +
stat_summary(fun.y = mean, geom = "point", position = position_dodge(width = .5),
size = 30, shape = "-", show_guide = F, alpha = .7) + # plot mean as "-"
geom_jitter(position = position_jitterdodge(jitter.width = .3, jitter.height = .1,
dodge.width = .5),
size = 4, alpha = .85) +
labs(x = "Variable", y = NULL) + # adjust legend
theme_light() # nicer theme
"Big" Dataset
If you have more datapoints, you can use geom_violin to summarise them.
set.seed(12345)
df1 <- data.frame(v1 = rnorm(50, 4, 1),
v2 = rnorm(50, 5, 1),
v3 = rnorm(50, 6, 1),
origin = rep(factor("df1", levels = c("df1", "df2")), 50))
df2 <- data.frame(v1 = rnorm(50, 4.5, 1),
v2 = rnorm(50, 5.5, 1),
v3 = rnorm(50, 6.5, 1),
origin = rep(factor("df2", levels = c("df1", "df2")), 50))
# merge dataframes
pdata <- bind_rows(df1, df2) %>%
gather(id, variable, -origin)
# plot with violin plot
ggplot(pdata, aes(x = id, y = variable, fill = origin)) +
geom_violin(adjust = .6) +
stat_summary(fun.y = mean, geom = "point", position = position_dodge(width = .9),
size = 6, shape = 4, show_guide = F) +
guides(fill = guide_legend(override.aes = list(colour = NULL))) +
labs(x = "Variable", y = NULL) +
theme_light()
Version with mean and sd
If you insist on plotting the mean with standard deviation, here is how it could be done.
# merge dataframes and compute limits for sd
pdata <- bind_rows(df1, df2) %>%
gather(id, variable, -origin) %>%
group_by(origin, id) %>% # group data for limit calculation
mutate(upper = mean(variable) + sd(variable), # upper limit for error bar
lower = mean(variable) - sd(variable)) # lower limit for error bar
# plot
ggplot(pdata, aes(x = id, y = variable, fill = origin)) +
stat_summary(fun.y = mean, geom = "bar", position = position_dodge(width = .9),
size = 3) +
geom_errorbar(aes(ymin = lower, ymax = upper),
width = .2, # Width of the error bars
position = position_dodge(.9))
I am trying hard to figure out to add color gridient label to my plot ( link to previous question). Sorry for keep asking but this is maximum I could push me forward.
#data 1:
lab1 <- 1:10
group <- rep(1:3, each = length (lab1))
label <- rep(lab1, 3)
avar <- rep(c(0, 1, 4, 5, 6, 8, 10, 11, 12, 13), 3)
myd <- data.frame (group, label, avar)
# data 2
fillcol <- rep(rnorm(length(lab1)-1, 0.5, 0.2), 3)
group1 <- rep(1:3, each = length(fillcol)/3)
# this variable will be used to fill color in bars
filld <- data.frame(group1, fillcol)
colbarplot <- function(group) {
myd1 <- myd[myd$group == group,]
filld1 <- filld[filld$group1 == group,]
blues <- colorRampPalette(c("yellow", "blue"))
barplot(as.matrix(diff(myd1$avar)), horiz=T,
col=blues(10)[10* filld1$fillcol],
axes=F, xlab="Mark")
axis(1, labels=myd$label, at=myd$avar)
axis(3, labels=myd$avar, at=myd$avar)
}
par(mfrow = c(4, 1))
par(mar = c(2.5, 1, 2.5, 1))
sapply(unique(myd$group),function(x) colbarplot(x))
Now I am struggling to add legend, sorry for this new user.
blues <- colorRampPalette(c("yellow", "blue"))
colors <- blues(10)
count <- length(colors)
m <- matrix(1:count, count, 1)
m1 <- m
image(m, col=colors, ylab="", axes=FALSE)
I produced color scale that is not what I am expecting, I am trying plot a smaller legend, less in width and height, along with original scale use in color coding.
Here are some unsuccessful trials for labeling:
colab <- c(round (min(filld$fillcol), 2), round(max(filld$fillcol), 2))
colpos <- c(0.33 * max(mapdat$position),0.66 * max(mapdat$position))
axis(1, labels=colab, at=colpos)
Getting a decent legend is much easier with ggplot2
library(plyr)
myd$group <- factor(myd$group)
gData <- ddply(myd, .(group), function(x){
data.frame(delta = diff(x$avar), label = paste(head(x$label, -1), tail(x$label, -1), sep = "-"))
})
gData$FillCol <- rnorm(nrow(gData))
ggplot(gData, aes(x = group, y = delta, fill = FillCol, label = label)) + geom_bar(stat = "identity") + coord_flip() + scale_fill_gradient(low = "blue", high = "yellow") + geom_text(position = "stack")