ggplot changing colors of bar plot - r

I came across this R script that use ggplot:
dat <- read.table(text = "A B C D E F G
1 480 780 431 295 670 360 190
2 720 350 377 255 340 615 345
3 460 480 179 560 60 735 1260
4 220 240 876 789 820 100 75", header = TRUE)
library(reshape2)
dat$row <- seq_len(nrow(dat))
dat2 <- melt(dat, id.vars = "row")
library(ggplot2)
ggplot(dat2, aes(x=variable, y=value, fill=row)) +
geom_bar(stat="identity") +
xlab("\nType") +
ylab("Time\n") +
guides(fill=FALSE) +
theme_bw()
That was what I've been looking for. However, I could not:
change the default colours (for example, I tried to use the "RdYlGn"
palette)
convert the raw values to frequencies.
Any suggestions?

You could try this:
library(reshape2)
library(dplyr)
library(ggplot2)
library(ggplot2)
dat%>%
melt(id.vars = "row",variable.name = "grp")%>%
group_by(grp)%>%
mutate(tot=sum(value), fq=value/tot)%>%
ggplot(aes(x=grp,y=fq,fill=row,label = sprintf("%.2f%%", fq*100)))+
geom_bar(stat = "identity")+
geom_text(size = 3, position = position_stack(vjust = 0.5))+
xlab("\nType") +
ylab("Time\n") +
guides(fill=FALSE) +
scale_fill_distiller(palette = "RdYlGn")+
theme_bw()

Related

Density Plot in R for 5 variables

I am trying to plot 5 variables. however, I only see one colour. I am not really sure how can I display different colour for each variable
my data looks like
> data
Lead_1 Lead_2 Lead_3 Lead_4 Lead_5
1 138 135 128 125 130
2 126 130 133 131 128
3 120 121 126 130 129
4 129 126 121 115 110
5 142 153 160 167 179
6 305 299 294 291 283
dim(data)
[1] 8517 5
enter image description here
enter image description here
data <- read.table("5leads.csv", header=TRUE, sep=",")
data
dat <- stack(data)
ggplot(dat, aes(x = values, fill = ind)) + geom_density(alpha = 0.25)
Try this approach using both color and fill as mentioned in the comments by #Punintended:
library(ggplot2)
#Code
dat <- stack(data)
ggplot(dat, aes(x = values, fill = ind,color=ind)) + geom_density(alpha = 0.15)
Output:
Or this:
#Code 2
dat <- stack(data)
ggplot(dat, aes(x = values, fill = ind,color=ind)) + geom_density(alpha = 1.5)
Output:
I tried to use
ggplot(dat, aes(x = values, fill = ind,color=ind)) + geom_density(alpha = 0.15)
and this is what I am getting
enter image description here

ggplot2: geom_bar(); how to alternate order of fill so bars are not lost inside a bar with a higher value?

I am trying to position two bars at the same position on the x-axis and seperated out by colour (almost as if stacking).
However, instead of stacking I want the bar simply inside the other bar - with the smallest Y-value being visable inside the bar with the highest Y-value.
I can get this to work to some extent - but the issue is that one Y-value is not consistently higher across one of the two factors. This leads to bars being 'lost' within a bar with a higher Y-value.
Here is a subset of my dataset and the current ggplot code:
condition hours expression freq_genes
1 tofde 9 up 27
2 tofde 12 up 92
3 tofde 15 up 628
17 tofde 9 down 0
18 tofde 12 down 1
19 tofde 15 down 0
33 tofp 9 up 2462
34 tofp 12 up 786
35 tofp 15 up 298
49 tofp 9 down 651
50 tofp 12 down 982
51 tofp 15 down 1034
65 tos 0 up 27
66 tos 3 up 123
67 tos 6 up 752
81 tos 0 down 1
82 tos 3 down 98
83 tos 6 down 594
sf_plot <- ggplot(data = gene_freq,
aes(x = hours,
y = freq_genes,
group = condition,
fill = factor(expression,
labels=c("Down",
"Up"))))
sf_plot <- sf_plot + labs(fill="Expression")
sf_plot <- sf_plot + geom_bar(stat = "identity",
width = 2.5,
position = "dodge")
sf_plot <- sf_plot + scale_fill_manual(values=c("#9ecae1",
"#3182bd"))
sf_plot <- sf_plot + xlab("Time (Hours)")
sf_plot <- sf_plot + scale_x_continuous(breaks =
seq(min(gene_freq$freq_genes),
max(gene_freq$freq_genes),
by = 3))
sf_plot <- sf_plot + ylab("Gene Frequency")
sf_plot <- sf_plot + facet_grid(. ~ condition, scales = "free")
sf_plot <- sf_plot + theme_bw()
sf_plot <- sf_plot + theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
sf_plot <- sf_plot + theme(axis.text.x = element_text(angle = 90))
# Print plot
sf_plot
You can add alpha = 0.5 to your geom_bar() statement to make the bars transparent. This will allow both bars to be seen. Adding that alpha statement and nothing else will produce what you're looking for, to make both overlaid bars visible. The colors, however, make seeing the two different bars challenging.
Another (and maybe better) option is to change the order in which the plot is created. If I recall correctly, ggplot will plot the bars in alphabetical or numeric or factor-level order. Here, your expression values are c("Down", "Up") and "Down" is being plotted first. If you force "Up" to be plotted first, you could resolve this, too.
library(dplyr)
library(ggplot2)
dat <-
read.table(text = "condition hours expression freq_genes
1 tofde 9 up 27
2 tofde 12 up 92
3 tofde 15 up 628
17 tofde 9 down 0
18 tofde 12 down 1
19 tofde 15 down 0
33 tofp 9 up 2462
34 tofp 12 up 786
35 tofp 15 up 298
49 tofp 9 down 651
50 tofp 12 down 982
51 tofp 15 down 1034
65 tos 0 up 27
66 tos 3 up 123
67 tos 6 up 752
81 tos 0 down 1
82 tos 3 down 98
83 tos 6 down 594") %>%
mutate(expression2 = ifelse(expression == "up", 1, 2))
dat %>%
ggplot(aes(x = hours, y = freq_genes, group = condition,
fill = factor(expression2, labels=c("Up", "Down")))) +
labs(fill="Expression") +
geom_bar(stat = "identity", position = "dodge", width = 2.5, alpha = 0.5) +
scale_fill_manual(values=c("#9ecae1", "#3182bd")) +
xlab("Time (Hours)") +
scale_x_continuous(breaks = seq(min(dat$freq_genes),
max(dat$freq_genes),
by = 3)) +
ylab("Gene Frequency") +
facet_grid(. ~ condition, scales = "free") +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
legend.position = "bottom",
axis.text.x = element_text(angle = 90))
Here, I've created a new column called expression2 that is just a numeric version of expression. I changed the fill variable in aes() to match with those new labels. I left the colors in scale_fill_manual() the same as in your original statement and kept the alpha value. "Down" is being plotted on top of "Up" but in keeping the same colors with the alpha value, both bars are easier to see. You can play with the legend to display "Down" before "Up" if that's necessary.
Note that providing machine readable data goes a long way in allowing others to help you out. Consider using dput() to output your data next time rather than pasting it in. Also note that you can "chain" together ggplot() statements with a +. This makes code much more compact and easier to read.

Add a label or geom_text to scatter plot - R/ggplot2

How can I add a label/geon_text to every point that will include its dataframe index and the xvar and yvar values (for example the label for the first one will be (“Point 1: 500,570”)?
Code:
library(ggplot2))
xvar <- c(500,450,490,560,618, 660,700,650, 590,550)
yvar <- c(570,600,650,670,660,650,630,580,570,550)
dat <- data.frame(xvar,yvar)
ggplot(dat, aes(x=xvar, y=yvar)) + geom_point(shape=1)
Create a variable label and plot it to geom_text():
library(ggplot2)
> xvar <- c(500,450,490,560,618, 660,700,650, 590,550)
> yvar <- c(570,600,650,670,660,650,630,580,570,550)
> dat <- data.frame(xvar,
yvar)
> dat$point <- sprintf("Point %s",
seq(from = 1,
to = length(xvar)))
> dat$labvar <- sprintf("%s: %s, %s",
dat$point,
dat$xvar,
dat$yvar)
> dat
xvar yvar point labvar
1 500 570 Point 1 Point 1: 500, 570
2 450 600 Point 2 Point 2: 450, 600
3 490 650 Point 3 Point 3: 490, 650
4 560 670 Point 4 Point 4: 560, 670
5 618 660 Point 5 Point 5: 618, 660
6 660 650 Point 6 Point 6: 660, 650
7 700 630 Point 7 Point 7: 700, 630
8 650 580 Point 8 Point 8: 650, 580
9 590 570 Point 9 Point 9: 590, 570
10 550 550 Point 10 Point 10: 550, 550
> ggplot(dat, aes(x=xvar,
y=yvar)) +
geom_text(aes(label = labvar),
hjust = -0.05,
vjust = 0.05) +
geom_point(shape = 1)
library(ggplot2)
library(ggrepel)
library(dplyr)
xvar <- c(500,450,490,560,618, 660,700,650, 590,550)
yvar <- c(570,600,650,670,660,650,630,580,570,550)
dat <- data.frame(xvar,yvar, xindex = seq_along(xvar)) %>%
dplyr::mutate(label = paste0("x: ", xvar, ", y:", yvar, ", index: ", xindex))
ggplot(dat, aes(x=xvar, y=yvar)) + geom_point(shape=1) +
geom_text_repel(aes(label = label))

plotting subset of grouped data in ggplot2

I am trying to make a plot that has mean (+/- SD) number (ID = total count per row) of Explorations on the y-axis and then grouped by both pp and type on the x-axis.
That is, I want to generate something that looks like this (hand-drawn and made up graph):
Here is how the dataframe is structured (available here).
pp crossingtype km type ID
0 Complete 80.0 DCC 10
1 Complete 80.0 DCC 4
0 Exploration 80.0 DCC 49
1 Exploration 80.0 DCC 4
0 Complete 144.0 DWC 235
1 Complete 144.0 DWC 22
0 Exploration 144.0 DWC 238
1 Exploration 144.0 DWC 18
1 Exploration 84.0 PC 40
0 Complete 107.0 PC 43
1 Complete 107.0 PC 22
0 Exploration 107.0 PC 389
I want to use ggplot2 and have tried this code:
ggplot(expMean, aes(x=as.factor(pp), y=crossingtype, color=factor(type),group=factor(type)))
+ geom_point(shape=16,cex=3)
+ geom_smooth(method=lm)
+ facet_grid(.~type)
But it gives me this figure (which is not what I am trying to make).
How can I use ggplot2 to make the first plot?
You can do the statistical transformations within ggplot(), but my preference is to process the data first, then plot the results.
library(tidyverse)
expMean %>%
filter(crossingtype == "Exploration") %>%
group_by(type, pp) %>%
summarise(Mean = mean(ID), SD = sd(ID)) %>%
ggplot(aes(factor(pp), Mean)) +
geom_pointrange(aes(ymax = Mean + SD,
ymin = Mean - SD)) +
facet_wrap(~type) +
theme_bw()
Is this what you want? This filters the data to only include Exploration, uses ID as the y variable, groups by pp and facets on type
tbl <- read_table2(
"pp crossingtype km type ID
0 Complete 80.0 DCC 10
1 Complete 80.0 DCC 4
0 Exploration 80.0 DCC 49
1 Exploration 80.0 DCC 4
0 Complete 144.0 DWC 235
1 Complete 144.0 DWC 22
0 Exploration 144.0 DWC 238
1 Exploration 144.0 DWC 18
1 Exploration 84.0 PC 40
0 Complete 107.0 PC 43
1 Complete 107.0 PC 22
0 Exploration 107.0 PC 389"
) %>%
mutate(pp = factor(pp))
ggplot(data = tbl %>% filter(crossingtype == "Exploration")) +
geom_boxplot(aes(x = pp, y = ID)) +
facet_wrap(~type)
I ran this code on the linked dataset to produce this:
Here's the approach I used. Utilised a colour instead of the double valued x-axis.
Note that I downloaded the data to my working directory, so the read.table command may need to be modified
library(dplyr)
library(ggplot2)
dat <- read.table("figshare.txt")
dat <- droplevels(filter(dat, crossingtype == "Exploration"))
dat <- dat %>%
group_by(pp, type) %>%
summarise(val = mean(ID),
SD = sd(ID))
ggplot(dat, aes(x = type, y = val, colour = as.factor(pp), group =
as.factor(pp))) +
geom_point(size = 3, position = position_dodge(width = 0.2)) +
geom_errorbar(aes(ymax = val + SD, ymin = val - SD), position =
position_dodge(width = 0.2), width = 0.2) +
labs(y = "Mean # of explorations (+/- SD", colour = "pp")

Issue with a drawing a vertical line in ggplot for categorical variable x-axis in R

I have the following table. I want to plot a vertical line using the "st_date_wk" column for each county. Please see my code below but it DOES NOT draw the vertical line using the "st_date_wk" column. Cannot figure out what I am doing wrong here.
Any help is appreciated.
Thanks.
dfx1:
YEAR Week Area acc_sum percentage COUNTY st_date_wk
1998 10-1 250 250 12.4 133 10-4
1998 10-2 300 550 29.0 133 10-4
1998 10-3 50 600 58.0 133 10-4
1998 10-4 100 700 75.9 133 10-4
1998 10-5 100 800 100.0 133 10-4
1999 9-3 75 75 22.0 205 10-2
1999 10-1 250 250 12.4 205 10-2
1999 10-2 300 550 29.0 205 10-2
1999 10-3 50 600 58.0 205 10-2
1999 10-4 100 700 75.9 205 10-2
1999 10-5 100 800 100.0 205 10-2
.
.
dfx1$YEAR <- as.factor(dfx1$YEAR)
dfx1$COUNTY <- as.factor(dfx1$COUNTY)
dfx1$percentage <- as.numeric(dfx1$percentage)
dfx1$acc_sum <- as.numeric(dfx1$acc_sum)
dfx1$Week <- factor(dfx1$Week, ordered = T)
dfx1$st_date_wk <- factor(dfx1$st_date_wk,ordered = T)
dfx1$Week <- factor(dfx1$Week, levels=c("6-1","6-2","6-3","6-4","6-5","7-1","7-2","7-3","7-4","7-5","8-1","8-2","8-3","8-4","8-5","9-1","9-2","9-3","9-4","9-5","10-1","10-2","10-3","10-4","10-5","11-1","11-2","11-3","11-4","11-5","12-1","12-2","12-3","12-4","12-5"))
gg <- ggplot(dfx1, aes(Week,percentage, col=YEAR, group = YEAR))
gg <- gg + geom_line()
gg <- gg + facet_wrap(~COUNTY, 2, scales = "fixed")
gg <- gg + theme(text = element_text(size=15), axis.text.x = element_text(angle=90, hjust=1))
gg <- gg + geom_vline(data=dfx1, aes(xintercept = dfx1$st_date_wk), color = "blue", linetype = "dashed", size = 1.0)+ facet_wrap(~COUNTY)
plot(gg)
1: In Ops.ordered(x, from[1]) : '-' is not meaningful for ordered factors
It is a very interesting issue, and I haven't quite figured out why it does not work. However, there is a fix for it.
First, This is the data that is used in the answer:
dfx1 <- read.table(text =
"YEAR Week Area acc_sum percentage COUNTY st_date_wk
1998 10-1 250 250 12.4 133 10-4
1998 10-2 300 550 29.0 133 10-4
1998 10-3 50 600 58.0 133 10-4
1998 10-4 100 700 75.9 133 10-4
1998 10-5 100 800 100.0 133 10-4
1999 9-3 75 75 22.0 133 10-1",
header = TRUE)
Convert types of Year, COUNTY, percentage, and acc_sum:
dfx1$YEAR <- as.factor(dfx1$YEAR)
dfx1$COUNTY <- as.factor(dfx1$COUNTY)
dfx1$percentage <- as.numeric(dfx1$percentage)
dfx1$acc_sum <- as.numeric(dfx1$acc_sum)
Create a vector with the week_levels (more reader-friendly):
week_levels <- c("6-1","6-2","6-3","6-4","6-5",
"7-1","7-2","7-3","7-4","7-5",
"8-1","8-2","8-3","8-4","8-5",
"9-1","9-2","9-3","9-4","9-5",
"10-1","10-2","10-3","10-4","10-5",
"11-1","11-2","11-3","11-4","11-5",
"12-1","12-2","12-3","12-4","12-5")
Transform Week and st_date_wk to an ordered factor with the same levels:
dfx1$Week <- factor(dfx1$Week, levels = week_levels, ordered = TRUE)
dfx1$st_date_wk <- factor(dfx1$st_date_wk, levels = week_levels, ordered = TRUE)
Create labels for scale_x_discrete (a named vector where the names correspond to the breaks of the x-axis):
labels <- week_levels
names(labels) <- seq_along(week_levels)
Create the visualisation, but instead of using the factors on the x-axis, use numeric, in geom_vline() use which() to get the number that corresponds to a Week on the x-axis. Then use scale_x_discrete() to add the weeks.
library(ggplot2)
ggplot(dfx1, aes(x = as.numeric(Week), y = percentage, col=YEAR, group = YEAR)) +
geom_line() +
geom_vline(xintercept = which(levels(dfx1$Week) %in% dfx1$st_date_wk), color = "blue", linetype = "dashed") +
scale_x_continuous(breaks = seq_along(labels), labels = labels) +
theme(text = element_text(size=15), axis.text.x = element_text(angle=90, hjust=1)) +
facet_wrap(~COUNTY, 2, scales = "fixed")
This will give you:
EDIT AFTER COMMENT:
library(dplyr)
dfx1 <- merge(dfx1,
(dfx1 %>%
group_by(COUNTY, st_date_wk) %>%
summarise(x = which(levels(st_date_wk) %in% st_date_wk[COUNTY == COUNTY]))),
by = c("COUNTY", "st_date_wk"), all.x = TRUE
)
ggplot(dfx1, aes(x = as.numeric(Week), y = percentage, col=YEAR, group = YEAR)) +
geom_line() +
geom_vline(data = dfx1, aes(xintercept = x), color = "blue", linetype = "dashed") +
scale_x_continuous(breaks = seq_along(labels), labels = labels) +
theme(text = element_text(size=15), axis.text.x = element_text(angle=90, hjust=1)) +
facet_wrap(~COUNTY, 2, scales = "fixed")
You just have to change the aes in the geom_vline
aes(xintercept = dfx1$st_date_wk %>% as.numeric())

Resources