ggplot2 barplot - r

I have small data.frame which I managed to plot in ggpot. Since ggplot does not support patterns , I graph the data with colors. I would appreciate a better presentation than the one I did in terms of coloring and design or even black and white. Also, I couldn't change the legend title
My data:
structure(list(Type = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
9L, 8L), .Label = c("Type A+Mod ", "Type B+C+D", "Type E+A",
"Type G+W", "Type H & Mod C", "Type Operation", "Type Production",
"Type Sales", "X, T, S"), class = "factor"), X2011 = structure(c(7L,
4L, 6L, 5L, 9L, 8L, 3L, 1L, 2L), .Label = c("$1,517.00", "$1,579.00",
"$1,727.00", "$105,352.00", "$126,787.00", "$141,647.00", "$187,506.00",
"$24,968", "$30,397.00"), class = "factor"), X2012 = structure(c(7L,
6L, 5L, 4L, 8L, 9L, 3L, 2L, 1L), .Label = c("$1,232.00", "$1,406.00",
"$1,963.00", "$109,533.00", "$125,795.00", "$166,251.00", "$172,238.00",
"$18,040.00", "$23,541.00"), class = "factor"), X2013 = structure(c(8L,
4L, 3L, 2L, 7L, 6L, 5L, 1L, 9L), .Label = c("$1,324.00", "$102,216.00",
"$125,101.00", "$198,769.00", "$2,088.00", "$20,070.00", "$21,094.00",
"$243.91", "$997.00"), class = "factor")), .Names = c("Type",
"X2011", "X2012", "X2013"), class = "data.frame", row.names = c(NA,
-9L))
The code:
colnames(DF)<-c("Type","2011","2012","2013")
dfMelt<-melt(DF, id.var="Type")
graph<- ggplot(dfMelt,aes(x=Type, y=value))+
geom_bar(aes(fill=variable),stat="identity", position="dodge",linetype=1,colour="red")+
#Tried this for black and white-Seems not working
#scale_colour_grey(start = 0, end = .9) +
theme_bw()+
theme(panel.background = element_rect(fill="grey98"))+
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
theme(axis.title.x=element_text(size=14,face="bold",vjust=-0.2),
axis.title.y=element_text(size=14,face="bold",vjust=0.15))+
theme(axis.ticks.x = element_line(size = 2))+
scale_x_discrete(expand=c(0.01,0))+
scale_y_discrete(expand=c(0.004,0.5))
print(graph)

Your values are being treated as factors rather than numbers, so the chart doesn't make sense. So first you want to convert them to numeric values:
DF <- cbind(DF[1],sapply(DF[-1], function(x) as.numeric(gsub("[$,]","",x))))
Then you can proceed as before, but obviously changing the discrete scale expansion on the y axis to a continuous one which also formats the values as dollars and using the Blues Brewer palette with scale_fill_brewer which works well in black and white and in colour. You can set the legend title when setting the palette here too.
dfMelt<-melt(DF, id.var="Type")
graph<- ggplot(dfMelt,aes(x=Type, y=value))+
geom_bar(aes(fill=variable),stat="identity", position="dodge",linetype=1,colour="red")+
theme_bw()+
theme(panel.background = element_rect(fill="grey98"))+
theme(axis.text.x = element_text(angle = 45, hjust = 1))+
theme(axis.title.x=element_text(size=14,face="bold",vjust=-0.2),
axis.title.y=element_text(size=14,face="bold",vjust=0.15))+
theme(axis.ticks.x = element_line(size = 2))+
scale_x_discrete(expand=c(0.01,0))+
scale_y_continuous("Price",labels=dollar)+
scale_fill_brewer("Year", palette="Blues")
Which gives:

First of all, your data is not in the correct format. Now it's a factor-variable and it needs to be numeric. Moreover remove the comma's (for the thousands) and the $ valuta-sign. I also cleaned up the ggplot code.
DF <- cbind(DF[1],sapply(DF[-1], function(x) as.numeric(gsub("[$,]","",x)))) # copied from James
colnames(DF)<-c("Type","2011","2012","2013")
dfMelt <- melt(DF, id.var="Type")
ggplot(dfMelt,aes(x=Type, y=value)) +
geom_bar(aes(fill=variable),stat="identity", position="dodge") +
theme_bw() +
theme(panel.background = element_rect(fill="grey98"),
axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.x=element_text(size=14,face="bold",vjust=-0.2),
axis.title.y=element_text(size=14,face="bold",vjust=0.15),
axis.ticks.x = element_line(size = 2)) +
scale_y_continuous("Price (in dollars)") +
scale_fill_discrete("Year")
The result:

Related

coord_polar: moving labels only for the outer ring to the outside

I can only find a solution for this in relation to single-level pie charts. I have the chart below and some of the labels on the outer ring don't fit well.
I'd like to keep the labels for the inner ring where they are but move the labels for the second ring to the outside (or at least the ones that don't fit).
Here is my code
ggplot(usage.may, aes(x = Level, y = Percent, fill = Subcategory, label = Label)) +
geom_bar(stat = "identity", color='white', show.legend = FALSE) +
geom_text(aes(label = paste0(Label, "\n", Value, " (", Per_label, ")")),
size = 2.5,
colour = "white",
check_overlap = TRUE,
position = position_stack(vjust = 0.5)) +
coord_polar('y') +
scale_fill_manual(values = c("C01" = "#404688FF",
"C011" = "#3B528BFF","C012" = "#3B528BFF","C013" = "#3B528BFF","C014" = "#3B528BFF",
"C02" = "#287C8EFF",
"C021" = "#287C8EFF",
"C03" = "#27AD81FF",
"C031" = "#35B779FF","C032" = "#35B779FF","C033" = "#35B779FF",
"C04" = "#8FD744FF",
"C041" = "#8FD744FF","C042" = "#8FD744FF")) +
labs(title = "Electricity Usage May 2022") + ylab("") + xlab("") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.title = element_text(hjust = 0.5),
panel.border = element_blank(),
panel.background = element_blank(),
strip.background = element_blank(),
axis.text= element_blank(),
axis.ticks= element_blank())
And sample data
structure(list(Level = structure(c(2L, 3L, 3L, 3L, 3L, 2L, 3L,
2L, 3L, 3L, 3L, 2L, 3L, 3L, 1L), levels = c("0", "1", "2"), class = "factor"),
Category = structure(c(2L, 2L, 2L, 2L, 2L, 3L, 3L, 4L, 4L,
4L, 4L, 5L, 5L, 5L, 1L), levels = c("C00", "C01", "C02",
"C03", "C04"), class = "factor"), Subcategory = structure(c(2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
1L), levels = c("C00", "C01", "C011", "C012", "C013", "C014",
"C02", "C021", "C03", "C031", "C032", "C033", "C04", "C041",
"C042"), class = "factor"), Colour = structure(c(2L, 3L,
3L, 3L, 3L, 4L, 5L, 6L, 7L, 7L, 7L, 8L, 9L, 9L, 1L), levels = c("0",
"1", "2", "3", "4", "5", "6", "7", "8"), class = "factor"),
Label = c("Cafe (R1 & R2) ", "Non-checked ", "Spider Monkey ",
"Signing-in Cabin", "Solar (cafe)", "Vet Room", "Non-checked",
"Butchery", "Non-checked", "Solar (lynx)", "Solar (butchery)",
"Tiger Block", "Farm", "Non-checked", ""), Value = c(5323L,
921L, 2611L, 34L, 1791L, 534L, 534L, 8479L, 6689L, 1371L,
419L, 3596L, 87L, 3247L, 0L), Percent = c(30L, 5L, 15L, 0L,
10L, 3L, 3L, 47L, 37L, 8L, 2L, 20L, 2L, 18L, 0L), Per_label = c("30%",
"5%", "15%", "0%", "10%", "3%", "3%", "47%", "37%", "8%",
"2%", "20%", "2%", "18%", "0%")), row.names = c(NA, -15L), class = "data.frame")
Thanks in advance
One option would be to use an ifelse to shift the x position of the labels for the outer ring. Additionally I use an ifelse + scale_color_identity to conditionally set the font color of the labels:
library(ggplot2)
ggplot(usage.may, aes(x = Level, y = Percent, fill = Subcategory, label = Label)) +
geom_bar(stat = "identity", color='white', show.legend = FALSE) +
geom_text(aes(label = paste0(Label, "\n", Value, " (", Per_label, ")"),
x = as.numeric(Level) + ifelse(Level == 2, 1, 0),
color = ifelse(Level == 2, "black", "white")),
size = 2.5,
check_overlap = TRUE,
position = position_stack(vjust = 0.5)) +
coord_polar('y') +
scale_fill_manual(values = c("C01" = "#404688FF",
"C011" = "#3B528BFF","C012" = "#3B528BFF","C013" = "#3B528BFF","C014" = "#3B528BFF",
"C02" = "#287C8EFF",
"C021" = "#287C8EFF",
"C03" = "#27AD81FF",
"C031" = "#35B779FF","C032" = "#35B779FF","C033" = "#35B779FF",
"C04" = "#8FD744FF",
"C041" = "#8FD744FF","C042" = "#8FD744FF")) +
scale_color_identity() +
labs(title = "Electricity Usage May 2022") + ylab("") + xlab("") +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
plot.title = element_text(hjust = 0.5),
panel.border = element_blank(),
panel.background = element_blank(),
strip.background = element_blank(),
axis.text= element_blank(),
axis.ticks= element_blank())

Removing "False"-condition scales::dollar labels on ifelse within geom_label

trying to establish individual bar data labels ONLY if the value is negative. I was able to do it fine for a variable that comprised simple integers, but for a variable that needs to be formatted as dollar with the thousands separator, I can't seem to get rid of the "NA" label.
DolSumPlot <- ggplot(data = DolSums, aes(x = Group.1, fill = Group.2)) +
geom_bar(aes(weight = x), position = position_stack(reverse = TRUE)) +
coord_flip() +
labs(title = "Dollars Billed by Technician and Shop, Between 02/01/2018 and 05/31/2018",
y = "Dollars Billed", x = "Technician", fill = "Shop") +
scale_y_continuous(limits= c(NA,NA),
labels = scales::dollar,
breaks = seq(0, 50000 + 10000, 5000*2),
minor_breaks = seq(0,50000 + 10000, by = 5000)) +
scale_fill_brewer(palette = "Set1") +
geom_label(aes(label=scales::dollar(ifelse(DolSums$x < 0, DolSums$x,NA)),
y = DolSums$x),
show.legend = FALSE, size = 2.6, colour = "white", fontface = "bold")
Data:
DolSums = structure(list(Group.1 = c((names)), Group.2 = structure(c(4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
5L, 5L, 5L, 5L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Radio",
"Video", "Engineering", "800Mhz", "PSSRP", "Other"), class = "factor"),
x = c(4646, 16008.5, 48793.1, 4040, 14468.25, 13332, 1565.5,
6060, 6549.85, 2929, 4444, 3257.25, 5904, 2029.5, 3321, 6767,
8105.25, 8105.25, 8130.5, 3131, 5075.25, 3383.5, 4418.75,
23381.5, 1363.5, -2323, 29133.45, 2550.25, 505, 26042.85,
35203.55, 35940.85, 1641.25, 45066.2, 37541.7, 606, 45439.9
)), .Names = c("Group.1", "Group.2", "x"), row.names = c(NA,
-37L), class = "data.frame")
You can do this by using the data argument in geom_label and subsetting only rows with negative x. Also note that since you already have DolSums as input, there is no need to write DolSums$x. Instead, use column name to refer to a specific column directly:
library(ggplot2)
ggplot(data = DolSums, aes(x = Group.1, fill = Group.2)) +
geom_bar(aes(weight = x), position = position_stack(reverse = TRUE)) +
coord_flip() +
labs(title = "Dollars Billed by Technician and Shop, Between 02/01/2018 and 05/31/2018",
y = "Dollars Billed", x = "Technician", fill = "Shop") +
scale_y_continuous(limits= c(NA,NA),
labels = scales::dollar,
breaks = seq(0, 50000 + 10000, 5000*2),
minor_breaks = seq(0,50000 + 10000, by = 5000)) +
scale_fill_brewer(palette = "Set1") +
geom_label(data = DolSums[DolSums$x < 0,],
aes(label=scales::dollar(x),
y = x),
show.legend = FALSE, size = 2.6, colour = "white", fontface = "bold")

ggrepel label fill color questions

I'm working with ggplot2 for the first time, and I'm having trouble making the colors of the labels I created with ggrepel change dynamically. Currently, my code looks like this:
ggplot(tstat) +
geom_point(aes(Mu, Sigma),size = 5, color = 'black') +
geom_label_repel(aes(Mu, Sigma, label = VarNames, fill = factor(Hemisphere)), fontface = 'bold', color = 'white',
box.padding = unit(0.25, 'lines'),point.padding = unit(0.5, 'lines')) +
geom_rangeframe() +
theme_tufte() +
xlab(expression(paste(mu, "*"))) +
ylab(expression(sigma)) +
theme(axis.title.x = element_text(vjust=-0.5), axis.title.y = element_text(vjust=1.5)) +
ggtitle("Model Sensitivity by Hemisphere")
In general, this works pretty well, except I strongly dislike the toothpaste green color it gives me for one of the two factors plotted. I want to dictate the specific colors of that fill = factor(Hemisphere)) line, but I don't know how.
I have already tried using the scale_colours_manual function, but when I include it within the geom_label_repel(.....) paratheses in line 3, the program complains that "ggplot2 doesn't know how to deal with data of class ScaleDiscrete/Scale/ggproto", and when I place the scale_colours_manual line outside of line 3, it has no effect at all, as in this example, which produced an identical plot to the one above:
ggplot(tstat) +
geom_point(aes(Mu, Sigma),size = 5, color = 'black') +
scale_colour_manual(values = c('blue', 'red')) +
geom_label_repel(aes(Mu, Sigma, label = VarNames, fill = factor(Hemisphere)), fontface = 'bold', color = 'white',
box.padding = unit(0.25, 'lines'),point.padding = unit(0.5, 'lines')) +
geom_rangeframe() +
theme_tufte() +
xlab(expression(paste(mu, "*"))) +
ylab(expression(sigma)) +
theme(axis.title.x = element_text(vjust=-0.5), axis.title.y = element_text(vjust=1.5)) +
ggtitle("Model Sensitivity by Hemisphere")
I know there has to be a way to do this, but I'm at a loss. Thanks for any help you've got!
EDIT: At request, I've attached a dput() of tstat. Not a big data frame.
structure(list(VarNames = structure(c(4L, 1L, 3L, 2L, 5L, 6L,
4L, 1L, 3L, 2L, 5L, 6L), .Label = c("Dry Deposition", "MEGAN Acetone",
"MEGAN Terpenes", "Monoterpene Yield", "Ocean", "Photolysis"), class = "factor"),
Mu = c(2703.09, 8066.01, 6566.6, 19741.7, 5809.6, 14231.8, 1493.56, 3067.54, 3631.32, 9951.06, 8748.95, 7967.93),
Sigma = c(3478.28, 8883.23, 7276.49, 18454.4, 6218.8, 14989.7, 1925.14, 3410.27, 4017.64, 9289.57, 9354.64, 8403.1),
Hemisphere = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L),
.Label = c("Northern", "Southern"), class = "factor")),
.Names = c("VarNames", "Mu", "Sigma", "Hemisphere"),
class = "data.frame", row.names = c(NA, -12L))
You can use scale_fill_manual:
tstat <- structure(list(VarNames = structure(c(4L, 1L, 3L, 2L, 5L, 6L,
4L, 1L, 3L, 2L, 5L, 6L), .Label = c("Dry Deposition", "MEGAN Acetone",
"MEGAN Terpenes", "Monoterpene Yield", "Ocean", "Photolysis"), class = "factor"),
Mu = c(2703.09, 8066.01, 6566.6, 19741.7, 5809.6, 14231.8, 1493.56, 3067.54, 3631.32, 9951.06, 8748.95, 7967.93),
Sigma = c(3478.28, 8883.23, 7276.49, 18454.4, 6218.8, 14989.7, 1925.14, 3410.27, 4017.64, 9289.57, 9354.64, 8403.1),
Hemisphere = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L),
.Label = c("Northern", "Southern"), class = "factor")),
.Names = c("VarNames", "Mu", "Sigma", "Hemisphere"),
class = "data.frame", row.names = c(NA, -12L))
library(ggplot2)
library(ggrepel)
library(ggthemes)
ggplot(tstat) +
geom_point(aes(Mu, Sigma),size = 5, color = 'black') +
geom_label_repel(aes(Mu, Sigma, label = VarNames, fill = factor(Hemisphere)), fontface = 'bold', color = 'white',
box.padding = unit(0.25, 'lines'),point.padding = unit(0.5, 'lines')) +
geom_rangeframe() +
theme_tufte() +
xlab(expression(paste(mu, "*"))) +
ylab(expression(sigma)) +
theme(axis.title.x = element_text(vjust=-0.5), axis.title.y = element_text(vjust=1.5)) +
ggtitle("Model Sensitivity by Hemisphere") +
scale_fill_manual(values = setNames(c("lightblue", "darkgreen"), levels(tstat$Hemisphere)))

With both stacked and dodged bars, how can you remove dodge-bar elements from legend?

Thanks to combine stacked bars and dodged bars, I created the plot below using the data frame shown. But now, since the axis titles name the bars, how can I remove the legend elements other than for the one stacked bar? That is, can the legend show only the segments of the Big8 bar?
> dput(combo)
structure(list(firm = structure(c(12L, 1L, 11L, 13L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L), .Label = c("Avg.", "Co", "Firm1",
"Firm2", "Firm3", "Firm4", "Firm5", "Firm6", "Firm7", "Firm8",
"Median", "Q1", "Q3"), class = "factor"), metric = structure(c(5L,
1L, 4L, 6L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Avg.",
"Big8", "Co", "Median", "Q1", "Q3"), class = "factor"), value = c(0.0012,
0.0065, 0.002, 0.0036, 0.0065, 0.000847004466666667, 0.000658907411111111,
0.0002466389, 8.41422555555556e-05, 8.19149222222222e-05, 7.97185555555556e-05,
7.82742555555556e-05, 7.56679888888889e-05), grp = structure(c(1L,
2L, 3L, 6L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), .Label = c("Q1",
"Avg.", "Median", "Co", "Big8", "Q3"), class = "factor")), .Names = c("firm",
"metric", "value", "grp"), row.names = c(NA, -13L), class = "data.frame")
Here is the plotting code.
ggplot(combo, aes(x=grp, y=value, fill=firm)) +
geom_bar(stat="identity") +
labs(x = "", y = "") +
theme(legend.position = "bottom") +
guides(fill = guide_legend(nrow = 2))
The plot, which ideally would have a smaller set of elements in the legend.
You can manually set the breaks for scale_fill_discrete:
library(ggplot2)
ggplot(combo, aes(x=grp, y=value, fill=firm)) +
geom_bar(stat="identity") +
labs(x = "", y = "") +
theme(legend.position = "bottom") +
guides(fill = guide_legend(nrow = 2)) +
scale_fill_discrete(breaks = combo$firm[combo$metric=="Big8"])
I'm not 100% sure which labels you want to keep, but a manually entered vector, combo$firm and combo$metric will all work.

ggplot2: axis line at 0 with negative datapoints

I am doing boxplot with positive and negative data and would like to have the axis at y=0.
adding a line afterwards is not elegant since the line would be on to of the boxes and not behind them.
(the goal is to have the line at y=0 black, while the lines at 1 and -1 should be gray)
In addition I would like to have only the axis lines. I therefore used
axis.line=element_line()
,panel.border = element_blank()
in the theme. however the vertical line goes above 1, which does not look good (my data is by definition between -1 and 1).
here is the code:
require (ggplot2)
theme_jack <- function (base_size = 10, base_family = "") {
theme_bw(base_size = base_size, base_family = base_family) %+replace%
theme(
title = element_text(size = 12)
,axis.title.x = element_text(colour = "white", size=0)
,axis.title.y = element_text(size=12, angle=90)
,axis.text.x=element_text(size=8)
,panel.grid.major = element_line(colour = "grey")#,
,axis.line=element_line()
,panel.border = element_blank()
,panel.grid.minor = element_blank()
,panel.grid.major.x = element_blank()
,legend.position = "none"
)
}
theme_set(theme_jack())
datatest2=structure(list(datatest2.genotype = structure(c(1L, 1L, 5L, 5L,
1L, 5L, 5L, 5L, 1L, 5L, 1L, 5L, 1L, 5L, 5L, 1L, 1L, 1L, 5L, 5L,
1L, 1L, 1L, 5L, 1L, 1L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L), .Label = c("CS_d42-chG80", "CS_G_c380cha", "CS_Gd42-chG80",
"PKC_CS", "PKC_d42-chG80", "PKC_G_c380cha", "PKC_Gd42-chG80"), class = "factor"),
datatest2.score = c(0.8882, -0.3775, -0.4053, 0.1962, 0.9982,
0.5627, -0.4865, 0.7267, 0.3276, 0.5017, 0.9731, 0.1525,
0.7857, 0.6121, 0.8508, 0.1311, -0.2457, 0.8848, -0.1254,
0.1047, -0.2715, 0.7189, 0.4115, 0.9704, -0.8328, -0.1301,
0.9756, 0.2317, 0.4297, 0.9967, 0.6423, 0.8516, 0.3386, 0.5208,
0.9148, 0.2539, 0.8581, 0.5621, 0.5969, 0.7435)), .Names = c("genotype",
"score"), row.names = c(NA, 40L), class = "data.frame")
p=ggplot(datatest2, aes(x=factor(genotype),y= score))
plot=p+ geom_boxplot()+ labs(x="genotype",y="PI during final test")+
scale_fill_grey(start = 0.9, end = 0.9)+ ##allow good bw prints
scale_y_continuous(minor_breaks=NULL,breaks = seq(-1 , 1, 1) )
plot
Try the expand argument to `scale_y_continuous. Replace the final section with this and you should get something closer to what you want. You may have to experiment with the values below.
p = ggplot(datatest2, aes(x = factor(genotype), y = score))
plot = p+ geom_boxplot()+ labs(x = "genotype",
y = "PI during final test")+
scale_fill_grey(start = 0.9, end = 0.9)+ ##allow good bw prints
scale_y_continuous(expand = c(0,0.005),
minor_breaks = NULL,
breaks = seq(-1 , 1, 1) ) +
theme()
plot

Resources