ggplot stacked barplot - r

DataLink:
https://www.dropbox.com/s/ql5jw7eng3plrso/GTAP_MacroValueChange.csv
Code:
library(ggplot2)
library(grid)
#Upload data
ccmacrosims2 <- read.csv(file = "F:/Purdue University/RA_Position/PhD_ResearchandDissert/PhD_Draft/GTAP-CGE/GTAP_NewAggDatabase/NewFiles/GTAP_MacroValueChange.csv", header=TRUE, sep=",", na.string="NA", dec=".", strip.white=TRUE)
#Data manipulation for analysis
ccmacrorsts2 <- as.data.frame(ccmacrosims2)
ccmacrorsts2[6:10] <- sapply(ccmacrorsts2[6:10],as.numeric)
ccmacrorsts2 <- droplevels(ccmacrorsts2)
ccmacrorsts2 <- transform(ccmacrorsts2,region=factor(region,levels=unique(region)))
#Selecting data of interest
GDPDecomp1 <- melt(ccmacrorsts2[ccmacrorsts2$region %in% c("TUR","MAR"), ])
GDPDecomp2 <- GDPDecomp1[GDPDecomp1$sres %in% c("AVERAGE"), ]
GDPDecomp.f <- subset(GDPDecomp2, variable !="GDP")
#Ploting
GDPDecompPlot <- ggplot(data = GDPDecomp.f, aes(factor(region),value, fill=variable))
GDPDecompPlot + geom_bar(stat="identity", position="stack") + facet_wrap(~tradlib, scales="free_y") +
theme(axis.text.x = element_text(colour = 'black', angle = 90, size = 12, hjust = 0.5, vjust = 0.5),axis.title.x=element_blank()) +
ylab("GDP (Change in $US million)") + theme(axis.text.y = element_text(colour = 'black', size = 12), axis.title.y = element_text(size = 12)) +
theme(strip.text.x = element_text(size = 12, hjust = 0.5, vjust = 0.5, face = 'bold'))
Issue at hand:
The barplot using ggplot (see below)
excel_barplot:
It seems that the plot does not really represent correctly the values of the data. What I am looking for is something like what we get from excel_barplot_sample. For instance, comparing the panel "TRLIBEU" under ggplot with its counterpart using excel, one can clearly notice that the ggplot does not capture the values in the data correctly when stacking.
Any help on how to correct for the discrepancy?
Thanks in advance

If you need to stack bars in ggplot2 with negative values then for better result you should make two new data frames - one for positive values and second for negative values.
GDPDecomp.f.pos<-GDPDecomp.f[GDPDecomp.f$value>0,]
GDPDecomp.f.neg<-GDPDecomp.f[GDPDecomp.f$value<0,]
Then use each data frame in its own geom_bar() call.
ggplot()+
geom_bar(data=GDPDecomp.f.pos,aes(x=factor(region),y=value,fill=variable),stat="identity")+
geom_bar(data=GDPDecomp.f.neg,aes(x=factor(region),y=value,fill=variable),stat="identity")+
facet_wrap(~tradlib, scales="free_y") +
theme(axis.text.x = element_text(colour = 'black', angle = 90, size = 12, hjust = 0.5, vjust = 0.5),axis.title.x=element_blank()) +
ylab("GDP (Change in $US million)") + theme(axis.text.y = element_text(colour = 'black', size = 12), axis.title.y = element_text(size = 12)) +
theme(strip.text.x = element_text(size = 12, hjust = 0.5, vjust = 0.5, face = 'bold'))

Related

Increasing thickness of lineplot with ggplot2 [duplicate]

Datalink:
the data used
My code:
ccfsisims <- read.csv(file = "F:/Purdue University/RA_Position/PhD_ResearchandDissert/PhD_Draft/GTAP-CGE/GTAP_NewAggDatabase/NewFiles/GTAP_ConsIndex.csv", header=TRUE, sep=",", na.string="NA", dec=".", strip.white=TRUE)
ccfsirsts <- as.data.frame(ccfsisims)
ccfsirsts[6:24] <- sapply(ccfsirsts[6:24],as.numeric)
ccfsirsts <- droplevels(ccfsirsts)
ccfsirsts <- transform(ccfsirsts,sres=factor(sres,levels=unique(sres)))
library(ggplot2)
#------------------------------------------------------------------------------------------
#### Plot of food security index for Morocco and Turkey by sector
#------------------------------------------------------------------------------------------
#_Code_Begin...
datamortur <- melt(ccfsirsts[ccfsirsts$region %in% c("TUR","MAR"), ]) # Selecting regions of interest
datamortur1 <- datamortur[datamortur$variable %in% c("pFSI2"), ] # Selecting the food security index of interest
datamortur2 <- datamortur1[datamortur1$sector %in% c("wht","gro","VegtFrut","osd","OthCrop","VegtOil","XPrFood"), ] # Selecting food sectors of interest
datamortur3 <- subset(datamortur2, tradlib !="BASEDATA") # Eliminating the "BASEDATA" scenario results
allfsi.f <- datamortur3
fsi.wht <- allfsi.f[allfsi.f$sector %in% c("wht"), ]
Figure29 <- ggplot(data=fsi.wht, aes(x=factor(sres),y=value,colour=factor(tradlib)))
Figure29 + geom_line(aes(group=factor(tradlib),size=2)) + facet_grid(regionsFull~., scales="free_y", labeller=reg_labeller) + scale_colour_brewer(type = "div") +
theme(axis.text.x = element_text(colour = 'black', angle = 90, size = 13, hjust = 0.5, vjust = 0.5),axis.title.x=element_blank()) +
ylab("FSI (%Change)") + theme(axis.text.y = element_text(colour = 'black', size = 12), axis.title.y = element_text(size = 12, hjust = 0.5, vjust = 0.2)) +
theme(strip.text.y = element_text(size = 11, hjust = 0.5, vjust = 0.5, face = 'bold'))
My result:
Newresult with aes(size=2):
My question:
Is there a way to control for line width more precisely to avoid the result in the second plot? I particularly find it document-unfriendly, and more so for publishing purposes to include the plot with the newly defined line width.
best,
ismail
Whilst #Didzis has the correct answer, I will expand on a few points
Aesthetics can be set or mapped within a ggplot call.
An aesthetic defined within aes(...) is mapped from the data, and a legend created.
An aesthetic may also be set to a single value, by defining it outside aes().
As far as I can tell, what you want is to set size to a single value, not map within the call to aes()
When you call aes(size = 2) it creates a variable called `2` and uses that to create the size, mapping it from a constant value as it is within a call to aes (thus it appears in your legend).
Using size = 1 (and without reg_labeller which is perhaps defined somewhere in your script)
Figure29 +
geom_line(aes(group=factor(tradlib)),size=1) +
facet_grid(regionsFull~., scales="free_y") +
scale_colour_brewer(type = "div") +
theme(axis.text.x = element_text(
colour = 'black', angle = 90, size = 13,
hjust = 0.5, vjust = 0.5),axis.title.x=element_blank()) +
ylab("FSI (%Change)") +
theme(axis.text.y = element_text(colour = 'black', size = 12),
axis.title.y = element_text(size = 12,
hjust = 0.5, vjust = 0.2)) +
theme(strip.text.y = element_text(size = 11, hjust = 0.5,
vjust = 0.5, face = 'bold'))
and with size = 2
Figure29 +
geom_line(aes(group=factor(tradlib)),size=2) +
facet_grid(regionsFull~., scales="free_y") +
scale_colour_brewer(type = "div") +
theme(axis.text.x = element_text(colour = 'black', angle = 90,
size = 13, hjust = 0.5, vjust =
0.5),axis.title.x=element_blank()) +
ylab("FSI (%Change)") +
theme(axis.text.y = element_text(colour = 'black', size = 12),
axis.title.y = element_text(size = 12,
hjust = 0.5, vjust = 0.2)) +
theme(strip.text.y = element_text(size = 11, hjust = 0.5,
vjust = 0.5, face = 'bold'))
You can now define the size to work appropriately with the final image size and device type.
Line width in ggplot2 can be changed with argument size= in geom_line().
#sample data
df<-data.frame(x=rnorm(100),y=rnorm(100))
ggplot(df,aes(x=x,y=y))+geom_line(size=2)
Line width in ggplot2 can be changed with argument lwd= in geom_line().
geom_line(aes(x=..., y=..., color=...), lwd=1.5)
It also looks like if you just put the size argument in the geom_line() portion but without the aes() it will scale appropriately. At least it works this way with geom_density and I had the same problem.
If you want to modify the line width flexibly you can use "scale_size_manual," this is the same procedure for picking the color, fill, alpha, etc.
library(ggplot2)
library(tidyr)
x = seq(0,10,0.05)
df <- data.frame(A = 2 * x + 10,
B = x**2 - x*6,
C = 30 - x**1.5,
X = x)
df = gather(df,A,B,C,key="Model",value="Y")
ggplot( df, aes (x=X, y=Y, size=Model, colour=Model ))+
geom_line()+
scale_size_manual( values = c(4,2,1) ) +
scale_color_manual( values = c("orange","red","navy") )
Just add the size command outside the aes() function, with any fractional value desired, e.g. size = 1.5
geom_line(data,aes(x=x,y=y), size=1.5)

overlay geom_point with position=dodge and facet grid in ggplot2

Considering the following data, I am able to generate a plot which describes how the risk of a react over a time interval changes.
risk_1 <- c(0.121,0.226,0.333,0.167,0.200,0.273,0.138,0.323,0.394,0.250,0.200,0.545,0.190,0.355,0.515,0.333,0.300,0.818)
risk_minus_SE <- c(0.060,0.114,0.198,0.047,0.057,0.097,0.072,0.186,0.247,0.089,0.057,0.280,0.109,0.211,0.352,0.138,0.108,0.523)
risk_plus_SE <- c(0.229,0.398,0.504,0.448,0.510,0.566,0.249,0.499,0.563,0.532,0.510,0.787,0.309,0.531,0.675,0.609,0.603,0.949)
Status <- rep(c(rep('With placebo',3),rep('With drug',3)),3)
durtn <- rep(c('(3-15]','(15-30]','(30-46]'),6)
react <- c(rep("x\u226516",6),rep("x\u226509",6),rep("x\u226504",6))
df1 <- data.frame(risk_1, risk_minus_SE, risk_plus_SE, Status, durtn, react)
dodge <- position_dodge(width=0.45)
ggplot(df1,aes(colour=react, y=risk_1, x=durtn)) +
geom_point(aes(shape=durtn), shape=16, size = 5, position=dodge) +
geom_errorbar(aes(ymin=risk_minus_SE, ymax=risk_plus_SE), position = dodge, width=0.5, size=1, lty=1) +
scale_colour_manual(values = c('black','red','blue')) +
facet_grid(~Status) +
scale_shape_manual(values = c(8,19))+
theme_bw() +
scale_x_discrete(limits=c('(3-15]','(15-30]','(30-46]')) +
coord_cartesian(ylim = c(0, 0.8)) +
theme(legend.position = c(.1, .85), legend.background = element_rect(colour = "black"),
plot.title = element_text(lineheight=1.5, face="bold", size=rel(1.5), hjust = 0.5),
panel.grid.major.x = element_blank(),
axis.text.x = element_text(vjust=0.5, size=16),
axis.text.y = element_text(vjust=0.5, size=16),
axis.title.y = element_text(size=20),
axis.title.x = element_text(size=20),
legend.text = element_text(size = 16, face = "bold"),
strip.text = element_text(size=25)) +
xlab("\ntime (min)") + ylab("Risk")
What I want to do is overlay a series of points at given x and y coordinates.
That being at With drug & durtn==(3,15], manually insert points at.....
Risk==0.5 for react=x≥04 in black
Risk==0.2 for react=x≥09 in red
Risk==0.0 for react=x≥16 in blue
Such that the desired output should look like
How does one use the geom_point() in combination with a facet_grid and dodge
First, you have to create a separate data frame that contains the data for the additional points.
dat <- data.frame(risk_1 = c(0.5, 0.2, 0),
react = levels(df1$react),
durtn = '(3-15]',
Status = 'With drug')
This new data frame dat can be used with geom_point to add an additional layer to the existing plot.
+ geom_point(data = dat, position = dodge, shape = 4, size = 5, show.legend = FALSE)

How to facet two plots side by side using ggplot2 in R

I have small data frame of statistical values obtained from different method. you can download from here.Dataset is look like this:
I need to facet (two plot side by side with same y axis labels) two plot of RMSE.SD and MB variable using ggplot2 package in R like the following example figure.
I wrote this code for plotting 1 plot for RMSE.SD variable.
library(ggplot2)
comparison_korea <- read.csv("comparison_korea.csv")
ggplot(data=comparison_korea, aes(R,X))+
geom_point(color = "black", pch=17, alpha=1,na.rm=T, size=4)+
labs(title = "", y = "")+
theme(plot.title= element_text(hjust = 0.5,size = 15, vjust = 0.5, face= c("bold")),
axis.ticks.length = unit(0.2,"cm") ,
panel.border = element_rect(colour = "black", fill=NA, size=0.5),
axis.text.x = element_text(angle = 0, vjust = 0.5, size = 14, hjust = 0.5,margin=margin(4,0,0,0), colour = "black"),
axis.text.y = element_text(angle = 0, vjust = 0.5, size = 14, hjust = 1,margin=margin(0,5,0,0), colour = "black"),
plot.margin = unit(c(1, 1.5, 1, 0.5), "lines"))
You should be able to do something like this:
library(ggplot2)
ds <- read.csv("comparison_korea.csv")
dat <- data.frame(labels = as.character(ds$X),
RMSE.SD = ds$RMSE.SD,
MB = ds$MB)
dat <- reshape2::melt(dat)
ggplot(dat, aes(y = labels, x = value)) +
geom_point(shape = "+", size = 5) +
facet_wrap(~variable) +
xlab("value / reference (mm)") +
ylab("") +
theme_bw()

ggplot2 - How to boxplot two variables with different scales on same plot using a primary and secondary y-axis?

Datalink:
Data
Code:
distevyield <- read.csv(file = "F:/Purdue University/RA_Position/PhD_ResearchandDissert/PhD_Draft/GTAP-CGE/GTAP_NewAggDatabase/NewFiles/GTAP_DistEVYield.csv", header=TRUE, sep=",", na.string="NA", dec=".", strip.white=TRUE)
str(distevyield)
distevyield <- as.data.frame(distevyield)
distevyield[5:6] <- sapply(distevyield[5:6],as.numeric)
distevyield <- droplevels(distevyield)
distevyield <- transform(distevyield,region=factor(region,levels=unique(region)))
library(ggplot2)
distevyield.f <- melt(subset(distevyield, region !="World"))
Figure3 <- ggplot(data = distevyield.f, aes(factor(variable), value))
Figure3 + geom_boxplot() +
theme(axis.text.x = element_text(colour = 'black', angle = 90, size = 15, hjust = 1, vjust = 0.5),axis.title.x = element_blank()) +
theme(axis.text.y = element_text(colour = 'black', size = 15, hjust = 0.5, vjust = 0.5), axis.title.y = element_blank()) +
theme(strip.text.x = element_text(size = 14, hjust = 0.5, vjust = 0.5, face = 'bold')) +
facet_wrap(~region, scales="free_y")
Outcome:
Question:
My two variables plotted, yield (%change) and ev (in million US$) have different units. Is there a way to add a secondary y-axis for one of the variable so as to display each variable with a boxplot representing its original units, but within the same plot? Does this feature exist in ggplot2 to begin with?
Thanks!
With ggplot2 you can't make two y axis (see explanation/discussion).
To represent different axis values for ev and yield you have to plot them in separate plots. One way to achieve this would be to use facet_wrap() and combine region and variable values. With facet_wrap() you will get both plots together and you will be able to control number of columns to show.
ggplot(data = distevyield.f, aes(factor(1), value)) +
geom_boxplot() + facet_wrap(region~variable,scales="free",ncol=8)+
theme(axis.text.x=element_blank(),
axis.title.x=element_blank(),
axis.title.y = element_blank())

How to change line width in ggplot?

Datalink:
the data used
My code:
ccfsisims <- read.csv(file = "F:/Purdue University/RA_Position/PhD_ResearchandDissert/PhD_Draft/GTAP-CGE/GTAP_NewAggDatabase/NewFiles/GTAP_ConsIndex.csv", header=TRUE, sep=",", na.string="NA", dec=".", strip.white=TRUE)
ccfsirsts <- as.data.frame(ccfsisims)
ccfsirsts[6:24] <- sapply(ccfsirsts[6:24],as.numeric)
ccfsirsts <- droplevels(ccfsirsts)
ccfsirsts <- transform(ccfsirsts,sres=factor(sres,levels=unique(sres)))
library(ggplot2)
#------------------------------------------------------------------------------------------
#### Plot of food security index for Morocco and Turkey by sector
#------------------------------------------------------------------------------------------
#_Code_Begin...
datamortur <- melt(ccfsirsts[ccfsirsts$region %in% c("TUR","MAR"), ]) # Selecting regions of interest
datamortur1 <- datamortur[datamortur$variable %in% c("pFSI2"), ] # Selecting the food security index of interest
datamortur2 <- datamortur1[datamortur1$sector %in% c("wht","gro","VegtFrut","osd","OthCrop","VegtOil","XPrFood"), ] # Selecting food sectors of interest
datamortur3 <- subset(datamortur2, tradlib !="BASEDATA") # Eliminating the "BASEDATA" scenario results
allfsi.f <- datamortur3
fsi.wht <- allfsi.f[allfsi.f$sector %in% c("wht"), ]
Figure29 <- ggplot(data=fsi.wht, aes(x=factor(sres),y=value,colour=factor(tradlib)))
Figure29 + geom_line(aes(group=factor(tradlib),size=2)) + facet_grid(regionsFull~., scales="free_y", labeller=reg_labeller) + scale_colour_brewer(type = "div") +
theme(axis.text.x = element_text(colour = 'black', angle = 90, size = 13, hjust = 0.5, vjust = 0.5),axis.title.x=element_blank()) +
ylab("FSI (%Change)") + theme(axis.text.y = element_text(colour = 'black', size = 12), axis.title.y = element_text(size = 12, hjust = 0.5, vjust = 0.2)) +
theme(strip.text.y = element_text(size = 11, hjust = 0.5, vjust = 0.5, face = 'bold'))
My result:
Newresult with aes(size=2):
My question:
Is there a way to control for line width more precisely to avoid the result in the second plot? I particularly find it document-unfriendly, and more so for publishing purposes to include the plot with the newly defined line width.
best,
ismail
Whilst #Didzis has the correct answer, I will expand on a few points
Aesthetics can be set or mapped within a ggplot call.
An aesthetic defined within aes(...) is mapped from the data, and a legend created.
An aesthetic may also be set to a single value, by defining it outside aes().
As far as I can tell, what you want is to set size to a single value, not map within the call to aes()
When you call aes(size = 2) it creates a variable called `2` and uses that to create the size, mapping it from a constant value as it is within a call to aes (thus it appears in your legend).
Using size = 1 (and without reg_labeller which is perhaps defined somewhere in your script)
Figure29 +
geom_line(aes(group=factor(tradlib)),size=1) +
facet_grid(regionsFull~., scales="free_y") +
scale_colour_brewer(type = "div") +
theme(axis.text.x = element_text(
colour = 'black', angle = 90, size = 13,
hjust = 0.5, vjust = 0.5),axis.title.x=element_blank()) +
ylab("FSI (%Change)") +
theme(axis.text.y = element_text(colour = 'black', size = 12),
axis.title.y = element_text(size = 12,
hjust = 0.5, vjust = 0.2)) +
theme(strip.text.y = element_text(size = 11, hjust = 0.5,
vjust = 0.5, face = 'bold'))
and with size = 2
Figure29 +
geom_line(aes(group=factor(tradlib)),size=2) +
facet_grid(regionsFull~., scales="free_y") +
scale_colour_brewer(type = "div") +
theme(axis.text.x = element_text(colour = 'black', angle = 90,
size = 13, hjust = 0.5, vjust =
0.5),axis.title.x=element_blank()) +
ylab("FSI (%Change)") +
theme(axis.text.y = element_text(colour = 'black', size = 12),
axis.title.y = element_text(size = 12,
hjust = 0.5, vjust = 0.2)) +
theme(strip.text.y = element_text(size = 11, hjust = 0.5,
vjust = 0.5, face = 'bold'))
You can now define the size to work appropriately with the final image size and device type.
Line width in ggplot2 can be changed with argument size= in geom_line().
#sample data
df<-data.frame(x=rnorm(100),y=rnorm(100))
ggplot(df,aes(x=x,y=y))+geom_line(size=2)
Line width in ggplot2 can be changed with argument lwd= in geom_line().
geom_line(aes(x=..., y=..., color=...), lwd=1.5)
It also looks like if you just put the size argument in the geom_line() portion but without the aes() it will scale appropriately. At least it works this way with geom_density and I had the same problem.
If you want to modify the line width flexibly you can use "scale_size_manual," this is the same procedure for picking the color, fill, alpha, etc.
library(ggplot2)
library(tidyr)
x = seq(0,10,0.05)
df <- data.frame(A = 2 * x + 10,
B = x**2 - x*6,
C = 30 - x**1.5,
X = x)
df = gather(df,A,B,C,key="Model",value="Y")
ggplot( df, aes (x=X, y=Y, size=Model, colour=Model ))+
geom_line()+
scale_size_manual( values = c(4,2,1) ) +
scale_color_manual( values = c("orange","red","navy") )
Just add the size command outside the aes() function, with any fractional value desired, e.g. size = 1.5
geom_line(data,aes(x=x,y=y), size=1.5)

Resources