I made a ggplot with my data. than I wanted to label some interactions between my boxplot-data (via anova/Tukey) to show, if there are signifikant differences between special Groups or not. I'm working with one csv-sheet called "test", that I have imported and with one tabel called "final"
[
[
but when I plot my data WITH the label from my TukeyHSD, I get every label triple with empty spaces.
how can i remove the empty spaces. there must be any solution to just show the first tree lines with label in the first plot, the middle tree lines in the middle plot with their specific label and the last tree lines in the third spot with label. How can I remove those empty lines with their alphateic labels?
Here my code:
test$int<-interaction(test$geno,test$n)
generate_label_df <- function(TUKEY, variable){
Tukey.levels <- TUKEY[[variable]][,4]
Tukey.labels <- data.frame(multcompLetters(Tukey.levels)['Letters'])
Tukey.labels$int=rownames(Tukey.labels)
Tukey.labels=Tukey.labels[order(Tukey.labels$int) , ]
return(Tukey.labels)
}
model=lm(test$amino~test$int ) ###
ANOVA=aov(model)
TUKEY <- TukeyHSD(x=ANOVA, 'test$int', conf.level=0.95)
labels<-generate_label_df(TUKEY , "test$int")
names(labels)<-c('Letters','int')
yvalue<-aggregate(amino~int, data=test, max)
final<-merge(labels,yvalue)
ggplot(test, aes(x=int, y=amino)) +
stat_boxplot(geom="errorbar") +
geom_boxplot(aes(fill=CO2)) +
geom_text(data = final, aes(label = Letters, vjust = -0.6)) +
labs(title = "Aminosäuren Erdkultur", x = "Genotyp", y = "Aminosäurekonz. [µmol/gFW]") +
scale_x_discrete(labels = c("col.Ammonium" = "col", "sps.Ammonium" = "sps",
"swe.Ammonium" = "swe", "col.Nitrat" = "col",
"sps.Nitrat" = "sps", "swe.Nitrat" = "swe",
"col.NON" = "col", "sps.NON" = "sps","swe.NON" = "swe"),
limits = c("col.Ammonium", "sps.Ammonium", "swe.Ammonium",
"col.Nitrat", "sps.Nitrat","swe.Nitrat", "col.NON",
"sps.NON", "swe.NON") )+
guides(fill = guide_legend(title = "CO2-Behandlung")) +
theme(plot.title = element_text(face = "bold", size="17", hjust = "0.5")) +
scale_fill_manual(values = c("violetred3", "steelblue1")) +
facet_wrap( ~ n) +
theme(legend.title = element_text(face = "bold"))
my first opinion was, to remove "facet_wrap(~n)". The Problem is solved than, but obviously I'm missing the subdivision on this way
Related
I have a data set as shown here:
ALL<- structure(list(GI = c(38.448275862069, 40.2659574468085, 85.3378378378378,
56.4606741573034, 26.5714285714286, 16.8944099378882), GI_D = c(31.5275862068966,
37.0446808510638, 64.0033783783784, 45.7331460674157, 20.7257142857143,
14.1913043478261), GI_W = c(34.84375, 39.4270833333333, 83.0921052631579,
54.6195652173913, 25.5963302752294, 16.4848484848485), NEE_D_mean = c(9.9644036070938,
-5.49181483024952, -29.5841687938457, -10.950117466455, -9.76133775037159,
-1.17370950853892), NEE_D_se = c(24.4055666454516, 8.31286897717958,
43.0803839446216, 42.0054504158082, 28.7765100449838, 8.86774764999355
), NEE_W_mean = c(-10.6866769282934, 20.9456806199394, -24.0380682586804,
52.3723812566745, -62.2858574112861, 56.3557615426375), NEE_W_se = c(15.2426118086142,
17.8227858145903, 22.7452815581715, 38.4251278858896, 19.1950340008666,
25.59062272811), GPP_D_mean = c(2.76586256588453, -14.0740484535984,
22.0551675189495, 38.2196758481854, -22.2452106112792, 2.92247497333855
), GPP_D_se = c(10.0301104827162, 4.76830515667558, 10.1200654792974,
13.6220945562145, 12.5521089272372, 4.02070599220442), GPP_W_mean = c(-13.3583364224079,
5.5457128851295, 6.96224944388818, 30.9347346550519, -24.0637392356731,
31.1919112040759), GPP_W_se = c(7.79177565854901, 7.68225824264646,
7.53759987843893, 9.21062180693269, 11.5998936888688, 4.91032534186175
), RE_D_mean = c(-6.92656657644594, -20.2249090077204, -1.55891573291113,
15.3619823271736, -59.6169736724781, 0.0398744940922411), RE_D_se = c(8.81296607135718,
3.17951327169943, 7.26103092218914, 9.79375075847273, 33.89046634443,
3.15632251128507), RE_W_mean = c(-11.2826765406364, -5.50930629197934,
-7.35527862198859, -3.3802491396303, -5.7039196948544, 15.5927675710877
), RE_W_se = c(7.82782177993256, 3.28089787167971, 5.27000717925753,
5.7667863399033, 10.1830962186111, 3.17699751136105), site = c("DK_M",
"DK_B", "UK", "NL", "HU", "IT")), row.names = c(NA, -6L), class = "data.frame")
And now I want to make a plot similar to below,
My code is
library(dplyr)
require(ggplot2)
require(ggpmisc)
library(tidyr)
library(tidyverse)
target1<- c("UK", "DK_M", "NL","DK_B") #What about "DK_B"?
dat<- filter(ALL, site %in% target1)
fit<- lm(NEE_D_mean~GI,dat)
summary(fit)
target2<- c("HU", "DK_M","NL","DK_B")
df<- filter(ALL, site %in% target2)
fit<- lm(RE_D_mean~GI,df)
summary(fit)
ggplot(ALL, mapping = aes(x=GI, y=NEE_D_mean))+
geom_point(aes(x=GI, y=NEE_D_mean,shape=site,color= 'green', size=1))+
geom_hline(yintercept = 0)+ #add a horizontal line= 0
geom_errorbar(aes(ymin=NEE_D_mean-NEE_D_se, ymax=NEE_D_mean+NEE_D_se), width=0.5) +
labs(y='Drought change of NEE from control % ', x= 'Gaussen Index of Aridity', color= ' ')+ #here, note: x and y axis title is reversed.
geom_smooth (data = dat,aes(x=GI, y=NEE_D_mean),method='lm', formula = y~x,color= 'black', se=FALSE,inherit.aes = FALSE) +
#stat_poly_eq(formula = y~x, eq.with.lhs = "italic(hat(y))~`=`~", aes(x = 65, y = -20,label = paste(..eq.label.., ..rr.label.., sep = "~~~")), parse = TRUE) +
geom_point(aes(x=GI, y=RE_D_mean,shape= site,color= "blue",size=2))+ #if I add color= "bule" here, it doesn't work at all. why?
geom_hline(yintercept = 0)+ #add a horizontal line= 0
geom_errorbar(aes(ymin=RE_D_mean-RE_D_se, ymax=RE_D_mean+RE_D_se, color= "blue"), width=0.5, size=1) + #if I add color= "blue" here, it doesn't work at all. why?
labs(y='Drought change of Reco from control % ', x= 'Gaussen Index of Aridity', color= ' ')+
scale_color_manual(values = c("NEE"="black", "RE"="green"), drop= F)+ #change the color and match the color with the second legend
geom_smooth (data = df,aes(x=GI, y=RE_D_mean),method='lm', formula = y~x,color= 'green', se=FALSE,inherit.aes = FALSE) +
theme_bw()+
#theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())+
theme(legend.position = c(0.85, 0.3))+ #change the legend position
theme(legend.title = element_blank())+ #Change the legend title to blank
theme_bw()+
theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())
#+annotate(geom="text", x=60, y=10, label="NEE",color="red")
The problem is I can't change the size and color of the dots separately. I had set the size of the dots separately, it showed a weird legend on the left. Meanwhile, even if I set the color of the second (RE) graph's dots and error bars to blue, the output color didn't change.
this question is linked to a previous one answered by #Rui Barradas and #Duck, but i need more help. Previous link here:
how do i vectorise (automate) plot creation in R
Basically, I need to combine 3 datasets into one plot with a secondary y axis. All datasets need to be split by SITENAME and will facet wrap by Sampling.Year. I am using split and sapply. Being facet wrap the plots look something like this:
However, i'm now trying to add the two other data sources into the plots, to look something like this:
But i am struggling to add the two other data sources and get them to split by SITENAME. Her is my code so far...
Record plot format as a function to be applied to a split list df (ideally 'df' would be added as geom_line with a secondary y axis, and 'FF_start_dates' will be added as a vertical dashed line):
SITENAME_plot <- function(AllDates_TPAF){
ggplot(AllDates_TPAF, aes(DATE, Daily.Ave.PAF)) +
geom_point(aes(colour = Risk), size = 3) +
scale_colour_manual(values=c("Very Low" = "dark green","Low" = "light green",
"Moderate" = "yellow", "High" = "orange", "Very High" = "red"), drop = FALSE) +
labs(x = "Month", y = "Total PAF (% affected)") +
scale_x_date(breaks = "1 month", labels = scales::date_format("%B")) +
facet_wrap(~Sampling.Year, ncol = 1, scales = "free")+
scale_y_continuous(limits = c(0, 100), sec.axis = sec_axis(~., name = "Water level (m)")) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
theme(legend.text=element_text(size=15)) +
theme(axis.text=element_text(size=15),
axis.title=element_text(size=15,face="bold")) +
guides(color = guide_legend(reverse = TRUE))+
theme_bw() +
ggtitle(unique(AllDates_TPAF$SITENAME))
}
plot write function:
SITENAME_plot_write <- function(name, g, dir = "N:/abc/"){
flname <- file.path(dir, name)
flname <- paste0(flname, ".jpg")
png(filename = flname, width = 1500, height = 1000)
print(g)
dev.off()
flname
}
Apply function to list split by SITENAME:
sp1 <- split(AllDates_TPAF, AllDates_TPAF$SITENAME)
gg_list <- sapply(sp1, SITENAME_plot, simplify = FALSE)
mapply(SITENAME_plot_write, names(gg_list), gg_list, MoreArgs = list(dir = getwd()))
dev.off()
I have uploaded samples of all 3 datasets here: Sample Data
Apologies for not using gsub but there was too much data and I couldn't get it to work properly
thanks in advance for any help you can give, even if it is just to point me towards a web tutorial of some kind.
You can try next code. I used the data you shared. Just be careful with names of all datasets. Ideally, the key columns as DATE and Sampling.Year should be present in all dataframes before making the split. Also some variables as Risk was absent so I added an example var with same name. Here the code, I added a function for the plot you want:
library(tidyverse)
library(readxl)
#Data
df1 <- read_excel('Sample data.xlsx',1)
#Create var
df1$Risk <- c(rep(c("Very Low","Low","Moderate","High","Very High"),67),"Very High")
#Other data
df2 <- read_excel('Sample data.xlsx',2)
df3 <- read_excel('Sample data.xlsx',3)
#Split 1
L1 <- split(df1,df1$SITENAME)
L2 <- split(df2,df2$SITENAME)
L3 <- split(df3,df3$`Site Name`)
#Function to create plots
myplot <- function(x,y,z)
{
#Merge x and y
#Check for duplicates and avoid column
y <- y[!duplicated(paste(y$DATE,y$Sampling.Year)),]
y$SITENAME <- NULL
xy <- merge(x,y,by.x = c('Sampling.Year','DATE'),by.y = c('Sampling.Year','DATE'),all.x=T)
#Format to dates
xy$DATE <- as.Date(xy$DATE)
#Scale factor
scaleFactor <- max(xy$Daily.Ave.PAF) / max(xy$Height)
#Rename for consistency in names
names(z)[4] <- 'DATE'
#Format date
z$DATE <- as.Date(z$DATE)
#Plot
#Plot
G <- ggplot(xy, aes(DATE, Daily.Ave.PAF)) +
geom_point(aes(colour = Risk), size = 3) +
scale_colour_manual(values=c("Very Low" = "dark green","Low" = "light green",
"Moderate" = "yellow", "High" = "orange", "Very High" = "red"), drop = FALSE) +
scale_x_date(breaks = "1 month", labels = scales::date_format("%b %Y")) +
geom_line(aes(x=DATE,y=Height*scaleFactor))+
scale_y_continuous(name="Total PAF (% affected)", sec.axis=sec_axis(~./scaleFactor, name="Water level (m)"))+
labs(x = "Month") +
geom_vline(data = z,aes(xintercept = DATE),linetype="dashed")+
facet_wrap(~Sampling.Year, ncol = 1, scales = "free")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
theme(legend.text=element_text(size=15)) +
theme(axis.text=element_text(size=15),
axis.title=element_text(size=15,face="bold")) +
guides(color = guide_legend(reverse = TRUE))+
theme_bw() +
ggtitle(unique(xy$SITENAME))
return(G)
}
#Create a list of plots
Lplots <- mapply(FUN = myplot,x=L1,y=L2,z=L3,SIMPLIFY = FALSE)
#Now format names
vnames <- paste0(names(Lplots),'.png')
mapply(ggsave, Lplots,filename = vnames,width = 30,units = 'cm')
You will end up with plots like these saved in your dir:
Some dashed lines do not appear in plots because they were not present in the data you provided.
I am using the function plot_missing to show the number of NAs in my dataset. Since my dataset has 50 variables I need to adjust the text size. I managed to change the text size of the axis but not of the data labels. Any suggestions?
Using a sample dataset:
library(ggplot2)
library(DataExplorer)
df <- data.frame(matrix(data=runif(1000, 0, 100), ncol=50))
df[df>80] <- NA
plot_missing(df, theme_config =list(axis.text=element_text(size=6)))
There are probably more elegant ways to do this, but here I've modified the plot_missing function:
plot_missing_smaller_labels <-
function (data, title = NULL, ggtheme = theme_gray(),
theme_config = list(legend.position = c("bottom")))
{
pct_missing <- NULL
missing_value <- profile_missing(data)
output <- ggplot(missing_value, aes_string(x = "feature",
y = "num_missing", fill = "group")) +
geom_bar(stat = "identity") +
geom_text(aes(label = paste0(round(100 * pct_missing, 2), "%")), size = 2) +
scale_fill_manual("Group", values = c(Good = "#1a9641",
OK = "#a6d96a", Bad = "#fdae61", Remove = "#d7191c"),
breaks = c("Good", "OK", "Bad", "Remove")) +
coord_flip() +
xlab("Features") +
ylab("Missing Rows")
class(output) <- c("single", class(output))
plotDataExplorer(plot_obj = output, title = title, ggtheme = ggtheme,
theme_config = theme_config)
}
I've added size = 2 in the geom_text() function.
The new plot_missing_smaller_labels function is called like so:
plot_missing_smaller_labels(df, theme_config=list(axis.text=element_text(size = 6)))
Which gives labels with smaller text size.
I am using generic Diabetes data,
Processing data(continuous to discrete)
library("ggalluvial")
dat$Glucose_cat<- cut(dat$Glucose,breaks=c(1,100,125,max(dat$Glucose)), labels = c("Low","Normal","High"))
dat$BMI_cat <- cut(dat$BMI, breaks= c(17,25,30,35,40,max(dat$Age)), labels = c("18-25", "25-30", "30-35", "35-40", "40+"))
dat$Outcome_cat<-cut(dat$Outcome, breaks = c(-Inf,0,Inf), labels = c("Negative", "Positive"))
dat$freq <- 1`
dat3d <- dat[, .(freq3d = .N, freq = sum(freq)), by=list(Glucose_cat,
BMI_cat, Outcome_cat)]
dat3d<- dat3d[!(is.na(dat3d$BMI_cat))]
dat3d<- dat3d[!(is.na(dat3d$Glucose_cat))]
setnames(dat3d, old = c('Glucose_cat', 'BMI_cat','Outcome_cat'), new = c('Glucose', 'BMI','Diabetes'))
ggplot(dat3d,aes(axis1= Diabetes, axis2=Glucose, axis3 = BMI, y = freq))+
geom_alluvium(aes(fill=Diabetes), reverse = FALSE)+
scale_fill_manual(labels = c("Negative", "Positive"), values = c("blue", "red"))+
scale_x_discrete(limits = c("Glucose", "BMI"), expand = c(.001, .001))+
geom_stratum(alpha=0.6, reverse = FALSE)+
geom_text(stat="stratum", label.strata= TRUE, reverse = FALSE)+
ylab("Frequency")+xlab("Features")+
theme(legend.title = element_text(size=12))+
theme_minimal()
following plot is displayed with the above code
I want to plot such that when Glucose is "Positive" and BMI is "High", it should one single red line and Not 5 lines as in my case.
I am pretty new to R programming and i am exploring different libraries to create this flow diagram. I tried something with "alluvial" library which has this function "layer", then everything is sorted on some value in my case i did sort it for Daibetes=="Negative" and plot looked like thisplot using alluvial library, sorted like all red lines are above blue line in each case
I want to do something similar using ggalluvial. Look forward to leads. Thanks in advance.
You need to set aes.bind = TRUE in the geom_alluvium() which gets passed to stat_flow() which prioritizes the aesthetics over the axis lodes when plotting.
ggplot(dat3d,aes(axis1= Diabetes, axis2=Glucose, axis3 = BMI, y = freq3d)) +
geom_alluvium(aes(fill=Diabetes),aes.bind=TRUE, reverse = FALSE) +
scale_fill_manual(labels = c("Negative", "Positive"), values = c("blue", "red")) +
scale_x_discrete(limits = c("Diabetes", "Glucose", "BMI"), expand = c(.001, .001)) +
geom_stratum(alpha=0.6, reverse = FALSE) +
geom_text(stat="stratum", label.strata= TRUE, reverse = FALSE) +
ylab("Frequency")+xlab("Features") +
theme(legend.title = element_text(size=12)) +
theme_minimal()
I need to print in Word a graph with a table at the bottom which indicates the legend used in the graph. I have the 3 elements separated and I need to put it together in a single grid.
I use grid.arrange to try but the size of each element differs, like here
There is a way to put them all together so years matches in x-axis graph and column table and each row of the table match each label from the legend.?
As a second question, it is possible to put the Total line in the legend?
Any help would be appreciated.
The code I'm using:
VARIABLE<-c("OFERT_PLP","OFERT_TP","OFERT_LIC")
X2007<-as.vector(matrix1) #matrices A-D contain randomly generated NA and numerical values
X2008<-as.vector(matrix2)
X2009<-as.vector(matrix3)
X2010<-as.vector(matrix4)
X2011<-as.vector(matrix5)
X2012<-as.vector(matrix6)
X2013<-as.vector(matrix7)
X2014<-as.vector(matrix8)
X2015<-as.vector(matrix9)
X2016<-as.vector(matrix10)
orden<-c(1,2,3)
ETIQUETA<-c("Pregrado","Postitulo","Postgrado")
df<-as.data.frame(cbind(VARIABLE,X2007,X2008,X2009,X2010,X2011,X2012,X2013,X2014,X2015,X2016,orden,ETIQUETA))
df<-df[,colnames(df)!="ETIQUETA"]
dfm1<-melt(df,id.vars = c("VARIABLE","orden"), variable.name = "ANO", value.name = "VALOR")
dfm <- dfm1 %>% group_by(ANO,VARIABLE,VALOR) %>%
group_by(ANO) %>%
mutate(pct = (VALOR / sum(VALOR,na.rm=TRUE)),
total=sum(VALOR,na.rm=TRUE),
cumsum=cumsum(VALOR),
npos = total-cumsum+0.5*VALOR)
d<-as.data.frame(rbind(df[,paste0("X",anos)],colSums(df[,paste0("X",anos)],na.rm=TRUE)))
p1<-ggplot(dfm, aes(x=ANO, y=VALOR, fill=orden)) +
geom_bar(stat="identity")+ geom_line(aes(y=total,group=1,linetype="Total"))+ scale_fill_brewer()
none <- element_blank()
p2 <- p1 + theme_bw() +
scale_x_discrete(labels=anos) +
labs(panel.grid.major = none, panel.grid.minor = none) +
labs(panel.background = none) +
labs(panel.border = none) +
xlab(NULL) + ylab(NULL)+
background_grid(major = "y", minor = "none" )+
geom_text(aes(label=ifelse(pct==0,"",paste0(sprintf("%1.0f",pct*100),"%")),y=npos), colour="black",size=3)
p3<-p2 + theme(legend.title=none,legend.position = "left",legend.text=element_text(size=5),
legend.key.height=unit(0.8,"line"), axis.text = element_text(size = 8)) + guides(linetype=FALSE)
legend <- get_legend(p3)
p3<-p3+theme(legend.position = "none")
pg <- ggplotGrob(p3)
mythemegra <- gridExtra::ttheme_default(
core = list(fg_params=list(cex = 0.8)),
colhead = list(fg_params=list(cex = 0.8)),
rowhead = list(fg_params=list(cex = 0.8)),parse = "TRUE")
table<-tableGrob(d,rows = NULL,col=NULL,
theme = mythemegra)
blankPlot <- ggplot()+geom_blank(aes(1,1)) + cowplot::theme_nothing()
grid.arrange(arrangeGrob(blankPlot,pg,top=textGrob("Title",gp=gpar(fontsize=12,font=2,col="black"),just = "centre"),
nrow=1,ncol=2,widths = c(1,5)),
arrangeGrob(legend,table,nrow=1,ncol=2,widths = c(1,4.5)),heights=c(6.5,6.5))