Related
I have this dataframe:
structure(list(taxon = c("Acidaminococcus", "Butyricicoccus",
"Butyrivibrio", "Collinsella", "Coprococcus", "Olsenella", "Parabacteroides",
"Paraprevotella", "Pasteurellaceae_unclassified"), lfc_StatusChronic.ACST0. = c(0.88175957,
0.88803574, 0.790947444, 1.319321361, 0.7176503, 0.758374253,
-0.833877215, -1.106098414, 0.932218695), se_StatusChronic.ACST0. = c(0.439259504,
0.449835605, 0.369268494, 0.391714918, 0.27578621, 0.364036816,
0.377314959, 0.485078744, 0.421283473), lfc_Time.fT1 = c(-0.021243562,
0.66196107, 0.334274258, -0.382520121, -0.005363874, -0.313304181,
-0.439558941, -0.029316428, 0.682658747), se_Time.fT1 = c(0.312681188,
0.330173331, 0.301559494, 0.309355933, 0.293938402, 0.302957725,
0.339292487, 0.361459254, 0.385696553), lfc_Time.fT2 = c(-1.092105655,
-0.083635974, -0.435405323, -1.221704783, -0.557850053, -0.734425087,
-0.19277856, 0.148094198, 0.461233277), se_Time.fT2 = c(0.326565043,
0.344533883, 0.31544836, 0.323423323, 0.307225241, 0.317023725,
0.354270528, 0.377368442, 0.403530764), lfc_Time.fT3 = c(-0.684898535,
0.007779894, -0.661494348, -0.765693993, -0.294827229, -1.082174069,
-0.428338824, 0.072377208, 0.682615791), se_Time.fT3 = c(0.324919486,
0.342422134, 0.314578177, 0.322254678, 0.305999846, 0.316331693,
0.352370636, 0.375283079, 0.402530027), lfc_Time.fT4 = c(-1.038613852,
-0.159777157, -0.172345815, -0.691220321, -0.444048742, -1.062300665,
0.073495083, 0.295212326, 0.337145234), se_Time.fT4 = c(0.319416657,
0.336513636, 0.309526757, 0.316959694, 0.300928605, 0.311343927,
0.346365478, 0.36886735, 0.396117478), lfc_Time.fT5 = c(-0.714954683,
0.081376697, -0.621676699, -0.483698623, -0.339094441, -0.718106519,
-0.055315775, 0.475970869, 0.160939365), se_Time.fT5 = c(0.317230276,
0.334106044, 0.307553106, 0.314893819, 0.298943665, 0.309379791,
0.343965965, 0.366296439, 0.393607858)), row.names = c(NA, -9L
), class = "data.frame")
It is a dataframe where each row is a category, and the columns correspond with a time series (from T0 til T5).
I want to do a bar chart for each category (taxon) for their time (T0-T5):
melted_df <- reshape2::melt(taxonFC1, id.vars = "taxon", variable.name = "timepoint", value.name = "value")
ggplot(melted_df, aes(x = timepoint, y = value, fill = taxon)) +
geom_bar(stat = "identity") +
facet_wrap(~ taxon, ncol = 3) +
labs(title = "Bar Chart for Different Time Series",
x = "Time Point",
y = "Value",
fill = "Category")
The question is if it is possible to assign the standard error (se columns) to their logFC value (lfc columns) for each time series.
Update:
I did this, but only for T0:
ggplot(data = taxonFC1, aes(x = taxon, y = lfc_StatusChronic.ACST0., fill = taxon)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Category") +
ylab("lfc_StatusChronic.ACST0.") +
ggtitle("Bar Plot of lfc_StatusChronic.ACST0. by Category") +
# Add error bars using se_StatusChronic.ACST0. column
geom_errorbar(aes(ymin = lfc_StatusChronic.ACST0. - se_StatusChronic.ACST0.,
ymax = lfc_StatusChronic.ACST0. + se_StatusChronic.ACST0.),
width = 0.4)
Output expected (the image is from other data):
Is this what you're looking for?
library(dplyr)
library(tidyr)
library(ggplot2)
dat <- structure(list(taxon = c("Acidaminococcus", "Butyricicoccus",
"Butyrivibrio", "Collinsella", "Coprococcus", "Olsenella", "Parabacteroides",
"Paraprevotella", "Pasteurellaceae_unclassified"), lfc_StatusChronic.ACST0. = c(0.88175957,
0.88803574, 0.790947444, 1.319321361, 0.7176503, 0.758374253,
-0.833877215, -1.106098414, 0.932218695), se_StatusChronic.ACST0. = c(0.439259504,
0.449835605, 0.369268494, 0.391714918, 0.27578621, 0.364036816,
0.377314959, 0.485078744, 0.421283473), lfc_Time.fT1 = c(-0.021243562,
0.66196107, 0.334274258, -0.382520121, -0.005363874, -0.313304181,
-0.439558941, -0.029316428, 0.682658747), se_Time.fT1 = c(0.312681188,
0.330173331, 0.301559494, 0.309355933, 0.293938402, 0.302957725,
0.339292487, 0.361459254, 0.385696553), lfc_Time.fT2 = c(-1.092105655,
-0.083635974, -0.435405323, -1.221704783, -0.557850053, -0.734425087,
-0.19277856, 0.148094198, 0.461233277), se_Time.fT2 = c(0.326565043,
0.344533883, 0.31544836, 0.323423323, 0.307225241, 0.317023725,
0.354270528, 0.377368442, 0.403530764), lfc_Time.fT3 = c(-0.684898535,
0.007779894, -0.661494348, -0.765693993, -0.294827229, -1.082174069,
-0.428338824, 0.072377208, 0.682615791), se_Time.fT3 = c(0.324919486,
0.342422134, 0.314578177, 0.322254678, 0.305999846, 0.316331693,
0.352370636, 0.375283079, 0.402530027), lfc_Time.fT4 = c(-1.038613852,
-0.159777157, -0.172345815, -0.691220321, -0.444048742, -1.062300665,
0.073495083, 0.295212326, 0.337145234), se_Time.fT4 = c(0.319416657,
0.336513636, 0.309526757, 0.316959694, 0.300928605, 0.311343927,
0.346365478, 0.36886735, 0.396117478), lfc_Time.fT5 = c(-0.714954683,
0.081376697, -0.621676699, -0.483698623, -0.339094441, -0.718106519,
-0.055315775, 0.475970869, 0.160939365), se_Time.fT5 = c(0.317230276,
0.334106044, 0.307553106, 0.314893819, 0.298943665, 0.309379791,
0.343965965, 0.366296439, 0.393607858)), row.names = c(NA, -9L
), class = "data.frame")
dat %>%
rename(lfc_time.fT0 = lfc_StatusChronic.ACST0.,
se_Time.fT0 = se_StatusChronic.ACST0.) %>%
pivot_longer(-taxon, names_pattern="(.*)_[Tt]ime\\.f(.*)",
names_to = c(".value", "time")) %>%
ggplot(aes(x = time, y = lfc, ymin = lfc - se, ymax = lfc + se, fill = taxon)) +
geom_bar(stat = "identity") +
geom_errorbar(width=.4) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
scale_fill_brewer(palette="Set1") +
xlab("Category") +
ylab("lfc_StatusChronic.ACST0.") +
facet_wrap(~taxon, ncol=1) +
ggtitle("Bar Plot of lfc_StatusChronic.ACST0. by Category")```
If so, the key is to rename the T0 variables to have the same format as the other time-period variables and then use pivot_longer() to put all the lfc measures in a single column and all the se measures in a single column. The rest is accomplished with faceting on the time variable. The pivot_longer() documentation has some good examples of retaining multiple columns, see in particular the last example on the page.
I have this data frame where I want to create multiple plots at the same time in a loop, but when I run the code it gives me an error. Can anyone please tell me what I am doing wrong!
Data:
structure(list(Date = structure(c(289094400, 297043200, 304992000,
312854400, 320716800, 328665600), tzone = "UTC", class = c("POSIXct",
"POSIXt")), NORTH = c(4.06976744186047, 5.51675977653633, 7.2799470549305,
4.75015422578655, 4.59363957597172, 3.15315315315317), YORKSANDTHEHUMBER = c(4.0121120363361,
5.45851528384282, 9.52380952380951, 6.04914933837431, 3.03030303030299,
5.42099192618225), NORTHWEST = c(6.57894736842105, 6.95256660168939,
6.50060753341436, 5.5904164289789, 4.59211237169096, 4.70041322314051
), EASTMIDS = c(4.98489425981872, 8.20143884892085, 6.91489361702127,
5.22388059701494, 5.61465721040189, 4.64465584778958), WESTMIDS = c(4.65838509316771,
4.74777448071216, 8.66855524079319, 6.56934306569344, 3.22896281800389,
3.17535545023698), EASTANGLIA = c(6.74525212835624, 8.58895705521476,
8.47457627118643, 10.7291666666667, 4.8447789275635, 4.84522207267835
), OUTERSEAST = c(6.7110371602884, 7.53638253638255, 9.47317544707589,
8.56512141280351, 3.82269215128102, 2.11515863689776), OUTERMET = c(4.54545454545458,
6.58505698607005, 7.36633663366336, 7.08225746956843, 4.3747847054771,
1.68316831683168), LONDON = c(8.11719500480309, 10.3065304309196,
6.32299637535239, 7.65151515151515, 1.30190007037299, 2.1535255296978
), SOUTHWEST = c(6.17577197149644, 7.71812080536912, 7.63239875389407,
9.45489628557649, 2.46804759806079, 2.19354838709679), WALES = c(6.09418282548476,
8.35509138381203, 7.40963855421687, 7.01065619742007, 1.15303983228513,
3.47150259067357), SCOTLAND = c(5.15222482435597, 4.12026726057908,
5.40106951871658, 8.67579908675796, -0.280112044817908, 2.94943820224719
), NIRELAND = c(4.54545454545454, 4.94752623688156, 4.42857142857145,
2.96397628818967, 6.06731620903454, 0.0835073068893502), UK = c(5.76890543055322,
7.20302836425676, 7.39543442582184, 7.22885986848197, 3.23472252213347,
2.95766398929048)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"))
Code:
for (i in 2:ncol(data2)) { # Printing ggplot within for-loop
print(ggplot(data2, aes(x = Date, y = data2[, i])) + # Basic ggplot2 plot of x & y's
geom_line() +
labs(title = "Uk States",
y = "",
x = "") +
theme_bw() +
geom_hline(yintercept = 0))
Sys.sleep(1)
}
Error:
Don't know how to automatically pick scale for object of type tbl_df/tbl/data.frame. Defaulting to continuous.
Error in is.finite(x) : default method not implemented for type 'list'
I would suggest to loop over the column names instead of value. You may then use .data to use as y-index.
library(tidyverse)
for(i in names(data2)[-1]) { # Printing ggplot within for-loop
# Basic ggplot2 plot of x & y's
print(ggplot(data2, aes(x = Date, y = .data[[i]])) +
geom_line()+ labs(title = "Uk States",
y = "",
x = "")+
theme_bw()+
geom_hline(yintercept = 0))
Sys.sleep(1)
}
You may also try facet_wrap to combine multiple plots together.
data2 %>%
pivot_longer(cols = -Date) %>%
ggplot(aes(Date, value)) +
geom_line() + facet_wrap(~name) +
labs(title = "Uk States", x = "", y = "") +
theme_bw() +
geom_hline(yintercept = 0)
Another way of generating ggplot in a loop is to use lapply, where we loop for colnames and use aes_string as the aesthetic mapping.
Here the results are saved to the list ggplot_list, where you can extract individual plot by indexing (e.g. plot for NORTH is stored in ggplot_list[[1]])
Note that I've changed labs(title = i) so that the plot title would be your column names.
library(ggplot2)
ggplot_list <- lapply(colnames(data2[-1]), \(i) {
ggplot(data2, aes_string("Date", x)) +
geom_line() +
labs(title = i, y = "", x = "") +
theme_bw() +
geom_hline(yintercept = 0)
})
this question is linked to a previous one answered by #Rui Barradas and #Duck, but i need more help. Previous link here:
how do i vectorise (automate) plot creation in R
Basically, I need to combine 3 datasets into one plot with a secondary y axis. All datasets need to be split by SITENAME and will facet wrap by Sampling.Year. I am using split and sapply. Being facet wrap the plots look something like this:
However, i'm now trying to add the two other data sources into the plots, to look something like this:
But i am struggling to add the two other data sources and get them to split by SITENAME. Her is my code so far...
Record plot format as a function to be applied to a split list df (ideally 'df' would be added as geom_line with a secondary y axis, and 'FF_start_dates' will be added as a vertical dashed line):
SITENAME_plot <- function(AllDates_TPAF){
ggplot(AllDates_TPAF, aes(DATE, Daily.Ave.PAF)) +
geom_point(aes(colour = Risk), size = 3) +
scale_colour_manual(values=c("Very Low" = "dark green","Low" = "light green",
"Moderate" = "yellow", "High" = "orange", "Very High" = "red"), drop = FALSE) +
labs(x = "Month", y = "Total PAF (% affected)") +
scale_x_date(breaks = "1 month", labels = scales::date_format("%B")) +
facet_wrap(~Sampling.Year, ncol = 1, scales = "free")+
scale_y_continuous(limits = c(0, 100), sec.axis = sec_axis(~., name = "Water level (m)")) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
theme(legend.text=element_text(size=15)) +
theme(axis.text=element_text(size=15),
axis.title=element_text(size=15,face="bold")) +
guides(color = guide_legend(reverse = TRUE))+
theme_bw() +
ggtitle(unique(AllDates_TPAF$SITENAME))
}
plot write function:
SITENAME_plot_write <- function(name, g, dir = "N:/abc/"){
flname <- file.path(dir, name)
flname <- paste0(flname, ".jpg")
png(filename = flname, width = 1500, height = 1000)
print(g)
dev.off()
flname
}
Apply function to list split by SITENAME:
sp1 <- split(AllDates_TPAF, AllDates_TPAF$SITENAME)
gg_list <- sapply(sp1, SITENAME_plot, simplify = FALSE)
mapply(SITENAME_plot_write, names(gg_list), gg_list, MoreArgs = list(dir = getwd()))
dev.off()
I have uploaded samples of all 3 datasets here: Sample Data
Apologies for not using gsub but there was too much data and I couldn't get it to work properly
thanks in advance for any help you can give, even if it is just to point me towards a web tutorial of some kind.
You can try next code. I used the data you shared. Just be careful with names of all datasets. Ideally, the key columns as DATE and Sampling.Year should be present in all dataframes before making the split. Also some variables as Risk was absent so I added an example var with same name. Here the code, I added a function for the plot you want:
library(tidyverse)
library(readxl)
#Data
df1 <- read_excel('Sample data.xlsx',1)
#Create var
df1$Risk <- c(rep(c("Very Low","Low","Moderate","High","Very High"),67),"Very High")
#Other data
df2 <- read_excel('Sample data.xlsx',2)
df3 <- read_excel('Sample data.xlsx',3)
#Split 1
L1 <- split(df1,df1$SITENAME)
L2 <- split(df2,df2$SITENAME)
L3 <- split(df3,df3$`Site Name`)
#Function to create plots
myplot <- function(x,y,z)
{
#Merge x and y
#Check for duplicates and avoid column
y <- y[!duplicated(paste(y$DATE,y$Sampling.Year)),]
y$SITENAME <- NULL
xy <- merge(x,y,by.x = c('Sampling.Year','DATE'),by.y = c('Sampling.Year','DATE'),all.x=T)
#Format to dates
xy$DATE <- as.Date(xy$DATE)
#Scale factor
scaleFactor <- max(xy$Daily.Ave.PAF) / max(xy$Height)
#Rename for consistency in names
names(z)[4] <- 'DATE'
#Format date
z$DATE <- as.Date(z$DATE)
#Plot
#Plot
G <- ggplot(xy, aes(DATE, Daily.Ave.PAF)) +
geom_point(aes(colour = Risk), size = 3) +
scale_colour_manual(values=c("Very Low" = "dark green","Low" = "light green",
"Moderate" = "yellow", "High" = "orange", "Very High" = "red"), drop = FALSE) +
scale_x_date(breaks = "1 month", labels = scales::date_format("%b %Y")) +
geom_line(aes(x=DATE,y=Height*scaleFactor))+
scale_y_continuous(name="Total PAF (% affected)", sec.axis=sec_axis(~./scaleFactor, name="Water level (m)"))+
labs(x = "Month") +
geom_vline(data = z,aes(xintercept = DATE),linetype="dashed")+
facet_wrap(~Sampling.Year, ncol = 1, scales = "free")+
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
theme(legend.text=element_text(size=15)) +
theme(axis.text=element_text(size=15),
axis.title=element_text(size=15,face="bold")) +
guides(color = guide_legend(reverse = TRUE))+
theme_bw() +
ggtitle(unique(xy$SITENAME))
return(G)
}
#Create a list of plots
Lplots <- mapply(FUN = myplot,x=L1,y=L2,z=L3,SIMPLIFY = FALSE)
#Now format names
vnames <- paste0(names(Lplots),'.png')
mapply(ggsave, Lplots,filename = vnames,width = 30,units = 'cm')
You will end up with plots like these saved in your dir:
Some dashed lines do not appear in plots because they were not present in the data you provided.
For the following data set, I would like to plot for each variable and color each 10th observations differently. I can do it using the R base. I want to learn how to do it using the ggplot2?
dput(mydata)
structure(list(beta0_C1 = c(5.90722120539152, 5.89025566996191,
5.88591520258904, 5.86911167649919, 5.93772460437405, 5.92985640353594,
5.89150365752453, 5.99046628686212, 5.91548006074821, 5.91571832976612,
5.88437484241154, 5.92092513223357, 5.98978050584774, 5.91152552752889,
5.91235823292462, 5.87961960044268, 5.84048698713552, 5.85484766204026,
5.94002829943904, 5.8844367778216, 5.90201348639369, 5.91220967575205,
5.90010933186624, 5.9187781795242, 5.85506764080697, 5.90103565341373,
5.88527143992961, 5.90218851192948, 5.90118162849608, 5.93147588185271
), beta1_C1 = c(0.389473200070741, 0.386495525456602, 0.401277295631578,
0.400952009358693, 0.376727640651344, 0.380365338054745, 0.393444927288697,
0.351041363714069, 0.393194356572458, 0.393448101768608, 0.398884551136789,
0.399458966787235, 0.357436746423815, 0.393782316102096, 0.387154169967002,
0.400838223362088, 0.404272252119662, 0.407427775176583, 0.379704250022161,
0.388842664781329, 0.382202010301184, 0.401354531881688, 0.391184010553641,
0.390280828053183, 0.402135923802544, 0.384344141458216, 0.405409447440106,
0.391719398951194, 0.398025625260563, 0.361822915989445), beta2_C1 = c(-0.0214886993465096,
-0.020723519439664, -0.0224612526333316, -0.0218187226687474,
-0.0200324040063121, -0.0208421378685671, -0.0218756660346625,
-0.0182499666400075, -0.0222765863213226, -0.022242845613047,
-0.0222033291270054, -0.0231570312767931, -0.0189429585905841,
-0.0221017468740293, -0.0209327798783444, -0.022409049257, -0.021698958175968,
-0.0225601087054418, -0.020928341508875, -0.0214668830626075,
-0.0205872002686706, -0.0233768022702472, -0.021755967293395,
-0.0218442145294776, -0.0222514480818199, -0.0212195394692002,
-0.0232109717283908, -0.0214814999754984, -0.0225124468437127,
-0.0187033387452614), beta0_C2 = c(6.50537199380546, 6.43626630601952,
6.44460360859128, 6.44788878017196, 6.49678676895955, 6.48474789770674,
6.5459727637079, 6.37593806532098, 6.39492158034295, 6.44497331914909,
6.3888816168562, 6.49660574813212, 6.45922901141938, 6.40080765767324,
6.37918638201668, 6.49354321098856, 6.47057962920788, 6.55699741431025,
6.56617313133218, 6.54271932949381, 6.44608000042182, 6.45333777656105,
6.67458442747556, 6.48420983182487, 6.59919337271637, 6.46645685814734,
6.46171236062657, 6.52625058117578, 6.51177045919728, 6.49897849935538
), beta1_C2 = c(-0.370455826326915, -0.338852275811034, -0.340671118342601,
-0.339888681238265, -0.36934391822867, -0.357194169746804, -0.415966150286963,
-0.349051278947586, -0.358209379291251, -0.371785518417424, -0.349725822847608,
-0.368220986471866, -0.327425879655177, -0.336993142255552, -0.328859493371605,
-0.347764105375218, -0.329761787134926, -0.37935820670654, -0.400211161919931,
-0.408699321227288, -0.357590345066542, -0.376548827126353, -0.44672514669147,
-0.353840422053319, -0.421912098450693, -0.371491468175642, -0.354864346664247,
-0.39139246919467, -0.379006372881295, -0.372492936183765), beta2_C2 = c(0.039728365796445,
0.0368393936404604, 0.0375019672690036, 0.0375019364609944, 0.0403444583999664,
0.0378627636833333, 0.0446717245407897, 0.0377538641609231, 0.039662572899695,
0.0408055348533836, 0.0386737104573771, 0.0397794302159846, 0.0352739962796708,
0.0376756204317514, 0.0370614500426065, 0.0374731659969108, 0.035366001926832,
0.0397165124506166, 0.0414814320660011, 0.0431083057931525, 0.0388672853038453,
0.0403590048367136, 0.0461540000449275, 0.0379315295246309, 0.0440664419193363,
0.0404593732981113, 0.0387390924290065, 0.0417832766420881, 0.0409598003097311,
0.0394548129358408)), row.names = c(NA, 30L), class = "data.frame")
R base code
par(mfrow=c(3,3))
col.set=c("green","blue","purple","deeppink","darkorchid","darkmagenta","black","khaki")
loop.vector=1:ncol(mydata)
for(b in loop.vector) {
x.beta<-mydata[,b]
beta <- substr(sub("^beta", '', names(mydata)[b]),1,1)
Cn <- substr(sub("^beta", '',names(mydata)[b]),3,4)
plot(x.beta, type = "n", ylab="", xlab="",
main=bquote(beta[.(beta)]~.(Cn)),
cex.main=1)
mtext("plots of betas",line=-1.5, cex=1, outer = TRUE)
for (k in 1:3){
beta_k=mydata[((nrow(mydata)/3)*k-((nrow(mydata)/3)-1)):
((nrow(mydata)/3)*k),b]
lines(((nrow(mydata)/3)*k-((nrow(mydata)/3)-1)):
((nrow(mydata)/3)*k),beta_k,
col=col.set[k])
legend("topleft", bg="transparent",inset=0.05,legend=paste0("chain_",1:3),
col=col.set, lty=1,box.lty=0, cex=0.8)
}
}
I want the same main title for each plot and one main titile for all plots.
how can I do it using the ggplot2 package?
ggplot2 works best with a long data frame containing variables for x, y, color, etc. This makes a long data frame:
library(tidyverse)
long_data = my_data %>%
mutate(n=1:nrow(my_data), chain=paste0('Chain ', rep(1:3, each=nrow(my_data)/3))) %>%
pivot_longer(cols=c(-n, -chain)) %>%
mutate(name=str_replace(name, '(\\d)_', '[\\1]~~'))
This makes the plot.
ggplot(long_data, aes(n, value, color=chain)) +
geom_line() +
facet_wrap(~name, scales='free_y', ncol=3, dir='v',
labeller=label_parsed) +
scale_color_manual('', values=c('Chain 1'='green', 'Chain 2'='blue', 'Chain 3'='purple')) +
theme_minimal()
Quite similar to #KentJohnson's answer but adding expression labelling of your facets, centered title and using scale_color_manual function to edit color labeling:
library(ggplot2)
library(dplyr)
library(tidyr)
df %>% mutate(Group = rep(c("A","B","C"), each = 10),
Position = 1:30) %>%
pivot_longer(-c(Group,Position), names_to = "Var",values_to = "val") %>%
mutate(Var = factor(Var, levels = c("beta0_C1","beta1_C1","beta2_C1","beta0_C2","beta1_C2","beta2_C2"),
labels = c(expression(beta[0]*"C1"),
expression(beta[1]*"C1"),
expression(beta[2]*"C1"),
expression(beta[0]*"C2"),
expression(beta[1]*"C2"),
expression(beta[2]*"C2")))) %>%
ggplot(aes(x = Position, y = val, color = Group))+
geom_line()+
facet_wrap(.~Var, scales = "free", labeller = label_parsed)+
labs(x = "", y ="", title = "Plots of Betas", color = "")+
scale_color_manual(values = c("green","blue","purple"), labels = c("Chain 1","Chain 2","Chain 3"))+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
For the following data set, I would like to plot for each variable and color each 10th observations differently. I can do it using the R base. I want to learn how to do it using the ggplot2?
dput(mydata)
structure(list(beta0_C1 = c(5.90722120539152, 5.89025566996191,
5.88591520258904, 5.86911167649919, 5.93772460437405, 5.92985640353594,
5.89150365752453, 5.99046628686212, 5.91548006074821, 5.91571832976612,
5.88437484241154, 5.92092513223357, 5.98978050584774, 5.91152552752889,
5.91235823292462, 5.87961960044268, 5.84048698713552, 5.85484766204026,
5.94002829943904, 5.8844367778216, 5.90201348639369, 5.91220967575205,
5.90010933186624, 5.9187781795242, 5.85506764080697, 5.90103565341373,
5.88527143992961, 5.90218851192948, 5.90118162849608, 5.93147588185271
), beta1_C1 = c(0.389473200070741, 0.386495525456602, 0.401277295631578,
0.400952009358693, 0.376727640651344, 0.380365338054745, 0.393444927288697,
0.351041363714069, 0.393194356572458, 0.393448101768608, 0.398884551136789,
0.399458966787235, 0.357436746423815, 0.393782316102096, 0.387154169967002,
0.400838223362088, 0.404272252119662, 0.407427775176583, 0.379704250022161,
0.388842664781329, 0.382202010301184, 0.401354531881688, 0.391184010553641,
0.390280828053183, 0.402135923802544, 0.384344141458216, 0.405409447440106,
0.391719398951194, 0.398025625260563, 0.361822915989445), beta2_C1 = c(-0.0214886993465096,
-0.020723519439664, -0.0224612526333316, -0.0218187226687474,
-0.0200324040063121, -0.0208421378685671, -0.0218756660346625,
-0.0182499666400075, -0.0222765863213226, -0.022242845613047,
-0.0222033291270054, -0.0231570312767931, -0.0189429585905841,
-0.0221017468740293, -0.0209327798783444, -0.022409049257, -0.021698958175968,
-0.0225601087054418, -0.020928341508875, -0.0214668830626075,
-0.0205872002686706, -0.0233768022702472, -0.021755967293395,
-0.0218442145294776, -0.0222514480818199, -0.0212195394692002,
-0.0232109717283908, -0.0214814999754984, -0.0225124468437127,
-0.0187033387452614), beta0_C2 = c(6.50537199380546, 6.43626630601952,
6.44460360859128, 6.44788878017196, 6.49678676895955, 6.48474789770674,
6.5459727637079, 6.37593806532098, 6.39492158034295, 6.44497331914909,
6.3888816168562, 6.49660574813212, 6.45922901141938, 6.40080765767324,
6.37918638201668, 6.49354321098856, 6.47057962920788, 6.55699741431025,
6.56617313133218, 6.54271932949381, 6.44608000042182, 6.45333777656105,
6.67458442747556, 6.48420983182487, 6.59919337271637, 6.46645685814734,
6.46171236062657, 6.52625058117578, 6.51177045919728, 6.49897849935538
), beta1_C2 = c(-0.370455826326915, -0.338852275811034, -0.340671118342601,
-0.339888681238265, -0.36934391822867, -0.357194169746804, -0.415966150286963,
-0.349051278947586, -0.358209379291251, -0.371785518417424, -0.349725822847608,
-0.368220986471866, -0.327425879655177, -0.336993142255552, -0.328859493371605,
-0.347764105375218, -0.329761787134926, -0.37935820670654, -0.400211161919931,
-0.408699321227288, -0.357590345066542, -0.376548827126353, -0.44672514669147,
-0.353840422053319, -0.421912098450693, -0.371491468175642, -0.354864346664247,
-0.39139246919467, -0.379006372881295, -0.372492936183765), beta2_C2 = c(0.039728365796445,
0.0368393936404604, 0.0375019672690036, 0.0375019364609944, 0.0403444583999664,
0.0378627636833333, 0.0446717245407897, 0.0377538641609231, 0.039662572899695,
0.0408055348533836, 0.0386737104573771, 0.0397794302159846, 0.0352739962796708,
0.0376756204317514, 0.0370614500426065, 0.0374731659969108, 0.035366001926832,
0.0397165124506166, 0.0414814320660011, 0.0431083057931525, 0.0388672853038453,
0.0403590048367136, 0.0461540000449275, 0.0379315295246309, 0.0440664419193363,
0.0404593732981113, 0.0387390924290065, 0.0417832766420881, 0.0409598003097311,
0.0394548129358408)), row.names = c(NA, 30L), class = "data.frame")
R base code
par(mfrow=c(3,3))
col.set=c("green","blue","purple","deeppink","darkorchid","darkmagenta","black","khaki")
loop.vector=1:ncol(mydata)
for(b in loop.vector) {
x.beta<-mydata[,b]
beta <- substr(sub("^beta", '', names(mydata)[b]),1,1)
Cn <- substr(sub("^beta", '',names(mydata)[b]),3,4)
plot(x.beta, type = "n", ylab="", xlab="",
main=bquote(beta[.(beta)]~.(Cn)),
cex.main=1)
mtext("plots of betas",line=-1.5, cex=1, outer = TRUE)
for (k in 1:3){
beta_k=mydata[((nrow(mydata)/3)*k-((nrow(mydata)/3)-1)):
((nrow(mydata)/3)*k),b]
lines(((nrow(mydata)/3)*k-((nrow(mydata)/3)-1)):
((nrow(mydata)/3)*k),beta_k,
col=col.set[k])
legend("topleft", bg="transparent",inset=0.05,legend=paste0("chain_",1:3),
col=col.set, lty=1,box.lty=0, cex=0.8)
}
}
I want the same main title for each plot and one main titile for all plots.
how can I do it using the ggplot2 package?
ggplot2 works best with a long data frame containing variables for x, y, color, etc. This makes a long data frame:
library(tidyverse)
long_data = my_data %>%
mutate(n=1:nrow(my_data), chain=paste0('Chain ', rep(1:3, each=nrow(my_data)/3))) %>%
pivot_longer(cols=c(-n, -chain)) %>%
mutate(name=str_replace(name, '(\\d)_', '[\\1]~~'))
This makes the plot.
ggplot(long_data, aes(n, value, color=chain)) +
geom_line() +
facet_wrap(~name, scales='free_y', ncol=3, dir='v',
labeller=label_parsed) +
scale_color_manual('', values=c('Chain 1'='green', 'Chain 2'='blue', 'Chain 3'='purple')) +
theme_minimal()
Quite similar to #KentJohnson's answer but adding expression labelling of your facets, centered title and using scale_color_manual function to edit color labeling:
library(ggplot2)
library(dplyr)
library(tidyr)
df %>% mutate(Group = rep(c("A","B","C"), each = 10),
Position = 1:30) %>%
pivot_longer(-c(Group,Position), names_to = "Var",values_to = "val") %>%
mutate(Var = factor(Var, levels = c("beta0_C1","beta1_C1","beta2_C1","beta0_C2","beta1_C2","beta2_C2"),
labels = c(expression(beta[0]*"C1"),
expression(beta[1]*"C1"),
expression(beta[2]*"C1"),
expression(beta[0]*"C2"),
expression(beta[1]*"C2"),
expression(beta[2]*"C2")))) %>%
ggplot(aes(x = Position, y = val, color = Group))+
geom_line()+
facet_wrap(.~Var, scales = "free", labeller = label_parsed)+
labs(x = "", y ="", title = "Plots of Betas", color = "")+
scale_color_manual(values = c("green","blue","purple"), labels = c("Chain 1","Chain 2","Chain 3"))+
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))