How to plot a row against the column names in R - r

I would like to plot a graph with different lines for each row, and that the column names are assigned to the X axis. For finishing, I would also like to make every line different from the other with a legend for the reader.
Thank you in advance.
My data:
Average 2003-2005 Average 2006-2008 Average 2009-2010 Average 2011-2013 Average 2014-2016
31.48489 32.53664 30.41938 30.53870 31.15550
18.78799 17.78141 17.58791 17.03071 17.25654
107.46615 107.71512 109.55090 110.31438 109.66492
> str(Table_1_2003_2018_All)
'data.frame': 3 obs. of 6 variables:
$ Average 2003-2005: num 31.5 18.8 107.5
$ Average 2006-2008: num 32.5 17.8 107.7
$ Average 2009-2010: num 30.4 17.6 109.6
$ Average 2011-2013: num 30.5 17 110.3
$ Average 2014-2016: num 31.2 17.3 109.7
$ Average 2017-2018: num 31.8 16.8 109.8
Code:
# Plot 1
colnames(Table_1_2003_2018_All) <- c("2003-2005","2006-2008","2009-2010","2011-2013","2014-2016","2017-2018")
plot(seq_along(Table_1_2003_2018_All),
Table_1_2003_2018_All[1,], type="l", xaxt = 'n',xlab = 'Time Periods', ylab = 'Average',
main = "MARKET WORK", ylim = c(30,35)
)
axis(1, at = 1:6, colnames(Table_1_2003_2018_All))
Thanks in advance.

We can specify the 'x' as numeric i.e sequence of columns and then change the x labels with axis
plot(seq_along(Table_1_2003_2018_All),
Table_1_2003_2018_All[1,], type="l", xaxt = 'n',
xlab = 'colnames', ylab = 'first row')
axis(1, at = 1:5, colnames(Table_1_2003_2018_All))
If we need to plot lines for each row, use matplot
matplot(t(Table_1_2003_2018_All), type = 'l', xaxt = 'n')
legend("top", legend = seq_len(nrow(Table_1_2003_2018_All)),
col= seq_len(nrow(Table_1_2003_2018_All)),cex=0.8,
fill=seq_len(nrow(Table_1_2003_2018_All)))
axis(1, at = 1:5, colnames(Table_1_2003_2018_All))
data
Table_1_2003_2018_All <- structure(list(Average2003.2005 = c(31.48489, 18.78799, 107.46615
), Average2006.2008 = c(32.53664, 17.78141, 107.71512), Average2009.2010 = c(30.41938,
17.58791, 109.5509), Average2011.2013 = c(30.5387, 17.03071,
110.31438), Average2014.2016 = c(31.1555, 17.25654, 109.66492
)), class = "data.frame", row.names = c(NA, -3L))

Related

How to rearrange the rows of a matrix in R

I have the following data frame:
> agg_2
# A tibble: 3 × 3
bcs default_flag pred_default
<chr> <dbl> <dbl>
1 high-score 0.00907 0.0121
2 low-score 0.0345 0.0353
3 mid-score 0.0210 0.0204
I plot it as a bar plot using the following code:
barplot(t(as.matrix(agg_2[,-1])),
main = "Actual Default vs Predicted Default",
xlab = "Score Category",
ylab = "Default Rate",
names.arg = c("High Score", "Low Score", "Mid Score"),
col = gray.colors(2),
beside = TRUE)
legend("topleft",
c("Default", "Pred. Default"),
fill = gray.colors(2))
and it gives me this:
How can I rearrange the data frame/matrix so that the pairs of bars in the bar plot are as follows: Low Score then Mid Score then High Score?
Here is one potential solution:
agg_2 <- read.table(text = "bcs default_flag pred_default
high-score 0.00907 0.0121
low-score 0.0345 0.0353
mid-score 0.0210 0.0204", header = TRUE)
agg_2$bcs <- factor(agg_2$bcs, levels = c("low-score", "mid-score", "high-score"), ordered = TRUE)
agg_2 <- agg_2[order(agg_2$bcs),]
barplot(t(as.matrix(agg_2[,-1])),
main = "Actual Default vs Predicted Default",
xlab = "Score Category",
ylab = "Default Rate",
names.arg = agg_2$bcs,
col = gray.colors(2),
beside = TRUE)
legend("topright",
c("Default", "Pred. Default"),
fill = gray.colors(2))
Created on 2022-06-21 by the reprex package (v2.0.1)

Adding error bars from different columns in grouped barplot with ggplot2

I´d like to add error bars to my barplot. The data for standard deviations is in two different columns.
This is my barplot:
# load packages
> library(data.table)
> library(ggplot2)
> library(tidyr)
>
> results <- fread("Results.csv", header=TRUE, sep=";")
>
> str(results)
Classes ‘data.table’ and 'data.frame': 7 obs. of 5 variables:
$ Organism : chr "AC1432" "D3425" "BF3523" "XR2405" ...
$ Molecule1 : num 39.5 418.4 189.2 49.3 4610.9 ...
$ Molecule1sd: num 19.6 70.9 102.8 21.2 275.9 ...
$ Molecule2 : num 276 6511 235 500 11205 ...
$ Molecule2sd: num 21 291.1 109.7 67.1 94.5 ...
- attr(*, ".internal.selfref")=<externalptr>
>
> df <- data.frame(results)
>
> str(df)
'data.frame': 7 obs. of 5 variables:
$ Organism : chr "AC1432" "D3425" "BF3523" "XR2405" ...
$ Molecule1 : num 39.5 418.4 189.2 49.3 4610.9 ...
$ Molecule1sd: num 19.6 70.9 102.8 21.2 275.9 ...
$ Molecule2 : num 276 6511 235 500 11205 ...
$ Molecule2sd: num 21 291.1 109.7 67.1 94.5 ...
>
>
>
> # Manually set factor levels of 'Organism' column to plot in a logical order.
> df$Organism = factor(df$Organism,
+ levels=c("without organism", "AC1432", "BF3523", "XR2405", "D3425", "XR2463", "ATF259"))
>
> df.g <- gather(df, Molecule1, Molecule2, -Organism, -Molecule1sd, -Molecule2sd)
> df.sd <- gather(df, Molecule1sd, Molecule2sd, -Molecule1, -Molecule2, -Organism)
> ggplot(df.g, aes(Molecule1, Molecule2)) +
+ geom_bar(aes(fill = Organism), stat = "identity", position = "dodge")
barplot without error bar
used data:
> dput(df)
structure(list(Organism = structure(c(2L, 5L, 3L, 4L, 6L, 7L,
1L), .Label = c("without organism", "AC1432", "BF3523", "XR2405",
"D3425", "XR2463", "ATF259"), class = "factor"), Molecule1 = c(39.45920899,
418.4234805, 189.162295, 49.314698, 4610.921188, 751.7070352,
35), Molecule1sd = c(19.55450482, 70.91013667, 102.7566193, 21.20841393,
275.8934527, 71.62450643, NA), Molecule2 = c(275.9147606, 6510.974605,
235.247381, 499.8928585, 11205.33907, 9507.869294, 250), Molecule2sd = c(21.04668977,
291.1223384, 109.652064, 67.1000078, 94.54544271, 707.1950335,
NA)), row.names = c(NA, -7L), class = "data.frame")
and this is my trial for the error bars
ggplot(df.g, aes(Molecule1, Molecule2)) +
geom_bar(aes(fill = Organism), stat = "identity", position = "dodge") +geom_errorbar(df.sd, aes_Molecule1(ymin=Molecule1-Molecule1sd, ymax=Molecule1+Molecule1sd),aes_Molecule2(ymin=Molecule2-Molecule2sd, ymax=Molecule2+Molecule2sd), width=.2 )
but my idea doesn´t work. How can I add error bars from two different columns?
It might be easier if you reshape your dataset with columns for Organism, Molecule, mean and sd. Here is a tidyverse way to do it:
Package and Dataset
library(tidyverse)
df <- data.frame(Organism = c("AC1432", "D3425", "BF3523", "XR2405",
"XR2463", "ATF259", "without organism"),
Molecule1 = c(39.5, 418.4, 189.2, 49.3,
4610.9, 800, 10),
Molecule1sd = c(19.6, 70.9, 102.8, 21.2,
275.9, 100, 1),
Molecule2 = c(276, 6511, 235, 500,
11205, 9500, 250),
Molecule2sd = c( 21, 291.1, 109.7, 67.1,
94.5, 50, 2))
# I estimated the not shown values in your str(result)
Reshaping
df2 <- df %>%
# add meaningful ending to columnnames containing mean (m)
select(Molecule1m = Molecule1,
Molecule2m = Molecule2,
everything()) %>%
# gather whole dataset into Molecule, mean, sd
pivot_longer(cols = -Organism,
names_to = c("Molecule", ".value"),
names_pattern = "(Molecule[12])(.)") %>%
# factor reorder levels
mutate(Organism = factor(Organism,
levels=c("without organism", "AC1432",
"BF3523", "XR2405",
"D3425", "XR2463", "ATF259")))
Plot
ggplot(df2, aes(x = Molecule,
y = m,
fill = Organism)) +
geom_col(position = "dodge") +
geom_errorbar(aes(ymin = m - s, ymax = m + s),
position = "dodge")

What is wrong with my custom colour palette in this plot?

Using ggsurvplot to draw some Kaplan-Meier curves.
5 curves should be plotted and I want control over their colours.
Here is the output of the survfit being plotted:
> elective_30Decadesurv
Call: survfit(formula = elective30Surv ~ electives$Decade)
n events median 0.95LCL 0.95UCL
electives$Decade=50 14 0 NA NA NA
electives$Decade=60 173 2 NA NA NA
electives$Decade=70 442 5 NA NA NA
electives$Decade=80 168 4 NA NA NA
electives$Decade=90 2 0 NA NA NA
Here is a working plot using the default colour palette, "hue":
> ggsurvplot(elective_30Decadesurv,
data = electives,
palette = "hue",
title = "30 day survival after elective EVAR",
legend = "none",
legend.title = "Decade",
legend.labs = c("5th",
"6th",
"7th",
"8th",
"9th"
),
censor.shape = 124,
ggtheme = survPlotTheme,
risk.table = "nrisk_cumevents",
risk.table.y.text.col = TRUE,
risk.table.fontsize = 3,
risk.table.height = 0.3,
break.time.by = 5,
ylim = c(0.95,
1
),
pval = TRUE,
pval.size = 3,
pval.coord = c(1,
0.96
)
)
See plot in section 3.1.4 of this webpage for the output of the above
The Decade group has 5 entries, so I'm trying to provide five colours to palette.
However, both:
> ggsurvplot(elective_30Decadesurv,
data = electives,
palette = c("#440154",
"#3B528B",
"#21908C",
"#5DC863",
"#5DC863"
),
title = "30 day survival after elective EVAR",
legend = "none",
legend.title = "Decade",
legend.labs = c("5th",
"6th",
"7th",
"8th",
"9th"
),
censor.shape = 124,
ggtheme = survPlotTheme,
risk.table = "nrisk_cumevents",
risk.table.y.text.col = TRUE,
risk.table.fontsize = 3,
risk.table.height = 0.3,
break.time.by = 5,
ylim = c(0.95,
1
),
pval = TRUE,
pval.size = 3,
pval.coord = c(1,
0.96
)
)
And:
> fiveColours <- c("#440154",
"#3B528B",
"#21908C",
"#5DC863",
"#5DC863"
)
> ggsurvplot(elective_30Decadesurv,
data = electives,
palette = fiveColours,
title = "30 day survival after elective EVAR",
legend = "none",
legend.title = "Decade",
legend.labs = c("5th",
"6th",
"7th",
"8th",
"9th"
),
censor.shape = 124,
ggtheme = survPlotTheme,
risk.table = "nrisk_cumevents",
risk.table.y.text.col = TRUE,
risk.table.fontsize = 3,
risk.table.height = 0.3,
break.time.by = 5,
ylim = c(0.95,
1
),
pval = TRUE,
pval.size = 3,
pval.coord = c(1,
0.96
)
)
Give the same error:
Error in names(.cols) <- grp.levels :
'names' attribute [5] must be the same length as the vector [4]
What vector is length [4]?
Is 'names' attribute my colour vector?
If I take one of the colours out of the custom palette, eg fiveColours <- c("#440154","#3B528B","#21908C","#5DC863") I get this error:
Error: Insufficient values in manual scale. 5 needed but only 4 provided.
Which implies the number of colours provided is correct but something else is causing the issue.
I've troubleshot to the limits of my own ability. Help please!
FYI:
> electives %>% select(Decade) %>% group_by(Decade) %>% summarise(n())
# A tibble: 5 x 2
Decade `n()`
<fct> <int>
1 50 14
2 60 173
3 70 442
4 80 168
5 90 2
Should prove the length of the Decade variable and here is how the survival object and survfit were generated:
> elective5Surv <- Surv(electives$surv5Y, electives$dead5Y)
> elective_5Decadesurv <- survfit(elective5Surv ~ electives$Decade)
Ok, I have sorted my own mistake by proof-reading!
Of the five hex colours I’d provided, two were identical (not on purpose.)
I changed the fifth colour to a different hex value (what it was meant to be in the first place) and it works now.
Thanks, Rui, for your response earlier, it helped me down the path!

Multiple line chart using plotly r

I have a data frame which I am trying to plot using plotly as multiple line chart.Below is how the dataframe looks like:
Month_considered pct.x pct.y pct
<fct> <dbl> <dbl> <dbl>
1 Apr-17 79.0 18.4 2.61
2 May-17 78.9 18.1 2.99
3 Jun-17 77.9 18.7 3.42
4 Jul-17 77.6 18.5 3.84
5 Aug-17 78.0 18.3 3.70
6 Sep-17 78.0 18.9 3.16
7 Oct-17 77.6 18.9 3.49
8 Nov-17 77.6 18.4 4.01
9 Dec-17 78.5 18.0 3.46
10 Jan-18 79.3 18.4 2.31
11 2/1/18 78.9 19.6 1.48
When I iterate through to plot multiple lines below is the code used.
colNames <- colnames(delta)
p <-
plot_ly(
atc_seg_master,
x = ~ Month_considered,
type = 'scatter',
mode = 'line+markers',
line = list(color = 'rgb(205, 12, 24)', width = 4)
)
for (trace in colNames) {
p <-
p %>% plotly::add_trace(y = as.formula(paste0("~`", trace, "`")), name = trace)
}
p %>%
layout(
title = "Trend Over Time",
xaxis = list(title = ""),
yaxis = list (title = "Monthly Count of Products Sold")
)
p
This is how the output looks like
My question is how to remove trace 0 and month_considered to remove from the chart even though its not in colnames which I loop through to add the lines.
It looks like you were getting tripped up by two things:
When you initially defined p and included the data and x arguments, a trace was created -- trace 0. You can define a plot without providing any data or x values to start by just using p <- plot_ly() along with any desired layout features.
When you loop through the column names, your x axis column, Month_Considered is part of the set. You can exclude this by using setdiff() (part of base R) to create a vector with all of your column names except for Months_Considered
Putting those two things together, one way (of many possible) to accomplish what you're going for is as follows:
library(plotly)
df <- data.frame(Month_Considered = seq.Date(from = as.Date("2017-01-01"), by = "months", length.out = 12),
pct.x = seq(from = 70, to = 80, length.out = 12),
pct.y = seq(from = 30, to = 40, length.out = 12),
pct = seq(from = 10, to = 20, length.out = 12))
## Define a blank plot with the desired layout (don't add any traces yet)
p <- plot_ly()%>%
layout(title = "Trend Over Time",
xaxis = list(title = ""),
yaxis = list (title = "Monthly Count of Products Sold") )
## Make sure our list of columns to add doesnt include the Month Considered
ToAdd <- setdiff(colnames(df),"Month_Considered")
## Add the traces one at a time
for(i in ToAdd){
p <- p %>% add_trace(x = df[["Month_Considered"]], y = df[[i]], name = i,
type = 'scatter',
mode = 'line+markers',
line = list(color = 'rgb(205, 12, 24)', width = 4))
}
p

Plot different colours based on the conditions

This is the first 10 rows of my data frame:
head(test.data,10)
# A tibble: 10 x 5
date o2.permeg co2.ppm apo o2.spike
<time> <dbl> <dbl> <dbl> <chr>
1 2015-01-01 00:00:00 -685.09 413.023 -354.1816 N
2 2015-01-01 00:02:00 -695.10 412.894 -364.8690 N
3 2015-01-01 00:04:00 -687.84 412.979 -357.1627 N
4 2015-01-01 00:06:00 -683.23 412.866 -353.1460 N
5 2015-01-01 00:08:00 -683.28 412.755 -353.7788 N
6 2015-01-01 00:10:00 -685.40 412.647 -356.4659 N
7 2015-01-01 00:12:00 -687.80 412.659 -358.8029 N
8 2015-01-01 00:14:00 -662.79 412.665 NA Y
9 2015-01-01 00:16:00 -684.17 412.762 -354.6321 N
10 2015-01-01 00:18:00 -680.37 412.720 -351.0526 N
As you can see there's a last column named o2.spike, which has characters N and Y in it. N means that the data point is not a spike, and Y means that it is a spike. In this sample, there's only 1 Y, but in the real frame, there are loads, and randomly placed.
My desire is to plot all the data points in a plot, and those marked with Y will be plotted in a different colour.
For your information, this is the current code that I am using to plot everything. The first 3 variables are plotted in red, green, and blue, and I want the "Y" rows to be plotted in as, for example, pink.
library(openair)
test.data$yr_day <- format(as.Date(test.data$date), "%Y-%m-%d")
dir.create(daily) # where "daily" is the path of the folder I want to save the plots into
for (d in unique(test.data$yr_day)) {
mypath <- file.path(daily, paste(name, d, ".png", sep = "" ))
png(filename = mypath, width = 963, height = 690)
timePlot(subset(test.data, yr_day == d),
plot.type = "p",
pollutant = c("co2.ppm", "o2.permeg", "apo"),
y.relation = "free",
date.pad = TRUE,
pch = c(19,19,19),
cex = 0.2,
xlab = paste("Time of day in hours on", d),
ylab = "CO2, O2, and APO concentrations",
name.pol = c("CO2 (ppm)", "O2 (per meg)", "APO (per meg)"),
date.breaks = 24,
date.format = "%H:%M"
)
dev.off()
}
An example plot (containing all the spikes with the same colour as the non-spike ones) is as follows:
So how do I plot the spikes in a different colour from the others? Thank you very much!
Edit:
As asked by Sebastian, I have added this (not sure how you guys will be able to extract the data from that)
dput(head(test.data,20))
structure(list(date = structure(c(1420070400, 1420070520, 1420070640,
1420070760, 1420070880, 1420071000, 1420071120, 1420071240, 1420071360,
1420071480, 1420071600, 1420071720, 1420071840, 1420071960, 1420072080,
1420072200, 1420072320, 1420072440, 1420072560, 1420072680), class = c("POSIXct",
"POSIXt"), tzone = "GMT"), o2.permeg = c(-685.09, -695.1, -687.84,
-683.23, -683.28, -685.4, -687.8, -662.79, -684.17, -680.37,
-684.66, -686.13, -683.27, -680.77, -682.16, -692.54, NA, NA,
NA, NA), co2.ppm = c(413.023, 412.894, 412.979, 412.866, 412.755,
412.647, 412.659, 412.665, 412.762, 412.72, 412.692, 412.71,
412.757, 412.838, 412.922, 413.019, NA, NA, NA, NA), apo = c(-354.181646778043,
-364.868973747017, -357.162673031026, -353.145990453461, -353.778806682578,
-356.465871121718, -358.802863961814, NA, -354.632052505966,
-351.052577565632, -355.489594272076, -356.86508353222, -353.75830548926,
-350.833007159904, -351.781957040573, -361.652649164678, NA,
NA, NA, NA), o2.spike = c("N", "N", "N", "N", "N", "N", "N",
"Y", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N"
)), .Names = c("date", "o2.permeg", "co2.ppm", "apo", "o2.spike"
), row.names = c(NA, -20L), class = c("tbl_df", "tbl", "data.frame"
))
Unfortunately, without having data, it's not easy to answer the question.
A ggplot2 solution could be:
g1 <- ggplot(data=test.data, aes(x=date, y=o2.permeg, col=o2.spike)) + geom_point()
g1
Passing a column of the dataframe to parameter "col" in "aes" makes you map with different colors every different value in that column.
It creates even a legend, with names associated to different colors.
I tried this with another dataframe ("iris", contained in base R) and it worked, hope it will be helpful.
Edit:
To have side-by-side plots, you can create 3 plots with ggplot and the use the function plot_grid() provided by "cowplot" package.
library(cowplot)
g1 <- ggplot(data=test.data, aes(x=date, y=o2.permeg, col=o2.spike)) + geom_point()
g2 <- ggplot(data=test.data, aes(x=date, y=co2.ppm, col=o2.spike)) + geom_point()
g3 <- ggplot(data=test.data, aes(x=date, y=apo, col=o2.spike)) + geom_point()
plot_grid(g1, g2, g3, nrow=3, ncol=1)

Resources