I would like to plot a graph with different lines for each row, and that the column names are assigned to the X axis. For finishing, I would also like to make every line different from the other with a legend for the reader.
Thank you in advance.
My data:
Average 2003-2005 Average 2006-2008 Average 2009-2010 Average 2011-2013 Average 2014-2016
31.48489 32.53664 30.41938 30.53870 31.15550
18.78799 17.78141 17.58791 17.03071 17.25654
107.46615 107.71512 109.55090 110.31438 109.66492
> str(Table_1_2003_2018_All)
'data.frame': 3 obs. of 6 variables:
$ Average 2003-2005: num 31.5 18.8 107.5
$ Average 2006-2008: num 32.5 17.8 107.7
$ Average 2009-2010: num 30.4 17.6 109.6
$ Average 2011-2013: num 30.5 17 110.3
$ Average 2014-2016: num 31.2 17.3 109.7
$ Average 2017-2018: num 31.8 16.8 109.8
Code:
# Plot 1
colnames(Table_1_2003_2018_All) <- c("2003-2005","2006-2008","2009-2010","2011-2013","2014-2016","2017-2018")
plot(seq_along(Table_1_2003_2018_All),
Table_1_2003_2018_All[1,], type="l", xaxt = 'n',xlab = 'Time Periods', ylab = 'Average',
main = "MARKET WORK", ylim = c(30,35)
)
axis(1, at = 1:6, colnames(Table_1_2003_2018_All))
Thanks in advance.
We can specify the 'x' as numeric i.e sequence of columns and then change the x labels with axis
plot(seq_along(Table_1_2003_2018_All),
Table_1_2003_2018_All[1,], type="l", xaxt = 'n',
xlab = 'colnames', ylab = 'first row')
axis(1, at = 1:5, colnames(Table_1_2003_2018_All))
If we need to plot lines for each row, use matplot
matplot(t(Table_1_2003_2018_All), type = 'l', xaxt = 'n')
legend("top", legend = seq_len(nrow(Table_1_2003_2018_All)),
col= seq_len(nrow(Table_1_2003_2018_All)),cex=0.8,
fill=seq_len(nrow(Table_1_2003_2018_All)))
axis(1, at = 1:5, colnames(Table_1_2003_2018_All))
data
Table_1_2003_2018_All <- structure(list(Average2003.2005 = c(31.48489, 18.78799, 107.46615
), Average2006.2008 = c(32.53664, 17.78141, 107.71512), Average2009.2010 = c(30.41938,
17.58791, 109.5509), Average2011.2013 = c(30.5387, 17.03071,
110.31438), Average2014.2016 = c(31.1555, 17.25654, 109.66492
)), class = "data.frame", row.names = c(NA, -3L))
Related
I have the following data frame:
> agg_2
# A tibble: 3 × 3
bcs default_flag pred_default
<chr> <dbl> <dbl>
1 high-score 0.00907 0.0121
2 low-score 0.0345 0.0353
3 mid-score 0.0210 0.0204
I plot it as a bar plot using the following code:
barplot(t(as.matrix(agg_2[,-1])),
main = "Actual Default vs Predicted Default",
xlab = "Score Category",
ylab = "Default Rate",
names.arg = c("High Score", "Low Score", "Mid Score"),
col = gray.colors(2),
beside = TRUE)
legend("topleft",
c("Default", "Pred. Default"),
fill = gray.colors(2))
and it gives me this:
How can I rearrange the data frame/matrix so that the pairs of bars in the bar plot are as follows: Low Score then Mid Score then High Score?
Here is one potential solution:
agg_2 <- read.table(text = "bcs default_flag pred_default
high-score 0.00907 0.0121
low-score 0.0345 0.0353
mid-score 0.0210 0.0204", header = TRUE)
agg_2$bcs <- factor(agg_2$bcs, levels = c("low-score", "mid-score", "high-score"), ordered = TRUE)
agg_2 <- agg_2[order(agg_2$bcs),]
barplot(t(as.matrix(agg_2[,-1])),
main = "Actual Default vs Predicted Default",
xlab = "Score Category",
ylab = "Default Rate",
names.arg = agg_2$bcs,
col = gray.colors(2),
beside = TRUE)
legend("topright",
c("Default", "Pred. Default"),
fill = gray.colors(2))
Created on 2022-06-21 by the reprex package (v2.0.1)
I´d like to add error bars to my barplot. The data for standard deviations is in two different columns.
This is my barplot:
# load packages
> library(data.table)
> library(ggplot2)
> library(tidyr)
>
> results <- fread("Results.csv", header=TRUE, sep=";")
>
> str(results)
Classes ‘data.table’ and 'data.frame': 7 obs. of 5 variables:
$ Organism : chr "AC1432" "D3425" "BF3523" "XR2405" ...
$ Molecule1 : num 39.5 418.4 189.2 49.3 4610.9 ...
$ Molecule1sd: num 19.6 70.9 102.8 21.2 275.9 ...
$ Molecule2 : num 276 6511 235 500 11205 ...
$ Molecule2sd: num 21 291.1 109.7 67.1 94.5 ...
- attr(*, ".internal.selfref")=<externalptr>
>
> df <- data.frame(results)
>
> str(df)
'data.frame': 7 obs. of 5 variables:
$ Organism : chr "AC1432" "D3425" "BF3523" "XR2405" ...
$ Molecule1 : num 39.5 418.4 189.2 49.3 4610.9 ...
$ Molecule1sd: num 19.6 70.9 102.8 21.2 275.9 ...
$ Molecule2 : num 276 6511 235 500 11205 ...
$ Molecule2sd: num 21 291.1 109.7 67.1 94.5 ...
>
>
>
> # Manually set factor levels of 'Organism' column to plot in a logical order.
> df$Organism = factor(df$Organism,
+ levels=c("without organism", "AC1432", "BF3523", "XR2405", "D3425", "XR2463", "ATF259"))
>
> df.g <- gather(df, Molecule1, Molecule2, -Organism, -Molecule1sd, -Molecule2sd)
> df.sd <- gather(df, Molecule1sd, Molecule2sd, -Molecule1, -Molecule2, -Organism)
> ggplot(df.g, aes(Molecule1, Molecule2)) +
+ geom_bar(aes(fill = Organism), stat = "identity", position = "dodge")
barplot without error bar
used data:
> dput(df)
structure(list(Organism = structure(c(2L, 5L, 3L, 4L, 6L, 7L,
1L), .Label = c("without organism", "AC1432", "BF3523", "XR2405",
"D3425", "XR2463", "ATF259"), class = "factor"), Molecule1 = c(39.45920899,
418.4234805, 189.162295, 49.314698, 4610.921188, 751.7070352,
35), Molecule1sd = c(19.55450482, 70.91013667, 102.7566193, 21.20841393,
275.8934527, 71.62450643, NA), Molecule2 = c(275.9147606, 6510.974605,
235.247381, 499.8928585, 11205.33907, 9507.869294, 250), Molecule2sd = c(21.04668977,
291.1223384, 109.652064, 67.1000078, 94.54544271, 707.1950335,
NA)), row.names = c(NA, -7L), class = "data.frame")
and this is my trial for the error bars
ggplot(df.g, aes(Molecule1, Molecule2)) +
geom_bar(aes(fill = Organism), stat = "identity", position = "dodge") +geom_errorbar(df.sd, aes_Molecule1(ymin=Molecule1-Molecule1sd, ymax=Molecule1+Molecule1sd),aes_Molecule2(ymin=Molecule2-Molecule2sd, ymax=Molecule2+Molecule2sd), width=.2 )
but my idea doesn´t work. How can I add error bars from two different columns?
It might be easier if you reshape your dataset with columns for Organism, Molecule, mean and sd. Here is a tidyverse way to do it:
Package and Dataset
library(tidyverse)
df <- data.frame(Organism = c("AC1432", "D3425", "BF3523", "XR2405",
"XR2463", "ATF259", "without organism"),
Molecule1 = c(39.5, 418.4, 189.2, 49.3,
4610.9, 800, 10),
Molecule1sd = c(19.6, 70.9, 102.8, 21.2,
275.9, 100, 1),
Molecule2 = c(276, 6511, 235, 500,
11205, 9500, 250),
Molecule2sd = c( 21, 291.1, 109.7, 67.1,
94.5, 50, 2))
# I estimated the not shown values in your str(result)
Reshaping
df2 <- df %>%
# add meaningful ending to columnnames containing mean (m)
select(Molecule1m = Molecule1,
Molecule2m = Molecule2,
everything()) %>%
# gather whole dataset into Molecule, mean, sd
pivot_longer(cols = -Organism,
names_to = c("Molecule", ".value"),
names_pattern = "(Molecule[12])(.)") %>%
# factor reorder levels
mutate(Organism = factor(Organism,
levels=c("without organism", "AC1432",
"BF3523", "XR2405",
"D3425", "XR2463", "ATF259")))
Plot
ggplot(df2, aes(x = Molecule,
y = m,
fill = Organism)) +
geom_col(position = "dodge") +
geom_errorbar(aes(ymin = m - s, ymax = m + s),
position = "dodge")
Using ggsurvplot to draw some Kaplan-Meier curves.
5 curves should be plotted and I want control over their colours.
Here is the output of the survfit being plotted:
> elective_30Decadesurv
Call: survfit(formula = elective30Surv ~ electives$Decade)
n events median 0.95LCL 0.95UCL
electives$Decade=50 14 0 NA NA NA
electives$Decade=60 173 2 NA NA NA
electives$Decade=70 442 5 NA NA NA
electives$Decade=80 168 4 NA NA NA
electives$Decade=90 2 0 NA NA NA
Here is a working plot using the default colour palette, "hue":
> ggsurvplot(elective_30Decadesurv,
data = electives,
palette = "hue",
title = "30 day survival after elective EVAR",
legend = "none",
legend.title = "Decade",
legend.labs = c("5th",
"6th",
"7th",
"8th",
"9th"
),
censor.shape = 124,
ggtheme = survPlotTheme,
risk.table = "nrisk_cumevents",
risk.table.y.text.col = TRUE,
risk.table.fontsize = 3,
risk.table.height = 0.3,
break.time.by = 5,
ylim = c(0.95,
1
),
pval = TRUE,
pval.size = 3,
pval.coord = c(1,
0.96
)
)
See plot in section 3.1.4 of this webpage for the output of the above
The Decade group has 5 entries, so I'm trying to provide five colours to palette.
However, both:
> ggsurvplot(elective_30Decadesurv,
data = electives,
palette = c("#440154",
"#3B528B",
"#21908C",
"#5DC863",
"#5DC863"
),
title = "30 day survival after elective EVAR",
legend = "none",
legend.title = "Decade",
legend.labs = c("5th",
"6th",
"7th",
"8th",
"9th"
),
censor.shape = 124,
ggtheme = survPlotTheme,
risk.table = "nrisk_cumevents",
risk.table.y.text.col = TRUE,
risk.table.fontsize = 3,
risk.table.height = 0.3,
break.time.by = 5,
ylim = c(0.95,
1
),
pval = TRUE,
pval.size = 3,
pval.coord = c(1,
0.96
)
)
And:
> fiveColours <- c("#440154",
"#3B528B",
"#21908C",
"#5DC863",
"#5DC863"
)
> ggsurvplot(elective_30Decadesurv,
data = electives,
palette = fiveColours,
title = "30 day survival after elective EVAR",
legend = "none",
legend.title = "Decade",
legend.labs = c("5th",
"6th",
"7th",
"8th",
"9th"
),
censor.shape = 124,
ggtheme = survPlotTheme,
risk.table = "nrisk_cumevents",
risk.table.y.text.col = TRUE,
risk.table.fontsize = 3,
risk.table.height = 0.3,
break.time.by = 5,
ylim = c(0.95,
1
),
pval = TRUE,
pval.size = 3,
pval.coord = c(1,
0.96
)
)
Give the same error:
Error in names(.cols) <- grp.levels :
'names' attribute [5] must be the same length as the vector [4]
What vector is length [4]?
Is 'names' attribute my colour vector?
If I take one of the colours out of the custom palette, eg fiveColours <- c("#440154","#3B528B","#21908C","#5DC863") I get this error:
Error: Insufficient values in manual scale. 5 needed but only 4 provided.
Which implies the number of colours provided is correct but something else is causing the issue.
I've troubleshot to the limits of my own ability. Help please!
FYI:
> electives %>% select(Decade) %>% group_by(Decade) %>% summarise(n())
# A tibble: 5 x 2
Decade `n()`
<fct> <int>
1 50 14
2 60 173
3 70 442
4 80 168
5 90 2
Should prove the length of the Decade variable and here is how the survival object and survfit were generated:
> elective5Surv <- Surv(electives$surv5Y, electives$dead5Y)
> elective_5Decadesurv <- survfit(elective5Surv ~ electives$Decade)
Ok, I have sorted my own mistake by proof-reading!
Of the five hex colours I’d provided, two were identical (not on purpose.)
I changed the fifth colour to a different hex value (what it was meant to be in the first place) and it works now.
Thanks, Rui, for your response earlier, it helped me down the path!
I have a data frame which I am trying to plot using plotly as multiple line chart.Below is how the dataframe looks like:
Month_considered pct.x pct.y pct
<fct> <dbl> <dbl> <dbl>
1 Apr-17 79.0 18.4 2.61
2 May-17 78.9 18.1 2.99
3 Jun-17 77.9 18.7 3.42
4 Jul-17 77.6 18.5 3.84
5 Aug-17 78.0 18.3 3.70
6 Sep-17 78.0 18.9 3.16
7 Oct-17 77.6 18.9 3.49
8 Nov-17 77.6 18.4 4.01
9 Dec-17 78.5 18.0 3.46
10 Jan-18 79.3 18.4 2.31
11 2/1/18 78.9 19.6 1.48
When I iterate through to plot multiple lines below is the code used.
colNames <- colnames(delta)
p <-
plot_ly(
atc_seg_master,
x = ~ Month_considered,
type = 'scatter',
mode = 'line+markers',
line = list(color = 'rgb(205, 12, 24)', width = 4)
)
for (trace in colNames) {
p <-
p %>% plotly::add_trace(y = as.formula(paste0("~`", trace, "`")), name = trace)
}
p %>%
layout(
title = "Trend Over Time",
xaxis = list(title = ""),
yaxis = list (title = "Monthly Count of Products Sold")
)
p
This is how the output looks like
My question is how to remove trace 0 and month_considered to remove from the chart even though its not in colnames which I loop through to add the lines.
It looks like you were getting tripped up by two things:
When you initially defined p and included the data and x arguments, a trace was created -- trace 0. You can define a plot without providing any data or x values to start by just using p <- plot_ly() along with any desired layout features.
When you loop through the column names, your x axis column, Month_Considered is part of the set. You can exclude this by using setdiff() (part of base R) to create a vector with all of your column names except for Months_Considered
Putting those two things together, one way (of many possible) to accomplish what you're going for is as follows:
library(plotly)
df <- data.frame(Month_Considered = seq.Date(from = as.Date("2017-01-01"), by = "months", length.out = 12),
pct.x = seq(from = 70, to = 80, length.out = 12),
pct.y = seq(from = 30, to = 40, length.out = 12),
pct = seq(from = 10, to = 20, length.out = 12))
## Define a blank plot with the desired layout (don't add any traces yet)
p <- plot_ly()%>%
layout(title = "Trend Over Time",
xaxis = list(title = ""),
yaxis = list (title = "Monthly Count of Products Sold") )
## Make sure our list of columns to add doesnt include the Month Considered
ToAdd <- setdiff(colnames(df),"Month_Considered")
## Add the traces one at a time
for(i in ToAdd){
p <- p %>% add_trace(x = df[["Month_Considered"]], y = df[[i]], name = i,
type = 'scatter',
mode = 'line+markers',
line = list(color = 'rgb(205, 12, 24)', width = 4))
}
p
This is the first 10 rows of my data frame:
head(test.data,10)
# A tibble: 10 x 5
date o2.permeg co2.ppm apo o2.spike
<time> <dbl> <dbl> <dbl> <chr>
1 2015-01-01 00:00:00 -685.09 413.023 -354.1816 N
2 2015-01-01 00:02:00 -695.10 412.894 -364.8690 N
3 2015-01-01 00:04:00 -687.84 412.979 -357.1627 N
4 2015-01-01 00:06:00 -683.23 412.866 -353.1460 N
5 2015-01-01 00:08:00 -683.28 412.755 -353.7788 N
6 2015-01-01 00:10:00 -685.40 412.647 -356.4659 N
7 2015-01-01 00:12:00 -687.80 412.659 -358.8029 N
8 2015-01-01 00:14:00 -662.79 412.665 NA Y
9 2015-01-01 00:16:00 -684.17 412.762 -354.6321 N
10 2015-01-01 00:18:00 -680.37 412.720 -351.0526 N
As you can see there's a last column named o2.spike, which has characters N and Y in it. N means that the data point is not a spike, and Y means that it is a spike. In this sample, there's only 1 Y, but in the real frame, there are loads, and randomly placed.
My desire is to plot all the data points in a plot, and those marked with Y will be plotted in a different colour.
For your information, this is the current code that I am using to plot everything. The first 3 variables are plotted in red, green, and blue, and I want the "Y" rows to be plotted in as, for example, pink.
library(openair)
test.data$yr_day <- format(as.Date(test.data$date), "%Y-%m-%d")
dir.create(daily) # where "daily" is the path of the folder I want to save the plots into
for (d in unique(test.data$yr_day)) {
mypath <- file.path(daily, paste(name, d, ".png", sep = "" ))
png(filename = mypath, width = 963, height = 690)
timePlot(subset(test.data, yr_day == d),
plot.type = "p",
pollutant = c("co2.ppm", "o2.permeg", "apo"),
y.relation = "free",
date.pad = TRUE,
pch = c(19,19,19),
cex = 0.2,
xlab = paste("Time of day in hours on", d),
ylab = "CO2, O2, and APO concentrations",
name.pol = c("CO2 (ppm)", "O2 (per meg)", "APO (per meg)"),
date.breaks = 24,
date.format = "%H:%M"
)
dev.off()
}
An example plot (containing all the spikes with the same colour as the non-spike ones) is as follows:
So how do I plot the spikes in a different colour from the others? Thank you very much!
Edit:
As asked by Sebastian, I have added this (not sure how you guys will be able to extract the data from that)
dput(head(test.data,20))
structure(list(date = structure(c(1420070400, 1420070520, 1420070640,
1420070760, 1420070880, 1420071000, 1420071120, 1420071240, 1420071360,
1420071480, 1420071600, 1420071720, 1420071840, 1420071960, 1420072080,
1420072200, 1420072320, 1420072440, 1420072560, 1420072680), class = c("POSIXct",
"POSIXt"), tzone = "GMT"), o2.permeg = c(-685.09, -695.1, -687.84,
-683.23, -683.28, -685.4, -687.8, -662.79, -684.17, -680.37,
-684.66, -686.13, -683.27, -680.77, -682.16, -692.54, NA, NA,
NA, NA), co2.ppm = c(413.023, 412.894, 412.979, 412.866, 412.755,
412.647, 412.659, 412.665, 412.762, 412.72, 412.692, 412.71,
412.757, 412.838, 412.922, 413.019, NA, NA, NA, NA), apo = c(-354.181646778043,
-364.868973747017, -357.162673031026, -353.145990453461, -353.778806682578,
-356.465871121718, -358.802863961814, NA, -354.632052505966,
-351.052577565632, -355.489594272076, -356.86508353222, -353.75830548926,
-350.833007159904, -351.781957040573, -361.652649164678, NA,
NA, NA, NA), o2.spike = c("N", "N", "N", "N", "N", "N", "N",
"Y", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "N"
)), .Names = c("date", "o2.permeg", "co2.ppm", "apo", "o2.spike"
), row.names = c(NA, -20L), class = c("tbl_df", "tbl", "data.frame"
))
Unfortunately, without having data, it's not easy to answer the question.
A ggplot2 solution could be:
g1 <- ggplot(data=test.data, aes(x=date, y=o2.permeg, col=o2.spike)) + geom_point()
g1
Passing a column of the dataframe to parameter "col" in "aes" makes you map with different colors every different value in that column.
It creates even a legend, with names associated to different colors.
I tried this with another dataframe ("iris", contained in base R) and it worked, hope it will be helpful.
Edit:
To have side-by-side plots, you can create 3 plots with ggplot and the use the function plot_grid() provided by "cowplot" package.
library(cowplot)
g1 <- ggplot(data=test.data, aes(x=date, y=o2.permeg, col=o2.spike)) + geom_point()
g2 <- ggplot(data=test.data, aes(x=date, y=co2.ppm, col=o2.spike)) + geom_point()
g3 <- ggplot(data=test.data, aes(x=date, y=apo, col=o2.spike)) + geom_point()
plot_grid(g1, g2, g3, nrow=3, ncol=1)