Related
I have a data set as follow :
data = structure(list(year = c(2021L, 2021L, 2021L, 2021L, 2021L, 2021L,
2021L, 2021L, 2021L, 2021L, 2021L, 2021L, 2021L, 2021L, 2021L,
2021L, 2021L, 2021L, 2021L, 2021L, 2021L, 2021L, 2021L, 2021L
), month = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), Quarter = c(1L,
1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 1L, 1L, 1L, 2L, 2L,
2L, 3L, 3L, 3L, 4L, 4L, 4L), project = c("A", "A", "A", "A",
"A", "A", "A", "A", "A", "A", "A", "A", "B", "B", "B", "B", "B",
"B", "B", "B", "B", "B", "B", "B"), value = c(102.349429992044,
106.58161342807, 100.435891304271, 98.444807600341, 82.3101711995535,
59.6035963287678, 69.6231234694286, 90.5898095230998, 80.6258589573775,
115.639565579428, 104.73836165791, 107.508003106277, 90.4082358328098,
112.579438593004, 106.624680336326, 93.9307819392979, 75.4136657889693,
52.3110190297094, 70.3105808070076, 87.3448099614908, 68.2935766548446,
124.204436344695, 111.619576683155, 109.225885313817), Country = c("Denmark",
"Denmark", "Denmark", "Denmark", "Denmark", "Denmark", "Denmark",
"Denmark", "Denmark", "Denmark", "Denmark", "Denmark", "Germany",
"Germany", "Germany", "Germany", "Germany", "Germany", "Germany",
"Germany", "Germany", "Germany", "Germany", "Germany"), LongTermWI = c(121.960664674364,
104.723767102727, 109.956110038786, 94.7909742884892, 89.0611848528951,
83.0143004308842, 78.5554847511495, 82.1932844238529, 94.8317262446894,
109.741770216839, 109.224438221904, 121.94629475342, 124.912696115337,
106.137678558707, 111.196799677912, 90.7373556419141, 88.5814900982324,
78.4127049610748, 74.8773631279842, 81.5579488440033, 93.2896819041917,
114.322908768119, 114.660984633633, 121.312387668891), MinRef = c(89.0152351848971,
47.1805056248264, 72.920410008137, 66.0807724144165, 54.5679150901317,
53.7844552456038, 42.6401185444772, 52.546635367643, 69.2248217126283,
76.4144846076876, 89.4209199082177, 80.3882525480035, 90.4082358328098,
64.6192521242945, 85.1337944481354, 69.4221826905899, 50.3506836843003,
52.3110190297094, 40.4296442260575, 47.5775452531874, 68.2935766548446,
71.9901338300631, 93.2483160688902, 85.5467987151896), MaxRef = c(163.771100449271,
141.388975655703, 137.780711496641, 118.055928781909, 113.961805078013,
114.604519185711, 104.83540276271, 101.855462747317, 119.07394843672,
137.773221892607, 140.864382733085, 156.516066856324, 158.822912815973,
134.265032081886, 134.231205540578, 108.891671902872, 118.091190791042,
100.740245891658, 95.6179422824695, 101.998782325545, 132.191355352224,
137.281168224106, 153.155278763207, 152.772666775097), Delta = c(-19.61123468232,
1.85784632534323, -9.52021873451493, 3.6538333118518, -6.75101365334163,
-23.4107041021164, -8.93236128172087, 8.39652509924694, -14.2058672873119,
5.8977953625887, -4.48607656399371, -14.4382916471429, -34.5044602825272,
6.44176003429672, -4.5721193415857, 3.19342629738384, -13.1678243092631,
-26.1016859313654, -4.56678232097656, 5.78686111748746, -24.9961052493471,
9.88152757657565, -3.04140795047796, -12.0865023550742)), row.names = c(NA,
-24L), class = c("tbl_df", "tbl", "data.frame"))
I can plot the project data in a grouped bar chart in Plotly and add the LongTermWI as line.
I don't know how could I plot the lines in different color that the bar chart !
fig <- plot_ly(data , x = ~month, y = ~value, type = 'bar', color =~project)%>%
add_trace(x = ~month, y = ~LongTermWI, type = 'scatter', mode = 'lines')
fig
The colors command did not help ! Also for legend I would like to see the project + country as legend !
There are many different ways to accomplish your goal. It really depends on what the ultimate goal is. I'm going to show you two different ways that this could work.
In both of these approaches, I essentially make it so that each trace is independent.
In this method, I get to pick the exact color. If there were more than a few possible colors, I would use vectorization or a loop (i.e., lapply, for, etc.)
fig <- plot_ly() %>%
add_bars(data = data , x = ~month, y = ~value, color = ~project) %>%
add_lines(data = data[data$project == "A",], x = ~month,
y = ~LongTermWI, color = I("red")) %>%
add_lines(data = data[data$project == "B", ], x = ~month,
y = ~LongTermWI, color = I("black"))
In this next option, I let Plotly choose the colors. Instead of designating color variable (and Plotly potentially catching on that the color groups have the same name), I use split.
plot_ly() %>%
add_bars(data = data, x = ~month, y = ~value, color = ~project) %>%
add_lines(data = data, x = ~month, y = ~LongTermWI, split = ~project)
So imagine I have a dataset where the column "date" contains years 2011-2017 and months for each year, however months are written out in letters. For example:
date: 11-Jan
I would like to make the months numeric so I get:
date: 11-01
Any suggestions on how I can tackle this problem?
Kind regards!
Make your input proper dates, parse them, then format them.
x <- c("11-Jan", "12-Feb")
Sys.setlocale("LC_TIME", "C") #parsing of months depends on locale
format(
as.Date(paste0(x, "-1"), format = "%y-%b-%d"),
"%y-%m"
)
#[1] "11-01" "12-02"
See help("strptime") for details on format strings.
Assuming your data is like:
df1 <- structure(list(day_mon = c("16-Dec", "18-Nov", "12-Oct", "8-Oct",
"15-May", "29-Jun", "22-Feb", "25-May", "23-Jan", "24-Oct", "23-May",
"27-Sep", "9-Apr", "28-Oct", "18-Jan", "8-Apr", "7-Jan", "13-Dec",
"28-Nov", "24-May"), year = c(2012L, 2014L, 2011L, 2015L, 2015L,
2015L, 2011L, 2015L, 2012L, 2015L, 2011L, 2012L, 2014L, 2012L,
2013L, 2011L, 2017L, 2016L, 2014L, 2014L)),
row.names = c(
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L), class = "data.frame")
You can:
# Format the month and day: mon_day_fmt => character vector
df1$mon_day_fmt <- paste(
sprintf(
"%02d",
match(
gsub(
"\\d+\\-(\\w+)",
"\\1",
with(
df1,
day_mon
)
),
month.abb
)
),
sprintf(
"%02d",
as.integer(
gsub(
"^(\\d+)\\-\\w+$",
"\\1",
with(
df1,
day_mon
)
)
)
),
sep = "-"
)
# Create a date vector: date => Date Vector
df1$date <- as.Date(
paste(
df1$year,
df1$mon_day_fmt,
sep = "-"
)
)
I want to combine 6-hour timesteps that are immediately following one another in order to see maximum Total_IVT during a single storm event. For example, 2019-5-15 has several observations at Hours 12 and 18, and the next day has an observation at Hour 0. How can I combine by nearby timesteps?
Original data is here: https://ucla.box.com/ARcatalog. A shortened sample is below.
> dput(tail(df))
> structure(list(Year = c(2019L, 2019L, 2019L, 2019L, 2019L, 2019L
), Month = c(3L, 5L, 5L, 5L, 5L, 5L), Day = c(27L, 15L, 15L,
15L, 16L, 21L), Hour = c(12L, 0L, 12L, 18L, 0L, 6L), Total_IVT = c(111.5, 206, 503.3, 287, 261.2, 294.8), Date = c("2019-03-27", "2019-05-15",
"2019-05-15", "2019-05-15", "2019-05-16", "2019-05-21")), row.names = 1719:1724, class = "data.frame")
I tried this code, and I got the daily maximum, but what I want is to include previous or following days if the storm spans across days.
df1 <- df %>% #subset of storms by max IVT
mutate(Date = as.Date(Date)) %>%
group_by(Date) %>%
filter(Total_IVT == max(Total_IVT))
Here is an example of what I get from the full dataset when I plot the daily max IVT. What I want will be a plot of fewer points because some of the storms overlap days.
ggplot(df1) + geom_point(aes(Date, Total_IVT))
I am new to R, so I apologize if this does not make sense. I appreciate your help in advance.
Here's a subset of data.
structure(list(Transmitter = c(1675L, 1675L, 1675L, 1675L, 1681L,
1681L, 1681L, 1681L, 1685L, 1685L, 1685L, 1685L, 1685L, 9782L,
9782L, 9782L, 24166L, 24166L, 24166L, 24166L, 24184L, 24184L,
24184L, 24184L), Date = structure(c(17392, 17721, 17722, 17393,
17734, 17729, 17391, 17717, 17392, 17390, 17391, 17381, 17382,
18079, 18110, 17762, 17751, 18097, 18090, 18091, 18097, 18068,
18082, 18088), class = "Date"), Year = c(2017L, 2018L, 2018L,
2017L, 2018L, 2018L, 2017L, 2018L, 2017L, 2017L, 2017L, 2017L,
2017L, 2019L, 2019L, 2018L, 2018L, 2019L, 2019L, 2019L, 2019L,
2019L, 2019L, 2019L), DirectionGroups = structure(c(3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L), .Label = c("Both", "Marine", "River"), class = "factor")), row.names = c(355L,
356L, 357L, 358L, 475L, 476L, 477L, 478L, 530L, 531L, 532L, 533L,
534L, 573L, 574L, 575L, 626L, 627L, 628L, 629L, 764L, 765L, 766L,
767L), class = "data.frame")
I'm trying to create a scatterplot of individually tagged animals through time. Points are colored by the group I have put them in. Currently the scatterplot is sorted by the level of the Transmitter. Instead I would like a way to sort these data by the DirectionGroup.
Here is my current scatterplot.
ggplot(data = AbPlot3, aes(x = Date, y = factor(Transmitter), color = DirectionGroups)) + geom_point()+theme_bw()+ylab("Transmitter")+
scale_color_manual(values = c('grey40', 'black', 'grey70'), labels = c('Transient', 'External', 'Resident'))+
theme(axis.text.y = element_blank(), axis.title = element_text(size = 16),
axis.text.x = element_text(size = 14), legend.text = element_text(size = 14),
legend.title = element_text(size = 16))
Essentially, I want one plot with all Transient points next to each other, all external points together and all resident points together.
Try this. As already pointed out in the comments simply sort your data by DirectionsGroup then convert Transmitter to a factor and set the order accordingly e.g. by using forcats::fct_inorder:
library(ggplot2)
library(dplyr)
AbPlot3 <- AbPlot3 %>%
# Sort data in the wanted order
arrange(DirectionGroups, Transmitter) %>%
# Convert to factor and set order according to the order in the df
mutate(Transmitter = forcats::fct_inorder(factor(Transmitter)))
ggplot(data = AbPlot3, aes(x = Date, y = Transmitter, color = DirectionGroups)) + geom_point()+theme_bw()+ylab("Transmitter")+
scale_color_manual(values = c('grey40', 'black', 'grey70'), labels = c('Transient', 'External', 'Resident'))+
theme(axis.text.y = element_blank(), axis.title = element_text(size = 16),
axis.text.x = element_text(size = 14), legend.text = element_text(size = 14),
legend.title = element_text(size = 16))
Created on 2020-06-11 by the reprex package (v0.3.0)
I am trying to create a time series plot that has multiple data over the years. I would like to just plot the years and get the data to run from start date to end date. Here I have converted the respective columns to dates and then combined them but I do not get the result I am looking for.
The data is available from this website: https://www.businessinsider.co.za/coronavirus-deaths-how-pandemic-compares-to-other-deadly-outbreaks-2020-4?r=US&IR=T
Something like this where the data doesn't start in the same year or end in the same year:
https://ichef.bbci.co.uk/news/410/cpsprodpb/6E25/production/_111779182_optimised-mortality-nc.png
(time period vs deaths caused)
library(lubridate)
library(ggplot2)
otherDiseaseData <- structure(list(ï..Disease = structure(c(11L, 2L, 12L, 6L, 3L,
1L, 9L, 7L, 13L, 4L, 5L, 8L, 10L), .Label = c("Asian Flu", "blackdeath",
"Cholera", "Covid 19", "Ebola", "HIV", "Hong Kong Flu", "Mers",
"Russian Flu", "Sars", "smallpox", "spanish flu", "Swine Flu"
), class = "factor"), Start = c(0L, 1347L, 1918L, 1981L, 1899L,
1957L, 1889L, 1968L, 2009L, 2019L, 2014L, 2012L, 2002L), End = c(1979L,
1351L, 1919L, 2020L, 1923L, 1958L, 1890L, 1970L, 2010L, 2020L,
2016L, 2020L, 2003L), Death = c(300000L, 225000000L, 50000L,
2360000L, 1500000L, 1100000L, 1000000L, 1000000L, 151700L, 101526L,
11300L, 866L, 774L)), class = "data.frame", row.names = c(NA,
-13L))
yrs <- otherDiseaseData$Start
yr <- as.Date(as.character(yrs), format = "%Y")
yStart <- year(yr)
yrs <- otherDiseaseData$End
yr <- as.Date(as.character(yrs), format = "%Y")
yStart <- year(yr)
otherDiseaseData$x <- paste(otherDiseaseData$Start,otherDiseaseData$End)
otherDiseaseData
ggplot(otherDiseaseData, aes(y = Death, x = otherDiseaseData$x),xlim=0000-2000) + geom_point()
I'm not sure I've fully understood what you're asking for, but my interpretation is this:
df <- reshape::melt(otherDiseaseData, measure.vars = c("Start", "End"))
ggplot(df %>% filter(Disease != "smallpox", Death != 225000000)) +
geom_line(aes(value,Death, colour = Disease), size = 2) +
theme_minimal() +
ggrepel::geom_label_repel(data = filter(df, Disease != "smallpox", Death != 225000000, variable != "Start"),
aes(label = Disease, x = value, y = Death)) +
scale_y_log10() +
theme(legend.position = "none", aspect.ratio = 1) +
ylab("Number of Deaths") + xlab("Year")
I've used the reshape package to reorganise the given data, and then ggrepel to label the bars. I've had to remove some data as it really throws the scale, which I've ended up making logarithmic to spread the data out a little. It gives you this plot:
It's not perfect but it might be heading in the right direction? Apologies if I've misunderstood what you were angling for.