2d density plot for categories - r

I'm trying to make a 2d density plot where the density is displayed for each category. For example, in the image below, we have a density plot for each day, and all the daily densities are combined into the coloured plots. These types of plots are common in the scientific literature on atmospheric sciences and aerosol pollution studies.
So far I've got this
ggplot(dat, aes(y = `dN/dlogDp`, x = date)) +
stat_density2d(geom="tile", aes(fill = ..density..), contour = FALSE) +
scale_fill_gradient(low="blue", high="red") +
geom_point(alpha = 0.1) +
theme_bw()
But I want to facet it by day, and I'm not sure where to start.
Here are the example data:
structure(list(date = structure(c(1359244800, 1359245400, 1359246000,
1359246600, 1359247200, 1359247800, 1359248400, 1359249000, 1359249600,
1359250200, 1359250800, 1359251400, 1359252000, 1359252600, 1359253200,
1359253800, 1359254400, 1359255000, 1359255600, 1359256200, 1359256800,
1359257400, 1359258000, 1359258600, 1359259200, 1359259800, 1359260400,
1359261000, 1359261600, 1359262200, 1359262800, 1359263400, 1359264000,
1359264600, 1359265200, 1359265800, 1359266400, 1359267000, 1359267600,
1359268200, 1359268800, 1359269400, 1359270000, 1359270600, 1359271200,
1359271800, 1359272400, 1359273000, 1359273600, 1359274200, 1359274800,
1359275400, 1359276000, 1359276600, 1359277200, 1359277800, 1359278400,
1359279000, 1359279600, 1359280200, 1359280800, 1359281400, 1359282000,
1359282600, 1359283200, 1359283800, 1359284400, 1359285000, 1359285600,
1359286200, 1359286800, 1359287400, 1359288000, 1359288600, 1359289200,
1359289800, 1359290400, 1359291000, 1359291600, 1359292200, 1359292800,
1359293400, 1359294000, 1359294600, 1359295200, 1359295800, 1359296400,
1359297000, 1359297600, 1359298200, 1359298800, 1359299400, 1359300000,
1359300600, 1359301200, 1359301800, 1359302400, 1359303000, 1359303600,
1359304200), class = c("POSIXct", "POSIXt"), tzone = "UTC"),
`dN/dlogDp` = c(49.8, 49.275, 47.4, 47.975, 48.625, 51.725,
50.7, 47.55, 45.975, 45.35, 45.4, 47.75, 49.625, 48.225,
47.65, 47.3, 48.75, 50.075, 34.725, 42.025, 48.825, 52.25,
54.05, 49.15, 34.6, 34.375, 42.85, 30.325, 43.15, 36.875,
32.85, 36.85, 35.725, 39.8, 38.65, 40.1, 42.675, 38.5, 37.2,
34.425, 25.2, 14.725, 22.675, 14.875, 37.45, 46.025, 49.275,
35.425, 30, 38.9, 28.6, 41.675, 46.05, 48.6, 62.425, 62.65,
61.7, 49.5, 70.05, 71.875, 59.4, 38.525, 36.85, 25.625, 14.675,
14.7, 14.6, 14.725, 15.6, 15, 14.6, 14.75, 15.05, 14.975,
15.425, 15.1, 15.95, 14.95, 15, 14.6, 14.725, 14.85, 15.175,
28.95, 14.975, 14.725, 16.6, 18.925, 53.225, 60.2, 56.425,
54.55, 41.4, 19.025, 19.825, 31.875, 14.85, 16.375, 16.65,
34.325), Diameter = c(14.6, 15.1, 15.7, 16.3, 16.8, 17.5,
18.1, 18.8, 19.5, 20.2, 20.9, 21.7, 22.5, 23.3, 24.1, 25,
25.9, 26.9, 27.9, 28.9, 30, 31.1, 32.2, 33.4, 34.6, 35.9,
37.2, 38.5, 40, 41.4, 42.9, 44.5, 46.1, 47.8, 49.6, 51.4,
53.3, 55.2, 57.3, 59.4, 61.5, 63.8, 66.1, 68.5, 71, 73.7,
76.4, 79.1, 82, 85.1, 88.2, 91.4, 94.7, 98.2, 101.8, 105.5,
109.4, 113.4, 117.6, 121.9, 126.3, 131, 135.8, 140.7, 145.9,
151.2, 156.8, 162.5, 168.5, 174.7, 181.1, 187.7, 194.6, 201.7,
209.1, 216.7, 224.7, 232.9, 241.4, 250.3, 259.5, 269, 278.8,
289, 299.6, 310.6, 322, 333.8, 346, 358.7, 371.8, 385.4,
399.5, 414.2, 429.4, 445.1, 461.4, 478.3, 495.8, 514)), .Names = c("date",
"dN/dlogDp", "Diameter"), row.names = c(NA, 100L), class = c("tbl_df",
"tbl", "data.frame"))
UPDATE This question is misguided and I now think that using categories isn't relevant to recreating this plot. These other questions are more closely related to the task of recreating this plot:
geom_raster interpolation with log scale
Use R to recreate contour plot made in Igor
And after I asked this question I have been keeping an updated gist of R code that combines details from the answers to these questions, and successfully replicates these plots (example output included in the gist). That gist is here: https://gist.github.com/benmarwick/9a54cbd325149a8ff405

The key steps are to strip away much of the decoration in the panels, and use scale_*_continuous(expand = c(0,0)) to make the density plot fill the entire panel. Here's an example of how to put it together:
# get the day and hour to use as facet panels
dat$day <- as.Date(dat$date)
dat$hour <- as.numeric(format(dat$date, "%H"))
library(ggplot2)
library(viridis)
# theme to suppress many details
squeeze_grid_theme <- theme_bw() + theme(axis.title = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
strip.text = element_blank(),
strip.background = element_blank(),
panel.margin.y = unit(0, "lines"),
panel.margin.x = unit(-1,"lines"),
panel.border = element_blank(),
panel.grid = element_blank(),
axis.text.x = element_text(margin=margin(0,0,0,0,"pt")),
axis.text.y = element_text(margin=margin(0,0,0,0,"pt")))
p <- ggplot(dat, aes(z = Diameter, y = `dN/dlogDp`, x = date)) +
stat_density2d(geom="tile", aes(fill = ..density..), contour = FALSE) +
scale_fill_viridis() +
geom_point(alpha = 0.1) +
facet_grid(~hour) +
scale_y_continuous(expand = c(0,0)) +
scale_x_datetime(expand = c(0,0)) +
squeeze_grid_theme
p
Then we get a separate density plot for each hour, tightly squeezed together like the example plot in the question.

Related

How to connect points according to grouping instead of connecting all points in ggplot?

As the title stated, I want to connect points in every group instead of all points.
Here is the original date:
df<-structure(list(TN = c(13.6, 18, 18.5, 17, 16.9, 13.6, 17.6, 14.8,
14, 11, 12.6, 18.6, 18.8, 18.3, 19.4, 18.5, 18.9, 22, 22.3),
TX = c(29.9, 26.9, 30.5, 26.6, 25.4, 29.7, 24.1, 21.1, 23.8,
29.3, 34.4, 31.1, 32, 35.9, 36.7, 37.5, 39.2, 34.8, 33.6),
TM = c(22.5, 21.4, 23.3, 21.4, 20.2, 21.4, 19.9, 17.8, 18.9,
20.9, 24.5, 24.5, 25.1, 27.3, 28.2, 28.5, 29.2, 28.2, 26.8
), Date = c("01/06/2022", "02/06/2022", "03/06/2022", "04/06/2022",
"05/06/2022", "06/06/2022", "07/06/2022", "08/06/2022", "09/06/2022",
"10/06/2022", "11/06/2022", "12/06/2022", "13/06/2022", "14/06/2022",
"15/06/2022", "16/06/2022", "17/06/2022", "18/06/2022", "19/06/2022"
)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-19L))
Here is my code:
library(ggplot2)
library(tidyr)
library(dplyr)
df %>% select(Date, TN, TX) %>%
pivot_longer(cols = c(TN,TX), names_to = "Tcombine", values_to = "Value") %>%
ggplot(aes(Date, Value,group = 1,shape=Tcombine,color=Tcombine)) +
geom_point()+
geom_line()+
theme(axis.text.x = element_text(angle = 90, hjust = 1), axis.title.x=element_blank())
I want the points of the two groups (two colors) to be connected separately as the date changes, but I don't know why all the points are connected?
Here is final graph I got:
Any suggestions are welcome! Thank you in adavance!
Add group=Tcombine.
df %>% select(Date, TN, TX) %>%
pivot_longer(cols = c(TN,TX), names_to = "Tcombine", values_to = "Value") %>%
ggplot(aes(Date, Value,group = 1,shape=Tcombine,color=Tcombine)) +
geom_point()+
geom_line(aes(group = Tcombine))+
theme(axis.text.x = element_text(angle = 90, hjust = 1), axis.title.x=element_blank())
Incidentally, while the ordering of your x-axis works here, the moment you get another month it will break. I suggest you convert your Date column to a proper Date-class and add scale_x_date.
df %>%
mutate(Date = as.Date(Date, format = "%d/%m/%Y")) %>%
select(Date, TN, TX) %>%
pivot_longer(cols = c(TN,TX), names_to = "Tcombine", values_to = "Value") %>%
ggplot(aes(Date, Value,group = 1,shape=Tcombine,color=Tcombine)) +
geom_point()+
geom_line(aes(group = Tcombine)) +
scale_x_date(date_breaks = "1 day") +
theme(axis.text.x = element_text(angle = 90, hjust = 1), axis.title.x=element_blank())
While this looks very similar, you have much better control over breaks (e.g., date_breaks = "3 days") and formatting (e.g., date_labels ="%d/%m/%Y" if you really want that formatting of the dates).

First Derivative of Scatter Plot R

Hello I am working with sigmoidal data and am attempting to plot two scatter plots on top of each other: the raw data & the first derivative of the raw data. My issue doesn't lie in plotting the data, but more-so finding a function that will create an accurate representation of the first derivative.
What have I tried: Creating a function that calculates the slope of the current & next point: (y2-y1)/(x2-x1) & assigning the value to the current temperature.
dput() of Data Frame:
structure(list(Temperature = c(4.98, 5.49, 6.01, 6.5, 7.02, 7.52, 8.03, 8.52, 9.03, 9.54, 10.04, 10.54, 11.05, 11.55, 12.05, 12.55, 13.05, 13.56, 14.06, 14.57, 15.07, 15.57, 16.07, 16.59, 17.08, 17.59, 18.08, 18.59, 19.09, 19.6, 20.1, 20.64, 21.12, 21.63, 22.13, 22.62, 23.13, 23.63, 24.13, 24.63, 25.11, 25.62, 26.11, 26.68, 27.19, 27.7, 28.2, 28.71, 29.21, 29.71, 30.21, 30.7, 31.21, 31.69, 32.19, 32.69, 33.19, 33.7, 34.19, 34.68, 35.19, 35.68, 36.19, 36.69, 37.19, 37.7, 38.19, 38.7, 39.2, 39.7, 40.21, 40.7, 41.22, 41.71, 42.21, 42.71, 43.21, 43.72, 44.22, 44.72, 45.22, 45.73, 46.23, 46.73, 47.23, 47.97, 48.71, 49.23, 49.74, 50.23, 50.73, 51.23, 51.73, 52.24, 52.75, 53.24, 53.75, 54.24, 54.75, 55.26, 55.75, 56.25, 56.75, 57.24, 57.75, 58.27, 58.77, 59.26, 59.77, 60.26, 60.78, 61.27, 61.79, 62.27, 62.77, 63.29, 63.79, 64.27, 64.78, 65.3, 65.8, 66.27, 66.8, 67.3, 67.8, 68.31, 68.78, 69.3, 69.8, 70.32, 70.81, 71.32, 71.81, 72.33, 72.82, 73.31, 73.83, 74.33, 74.82, 75.32, 75.83, 76.34, 76.84, 77.35, 77.82, 78.34, 78.85, 79.36, 79.84, 80.35, 80.85, 81.36, 81.86, 82.37, 82.86, 83.37, 83.88, 84.36, 84.88, 85.38, 85.88, 86.38, 86.89, 87.38, 87.89, 88.39, 88.89, 89.4, 89.9, 90.39, 90.9, 91.4, 91.91, 92.37, 92.89, 93.4, 93.91, 94.41, 94.91, 95.42), Absorbance = c(1.401351929, 1.403320313, 1.405181885, 1.406326294, 1.407440186, 1.409118652, 1.410095215, 1.410797119, 1.411560059, 1.412918091, 1.413970947, 1.414245605, 1.416000366, 1.415435791, 1.41809082, 1.4190979, 1.419677734, 1.420150757, 1.421966553, 1.420333862, 1.422637939, 1.422790527, 1.423461914, 1.426513672, 1.426315308, 1.426071167, 1.426467896, 1.428710938, 1.428070068, 1.428817749, 1.429733276, 1.432144165, 1.432434082, 1.433227539, 1.434616089, 1.435806274, 1.434814453, 1.436096191, 1.436096191, 1.436447144, 1.437896729, 1.4375, 1.438934326, 1.440139771, 1.440139771, 1.441741943, 1.442108154, 1.443969727, 1.444778442, 1.443862915, 1.444534302, 1.445648193, 1.444473267, 1.446395874, 1.447219849, 1.446151733, 1.449569702, 1.449066162, 1.448852539, 1.4503479, 1.451385498, 1.45111084, 1.451217651, 1.453125, 1.452560425, 1.455047607, 1.455093384, 1.456665039, 1.457977295, 1.457336426, 1.458648682, 1.46043396, 1.462158203, 1.464813232, 1.463531494, 1.468048096, 1.468643188, 1.470748901, 1.471878052, 1.476257324, 1.478057861, 1.482040405, 1.484466553, 1.486129761, 1.48815918, 1.496520996, 1.499786377, 1.504302979, 1.507217407, 1.512985229, 1.517471313, 1.524108887, 1.528198242, 1.534637451, 1.539169312, 1.546142578, 1.554611206, 1.55809021, 1.56854248, 1.572875977, 1.580307007, 1.585739136, 1.592514038, 1.600067139, 1.609222412, 1.616607666, 1.622375488, 1.631469727, 1.635635376, 1.642929077, 1.649780273, 1.655014038, 1.661483765, 1.663742065, 1.671859741, 1.677200317, 1.677108765, 1.683380127, 1.684082031, 1.687438965, 1.694595337, 1.694961548, 1.696685791, 1.696685791, 1.699768066, 1.702514648, 1.703613281, 1.705093384, 1.70022583, 1.707595825, 1.707962036, 1.709075928, 1.705276489, 1.71055603, 1.709259033, 1.70916748, 1.709732056, 1.710189819, 1.710281372, 1.711868286, 1.711883545, 1.713104248, 1.713760376, 1.711120605, 1.709716797, 1.711776733, 1.712814331, 1.714324951, 1.711120605, 1.713378906, 1.712432861, 1.716125488, 1.710006714, 1.710845947, 1.711502075, 1.711120605, 1.710006714, 1.70980835, 1.708602905, 1.708236694, 1.710189819, 1.707672119, 1.706939697, 1.710006714, 1.706192017, 1.706573486, 1.706207275, 1.705734253, 1.706207275, 1.705184937, 1.70954895, 1.705841064, 1.702972412, 1.703979492, 1.703063965, 1.709350586, 1.703338623, 1.700408936, 1.705276489, 1.705368042)), row.names = 1621:1800, class = "data.frame")
Code For my Attempt
raw = "<insert dput line>>"
columns = c("Temperature","Absorbance")
first = data.frame(matrix(nrow=0,ncol=2))
colnames(dFrame) = columns
for (i in 1:nrow(raw)) {
if(i != nrow(raw)) {
cAbs = raw[i,2]
nextAbs = raw[i+1,2]
cT = raw[i,1]
nextT = raw[i+1,1]
Temperature = raw[i,1]
Absorbance =((nextAbs-cAbs)/(nextT-cT))
t <- data.frame(Temperature,Absorbance)
names(t) <- names(raw)
first <- rbind(first, t)
}
}
ggplot()+
geom_point(data=raw, aes(x=Temperature,y=Absorbance), color = "red") +
geom_point(data = first, aes(x=Temperature,y = Absorbance), color = "blue")
What I was expecting
I was expecting an output that had the shape of something like so:
library(dplyr); library(ggplot2)
df %>%
arrange(Temperature) %>%
mutate(slope = (Absorbance - lag(Absorbance))/
(Temperature - lag(Temperature))) %>%
ggplot(aes(Temperature)) +
geom_line(aes(y= Absorbance, color = "Absorbance"), size = 1.2) +
geom_point(aes(y= slope * 20 + 1.4, color = "slope")) +
geom_smooth(aes(y= slope * 20 + 1.4, color = "slope"), se = FALSE, size = 0.8) +
scale_y_continuous(sec.axis = sec_axis(trans = ~(.x - 1.4)/20, name = "slope"))
If the data is even a little noisy, calculating the derivative by first differencing can be very noisy.
You can get a better estimate by fitting a smoothing spline function and calculating the derivative of the spline function. By differentiating a smooth function, you get a smooth derivative.
In most cases, smooth.spline with default arguments is fine, but I recommend taking a look at the result and possibly tuning the smooth.spline parameters for more or less smoothing, depending on your judgment.
edit: I learned this approach from the Numerical Recipes textbook.
library(tidyverse)
df <- tibble(
x = seq(1, 15, by = 0.1),
y = sin(x) + runif(length(x), -0.2, 0.2),
d1_diff = c(NA, diff(y) / diff(x)),
d1_spline = smooth.spline(x, y) %>% predict(x, deriv = 1) %>% pluck("y")
)
df %>%
pivot_longer(-x) %>%
mutate(name = factor(name, unique(name))) %>%
ggplot() + aes(x, value, color = name) + geom_point() + geom_line() +
facet_wrap(~name, ncol = 1)
#> Warning: Removed 1 rows containing missing values (geom_point).
#> Warning: Removed 1 row(s) containing missing values (geom_path).
Created on 2022-10-26 with reprex v2.0.2

Unexpected result while using lowess to smooth a data.table column in R

I have a data.table test_dt in which I want to smooth the y column using lowess function.
test_dt <- structure(list(x = c(28.75, 30, 31.25, 32.5, 33.75, 35, 36.25,
37.5, 38.75, 40, 41.25, 42.5, 43.75, 45, 46.25, 47.5, 48.75,
50, 52.5, 55, 57.5, 60, 62.5, 63.75, 65, 67.5, 70, 72.5, 75,
77.5, 80, 82.5, 85, 87.5, 90, 92.5, 95, 97.5, 100, 102.5, 103.75,
105, 106.25, 107.5, 108.75, 110, 111.25, 112.5, 113.75, 115,
116.25, 117.5, 118.75, 120, 121.25, 122.5, 125, 130, 135, 140,
145), y = c(116.78, 115.53, 114.28, 113.05, 111.78, 110.53, 109.28,
108.05, 106.78, 105.53, 104.28, 103.025, 101.775, 100.525, 99.28,
98.05, 96.8, 95.525, 93.1, 90.65, 88.225, 85.775, 83.35, 82.15,
80.9, 78.5, 76.075, 73.675, 71.25, 68.85, 66.5, 64.075, 61.725,
59.4, 57.075, 54.725, 52.475, 50.225, 48, 45.75, 44.65, 43.55,
42.475, 41.45, 40.35, 39.275, 38.25, 37.225, 36.175, 35.175,
34.175, 33.225, 32.275, 31.3, 30.35, 29.45, 27.625, 24.175, 21,
18.125, 15.55), z = c(116.778248424972, 115.531456655985, 114.284502467544,
113.034850770519, 111.784500981402, 110.533319511795, 109.284500954429,
108.034850457264, 106.784502297216, 105.531265565238, 104.278221015846,
103.026780249377, 101.775992395759, 100.528761292272, 99.2853168637851,
98.043586202838, 96.8021989104315, 95.5702032427799, 93.1041279347743,
90.6575956222915, 88.2179393348852, 85.783500434839, 83.3503011023971,
82.136280706039, 80.922846825298, 78.4965179152157, 76.0823895453039,
73.6686672097464, 71.264486719796, 68.8702598156142, 66.4865368523571,
64.1182523898466, 61.7552221811808, 59.4004347738795, 57.0823289450761,
54.7908645949795, 52.5071096685879, 50.2308279167219, 47.9940967492558,
45.7658417529877, 44.6514226583931, 43.5622751034012, 42.4876666190815,
41.4173110074806, 40.3555584369672, 39.3004471381618, 38.2552969838653,
37.2202353638959, 36.1963659189447, 35.1889616530209, 34.2004259883859,
33.2295174626826, 32.2669278456991, 31.3171387914754, 30.3742375589802,
29.4555719783757, 27.6243725086786, 23.9784367995753, 27.625,
27.625, 27.625)), row.names = c(NA, -61L), class = c("data.table",
"data.frame"))
As can be seen in the image below, I am getting an unexpected result. The expected result is that the line (z column) in the graph below should closely follow the points (y column).
Here is my code -
library(data.table)
library(ggplot2)
test_dt[, z := lowess(x = x, y = y, f = 0.1)$y]
ggplot(test_dt) + geom_point(aes(x, y)) + geom_line(aes(x, z))
Q1. Can someone suggest why lowess is not smoothing properly?
Q2. Since lowess is not working as expected, is there any other function in R that would be more efficient in smoothing the y column without producing a spike (as lowess did on the boundary points)?
You could use loess instead:
test_dt[, z := predict(loess(y ~ x, data = test_dt))]
ggplot(test_dt) + geom_point(aes(x, y)) + geom_line(aes(x, z))
Note though, that if all you want to do is plot the line, this is exactly the method that geom_smooth uses, so without even creating a z column, you could do:
ggplot(test_dt, aes(x, y)) + geom_point() + geom_smooth()
#> `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Created on 2021-11-07 by the reprex package (v2.0.0)
The problem got solved by keeping the number of iterations to zero.lowess acts like loess when iterations are kept at zero.
test_dt[, z := lowess(x = x, y = y, f = 0.1, iter=0)$y]

add_trace: control the linetype without warning

I am writing a function which returns a plotly object. I managed to control the colors already. However I have trouble controlling the linetype. Currently I use something like:
plot_ly(colors=c(rep(c("#CD0C18","#1660A7"),each=3),'#9467bd'),linetypes = c(rep(c("dot","dash","solid"),2),"dot")) %>%
add_trace(data=long_data,x=~month,y=~temperature,color=~measure,linetype=~measure,type="scatter",mode="lines",line=list(width=4)) %>%
layout(title = "Average High and Low Temperatures in New York",
xaxis = list(title = "Months", categoryorder="array", categoryarray=month),
yaxis = list (title = "Temperature (degrees F)"))
which returns me a warning:
Warning message:
plotly.js only supports 6 different linetypes
The warning makes sense, since measure has seven levels. However I would like to control the linetype without getting a warning every time I have more than 6 traces to plot - is there a way?
My sample data:
month <- c('January', 'February', 'March', 'April', 'May', 'June', 'July',
'August', 'September', 'October', 'November', 'December')
high_2000 <- c(32.5, 37.6, 49.9, 53.0, 69.1, 75.4, 76.5, 76.6, 70.7, 60.6, 45.1, 29.3)
low_2000 <- c(13.8, 22.3, 32.5, 37.2, 49.9, 56.1, 57.7, 58.3, 51.2, 42.8, 31.6, 15.9)
mid_2000 <-apply(rbind(high_2000,low_2000),2,mean)
high_2007 <- c(36.5, 26.6, 43.6, 52.3, 71.5, 81.4, 80.5, 82.2, 76.0, 67.3, 46.1, 35.0)
low_2007 <- c(23.6, 14.0, 27.0, 36.8, 47.6, 57.7, 58.9, 61.2, 53.3, 48.5, 31.0, 23.6)
high_2014 <- c(28.8, 28.5, 37.0, 56.8, 69.7, 79.7, 78.5, 77.8, 74.1, 62.6, 45.3, 39.9)
low_2014 <- c(12.7, 14.3, 18.6, 35.5, 49.9, 58.0, 60.0, 58.6, 51.7, 45.2, 32.2, 29.1)
data <- data.frame(month, high_2000, low_2000,mid_2000, high_2007, low_2007, high_2014, low_2014)
long_data<-tidyr::gather(data,measure,temperature,-month)
As can be seen here, the warning arises in
validLinetypes <- as.character(Schema$traces$scatter$attributes$line$dash$values)
if (length(pal) > length(validLinetypes)) {
warning("plotly.js only supports 6 different linetypes", call. = FALSE)
}
So, if you want to disable this warning alone, there are only two things you can do: override the whole function or manually extend Schema$traces$scatter$attributes$line$dash$values. The latter is somewhat less intrusive and can be done with
tmp <- plotly:::Schema
tmp$traces$scatter$attributes$line$dash$values <- c(tmp$traces$scatter$attributes$line$dash$values, rep(NA, 100))
assignInNamespace("Schema", tmp, ns = "plotly")
Here we add NA 100 times so that up to 106 line types now wouldn't provoke a warning. The last line overrides the Schema variable with tmp in the plotly package environment.
The vector Schema$traces$scatter$attributes$line$dash$values only gets used (through validLinetypes) here four times, and looking at those it seems like this cheating doesn't have any likely side effects.

Opacity by numeric vector

I have a very stylized line chart and I would like to plot all my lines in with one add_trace-command, in the hope that this makes my code neater.
I have two issues:
I want a lower opacity for the high-lines and full opacity for the low lines. If I try it seems to assign the opacity randomly.
I want the upper lines to be solid and the lower ones to be dashed. The opposite happens.
Concerning these issues I have two questions:
Can this 'strange' (not logical) behaviour of plotly be fixed? Maybe by using some layout option?
Does this 'strange' behaviour occur because I am trailing of the path?
The plotly examples usually tell you to write a new add_trace function for every line or other object you had. I am trying to implement all my lines with one add_trace-function.
In my real data, I have more than ten lines to draw and it would really help if I could draw some of the lines together. Here are some sample graph and data:
I tried with this code:
month <- c('January', 'February', 'March', 'April', 'May', 'June', 'July',
'August', 'September', 'October', 'November', 'December')
high_2000 <- c(32.5, 37.6, 49.9, 53.0, 69.1, 75.4, 76.5, 76.6, 70.7, 60.6, 45.1, 29.3)
low_2000 <- c(13.8, 22.3, 32.5, 37.2, 49.9, 56.1, 57.7, 58.3, 51.2, 42.8, 31.6, 15.9)
high_2007 <- c(36.5, 26.6, 43.6, 52.3, 71.5, 81.4, 80.5, 82.2, 76.0, 67.3, 46.1, 35.0)
low_2007 <- c(23.6, 14.0, 27.0, 36.8, 47.6, 57.7, 58.9, 61.2, 53.3, 48.5, 31.0, 23.6)
high_2014 <- c(28.8, 28.5, 37.0, 56.8, 69.7, 79.7, 78.5, 77.8, 74.1, 62.6, 45.3, 39.9)
low_2014 <- c(12.7, 14.3, 18.6, 35.5, 49.9, 58.0, 60.0, 58.6, 51.7, 45.2, 32.2, 29.1)
data <- data.frame(month, high_2000, low_2000, high_2007, low_2007, high_2014, low_2014)
library(plotly)
df<-tidyr::gather(data,key,values,-month)
plot_ly(data=df,x=~month,y=~values,split=~key,type="scatter",
mode="lines",opacity=ifelse(grepl('high',df$key),0.5,1),line=list(color='#1f77b4'),
linetype=ifelse(grepl('2000',df$key),'solid','dashed')) %>%
layout(xaxis=list(categoryarray = month, categoryorder = "array"))
Does this work for you?
df$key <- as.factor(df$key)
df$key <- factor(df$key , levels = c("high_2014","high_2007", "high_2000", "low_2014","low_2007", "low_2000"))
df$high_low <- substr(df$key, 1, 2)
df$high_low <- factor(df$high_low , levels = c("hi","lo"))
df <- df %>% arrange(high_low)
plot_ly(data=df,x=~month,y=~values,split=~key,type="scatter", mode="lines", opacity=ifelse(grepl('high',df$key),0.5,1), line=list(color='#1f77b4'),
linetype= ~ high_low, linetypes = c('solid', 'dashed')) %>%
layout(xaxis=list(categoryarray = month, categoryorder = "array"))

Resources