Show unique values on bubbles graph in R - r

I'm working with a db which looks more or less like this:
dput(ex)
structure(list(clave = c("01", "02", "03", "04", "05", "06",
"07", "08", "09", "10", "11", "12", "13", "14", "15", "16", "17",
"18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28",
"29", "30", "31", "32", "33"), n = c(2127, 3519, 153, 2070, 3089,
2971, 3005, 152, 53409, 2351, 4599, 3121, 4828, 7588, 25714,
4218, 3032, 295, 3856, 3885, 7044, 3246, 2589, 2559, 2223, 2316,
3560, 2695, 2465, 6742, 4024, 2065, 1627)), row.names = c(NA,
-33L), class = c("tbl_df", "tbl", "data.frame"))
And I'm using the packcircles package to create a bubble graph for just one variable since standard bubbles are added as a third dimension on ggplot. I'm using paletteer library too:
library(packcircles)
library(paletteer)
Next code creates the data.frame that will be used on the graph. First I create the coordinates for the bubbles (circles) and then I incorporate my clave and n variables from original data.frame:
# Create circles
ex_ <- circleProgressiveLayout(ex$n)
ex_ <- circleLayoutVertices(ex_, npoints=50)
# Incorporate variables
ex_$clave <- rep(ex$clave, each=51)
ex_$n <- rep(ex$n, each=51)
# Palette
colors <- paletteer_c("ggthemes::Green-Gold", 33)
Now we're ready to graph:
ggplot(data = ex_, aes(x, y, fill=clave)) +
geom_polygon() +
coord_fixed(ratio = 4/5) +
theme_void() +
scale_fill_manual(values = rev(colors)) +
geom_text(size = 3, label= unique(ex_$n))-> my_graph
plotly::ggplotly(my_graph)
Code above throws following error:
Error in `check_aesthetics()`:
! Aesthetics must be either length 1 or the same as the data (1683): label
Run `rlang::last_error()` to see where the error occurred.
If I use instead:
ggplot(data = ex_, aes(x, y, fill=clave)) +
geom_polygon() +
coord_fixed(ratio = 4/5) +
theme_void() +
scale_fill_manual(values = rev(colors)) +
geom_text(size = 3, label= ex_$n)-> my_graph
plotly::ggplotly(my_graph)
Now every circle is surrounded by text (51 times same text). What I want is that only clave and one value of n were showed when you pass the mouse pointer through each circle.
Any advice or idea to handle with this will be much appreciated.

How about this. The mouse-over doesn't work in the static picture below, but if you run the code, it should.
ex <- structure(list(clave = c("01", "02", "03", "04", "05", "06",
"07", "08", "09", "10", "11", "12", "13", "14", "15", "16", "17",
"18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28",
"29", "30", "31", "32", "33"), n = c(2127, 3519, 153, 2070, 3089,
2971, 3005, 152, 53409, 2351, 4599, 3121, 4828, 7588, 25714,
4218, 3032, 295, 3856, 3885, 7044, 3246, 2589, 2559, 2223, 2316,
3560, 2695, 2465, 6742, 4024, 2065, 1627)), row.names = c(NA,
-33L), class = c("tbl_df", "tbl", "data.frame"))
library(tidyverse)
library(packcircles)
library(paletteer)
ex_ <- circleProgressiveLayout(ex$n)
ex_ <- circleLayoutVertices(ex_, npoints=50)
# Incorporate variables
ex_$clave <- rep(ex$clave, each=51)
ex_$n <- rep(ex$n, each=51)
# Palette
colors <- paletteer_c("ggthemes::Green-Gold", 33)
ex_ <- ex_ %>%
mutate(lab = paste0("clave: ", clave, "\nN: ", n))
ggplot(data = ex_, aes(x, y, fill=clave, text=lab)) +
geom_polygon() +
coord_fixed(ratio = 4/5) +
theme_void() +
scale_fill_manual(values = rev(colors))-> my_graph
plotly::ggplotly(my_graph, tooltip = "text")
Created on 2022-04-06 by the reprex package (v2.0.1)

Related

Dates instead of number of hours on the X-axis?

Ive tried to add dates instead of number of hours on the X-axis on this plot, which represents daily prices at different hours throughout the period of a month.
elspot_prices_2021_hourly_eur <- read_excel("elspot-prices_2021_hourly_eur.xlsx")
elspot_prices_2021_daily_eur <- read_excel("elspot-prices_2021_daily_eur.xlsx")
#Seasonaity of same hour
elspot <- as.double(elspot_prices_2021_hourly_eur$...9[-1]) #DK1 is row 9
plot((24*(1:31) - 21), elspot[(24*(1:31) - 21)], col="deepskyblue",
ylim=c(9, 120), xlab="Hour", ylab="Price in Eur/h",
main="Seasonality of hourly prices in January 2021")
lines((24*(1:31) - 21), elspot[(24*(1:31) - 21)], col="deepskyblue")
points((24*(1:31) - 6), elspot[(24*(1:31) - 6)], col="lightcoral")
lines((24*(1:31) - 6), elspot[(24*(1:31) - 6)], col="lightcoral")
legend("topleft", legend=c("3AM", "6AM"), col=6:3, pch=19, bty="n")
The best I could come up with is
date_daily <- as.Date(elspot_prices_2021_daily_eur$`Elspot Prices in EUR/MWh`,
format="%m/%d/%y %H")[-1]
hours <- c("01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12",
"13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24")
date_hourly <- paste(hours[(0:8759) %% 24 + 1], date_daily[(0:8759) %/% 24 + 1])
date_hourly <- as_datetime(date_hourly, format="%H %Y-%m-%d")
But without luck
What the plot currently looks like

Tangled lines in plotly line chart with dates in axis

im trying to create a line chart with dates in the x-axis using plotly but the lines I get are quite confusing like they go back and forth.
library(plotly)
date = c("2020-02-06", "2020-11-21", "2019-10-26",
"2020-09-20", "2020-01-11", "2019-09-15", "2020-08-03", "2019-02-05",
"2018-05-18", "2020-01-20", "2020-01-29", "2019-04-15", "2019-06-27",
"2017-11-29", "2017-12-01", "2019-04-04", "2017-11-28", "2018-11-29",
"2020-06-26", "2020-06-26")
traffic.sp = c("28", "28", "20",
"20", "22", "36", "36", "29", "0", "22", "23", "28", "28", "37",
"26", "15", "39", "38", "22", "22")
df<-data.frame(date,traffic.sp)
fig <- plot_ly(df, x = ~date, y = ~traffic.sp, name = 'trace 0', type = 'scatter', mode = 'lines')
fig
This may be a better alternative that you can plug into plotly:
line <- df %>%
select(date,
traffic.sp) %>%
arrange(date) %>%
ggline(
x="date",
y="traffic.sp",
color = "steelblue")+
theme(axis.text.x = element_text(angle = 90))
ggplotly(line)
Which gives you this:

Display all the dates that are in included in the date column of my dataframe in the plotly axis

I want in the x-axis of my line chart to be displayed all the possible dates that are included in my dataset and not for example Jan 2018 and then Jul 2018.
library(plotly)
date = c("2020-02-06", "2020-11-21", "2019-10-26",
"2020-09-20", "2020-01-11", "2019-09-15", "2020-08-03", "2019-02-05",
"2018-05-18", "2020-01-20", "2020-01-29", "2019-04-15", "2019-06-27",
"2017-11-29", "2017-12-01", "2019-04-04", "2017-11-28", "2018-11-29",
"2020-06-26", "2020-06-26")
traffic.sp = c("28", "28", "20",
"20", "22", "36", "36", "29", "0", "22", "23", "28", "28", "37",
"26", "15", "39", "38", "22", "22")
df<-data.frame(date,traffic.sp)
df$date<-as.Date(df$date, format = "%Y-%m-%d")
fig <- plot_ly(df, x = ~sort(date), y = ~traffic.sp, name = 'trace 0', type = 'scatter', mode = 'lines')
fig
Ignoring the poor formatting that comes from including all the possible ticks, you can use tickvals within layout() to manipulate where ticks appear on an axis.
fig %>%
layout(xaxis = list(tickvals = unique(date)))

Column retrieved from database must be cast to numeric to be usable

If I retrieve the data from a database (a MySQL database with either RMariaDB or ODBC) I get errors while using the data as-is with multiple R functions (hist, boxplot, but not sd or summary):
Error in hist.default(lockout_per_hour$alarm_count) :
some 'x' not counted; maybe 'breaks' do not span range of 'x'
In addition: Warning message:
In pretty.default(range(x), n = breaks, min.n = 1) :
Internal(pretty()): very small range.. corrected
If I just export that same data to a CSV file and import it in RStudio everything works, otherwise if I want to use the data from the database I have to cast it to numeric.
As requested, code:
library(DBI);
db <- DBI::dbConnect(odbc::odbc(), 'my-dns');
q_perHour = "SELECT
DATE_FORMAT(MIN(timestamp), '%H') hour, COUNT(*) count
FROM alarm
GROUP BY YEAR(timestamp), MONTH(timestamp), DAY(timestamp), HOUR(timestamp)
LIMIT 100";
rs = dbSendQuery(db, q_perHour);
data <- dbFetch(rs);
hist(data$count); # KO
sd(data$count); # OK
dput output:
structure(list(hour = c("18", "19", "20", "21", "22", "23", "00",
"01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11",
"12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22",
"23", "00", "01", "02", "03", "04", "05", "06", "07", "08", "09",
"10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20",
"21", "22", "23", "00", "01", "02", "03", "04", "05", "06", "07",
"08", "09", "10", "11", "12", "13", "14", "15", "16", "17", "18",
"19", "20", "21", "22", "23", "00", "01", "02", "03", "04", "05",
"06", "07", "08", "09", "10", "11", "12", "13", "14", "15", "16",
"17", "18", "19", "20", "21"), count = structure(c(2.47032822920623e-323,
4.94065645841247e-323, 3.95252516672997e-323, 3.95252516672997e-323,
3.45845952088873e-323, 3.95252516672997e-323, 8.39911597930119e-323,
1.48219693752374e-323, 3.95252516672997e-323, 3.45845952088873e-323,
5.92878775009496e-323, 5.92878775009496e-323, 4.94065645841247e-323,
5.43472210425371e-323, 2.47032822920623e-323, 1.97626258336499e-323,
5.43472210425371e-323, 5.43472210425371e-323, 4.44659081257122e-323,
9.38724727098368e-323, 5.92878775009496e-323, 6.91691904177745e-323,
6.42285339593621e-323, 2.47032822920623e-323, 4.94065645841247e-323,
8.89318162514244e-323, 4.44659081257122e-323, 8.39911597930119e-323,
1.08694442085074e-322, 1.33397724377137e-322, 2.02566914794911e-322,
1.13635098543487e-322, 1.24010477106153e-321, 9.40700989681733e-321,
1.43279037293961e-322, 1.67982319586024e-322, 1.08694442085074e-322,
4.44659081257122e-323, 7.90505033345994e-323, 5.92878775009496e-323,
7.4109846876187e-323, 6.91691904177745e-323, 8.89318162514244e-323,
5.92878775009496e-323, 9.88131291682493e-323, 7.90505033345994e-323,
9.38724727098368e-323, 1.18575755001899e-322, 7.4109846876187e-323,
1.23516411460312e-322, 1.23516411460312e-322, 1.13635098543487e-322,
1.72922976044436e-322, 1.28457067918724e-322, 1.67982319586024e-322,
1.72922976044436e-322, 9.38724727098368e-323, 2.12448227711736e-322,
2.99403781379795e-321, 1.13635098543487e-322, 1.13635098543487e-322,
7.90505033345994e-323, 8.39911597930119e-323, 9.38724727098368e-323,
7.4109846876187e-323, 6.91691904177745e-323, 5.92878775009496e-323,
8.89318162514244e-323, 6.42285339593621e-323, 6.91691904177745e-323,
1.13635098543487e-322, 7.90505033345994e-323, 1.67982319586024e-322,
2.27270197086973e-322, 1.87744945419674e-322, 7.90505033345994e-323,
1.43279037293961e-322, 8.89318162514244e-323, 1.13635098543487e-322,
1.23516411460312e-322, 1.03753785626662e-322, 1.28457067918724e-322,
1.03753785626662e-322, 7.4109846876187e-323, 9.88131291682493e-323,
1.08694442085074e-322, 3.45845952088873e-323, 7.4109846876187e-323,
4.44659081257122e-323, 4.94065645841247e-323, 3.45845952088873e-323,
2.96439387504748e-323, 5.43472210425371e-323, 5.43472210425371e-323,
7.90505033345994e-323, 6.91691904177745e-323, 5.43472210425371e-323,
7.90505033345994e-323, 8.39911597930119e-323, 7.11454530011395e-322
), class = "integer64")), class = "data.frame", row.names = c(NA,
-100L))
As suggested, the issue is remediated if I change the connection to:
db <- DBI::dbConnect(odbc::odbc(), 'my-dns', bigint='numeric');
It seems the class "integer64" does not work well with the hist() function. Try modifying both variables to numeric:
library(dplyr)
data = mutate(data, hour = as.numeric(hour), count = as.numeric(count))
This works, although a warning is thrown for hist(data$count):
Warning messages: 1: In pretty.default(range(x), n = breaks, min.n =
: Internal(pretty()): very small range.. corrected 2: In
plot.window(xlim, ylim, "", ...) : Internal(pretty()): very small
range.. corrected
This warning seems to be connected to the data itself, though.
Also, you can try using the bigint argument in dbConnect() set to "numeric". This governs how 64-bit integer data is returned.

ggplot2 for procrustes rotation in vegan

I want to plot procrustes rotations between to RDA-objects obtained by vegan with ggplot2.
library(vegan)
#perform two RDAs, do procrustes:
pro.test <- procrustes(rda.t1,rda.t2)
I extracted the x,y coordinates from list of class "procrustes" and added a factor "dates".
test <- data.frame(rda1=pro.test$Yrot[,1], rda2=pro.test$Yrot[,2])
test$dates <- c(rep("A", 8), rep("B",8), rep("C", 8))
test.2 <- data.frame(rda1=pro.test$X[,1], rda2=pro.test$X[,2])
test.2$dates <- c(rep("A", 8), rep("B",8), rep("C", 8))
Now the basic plot:
ggplot() +
geom_point(data=test, aes(x=rda1, y=rda2, color=dates)) +
geom_point(data=test.2,aes(x=rda1, y=rda2, color=dates))
The part i cannot do is the plotting of small lines between each corresponding point in test and test.2
Vegan does plot these rotations with arrows rather than connected points. However, vegan does not khow to color according to sampling groups/factors, which is important for me.
Having arrows in the ggplot would be extremely great - i know there is a geom_segment with the argument "arrow".
Could you help me?
the dput of pro.test is below.
dput(pro.test)
structure(list(Yrot = structure(c(0.126093537705143, 0.196350569855869,
-0.0513472841582749, -0.304416713452662, 0.210682972922012, -0.0219477831881197,
-0.24519038499101, 0.338357488742126, -0.399739151138497, -0.366424716631558,
0.0321561053701086, 0.565794811541598, 0.606054432756139, -0.0122819831669951,
-0.00403199420346042, -0.0448308879361932, 0.0631101371381566,
-0.150820933315408, -0.018216051372273, -0.68513841544701, -0.117446131920294,
-0.450735018917557, 0.25749869839177, 0.47646869541639, -0.211447138648954,
-0.236584149111598, -0.0316882271224907, -0.281680981927695,
-0.182346139754316, -0.366221121187894, -0.263915986724565, -0.203160918536977,
0.209888424862468, 0.219400450315756, 0.143569801341895, 0.258388604988749,
0.542334722496036, 0.465147580652753, 0.294835945722885, 0.523372408452242,
0.0739580893460179, 0.242768571724456, 0.0409877673276456, -0.0942111509903291,
-0.193072299067071, -0.38889179801965, -0.352882980509932, -0.208549475629433
), .Dim = c(24L, 2L), .Dimnames = list(c("1", "2", "3", "4",
"5", "6", "7", "8", "9", "10", "12", "13", "14", "15", "16",
"17", "18", "19", "20", "21", "22", "23", "24", "25"), NULL)),
X = structure(c(0.0860177119127241, 0.217144585357183, -0.0301829830202831,
-0.246142550516987, 0.230574651598493, 0.00485065775494245,
-0.225907453854864, 0.371465194869491, -0.395330365511425,
-0.359255005182027, -0.00775013746753128, 0.47442649486468,
0.519983070801763, -0.0146878517934982, 0.0377018407084686,
-0.0885829362985767, 0.0935962405791314, -0.186192083265912,
0.00247095461296341, -0.655467761687806, -0.0966978065526177,
-0.398672122636169, 0.275589258531376, 0.39104839619648,
-0.273098318897548, -0.237373845171625, -0.0351119316278201,
-0.279271270040404, -0.171188235636994, -0.342350443283954,
-0.297148604541773, -0.21965804713297, 0.269830887309913,
0.268669489120665, 0.143826114581508, 0.229549645414776,
0.531869658831067, 0.479136042616735, 0.380638462867711,
0.548249030471031, 0.161449266776772, 0.282765937749097,
0.0756433464279055, 0.00516171212969907, -0.195519622624857,
-0.568932423412245, -0.381681091857682, -0.375455760069009
), .Dim = c(24L, 2L), const = 1.30375728392289, .Dimnames = list(
c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10",
"12", "13", "14", "15", "16", "17", "18", "19", "20",
"21", "22", "23", "24", "25"), c("RDA1", "RDA2")), "`scaled:center`" = structure(c(1.3588667228485e-17,
4.69820941410385e-18), .Names = c("RDA1", "RDA2"))), ss = 0.146265769408323,
rotation = structure(c(-0.958883328045618, 0.283800569407742,
0.283800569407742, 0.958883328045619), .Dim = c(2L, 2L)),
translation = structure(c(-9.76742606822348e-18, 1.35523649355013e-17
), .Dim = 1:2), scale = 0.918742698883168, xmean = structure(c(1.3586408473959e-17,
4.71176194125992e-18), .Names = c("RDA1", "RDA2")), symmetric = FALSE,
call = procrustes(X = rda.t1, Y = rda.t2), svd = structure(list(
d = c(2.51563498111738, 2.16729713036852), u = structure(c(0.743008138366833,
0.669282381600362, 0.669282381600362, -0.743008138366833
), .Dim = c(2L, 2L)), v = structure(c(-0.522515395489416,
0.852629850214347, -0.852629850214347, -0.522515395489416
), .Dim = c(2L, 2L))), .Names = c("d", "u", "v"))), .Names = c("Yrot",
"X", "ss", "rotation", "translation", "scale", "xmean", "symmetric",
"call", "svd"), class = "procrustes")
Does this do the job?
library(ggplot2)
library(grid)
ctest <- data.frame(rda1=pro.test$Yrot[,1],
rda2=pro.test$Yrot[,2],xrda1=pro.test$X[,1],
xrda2=pro.test$X[,2],dates=rep(c("A","B","C"),each=8))
ggplot(ctest) +
geom_point(aes(x=rda1, y=rda2, colour=dates)) +
geom_point(aes(x=xrda1, y=xrda2, colour=dates)) +
geom_segment(aes(x=rda1,y=rda2,xend=xrda1,yend=xrda2,colour=dates),arrow=arrow(length=unit(0.2,"cm")))

Resources