Column retrieved from database must be cast to numeric to be usable - r

If I retrieve the data from a database (a MySQL database with either RMariaDB or ODBC) I get errors while using the data as-is with multiple R functions (hist, boxplot, but not sd or summary):
Error in hist.default(lockout_per_hour$alarm_count) :
some 'x' not counted; maybe 'breaks' do not span range of 'x'
In addition: Warning message:
In pretty.default(range(x), n = breaks, min.n = 1) :
Internal(pretty()): very small range.. corrected
If I just export that same data to a CSV file and import it in RStudio everything works, otherwise if I want to use the data from the database I have to cast it to numeric.
As requested, code:
library(DBI);
db <- DBI::dbConnect(odbc::odbc(), 'my-dns');
q_perHour = "SELECT
DATE_FORMAT(MIN(timestamp), '%H') hour, COUNT(*) count
FROM alarm
GROUP BY YEAR(timestamp), MONTH(timestamp), DAY(timestamp), HOUR(timestamp)
LIMIT 100";
rs = dbSendQuery(db, q_perHour);
data <- dbFetch(rs);
hist(data$count); # KO
sd(data$count); # OK
dput output:
structure(list(hour = c("18", "19", "20", "21", "22", "23", "00",
"01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11",
"12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22",
"23", "00", "01", "02", "03", "04", "05", "06", "07", "08", "09",
"10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20",
"21", "22", "23", "00", "01", "02", "03", "04", "05", "06", "07",
"08", "09", "10", "11", "12", "13", "14", "15", "16", "17", "18",
"19", "20", "21", "22", "23", "00", "01", "02", "03", "04", "05",
"06", "07", "08", "09", "10", "11", "12", "13", "14", "15", "16",
"17", "18", "19", "20", "21"), count = structure(c(2.47032822920623e-323,
4.94065645841247e-323, 3.95252516672997e-323, 3.95252516672997e-323,
3.45845952088873e-323, 3.95252516672997e-323, 8.39911597930119e-323,
1.48219693752374e-323, 3.95252516672997e-323, 3.45845952088873e-323,
5.92878775009496e-323, 5.92878775009496e-323, 4.94065645841247e-323,
5.43472210425371e-323, 2.47032822920623e-323, 1.97626258336499e-323,
5.43472210425371e-323, 5.43472210425371e-323, 4.44659081257122e-323,
9.38724727098368e-323, 5.92878775009496e-323, 6.91691904177745e-323,
6.42285339593621e-323, 2.47032822920623e-323, 4.94065645841247e-323,
8.89318162514244e-323, 4.44659081257122e-323, 8.39911597930119e-323,
1.08694442085074e-322, 1.33397724377137e-322, 2.02566914794911e-322,
1.13635098543487e-322, 1.24010477106153e-321, 9.40700989681733e-321,
1.43279037293961e-322, 1.67982319586024e-322, 1.08694442085074e-322,
4.44659081257122e-323, 7.90505033345994e-323, 5.92878775009496e-323,
7.4109846876187e-323, 6.91691904177745e-323, 8.89318162514244e-323,
5.92878775009496e-323, 9.88131291682493e-323, 7.90505033345994e-323,
9.38724727098368e-323, 1.18575755001899e-322, 7.4109846876187e-323,
1.23516411460312e-322, 1.23516411460312e-322, 1.13635098543487e-322,
1.72922976044436e-322, 1.28457067918724e-322, 1.67982319586024e-322,
1.72922976044436e-322, 9.38724727098368e-323, 2.12448227711736e-322,
2.99403781379795e-321, 1.13635098543487e-322, 1.13635098543487e-322,
7.90505033345994e-323, 8.39911597930119e-323, 9.38724727098368e-323,
7.4109846876187e-323, 6.91691904177745e-323, 5.92878775009496e-323,
8.89318162514244e-323, 6.42285339593621e-323, 6.91691904177745e-323,
1.13635098543487e-322, 7.90505033345994e-323, 1.67982319586024e-322,
2.27270197086973e-322, 1.87744945419674e-322, 7.90505033345994e-323,
1.43279037293961e-322, 8.89318162514244e-323, 1.13635098543487e-322,
1.23516411460312e-322, 1.03753785626662e-322, 1.28457067918724e-322,
1.03753785626662e-322, 7.4109846876187e-323, 9.88131291682493e-323,
1.08694442085074e-322, 3.45845952088873e-323, 7.4109846876187e-323,
4.44659081257122e-323, 4.94065645841247e-323, 3.45845952088873e-323,
2.96439387504748e-323, 5.43472210425371e-323, 5.43472210425371e-323,
7.90505033345994e-323, 6.91691904177745e-323, 5.43472210425371e-323,
7.90505033345994e-323, 8.39911597930119e-323, 7.11454530011395e-322
), class = "integer64")), class = "data.frame", row.names = c(NA,
-100L))
As suggested, the issue is remediated if I change the connection to:
db <- DBI::dbConnect(odbc::odbc(), 'my-dns', bigint='numeric');

It seems the class "integer64" does not work well with the hist() function. Try modifying both variables to numeric:
library(dplyr)
data = mutate(data, hour = as.numeric(hour), count = as.numeric(count))
This works, although a warning is thrown for hist(data$count):
Warning messages: 1: In pretty.default(range(x), n = breaks, min.n =
: Internal(pretty()): very small range.. corrected 2: In
plot.window(xlim, ylim, "", ...) : Internal(pretty()): very small
range.. corrected
This warning seems to be connected to the data itself, though.
Also, you can try using the bigint argument in dbConnect() set to "numeric". This governs how 64-bit integer data is returned.

Related

r - How to create a multiple line graph where each line is a specified value of a column

I'm working with a bike share dataset that I've named "all_rides_v02".
Relevant columns are day_of_the_week(self explanatory) and member_casual(rides are logged as either casual or member)
$ ride_id <chr> "99103BB87CC6C1BB", "EAFCCCFB0A3FC5A1", "9EF4F46C57AD23…
$ rideable_type <chr> "electric_bike", "electric_bike", "electric_bike", "ele…
$ member_casual <chr> "member", "member", "member", "member", "member", "memb…
$ date <date> 2021-08-10, 2021-08-10, 2021-08-21, 2021-08-21, 2021-0…
$ month <chr> "08", "08", "08", "08", "08", "08", "08", "08", "08", "…
$ day <chr> "10", "10", "21", "21", "19", "19", "19", "13", "17", "…
$ year <chr> "21", "21", "21", "21", "21", "21", "21", "21", "21", "…
$ day_of_the_week <chr> "Tuesday", "Tuesday", "Saturday", "Saturday", "Thursday…
I'm trying to create a line graph with multiple(two)lines where one line represents "member rides" and the other line is "casual rides". The x-axis would be day_of_the_week and the y-axis would be the number of rides(which is not explicitly logged in the dataset).
Any advice?
ggplot(data=all_rides_v02)+
geom_line(aes(x=day_of_the_week, y=value, color=as.factor(member_casual)))+
geom_line()+
geom_point()
I could probably post a dozen ways I've done it incorrectly. The main issue I keep running into is that I don't know how to work around not having the "y value". I just want it to be the number of rides.
You'd need to aggregate your data first. If you're using the full tidyverse, you can go
all_rides_v02 %>%
group_by(day_of_the_week, member_casual) %>%
summarise(count = n()) %>%
mutate(day_of_the_week = factor(day_of_the_week, levels = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")) %>%
ggplot() +
geom_line(aes(x = day_of_the_week, y = count, colour = member_casual))

Dates instead of number of hours on the X-axis?

Ive tried to add dates instead of number of hours on the X-axis on this plot, which represents daily prices at different hours throughout the period of a month.
elspot_prices_2021_hourly_eur <- read_excel("elspot-prices_2021_hourly_eur.xlsx")
elspot_prices_2021_daily_eur <- read_excel("elspot-prices_2021_daily_eur.xlsx")
#Seasonaity of same hour
elspot <- as.double(elspot_prices_2021_hourly_eur$...9[-1]) #DK1 is row 9
plot((24*(1:31) - 21), elspot[(24*(1:31) - 21)], col="deepskyblue",
ylim=c(9, 120), xlab="Hour", ylab="Price in Eur/h",
main="Seasonality of hourly prices in January 2021")
lines((24*(1:31) - 21), elspot[(24*(1:31) - 21)], col="deepskyblue")
points((24*(1:31) - 6), elspot[(24*(1:31) - 6)], col="lightcoral")
lines((24*(1:31) - 6), elspot[(24*(1:31) - 6)], col="lightcoral")
legend("topleft", legend=c("3AM", "6AM"), col=6:3, pch=19, bty="n")
The best I could come up with is
date_daily <- as.Date(elspot_prices_2021_daily_eur$`Elspot Prices in EUR/MWh`,
format="%m/%d/%y %H")[-1]
hours <- c("01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12",
"13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24")
date_hourly <- paste(hours[(0:8759) %% 24 + 1], date_daily[(0:8759) %/% 24 + 1])
date_hourly <- as_datetime(date_hourly, format="%H %Y-%m-%d")
But without luck
What the plot currently looks like

Show unique values on bubbles graph in R

I'm working with a db which looks more or less like this:
dput(ex)
structure(list(clave = c("01", "02", "03", "04", "05", "06",
"07", "08", "09", "10", "11", "12", "13", "14", "15", "16", "17",
"18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28",
"29", "30", "31", "32", "33"), n = c(2127, 3519, 153, 2070, 3089,
2971, 3005, 152, 53409, 2351, 4599, 3121, 4828, 7588, 25714,
4218, 3032, 295, 3856, 3885, 7044, 3246, 2589, 2559, 2223, 2316,
3560, 2695, 2465, 6742, 4024, 2065, 1627)), row.names = c(NA,
-33L), class = c("tbl_df", "tbl", "data.frame"))
And I'm using the packcircles package to create a bubble graph for just one variable since standard bubbles are added as a third dimension on ggplot. I'm using paletteer library too:
library(packcircles)
library(paletteer)
Next code creates the data.frame that will be used on the graph. First I create the coordinates for the bubbles (circles) and then I incorporate my clave and n variables from original data.frame:
# Create circles
ex_ <- circleProgressiveLayout(ex$n)
ex_ <- circleLayoutVertices(ex_, npoints=50)
# Incorporate variables
ex_$clave <- rep(ex$clave, each=51)
ex_$n <- rep(ex$n, each=51)
# Palette
colors <- paletteer_c("ggthemes::Green-Gold", 33)
Now we're ready to graph:
ggplot(data = ex_, aes(x, y, fill=clave)) +
geom_polygon() +
coord_fixed(ratio = 4/5) +
theme_void() +
scale_fill_manual(values = rev(colors)) +
geom_text(size = 3, label= unique(ex_$n))-> my_graph
plotly::ggplotly(my_graph)
Code above throws following error:
Error in `check_aesthetics()`:
! Aesthetics must be either length 1 or the same as the data (1683): label
Run `rlang::last_error()` to see where the error occurred.
If I use instead:
ggplot(data = ex_, aes(x, y, fill=clave)) +
geom_polygon() +
coord_fixed(ratio = 4/5) +
theme_void() +
scale_fill_manual(values = rev(colors)) +
geom_text(size = 3, label= ex_$n)-> my_graph
plotly::ggplotly(my_graph)
Now every circle is surrounded by text (51 times same text). What I want is that only clave and one value of n were showed when you pass the mouse pointer through each circle.
Any advice or idea to handle with this will be much appreciated.
How about this. The mouse-over doesn't work in the static picture below, but if you run the code, it should.
ex <- structure(list(clave = c("01", "02", "03", "04", "05", "06",
"07", "08", "09", "10", "11", "12", "13", "14", "15", "16", "17",
"18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28",
"29", "30", "31", "32", "33"), n = c(2127, 3519, 153, 2070, 3089,
2971, 3005, 152, 53409, 2351, 4599, 3121, 4828, 7588, 25714,
4218, 3032, 295, 3856, 3885, 7044, 3246, 2589, 2559, 2223, 2316,
3560, 2695, 2465, 6742, 4024, 2065, 1627)), row.names = c(NA,
-33L), class = c("tbl_df", "tbl", "data.frame"))
library(tidyverse)
library(packcircles)
library(paletteer)
ex_ <- circleProgressiveLayout(ex$n)
ex_ <- circleLayoutVertices(ex_, npoints=50)
# Incorporate variables
ex_$clave <- rep(ex$clave, each=51)
ex_$n <- rep(ex$n, each=51)
# Palette
colors <- paletteer_c("ggthemes::Green-Gold", 33)
ex_ <- ex_ %>%
mutate(lab = paste0("clave: ", clave, "\nN: ", n))
ggplot(data = ex_, aes(x, y, fill=clave, text=lab)) +
geom_polygon() +
coord_fixed(ratio = 4/5) +
theme_void() +
scale_fill_manual(values = rev(colors))-> my_graph
plotly::ggplotly(my_graph, tooltip = "text")
Created on 2022-04-06 by the reprex package (v2.0.1)

Is there a way to have separate weighted edges for ins and outs in igraph R?

I'm making a directed network in R studio with the igraph package. I would like to display two edges between nodes where applicable, both weighted, one for ins and one for outs. I'm very new to R and managed to get weighted edges that compile both ins and outs but would like them separated.
I've googled my problem with no avail, it might be from phrasing it wrong. I apologize in advance if I worded it badly.
EDIT: Minimal reproducible sample:
OPR.df <- data.frame("From" = c(c("8", "8", "8", "8", "7", "25", "24", "1A", "12", "12"),
c("12", "12", "12", "17", "17", "17"),
c("17", "17", "17", "17"),
c("17", "17", "17", "17", "17", "9A", "9", "17", "9", "17", "9"),
c("9", "17", "17", "17")),
"To" = c(c("8", "8", "8", "7", "25", "24", "1A", "12", "12", "12"),
c("12", "12", "17", "17", "17", "17"),
c("17", "17", "17", "17"),
c("17", "17", "17", "17", "9A", "9", "17", "9", "17", "9", "17"),
c("17", "17", "17", "17")))
opr.d <- graph_from_data_frame(d = OPR.df,
directed = T)
# I think this is the part where I set this??
E(opr.d)$weight <- 1
opr.sd <- simplify(opr.d,
remove.multiple = T,
remove.loops = F,
edge.attr.comb = c(weight = "sum",
type = "ignore"))
E(opr.sd)$width <- E(opr.sd)$weight/3
There are a number of things that you can do to make the two-way
links more visible. First, plotting using the default layout crowds
the vertices 9, 9A and 17 too close together. There is no room to
see the edges. I will use layout_with_graphopt , which works fine
for this example, although for more complex examples you may need
to tune up the layout even more.
set.seed(4321)
plot(opr.sd, xpd=NA)
set.seed(4321)
plot(opr.sd, layout=layout_with_graphopt)
Of course, we still have the problem from your original question:
the arrows overlap each other. You can fix this using the edge.curved
argument. I wanted all of the arrows to be straight except where they
overlap, so I created a customized curvature vector to adjust only the
overlapping edges. Also, the arrow heads are too big and made it hard
to see the arrows, so I made the heads a bit smaller. All together, we get:
CURV = rep(0,ecount(opr.sd))
CURV[2] = 0.6
CURV[11] = 0.6
CURV[13] = 0.6
set.seed(4321)
plot(opr.sd, layout=layout_with_graphopt,
edge.arrow.size=0.7, edge.curved=CURV, frame=T)
You might still want to tweak this a bit, but I think this shows the
path to solving your problem.

invalid color name background in qgraph

I have been trying to use qgraph to generate the network graph. The code is as following
Gw <- qgraph(edgeList, diag = TRUE, labels = TRUE,legend.cex = 0.3, vsize = 1,edge.color=colorLabels,legend=TRUE,asize=1)
The figure can be generated, but the R command line gives the following error message. I do not know what does the invalid color name 'background' mean.
The dput result is shown as follows,
dput(edgeList)
structure(c("1", "2", "2", "3", "4", "5", "6", "7", "8", "1",
"9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "16",
"4", "5", "7", "1", "9", "10", "19", "20", "2", "16", "21", "3",
"22", "5", "23", "8", "1", "20", "2", "13", "14", "17", "14",
"1", "19", "14", "2", "21", "14", "24", "1", ":499.3", "nk Transfe",
"de of tran", "up(non-US ", "up(non-US ", "up(non-US ", "up(non-US ",
"up(non-US ", "up(non-US ", "up(non-US ", "up(non-US ", "up(non-US ",
"ine:4121", "ine:3257.4", "ine:75.2", "ine:75.2", "ine:11615.",
"ine:10603", "ine:334.2", "ine:7256.8", "ine:7256.8", "ine:996.8",
"ine:884.6", "ine:364.9", "ine:6360", "ine:5640.9", "ine:2729.7",
"ine:5482.6", "ine:85", "ine:1474.9", "ine:700.8", "ine:2754.6",
"ine:3257.4", "ine:3257.4", "ine:7307.8", "ine:18560.", "ine:85.1",
"ine:364.8", ":700.1", ":5317", "l:4258.9", "l:4258.9", "l:1637.6",
"l:1637.6", "l:46.4", "l:3938.5", "l:3938.5", "l:2800.4", "l:2715.1",
"l:2715.1", "l:12708.2", "l:1042", ":499.3", "nk Transfe", "de of tran",
"up(non-US ", "up(non-US ", "up(non-US ", "up(non-US ", "up(non-US ",
"up(non-US ", "up(non-US ", "up(non-US ", "up(non-US ", "ine:4121",
"ine:3257.4", "ine:75.2", "ine:75.2", "ine:11615.", "ine:10603",
"ine:334.2", "ine:7256.8", "ine:7256.8", "ine:996.8", "ine:884.6",
"ine:364.9", "ine:6360", "ine:5640.9", "ine:2729.7", "ine:5482.6",
"ine:85", "ine:1474.9", "ine:700.8", "ine:2754.6", "ine:3257.4",
"ine:3257.4", "ine:7307.8", "ine:18560.", "ine:85.1", "ine:364.8",
":700.1", ":5317", "l:4258.9", "l:4258.9", "l:1637.6", "l:1637.6",
"l:46.4", "l:3938.5", "l:3938.5", "l:2800.4", "l:2715.1", "l:2715.1",
"l:12708.2", "l:1042", "25", "1", "1", "26", "27", "28", "29",
"30", "31", "25", "32", "33", "4", "4", "3", "3", "5", "5", "7",
"6", "6", "27", "28", "30", "25", "32", "33", "9", "8", "1",
"1", "10", "12", "12", "16", "16", "16", "16", "8", "1", "3",
"3", "7", "7", "25", "9", "9", "1", "10", "10", "14", "14"), .Dim = c(104L,
2L), .Dimnames = list(NULL, c("newsendId", "newtoId")))
The generated figure is as follows. I used the following command to generate it
Gw <- qgraph(edgeList, layout = "spring", diag = FALSE, labels = TRUE, cut = NULL, edge.color = "red",legend.cex = 0.5, vsize = 8)
Which nodes are problems? With your data and code you can modify label.cex. There are other variations of the arguments for the label and legend sizes. Here is one version, with the color blue.
library(qgraph)
Gw <- qgraph(edgeList, layout = "spring", diag = FALSE, labels = TRUE, cut = NULL, edge.color = "red", legend.cex = 0.3, vsize = 4, label.cex = 0.3, label.color = "blue")
Gw

Resources