My data look like this:
set.seed(123)
library(tidyverse)
library(reshape2)
Year <- c(2017, 2017, 2017, 2018, 2018, 2018)
Month <- c(10, 11, 12, 1, 2, 3)
alpha_test <- runif(n = 6, min = 0.2, max = 0.25)
alpha_control <- runif(n = 6, min = 0.17, max = 0.22)
beta_test <- runif(n = 6, min = 0.01, max = 0.1)
beta_control <- runif(n = 6, min = 0.03, max = 0.05)
df <- tibble(Year, Month, alpha_test, alpha_control, beta_test, beta_control)
df
What I want is, two geom_path charts (one chart for alpha, one for beta) which compare the test and the control. Here's an example from Excel for a similar test:
I assume I will need to melt the data in some way to get what I want. But, the command
rawMelt <- melt(df, id.vars = c(Year, Month))
gives the error Error: id variables not found in data: 2017, 2018, October, November, December, January, February, March. How would you melt these data so that I can make the graph I want?
This is what I eventually went with, should anyone else have this problem:
rawMelt <- melt(df, id.vars = c("Year", "Month")) %>%
mutate(
theSource = ifelse(grepl("test", variable), "test", "control"),
metric = ifelse(grepl("alpha", variable), "alpha", "beta"),
monthText = paste0(Year, "_", ifelse(Month < 10, "0", ""), Month)
) %>%
select(-variable)
g_maker <- function(theMetric) {
theChart <- rawMelt %>%
filter(metric == theMetric)
g <- ggplot(theChart, aes(x = as.factor(monthText), y = value, group = theSource)) +
geom_path(aes(color = theSource)) +
scale_color_manual(values = c("red", "black")) +
theme_minimal() +
xlab(NULL) +
theme(axis.text.x = element_text(angle = 75, hjust = 1))
return(g)
}
alpha_graph <- g_maker("alpha")
beta_graph <- g_maker("beta")
alpha_graph
beta_graph
Related
DF <- data.frame(Height = rnorm(100, 170, 5),
Weight = rnorm(100, 55, 5))
BMI = function(height,weight){(weight/(height)^2*10000)}
DF$bmi = BMI(DF$Height,DF$Weight)
DF$weight_group <-
cut(
x = DF$Weight,
breaks = c(0,60,70,Inf),
include.lowest = TRUE,
labels = c("0-60", "61-70", "71+")
)
DF$BMI_group <-
cut(
x = DF$bmi,
breaks = c(0, 19.75582, Inf),
include.lowest = TRUE,
labels = c("Below Average", "Above Average")
)
This is my code. I cannot figure out how to just calculate the average of the last half of the data frame. I didn't know how to add in gender, to make 50 males and 50 females, so this is my work around.
Are you looking for such a solution?
DF <- data.frame(Height = rnorm(100, 170, 5),
Weight = rnorm(100, 55, 5),
Gender = c(rep("male", 50), rep("female", 50)))
BMI <- function(height,weight){(weight/(height)^2*10000)}
library(dplyr)
DF %>%
group_by(Gender) %>%
mutate(bmi = BMI(Height, Weight)) %>%
summarise(mean_bmi = mean(bmi))
# A tibble: 2 x 2
Gender mean_bmi
* <chr> <dbl>
1 female 19.4
2 male 19.6
We may use sample to create the column, subset the 'Gender' for 'F', and apply the BMI
DF$Gender <- sample(c("F", "M"), nrow(DF), replace = TRUE, prob = c(0.5, 0.5))
with(subset(DF, Gender == "F"), mean(BMI(Height, Weight)))
If we want to get the mean of 'BMI' by 'BMI_group'
subdf <- subset(DF, Gender == "F")
with(subdf, tapply(BMI(Height, Weight), BMI_group, FUN = mean))
Below Average Above Average
17.57841 21.43003
I have code that breaks down hours with corresponding values into quarters of an hour.
Unfortunately, when broken down into quarters of an hour, the values are identical for the entire hour.
After adding quarters of an hour, I would also like to add values between the original hours so that the graph is smooth and not sharp. How to do it, average it, interpolate it?
df <- data.frame(
h = 0:23,
x = c(22, 11, 5, 8 , 22, 88, 77, 7, 11, 5, 8 , 22, 88, 77, 11, 5, 8 , 22, 88, 77, 11, 5, 8 , 22))
library(dplyr)
library(stringr)
df %>%
data.frame(h = rep(df$h, each = 4), # quadruplicate rows
x = rep(df$x, each = 4)) %>% # quadruplicate rows
mutate(h.1 = str_pad(h.1, width = 2, side = "left", pad = "0"), # add leading '0'
qu = paste0(h.1, c(":00", ":15", ":30", ":45"))) %>% # create quarters
select( - c(h,x)) %>% # deselect obsolete cols
rename(c("h" = "h.1", "x" = "x.1"))
df %>%
ggplot() +
geom_point(aes(qu, x), color = "red", size = 2) +
labs(x= "", y = "",
title = "Example")
Here I make a "decimal hour" variable to simplify the calculations. We can also use hms::hms() to define a timestamp that ggplot2 can understand. I use base:approx here to interpolate between hourly points.
df2 <- df %>%
tidyr::uncount(4) %>% # make 4 copies of each row
mutate(h_dec = h + (0:3)/4,
h_time = hms::hms(hours = h_dec),
x = x * c(1, NA, NA, NA), # this is to make non-hourly into NA,
# so that approx only uses hourly
x_interp = approx(x = h, y = x, xout=h_dec)$y)
df2 %>%
ggplot() +
geom_point(aes(h_time, x_interp), color = "red", size = 2) +
labs(x= "", y = "",
title = "Example")
I am trying to combine bar plot and line plot on same visualization while the data for each plot comes from different data set. So the code I am trying:
library(dplyr)
library(echarts4r)
set.seed(600)
df1 <- data.frame(
#class = c(rep("1st", times = 60), rep("2nd", time = 30), rep("3rd", time = 30)),
week = rep(1:20, times = 3),
cat = c(rep("A", times = 20), rep("B", times = 20), rep("C", times = 20)),
count = round(runif(60, 1, 100))
)
df <- data.frame(
week = rep(1:20, times = 2),
cat = c(rep("D", times = 20), rep("E", times = 20)),
count = round(runif(40, 1, 100))
)
df1 %>%
group_by(cat) %>%
e_charts(week) %>%
e_bar(count, bind = cat) %>%
e_tooltip(
formatter = htmlwidgets::JS("
function(params){
return('<strong>' + params.name +
'</strong><br />week: ' + params.value[0] +
'<br />count: ' + params.value[1])
}
")
)
Trying to add line considering the data df on the viz. Below is what I am trying to achieve :
Here I have used echarts4rProxy() but is same thing possible outside Shiny?
Also is it possible to change the colors of bars and lines?
Thanks!!
Yes,
To go about it the way you do with 2 different datasets you can use e_data pass new data, it's just like e_charts but within the echarts4r pipe.
library(dplyr)
library(echarts4r)
set.seed(600)
df1 <- data.frame(
#class = c(rep("1st", times = 60), rep("2nd", time = 30), rep("3rd", time = 30)),
week = rep(1:20, times = 3),
cat = c(rep("A", times = 20), rep("B", times = 20), rep("C", times = 20)),
count = round(runif(60, 1, 100))
)
df <- data.frame(
week = rep(1:20, times = 2),
cat = c(rep("D", times = 20), rep("E", times = 20)),
count = round(runif(40, 1, 100))
)
df1 %>%
group_by(cat) %>%
e_charts(week) %>%
e_bar(count, bind = cat) %>%
e_data(data = group_by(df, cat), x = week) %>%
e_line(count) %>%
e_tooltip(
formatter = htmlwidgets::JS("
function(params){
return('<strong>' + params.name +
'</strong><br />week: ' + params.value[0] +
'<br />count: ' + params.value[1])
}
")
)
I'm trying to do a semi circle donut with highcharter library but I only know how to do a pie chart. I know that with JS you can do it by adding "startAngle" and "endAngle" but I want to know how to do it with R:
A <- c("a", "b", "c", "d")
B <- c(4, 6, 9, 2)
C <- c(23, 26, 13, 15)
df <- data.frame(A, B, C)
highchart() %>%
hc_chart(type = "pie") %>%
hc_add_series_labels_values(labels = df$A, values = df$B)%>%
hc_tooltip(crosshairs = TRUE, borderWidth = 5, sort = TRUE, shared = TRUE, table = TRUE,
pointFormat = paste('<b>{point.percentage:.1f}%</b>')
) %>%
hc_title(text = "ABC",
margin = 20,
style = list(color = "#144746", useHTML = TRUE))
Thank you!
You can do something like this though not using Highcharts library.
library(tidyverse)
library(ggforce)
library(scales)
library(ggplot2)
# -------------------------------------------------------------------------
A <- c("a", "b", "c", "d")
B <- c(4, 6, 9, 2)
C <- c(23, 26, 13, 15)
df <- data.frame(A, B, C)
# Ensure A is a factor (we'll be using it to fill the pie)
df$A <- factor(df$A)
# compute the individual proportion in this case using var C
df$prop <- df$C/sum(df$C)
# compute the cumulative proportion and use that to plot ymax
df$p_end <- cumsum(df$prop)
# generate a y-min between 0 and 1 less value than p_end (using p_end)
df$p_start <- c(0, head(df$p_end ,-1))
# -------------------------------------------------------------------------
# plot
df %>%
mutate_at(c("p_start", "p_end"), rescale, to=pi*c(-.5,.5), from=0:1) %>%
ggplot +
geom_arc_bar(aes(x0 = 0, y0 = 0, r0 = .5, r = 1, start = p_start, end = p_end, fill=A)) +
coord_fixed() +xlab("X_label") + ylab("Y_lablel") + guides(fill=guide_legend(title="Legend Title"))
Output
Hope that helps.
Try adding startAngle = -90, endAngle = 90 inside hc_add_series_labels_values.
Note as per the warning hc_add_series_labels_values is deprecated so suggest using hc_add_series.
highchart() %>%
hc_add_series(type = "pie", data = df, hcaes(x = A, y = B), startAngle = -90, endAngle = 90) %>%
hc_tooltip(pointFormat = '<b>{point.percentage:.1f}%</b>') %>%
hc_title(text = "ABC",
margin = 20,
style = list(color = "#144746", useHTML = TRUE))
I'm trying to load data from Quandl with collapse = "monthly".
Some of the values are only available in a yearly or halve-yearly fashion.
Some other values are only available within certain periods of time.
This leaves me with a lot of inhomogeneous data.
How can I fill the yearly and halve-yearly data in a "Last observation carried forward" fashion and the remaining NAs with 0?
Here is my idea of the data I got and the one I want to have at the end:
library(tibble)
set.seed(4711)
# How do I get from:
#
df.start <- data_frame(
Date = seq.Date(as.Date("1990-01-01"), as.Date("1999-12-01"), "1 month"),
B = rep(NA, 120),
C = c(rep(NA, 50), rnorm(120 - 50)),
D = rep(c(rnorm(1), rep(NA, 11)), 10),
E = c(rep(NA, 24), rep(c(rnorm(1), rep(NA, 11)), 8)),
F = c(rep(NA, 45), rnorm(50), rep(NA, 25)),
G = c(rep(NA, 24), rep(c(rnorm(1), rep(NA, 11)), 6), rep(NA, 24)),
H = c(rep(NA, 10), rnorm(20), rep(NA, 16), rnorm(37), rep(NA, 37)),
I = rep(c(rnorm(1), rep(NA, 5)), 20)
)
#
# To:
#
df.end <- data_frame(
Date = seq.Date(as.Date("1990-01-01"), as.Date("1999-12-01"), "1 month"),
B = rep(0, 120),
C = c(rep(0, 50), rnorm(120 - 50)),
D = rep(rnorm(10), each = 12),
E = c(rep(0, 24), rep(rnorm(8), each = 12)),
F = c(rep(0, 45), rnorm(50), rep(0, 25)),
G = c(rep(0, 24), rep(rnorm(6), each = 12), rep(0, 24)),
H = c(rep(0, 10), rnorm(20), rep(0, 16), rnorm(37), rep(0, 37)),
I = rep(rnorm(20), each = 6)
)
#
# Automatically?
#
You can use fill to fill the NAs with the last non-empty value (except for the Date column), and then replace the remaining NAs by 0. We do these operations grouped by year.
library(tidyverse)
library(lubridate)
df.end <- df.start %>%
mutate(year = year(Date)) %>%
group_by(year) %>%
fill(., colnames(df.start[-1])) %>%
replace(., is.na(.), 0) %>%
ungroup() %>%
select(-year)