Related
I am having a problem with skinny bars in a stacked column chart in highcharter in R. I have created a repo of the code below and am wondering if someone could help me avoid the issue of having the bars get super skinny when I add more than 49 series to the graph.
Any ideas or work arounds would be greatly appreciated.
As always, thank you in advance.
Best,
Nate
library(highcharter)
library(magrittr)
library(viridisLite)
dfmtx<- as.data.frame.matrix(matrix(data = abs(rnorm(n=20*50, mean = 0, sd=1)), ncol = 50))
dfmtx<- dfmtx/rowSums(dfmtx)
df<- data.frame(date=seq.Date(from = as.Date("2001-01-01"), to = Sys.Date(), by="years")[1:20],
dfmtx, stringsAsFactors = F)
hc<- highcharter::highchart() %>%
highcharter::hc_chart(type = "column") %>%
#highcharter::hc_plotOptions(column=list(pointWidth=45, pointPadding=0, groupPadding=0.1, padding=0)) %>%
highcharter::hc_plotOptions(column = list(stacking = "normal"), series=list(cropThreshold=200)) %>%
highcharter::hc_xAxis(categories = df$date, title=list(text="Fake Date")) %>%
highcharter::hc_title(text=paste0("Bars Get Skinny When You Add 50"))
for(i in 2:50){ # Smiles...This Works!
#for(i in 2:51){ # Tears..skinny bars :(
the_name<- colnames(df)[i]
hc<- hc %>%
highcharter::hc_add_series(name=the_name,
data = df[,i],
stack = "SameStack")
}
# Pretty colors...why not?
cols<- viridisLite::viridis(n=length(hc$x$hc_opts$series))
cols<- base::substr(cols, 0,7)
hc<- hc %>%
highcharter::hc_yAxis(title=list(text="Proportion"), max=1) %>%
highcharter::hc_colors(cols) %>%
highcharter::hc_legend(align="center")
hc
You can set the width and height of the chart using hc_size().
for(i in 2:51){ # Tears..skinny bars :(
the_name<- colnames(df)[i]
hc<- hc %>%
highcharter::hc_add_series(name=the_name,
data = df[,i],
stack = "SameStack")
}
# Pretty colors...why not?
cols<- viridisLite::viridis(n=length(hc$x$hc_opts$series))
cols<- base::substr(cols, 0,7)
hc<- hc %>%
highcharter::hc_yAxis(title=list(text="Proportion"), max=1) %>%
highcharter::hc_colors(cols) %>%
highcharter::hc_legend(align="center") %>%
highcharter::hc_size(height = 800) #Setting chart height to 800.
hc
I asked a similar question a few days ago and have made some progress by talking with some people, but I need a little more help to get this to the point I wanted. So here's where I am:
I want to have a data table of a certain size with columns of data (different scales) and be able to apply a color range (kind of like a heat map) to the values in the cells of the data table depending on how large or small the values in the columns are.
Here is some code that was provided to me:
library(DT)
testrun <- round(runif(100), 6)
data <- data.frame(testrun = testrun)
brks <- quantile(data$testrun, probs = seq(.05, .95, .01), na.rm = TRUE)
clrs_df <- colorRamp(c("white","blue"))(c(0,brks)) %>%
as_tibble(.name_repair ="minimal") %>%
setNames(nm=c("r","g","b")) %>%
mutate_all(~as.character(round(.,digits=0))) %>%
mutate(mycolor=paste0("rgb(",paste(r,g,b,sep = ","),")"))
clrs <- pull(clrs_df,mycolor)
DT::datatable(data,rownames=TRUE,options = list(lengthChange = FALSE, dom='t')) %>%
formatStyle(colnames(data), backgroundColor = styleInterval(brks, clrs))
The above example works for one column of data, but I intend to have multiple columns of data for which I want these color gradients to apply to each column individually.
I've tried substituting mtcars for testrun in the assignment portion of the code and in the brks assignment line, but it just leaves me with a table with no colors applied.
Can someone help point me in the right direction? Any help would be appreciated!
Thank you!
I think you want:
library(DT)
data <- data.frame(
C1 = rnorm(100),
C2 = rgamma(100, 10, 10)
)
dtable <- datatable(data, rownames=TRUE, options = list(lengthChange = FALSE, dom='t'))
colRamp <- colorRamp(c("white","blue"))
for(column in names(data)){
x <- na.omit(data[[column]])
brks <- quantile(x, probs = seq(.05, .95, .01))
RGB <- colRamp(c(0, (brks-min(x))/(max(x)-min(x))))
clrs <- apply(RGB, 1, function(rgb){
sprintf("rgb(%s)", toString(round(rgb,0)))
})
dtable <- dtable %>%
formatStyle(column, backgroundColor = styleInterval(brks, clrs))
}
dtable
Using the VennDiagram package, we can make a venn diagram like so with the venn.diagram() function like so:
library(tidyverse)
library(hrbrthemes)
library(tm)
library(proustr)
# Load dataset from github
data <- read.table("https://raw.githubusercontent.com/holtzy/data_to_viz/master/Example_dataset/14_SeveralIndepLists.csv", header=TRUE)
to_remove <- c("_|[0-9]|\\.|function|^id|script|var|div|null|typeof|opts|if|^r$|undefined|false|loaded|true|settimeout|eval|else|artist")
data <- data %>% filter(!grepl(to_remove, word)) %>% filter(!word %in% stopwords('fr')) %>% filter(!word %in% proust_stopwords()$word)
# library
library(VennDiagram)
#Make the plot
venn.diagram(
x = list(
data %>% filter(artist=="booba") %>% select(word) %>% unlist() ,
data %>% filter(artist=="nekfeu") %>% select(word) %>% unlist() ,
data %>% filter(artist=="georges-brassens") %>% select(word) %>% unlist()
),
category.names = c("Booba (1995)" , "Nekfeu (663)" , "Brassens (471)"),
filename = 'venn.png',
output = TRUE ,
imagetype="png" ,
height = 480 ,
width = 480 ,
resolution = 300,
compression = "lzw",
lwd = 1,
col=c("#440154ff", '#21908dff', '#fde725ff'),
fill = c(alpha("#440154ff",0.3), alpha('#21908dff',0.3), alpha('#fde725ff',0.3)),
cex = 0.5,
fontfamily = "sans",
cat.cex = 0.3,
cat.default.pos = "outer",
cat.pos = c(-27, 27, 135),
cat.dist = c(0.055, 0.055, 0.085),
cat.fontfamily = "sans",
cat.col = c("#440154ff", '#21908dff', '#fde725ff'),
rotation = 1
)
This results in a .png written to the working directly.
How can it instead be viewed in the RStudio viewer pane, and also used in RMarkdown docs etc (i.e. just in the same way a regular ggplot or base plots would be viewed)?
Also note, the same question applies to any of the examples found in the ?
venn.diagram documentation (they all seem to write to file instead of display in the RStudio viewer)
This should also do the job. I deleted the arguments for readability:
...
plt <- venn.diagram(
filename = NULL,
cex = 1,
cat.cex = 1,
lwd = 2,
)
grid::grid.draw(plt)
From ?venn.diagram
filename
Filename for image output, or if NULL returns the grid object itself
It seems, you can control almost anything. Again the docs:
... A series of graphical parameters tweaking the plot. See below for
details Details
Argument Venn Sizes Class Description
cex 1,2,3,4,5 numeric Vector giving the size for each area label (length = 1/3/7/15 based on set-number)
Thus we need to be able to display grid objects. plot() and print() don't do this job (it seems there is not print.grid()).
I usually do:
library(VennDiagram)
set.seed(1)
list1 <- list(A=sample(LETTERS, 12), B=sample(LETTERS, 12))
venn1 <- venn.diagram(list1, filename = NULL)
grid.newpage()
grid.draw(venn1)
I think it still writes a log file into the working directory, but not the graph.
You can put two diagrams side by side like this:
library(gridExtra)
set.seed(2)
list2 <- list(A=sample(LETTERS, 16), B=sample(LETTERS, 12))
venn2 <- venn.diagram(list2, filename = NULL)
grid.arrange(gTree(children=venn1),
gTree(children=venn2),
ncol=2)
Created on 2020-04-23 by the reprex package (v0.3.0)
I figured out a way - there may be better way(s). This involves writing to tempfile() instead of a file in the working directory and then reading it in with a few extra lines of code
Note: the only changes to the original code are the addition of
1 extra line at the start temp_file <- tempfile()
the rewriting of filename = 'venn.png' into filename = temp_file
3 extra lines at the bottom
# Libraries
library(tidyverse)
library(hrbrthemes)
library(tm)
library(proustr)
# Load dataset from github
data <- read.table("https://raw.githubusercontent.com/holtzy/data_to_viz/master/Example_dataset/14_SeveralIndepLists.csv", header=TRUE)
to_remove <- c("_|[0-9]|\\.|function|^id|script|var|div|null|typeof|opts|if|^r$|undefined|false|loaded|true|settimeout|eval|else|artist")
data <- data %>% filter(!grepl(to_remove, word)) %>% filter(!word %in% stopwords('fr')) %>% filter(!word %in% proust_stopwords()$word)
# library
library(VennDiagram)
temp_file <- tempfile()
#Make the plot
venn.diagram(
x = list(
data %>% filter(artist=="booba") %>% select(word) %>% unlist() ,
data %>% filter(artist=="nekfeu") %>% select(word) %>% unlist() ,
data %>% filter(artist=="georges-brassens") %>% select(word) %>% unlist()
),
category.names = c("Booba (1995)" , "Nekfeu (663)" , "Brassens (471)"),
filename = temp_file,
output = TRUE ,
imagetype="png" ,
height = 480 ,
width = 480 ,
resolution = 300,
compression = "lzw",
lwd = 1,
col=c("#440154ff", '#21908dff', '#fde725ff'),
fill = c(alpha("#440154ff",0.3), alpha('#21908dff',0.3), alpha('#fde725ff',0.3)),
cex = 0.5,
fontfamily = "sans",
cat.cex = 0.3,
cat.default.pos = "outer",
cat.pos = c(-27, 27, 135),
cat.dist = c(0.055, 0.055, 0.085),
cat.fontfamily = "sans",
cat.col = c("#440154ff", '#21908dff', '#fde725ff'),
rotation = 1
)
# https://stackoverflow.com/a/20909108/5783745
library(png)
img <- readPNG(temp_file)
grid::grid.raster(img)
This is an assignment that I have to boxplot() but I somehow got the data squeezed. I'm new to R :(
I guess the problem is because the x axis labels are too long and not placed vertically, so I've tried and failed (based on this Inserting labels in box plot in R on a 45 degree angle?)
examples <- read.csv("mov.development.csv", sep="\t")
library(dplyr)
movies_rated_67_times <- examples %>%
group_by(movie) %>%
summarize(count=n(), avg_rating=mean(rating))%>%
filter(count == 67)
boxplot_data <- examples %>%
filter(movie %in% movies_rated_67_times$movie) %>%
select(title, rating)
boxplot(rating~title,
data=boxplot_data,
xlab="Title",
ylab="Rating",
xaxt = "n"
)
text(seq_along(boxplot_data$title), par("usr")[3] - 0.5, labels = names(boxplot_data$title), srt = 90, adj = 1, xpd = TRUE);
I want to have a plot like this
But I got this
But with a different type of labels that are not too long, normal code would work
Normal code:
examples <- read.csv("mov.development.csv", sep="\t")
library(dplyr)
movies_rated_67_times <- examples %>%
group_by(movie) %>%
summarize(count=n(), avg_rating=mean(rating))%>%
filter(count == 67)
boxplot_data <- examples %>%
filter(movie %in% movies_rated_67_times$movie) %>%
select(movie, rating)
boxplot(rating~movie,
data=boxplot_data,
xlab="Title",
ylab="Rating"
)
csv file: https://drive.google.com/file/d/1ODM7qdOVI2Sua7HMHGEfNdYz_R1jhGAD/view?usp=sharing
Transforming your title column from factor to character seems to fix it. Additionally I would insert line breaks into some of the movies names and reduce the text size so it fit's into the plot
boxplot_data <- examples %>%
filter(movie %in% movies_rated_67_times$movie) %>%
mutate(title = as.character(title)) %>%
select(title, rating)
boxplot_data[boxplot_data$title == "Adventures of Robin Hood, The (1938)",]$title <- "Adventures of Robin Hood,\nThe (1938)"
boxplot_data[boxplot_data$title == "Wallace & Gromit: The Best of Aardman Animation (1996)",]$title <- " Wallace & Gromit: The Best of\nAardman Animation (1996)"
boxplot_data[boxplot_data$title == "Bridges of Madison County, The (1995)",]$title <- "Bridges of Madison County,\nThe (1995)"
par(cex.axis = 0.7)
boxplot(rating~title,
data=boxplot_data,
xlab="Title",
ylab="Rating")
I have boxplots on highcharter and I would like to customize both the
Fill color
Border color
Here is my code
df = data.frame(cbind(categ = rep(c('a','b','c','d')),value = rnorm(1000)))
hcboxplot(var = df$categ, x = as.numeric(df$value)) %>%
hc_chart(type = "column") %>%
hc_colors(c("#203d7d","#a0a0ed","#203d7e","#a0a0ad"))
The hc_colors works only if I put var2 instead of var but then the box plot are shrunken...
API for styling fillColor: https://api.highcharts.com/highcharts/series.boxplot.fillColor
And for "Border color": https://api.highcharts.com/highcharts/series.boxplot.color
Pure JavaScript example of how to style and define points: https://jsfiddle.net/BlackLabel/6tud3fgx
And R code:
library(highcharter)
df = data.frame(cbind(categ = rep(c('a','b','c','d', 'e')),value = rnorm(1000)))
hcboxplot(var = df$categ, x = as.numeric(df$value)) %>%
hc_chart(type = "column", events = list(
load = JS("function() {
var chart = this;
chart.series[0].points[2].update({
color: 'red'
})
chart.series[0].points[4].update({
x: 4,
low: 600,
q1: 700,
median: 800,
q3: 900,
high: 1000,
color: 'orange'
})
}")
)) %>%
hc_plotOptions(boxplot = list(
fillColor = '#F0F0E0',
lineWidth = 2,
medianColor = '#0C5DA5',
medianWidth = 3,
stemColor = '#A63400',
stemDashStyle = 'dot',
stemWidth = 1,
whiskerColor = '#3D9200',
whiskerLength = '20%',
whiskerWidth = 3,
color = 'black'
)) %>%
hc_colors(c("#203d7d","#a0a0ed","#203d7e","#a0a0ad"))
I made a couple functions to do some stuff with highcharts and boxplots. It will let you color each boxplot and fill it accordingly, and then inject new graphical parameters according to the Highcharts API, should you desire.
Check it out:
## Boxplots Data and names, note the data index (0,1,2) is the first number in the datum
series<- list(
list(
name="a",
data=list(c(0,1,2,3,4,5))
),
list(
name="b",
data=list(c(1,2,3,4,5,6))
),
list(
name="c",
data=list(c(2,3,4,5,6,7))
)
)
# Graphical attribute to be set: fillColor.
# Make the colors for the box fill and then also the box lines (make them match so it looks pretty)
cols<- viridisLite::viridis(n= length(series2), alpha = 0.5) # Keeping alpha in here! (for box fill)
cols2<- substr(cols, 0,7) # no alpha, pure hex truth, for box lines
gen_key_vector<-function(variable, num_times){
return(rep(variable, num_times))
}
kv<- gen_key_vector(variable = "fillColor", length(series))
# Make a function to put stuff in the 'series' list, requires seq_along to be used since x is the list/vector index tracker
add_variable_to_series_list<- function(x, series_list, key_vector, value_vector){
base::stopifnot(length(key_vector) == length(value_vector))
base::stopifnot(length(series_list) == length(key_vector))
series_list[[x]][length(series_list[[x]])+1]<- value_vector[x]
names(series_list[[x]])[length(series_list[[x]])]<- key_vector[x]
return(series_list[[x]])
}
## Put the extra stuff in the 'series' list
series2<- lapply(seq_along(series), function(x){ add_variable_to_series_list(x = x, series_list = series, key_vector = kv, value_vector = cols) })
hc<- highcharter::highchart() %>%
highcharter::hc_chart(type="boxplot", inverted=FALSE) %>%
highcharter::hc_title(text="This is a title") %>%
highcharter::hc_legend(enabled=FALSE) %>%
highcharter::hc_xAxis(type="category", categories=c("a", "b", "c"), title=list(text="Some x-axis title")) %>%
highcharter::hc_add_series_list(series2) %>%
hc_plotOptions(series = list(
marker = list(
symbol = "circle"
),
grouping=FALSE
)) %>%
highcharter::hc_colors(cols2) %>%
highcharter::hc_exporting(enabled=TRUE)
hc
This probably could be adjusted to work with a simple dataframe, but I think it will get you what you want for right now without having to do too much extra work. Also, maybe look into list_parse or list_parse2' fromhighcharter...it could probably help with building out theseries` object..I still need to look into that.
Edit:
I have expanded the example to make it work with a regular DF. As per some follow up questions, the colors are set using the viridis palette inside the make_highchart_boxplot_with_colored_factors function. If you want to allow your own palette and colors, you could expose those arguments and just include them as parameters inside the function call. The expanded example borrows how to add outliers from the highcharter library (albeit in a hacky way) and then builds everything else up from scratch. Hopefully this helps clarify my previous answer. Please note, I could probably also clean up the if condition to make it a little more brief, but I kept it verbose for illustrative purposes.
Double Edit: You can now specify a vector of colors for each level of the factor variable
library(highcharter)
library(magrittr)
library(viridisLite)
df = data.frame(cbind(categ = rep(c('a','b','c','d')),value = rnorm(1000)))
df$value<- base::as.numeric(df$value)
add_variable_to_series_list<- function(x, series_list, key_vector, value_vector){
base::stopifnot(length(key_vector) == length(value_vector))
base::stopifnot(length(series_list) == length(key_vector))
series_list[[x]][length(series_list[[x]])+1]<- value_vector[x]
names(series_list[[x]])[length(series_list[[x]])]<- key_vector[x]
return(series_list[[x]])
}
# From highcharter github pages:
hc_add_series_bwpout = function(hc, value, by, ...) {
z = lapply(levels(by), function(x) {
bpstats = boxplot.stats(value[by == x])$stats
outliers = c()
for (y in na.exclude(value[by == x])) {
if ((y < bpstats[1]) | (y > bpstats[5]))
outliers = c(outliers, list(which(levels(by)==x)-1, y))
}
outliers
})
hc %>%
hc_add_series(data = z, type="scatter", ...)
}
gen_key_vector<-function(variable, num_times){
return(rep(variable, num_times))
}
gen_boxplot_series_from_df<- function(value, by,...){
value<- base::as.numeric(value)
by<- base::as.factor(by)
box_names<- levels(by)
z=lapply(box_names, function(x) {
boxplot.stats(value[by==x])$stats
})
tmp<- lapply(seq_along(z), function(x){
var_name_list<- list(box_names[x])
#tmp0<- list(names(df)[x])
names(var_name_list)<- "name"
index<- x-1
tmp<- list(c(index, z[[x]]))
tmp<- list(tmp)
names(tmp)<- "data"
tmp_out<- c(var_name_list, tmp)
#tmp<- list(tmp)
return(tmp_out)
})
return(tmp)
}
# Usage:
#series<- gen_boxplot_series_from_df(value = df$total_value, by=df$asset_class)
## Boxplot function:
make_highchart_boxplot_with_colored_factors<- function(value, by, chart_title="Boxplots",
chart_x_axis_label="Values", show_outliers=FALSE,
boxcolors=NULL, box_line_colors=NULL){
by<- as.factor(by)
box_names_to_use<- levels(by)
series<- gen_boxplot_series_from_df(value = value, by=by)
if(is.null(boxcolors)){
cols<- viridisLite::viridis(n= length(series), alpha = 0.5) # Keeping alpha in here! (COLORS FOR BOXES ARE SET HERE)
} else {
cols<- boxcolors
}
if(is.null(box_line_colors)){
if(base::nchar(cols[[1]])==9){
cols2<- substr(cols, 0,7) # no alpha, pure hex truth, for box lines
} else {
cols2<- cols
}
} else {
cols2<- box_line_colors
}
# Injecting value 'fillColor' into series list
kv<- gen_key_vector(variable = "fillColor", length(series))
series2<- lapply(seq_along(series), function(x){ add_variable_to_series_list(x = x, series_list = series, key_vector = kv, value_vector = cols) })
if(show_outliers == TRUE){
hc<- highcharter::highchart() %>%
highcharter::hc_chart(type="boxplot", inverted=FALSE) %>%
highcharter::hc_title(text=chart_title) %>%
highcharter::hc_legend(enabled=FALSE) %>%
highcharter::hc_xAxis(type="category", categories=box_names_to_use, title=list(text=chart_x_axis_label)) %>%
highcharter::hc_add_series_list(series2) %>%
hc_add_series_bwpout(value = value, by=by, name="Outliers") %>%
hc_plotOptions(series = list(
marker = list(
symbol = "circle"
),
grouping=FALSE
)) %>%
highcharter::hc_colors(cols2) %>%
highcharter::hc_exporting(enabled=TRUE)
} else{
hc<- highcharter::highchart() %>%
highcharter::hc_chart(type="boxplot", inverted=FALSE) %>%
highcharter::hc_title(text=chart_title) %>%
highcharter::hc_legend(enabled=FALSE) %>%
highcharter::hc_xAxis(type="category", categories=box_names_to_use, title=list(text=chart_x_axis_label)) %>%
highcharter::hc_add_series_list(series2) %>%
hc_plotOptions(series = list(
marker = list(
symbol = "circle"
),
grouping=FALSE
)) %>%
highcharter::hc_colors(cols2) %>%
highcharter::hc_exporting(enabled=TRUE)
}
hc
}
# Usage:
tst_box<- make_highchart_boxplot_with_colored_factors(value = df$value, by=df$categ, chart_title = "Some Title", chart_x_axis_label = "Some X Axis", show_outliers = TRUE)
tst_box
# Custom Colors:
custom_colors_with_alpha_in_hex<- paste0(gplots::col2hex(sample(x=colors(), size = length(unique(df$categ)), replace = FALSE)), "80")
tst_box2<- make_highchart_boxplot_with_colored_factors(value = df$value, by=df$categ, chart_title = "Some Title",
chart_x_axis_label = "Some X Axis",
show_outliers = TRUE, boxcolors = custom_colors_with_alpha_in_hex)
tst_box2
tst_box3<- make_highchart_boxplot_with_colored_factors(value = df$value, by=df$categ, chart_title = "Some Title",
chart_x_axis_label = "Some X Axis",
show_outliers = TRUE, boxcolors = custom_colors_with_alpha_in_hex, box_line_colors = "black")
tst_box3
I hope this helps, please let me know if you have any more questions. I'm happy to try to help as best I can.
-nate
Since there's no highcharter answer yet, I give you at least a base solution.
First, your definition of the data frame is somewhat flawed, rather do:
dat <- data.frame(categ=c('a','b','c','d'), value=rnorm(1000))
Now, using boxplot is quite straightforward. border option colors your borders. With option col you also could color the fills.
boxplot(value ~ categ, dat, border=c("#203d7d","#a0a0ed","#203d7e","#a0a0ad"), pars=list(outpch=16))
Gives
Note: See this nice solution for further customizations.