So I was reading this post and I fell a little in love with the calendar heat map with Tetris-style month breaks.
However, the ggplot example doesn't implement the Tetris breaks, which are arguably the best part.
So, FTFY, gist here:
The procedure for this is:
create appropriate Tetris breaks for your data
left_join your data to the Tetris breaks created in (1)
pump the above through ggplot with some specially crafted geoms
The methodology for (1) is reasonably straightforward, implemented in the calendar_tetris_data(...) function in the gist, though it would be nice to make it a little more flexible.
My question is mainly around (3): how do I bundle up the 7 geoms necessary to make the breaks into a single procedure or geom?
If I do this:
calendar_tetris_geoms <- function() {
geom_segment(aes(x=x, xend=x, y=ymin, yend=ymax)) + # (a)
geom_segment(aes(x=xmin, xend=xmax, y=y, yend=y)) + # (b)
geom_segment(aes(x=dec.x, xend=dec.x, y=dec.ymin, yend=dec.ymax)) + # (c)
geom_segment(aes(x=nye.xmin, xend=nye.xmax, y=nye.y, yend=nye.y)) + # (d)
geom_segment(x=-0.5, xend=51.5, y=7.5, yend=7.5) + # put a line along the top
geom_segment(x=0.5, xend=52.5, y=0.5, yend=0.5) + # put a line along the bottom
geom_text(aes(x=month.x, y=month.y, label=month.l), hjust=0.25) # (e)
}
And then try to add that to my ggplot, it doesn't work:
> ggplot(data) + calendar_tetris_geoms()
Error in calendar_tetris_geoms() :
argument "plot" is missing, with no default
I clearly don't understand how this works. How does this work?
Modifying #baptiste's suggestion, if I do this:
calendar_tetris_geoms <- function() {
list(
geom_segment(aes(x=x, xend=x, y=ymin, yend=ymax)), # (a)
geom_segment(aes(x=xmin, xend=xmax, y=y, yend=y)), # (b)
geom_segment(aes(x=dec.x, xend=dec.x, y=dec.ymin, yend=dec.ymax)), # (c)
geom_segment(aes(x=nye.xmin, xend=nye.xmax, y=nye.y, yend=nye.y)), # (d)
geom_segment(x=-0.5, xend=51.5, y=7.5, yend=7.5), # put a line along the top
geom_segment(x=0.5, xend=52.5, y=0.5, yend=0.5), # put a line along the bottom
geom_text(aes(x=month.x, y=month.y, label=month.l), hjust=0.25) # (e)
)
}
Then this works a treat:
calendar_tetris_data(min(stock.data$date), max(stock.data$date)) %>%
left_join(stock.data) %>%
ggplot() +
geom_tile(aes(x=week, y=wday2factor(wday), fill = Adj.Close), colour = "white") +
calendar_tetris_geoms() +
facet_wrap(~ year, ncol = 1)
Update 2019-08-06 - Pulling everything into one post to make a Tetris Calendar Heat Map
Sample date data.
This is a stand in for your date data.
mydatedata<-as.Date(paste(sample(c(2018:2019), 3000, replace = TRUE), # year
sample(c(1:12), 3000, replace = TRUE), # month
sample(c(1:28), 3000, replace = TRUE), # day
sep="-"))
Create a data frame summarizing your data
Replace mydatedata with your df$date field.
newdf<-as.data.frame(table(mydatedata), stringsAsFactors = FALSE);
names(newdf)<-c("date", "n")
newdf$date<-as.Date(newdf$date, format='%Y-%m-%d')
Create Calendar Tetris Data Functions
Note: I created a weekday label, renamed several functions to avoid name collision, and moved the the helper functions inside the main function.
Original source links:
1) https://gist.github.com/dvmlls/5f46ad010bea890aaf17
2) calendar heat map tetris chart
calendar_tetris_data <- function(date_min, date_max) {
year2 <- function(d) as.integer(format(d, '%Y'))
wday2 <- function(d) {
n <- as.integer(format(d, '%u'))
ifelse(n==7, 0, n) + 1 # I want the week to start on Sunday=1, so turn 7 into 0.
}
wday2factor <- function(wd) factor(wd, levels=1:7, labels=c('Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'))
week2 <- function(d, year) {
# If January 1st is a Sunday, my weeks will start from 1 instead of 0 like the rest of them.
nyd <- as.Date(ISOdate(year, 1, 1))
# So if that's the case, subtract 1.
as.integer(format(d, '%U')) - ifelse(wday2(nyd) == 1, 1, 0)
}
start <- as.Date(ISOdate(year2(min(date_min)),1,1))
end <- as.Date(ISOdate(year2(max(date_max)), 12, 31))
all.dates <- start + 0:as.integer(end - start, units='days')
data.frame(date=all.dates) %>% tbl_df %>%
mutate(
wday=wday2(date),
year=year2(date),
month=as.integer(format(date, '%m')),
week=week2(date, year),
day=as.integer(format(date, '%d')),
weekday=wday2factor(wday), #20190806, adding weekday label
# (a) put vertical lines to the left of the first week of each month
x=ifelse(day <= 7, week - 0.5, NA),
ymin=ifelse(day <= 7, wday - 0.5, NA),
ymax=ifelse(day <= 7, wday + 0.5, NA),
# (b) put a horizontal line at the bottom of the first of each month
y=ifelse(day == 1, wday - 0.5, NA),
xmin=ifelse(day == 1, week - 0.5, NA),
xmax=ifelse(day == 1, week + 0.5, NA),
# (c) in december, put vertical lines to the right of the last week
dec.x=ifelse(month==12 & day >= 25, week + 0.5, NA),
dec.ymin=ifelse(month==12 & day >= 25, wday - 0.5, NA),
dec.ymax=ifelse(month==12 & day >= 25, wday + 0.5, NA),
# (d) put a horizontal line at the top of New Years Eve
nye.y=ifelse(month==12 & day == 31, wday + 0.5, NA),
nye.xmin=ifelse(month==12 & day == 31, week - 0.5, NA),
nye.xmax=ifelse(month==12 & day == 31, week + 0.5, NA),
# (e) put the first letter of the month on the first day
month.x=ifelse(day == 1, week, NA),
month.y=ifelse(day == 1, wday, NA),
month.l=ifelse(day == 1, substr(format(date, '%B'), 1, 3), NA)
)
}
Create the ggplot2 geom:
calendar_tetris_geoms <- function() {
list(
geom_segment(aes(x=x, xend=x, y=ymin, yend=ymax)), # (a)
geom_segment(aes(x=xmin, xend=xmax, y=y, yend=y)), # (b)
geom_segment(aes(x=dec.x, xend=dec.x, y=dec.ymin, yend=dec.ymax)), # (c)
geom_segment(aes(x=nye.xmin, xend=nye.xmax, y=nye.y, yend=nye.y)), # (d)
geom_segment(x=-0.5, xend=51.5, y=7.5, yend=7.5), # put a line along the top
geom_segment(x=0.5, xend=52.5, y=0.5, yend=0.5), # put a line along the bottom
geom_text(aes(x=month.x, y=month.y, label=month.l), hjust=0.25) # (e)
)
}
Create the plot:
library(ggplot2)
library(dplyr) # for %>% pipe
calendar_tetris_data(min(newdf$date), max(newdf$date)) %>%
left_join(newdf) %>%
ggplot() +
geom_tile(aes(x=week, y=weekday, fill = n), colour = "white") +
calendar_tetris_geoms() +
facet_wrap(~ year, ncol = 1)
Related
I want to identify 3d cylinders in an rgl plot to obtain one attribute of the nearest / selected cylinder. I tried using labels to simply spell out the attribute, but I work on data with more than 10.000 cylinders. Therefore, it gets so crowded that the labels are unreadable and it takes ages to render.
I tried to understand the documentation of rgl and I guess the solution to my issue is selecting the cylinder in the plot manually. I believe the function selectpoints3d() is probably the way to go. I believe it returns all vertices within the drawn rectangle, but I don't know how to go back to the cylinder data? I could calculate which cylinder is closest to the mean of the selected vertices, but this seems like a "quick & dirty" way to do the job.
Is there a better way to go? I noticed the argument value=FALSE to get the indices only, but I don't know how to go back to the cylinders.
Here is some dummy data and my code:
# dummy data
cylinder <- data.frame(
start_X = rep(1:3, 2)*2,
start_Y = rep(1:2, each = 3)*2,
start_Z = 0,
end_X = rep(1:3, 2)*2 + round(runif(6, -1, 1), 2),
end_Y = rep(1:2, each = 3)*2 + round(runif(6, -1, 1), 2),
end_Z = 0.5,
radius = 0.25,
attribute = sample(letters[1:6], 6)
)
# calculate centers
cylinder$center_X <- rowMeans(cylinder[,c("start_X", "end_X")])
cylinder$center_Y <- rowMeans(cylinder[,c("start_Y", "end_Y")])
cylinder$center_Z <- rowMeans(cylinder[,c("start_Z", "end_Z")])
# create cylinders
cylinder_list <- list()
for (i in 1:nrow(cylinder)) {
cylinder_list[[i]] <- cylinder3d(
center = cbind(
c(cylinder$start_X[i], cylinder$end_X[i]),
c(cylinder$start_Y[i], cylinder$end_Y[i]),
c(cylinder$start_Z[i], cylinder$end_Z[i])),
radius = cylinder$radius[i],
closed = -2)
}
# plot cylinders
open3d()
par3d()
shade3d(shapelist3d(cylinder_list, plot = FALSE), col = "blue")
text3d(cylinder$center_X+0.5, cylinder$center_Y+0.5, cylinder$center_Z+0.5, cylinder$attribute, color="red")
# get attribute
nearby <- selectpoints3d(value=TRUE, button = "right")
nearby <- colMeans(nearby)
cylinder$dist <- sqrt(
(nearby["x"]-cylinder$center_X)**2 +
(nearby["y"]-cylinder$center_Y)**2 +
(nearby["z"]-cylinder$center_Z)**2)
cylinder$attribute[which.min(cylinder$dist)]
If you call selectpoints3d(value = FALSE), you get two columns. The first column is the id of the object that was found. Your cylinders get two ids each. One way to mark the cylinders is to use "tags". For example, this modification of your code:
# dummy data
cylinder <- data.frame(
start_X = rep(1:3, 2)*2,
start_Y = rep(1:2, each = 3)*2,
start_Z = 0,
end_X = rep(1:3, 2)*2 + round(runif(6, -1, 1), 2),
end_Y = rep(1:2, each = 3)*2 + round(runif(6, -1, 1), 2),
end_Z = 0.5,
radius = 0.25,
attribute = sample(letters[1:6], 6)
)
# calculate centers
cylinder$center_X <- rowMeans(cylinder[,c("start_X", "end_X")])
cylinder$center_Y <- rowMeans(cylinder[,c("start_Y", "end_Y")])
cylinder$center_Z <- rowMeans(cylinder[,c("start_Z", "end_Z")])
# create cylinders
cylinder_list <- list()
for (i in 1:nrow(cylinder)) {
cylinder_list[[i]] <- cylinder3d(
center = cbind(
c(cylinder$start_X[i], cylinder$end_X[i]),
c(cylinder$start_Y[i], cylinder$end_Y[i]),
c(cylinder$start_Z[i], cylinder$end_Z[i])),
radius = cylinder$radius[i],
closed = -2)
# Add tag here:
cylinder_list[[i]]$material$tag <- cylinder$attribute[i]
}
# plot cylinders
open3d()
par3d()
shade3d(shapelist3d(cylinder_list, plot = FALSE), col = "blue")
text3d(cylinder$center_X+0.5, cylinder$center_Y+0.5, cylinder$center_Z+0.5, cylinder$attribute, color="red")
# Don't get values, get the ids
nearby <- selectpoints3d(value=FALSE, button = "right", closest = FALSE)
ids <- nearby[, "id"]
# Convert them to tags. If you select one of the labels, you'll get
# a blank in the list of tags, because we didn't tag the text.
unique(tagged3d(id = ids))
When I was trying this, I found that using closest = TRUE in selectpoints3d seemed to get too many ids; there may be a bug there.
Following a former question I opened few weeks ago:
Slope Chart - ggplot2
I face another issue, concerning the numeric values reported in the graph. Even specifying the decimal digits I need (exactly 3) with any of the two commands:
y=round(y, digit = 3) at the endof the code
or
options(digits=3) at the beginning of the whole code
The graphical output, doesn't give me the desired number of digits but only concerning 0. In the graph, I wanted to have 0.800 (not 0.8) and 0.940 (not 0.94). It looks like it removes 0. Below the graphical output from R, I circled in red the number I intended to change.
Below the whole code:
library(dplyr)
library(ggplot2)
#options(digits=3)
theme_set(theme_classic())
#### Data
df <- structure(list(group = c("Ups", "Ups", "Ups", "Ups", "Ups"),
yshift = c(0, 0, 0, 0, 0), x = structure(1:5, .Label = c("1 day",
"2 days", "3 days", "5 days", "7 days"), class = "factor"),
y = c(0.108, 0.8, 0.94, 1.511, 1.905), ypos = c(0.10754145,
0.8, 0.94, 1.5111111, 1.90544651164516)), row.names = c(1L,
3L, 5L, 7L, 9L), class = "data.frame")
# Define functions. Source: https://github.com/jkeirstead/r-slopegraph
plot_slopegraph <- function(df) {
ylabs <- subset(df, x==head(x,1))$group
yvals <- subset(df, x==head(x,1))$ypos
fontSize <- 3
gg <- ggplot(df,aes(x=x,y=ypos)) +
geom_line(aes(group=group),colour="grey80") +
geom_point(colour="white",size=8) +
geom_text(aes(label=y), size=fontSize, family="American Typewriter") +
scale_y_continuous(name="", breaks=yvals, labels=ylabs)
return(gg)
}
## Plot
plot_slopegraph(df) + labs(title="Monomer content after days of heating")
I am making any mistake or missing something? Is there any other way to force 0 digits?
Thank you in advance for every eventual reply or comment.
I like the scales package functions for things like this (though you could certainly use formatC or sprintf instead).
I've modified plot_slopegraph to use label=scales::label_number(accuracy = 0.001)(y)) in the geom_text():
plot_slopegraph <- function(df) {
ylabs <- subset(df, x==head(x,1))$group
yvals <- subset(df, x==head(x,1))$ypos
fontSize <- 3
gg <- ggplot(df,aes(x=x,y=ypos)) +
geom_line(aes(group=group),colour="grey80") +
geom_point(colour="white",size=8) +
geom_text(aes(label=scales::label_number(accuracy = 0.001)(y)), size=fontSize, family="American Typewriter") +
scale_y_continuous(name="", breaks=yvals, labels=ylabs)
return(gg)
}
plot_slopegraph(df)
Currently I'm creating multiple plots with regional data and save them to a PDF file. This works without problems, thanks to an SO post I've found (use grid.arrange over multiple pages or marrangeGrob with a layout_matrix).
This is my code so far:
library(ggplot2)
library(gridExtra)
library(dplyr)
data <- data.frame(
region = c("region 1", "region 2", "region 3", rep("region 4", 2), rep("region 5", 2)),
countries = c("country 1", "country 2", "country 3", "country 4", "country 5", "country 6", "country 7"),
dummydata1 = c(rep(1, 7)),
dummydata2 = c(rep(2, 7))
)
criterias <- list()
criterias[[ 'region_1' ]] <- data %>% filter(region == 'region 1')
criterias[[ 'region_2' ]] <- data %>% filter(region == 'region 2')
criterias[[ 'region_3' ]] <- data %>% filter(region == 'region 3')
criterias[[ 'region_4' ]] <- data %>% filter(region == 'region 4')
criterias[[ 'region_5' ]] <- data %>% filter(region == 'region 5')
# This layout matrix should be used for the regional plots
# Don't wonder about the strange numbering, some plots came later
# and it was easier to modify the matrix then all other functions.
regionLayout <- rbind(
c(1,1,1,1,1,2),
c(NULL,NULL,3,3,NULL,NULL),
c(9,9,4,4,10,10),
c(6,6,6,7,7,7),
c(6,6,6,7,7,7),
c(6,6,6,7,7,7),
c(6,6,6,7,7,7),
c(6,6,6,7,7,7),
c(6,6,6,7,7,7)
)
# This is just a dummy function
# The actual function creates several plots based on the real data
createRegionalPlots <- function (data, region) {
examplePlots <- list(ggplot() + ggtitle('Title (ggtext = plot 1)'),
ggplot() + ggtitle('Month (ggtext = plot 2)'),
ggplot() + ggtitle('Plot 1 (tile = 3)'),
ggplot() + ggtitle('Plot 2 (tile = 4)'),
ggplot() + ggtitle('Plot 3 (geom_bar = 5)'),
ggplot() + ggtitle('Plot 4 (geom_bar = 6)'),
ggplot() + ggtitle('Plot 5 (tile = 7)'),
ggplot() + ggtitle('Plot 6 (tile = 8)'))
}
# Found in https://stackoverflow.com/questions/43491685/
preparePage <- function(plots,layoutMatrix) {
# pdf(file = NULL) #invisible
par(mar=(c(5,5,5,5)))
plotsPerPage <- length(unique(na.omit(c(layoutMatrix))))
ml <- lapply(1:ceiling(length(plots)/plotsPerPage), function(page_IND){
ind <- (1 + ((page_IND - 1) * plotsPerPage )) : (page_IND * plotsPerPage)
grid.arrange(grobs = plots[ind], layout_matrix = layoutMatrix)
})
return(marrangeGrob(grobs=ml,nrow=1,ncol=1,top=NULL))
# dev.off() #invisible
}
# Here I'm running through all regions
regionalPlotList <- list()
for (region in names(criterias)) {
regionData <- criterias[[region]]
regionalPlots <- createRegionalPlots(data = regionData, region = region)
regionalPlotList <- do.call(c, list(regionalPlotList, regionalPlots))
}
# This leaves me with a list of 40 plots (5 regions x 8 plots)
allPlots <- preparePage(regionalPlotList, regionLayout)
ggsave("example.pdf",width = 297, height = 210, units = "mm", plot = allPlots)
As said, this works perfectly and leaves me (using the current data) with a five page report, one per every region and with the required layout.
I have now been asked to add additional per country plots at the end of the regional report and these pages should have a different layout (and different plots).
Overestimating myself (and my knowledge of r resp. ggplot) once again, I thought of this as an easy job (which it probably is for everyone else, but I'm stuck).
So, I've created a list of new criterias and a function, including a new layout:
createCountryPlots <- function(data, country) {
exampleCountryPlots <- list(ggplot() + ggtitle('Title (ggtext = plot 1)'),
ggplot() + ggtitle('Month (ggtext = plot 2)'),
ggplot() + ggtitle('Plot 1 (bar = 3)'),
ggplot() + ggtitle('Plot 2 (pie = 4)'),
ggplot() + ggtitle('Plot 3 (geom_bar = 5)'),
ggplot() + ggtitle('Plot 4 (geom_bar = 6)')
)
}
countryLayout = rbind(
c(1, 1, 1, 1, 1, 2),
c(3, 3, 3, 4, 4, 4),
c(3, 3, 3, 4, 4, 4),
c(3, 3, 3, 4, 4, 4),
c(5, 5, 5, 6, 6, 6),
c(5, 5, 5, 6, 6, 6),
c(5, 5, 5, 6, 6, 6)
)
# prepare the data per country
countryCriterias <- list()
countryCriterias[[ 'country_1' ]] <- data %>% filter(country == 'country 1')
countryCriterias[[ 'country_2' ]] <- data %>% filter(country == 'country 2')
# Running through all selected countries
countryPlotList <- list()
for (country in names(countryCriterias)) {
countryData <- countryCriterias[[country]]
countryPlots <- createCountryPlots(data = countryData, country = country)
countryPlotList <- do.call(c, list(countryPlotList, countryPlots))
}
countryPlots <- preparePage(countryPlotList, countryLayout)
# Just saving the country plots works perfectly again
ggsave("example.pdf",width = 297, height = 210, units = "mm", plot = countryPlots)
Saving this plots in a separate file works without any problems, but I'm currently stuck on how to combine these plots in one single PDF, respecting the different layouts the pages should have.
I've tried several possibilities (i.e. grid.arrange and arrangeGrob etc.), but I haven't been able to combine the plots into a single file.
Could anyone please enlighten me?
Edit:
Sorry, if I didn't make myself clear enough. This would be the result I should have at the end.
Thanks to the hint by #teunbrand to have a look at the patchwork package, I've found a solution to my problem.
It's in general almost the same as before, but instead of trying to arrange the plots first and then saving them, I "print" them directly to a pdf in the for-loop.
# defininig the layouts (simplified)
regionLayout <- "
AAAAAB
##CC##
DDEEFF
GGGHHH
GGGHHH"
countryLayout <- "
AAAAAB
CCCCDD
CCCCDD
EEEEFF
EEEEFF
"
# opening pdf
pdf('example5.pdf', pagecentre = FALSE, width = 29.7/2.54, height = 21/2.54)
par(mar = c(5, 5, 5, 5), oma = c(1, 1, 1, 1))
for (region in names(criterias)) {
regionData <- criterias[[region]]
regionalPlots <- createRegionalPlots(data = regionData, region = region)
# as regionalPlots is a list of plots, I'm using wrap_plots, which can take a dynamic
# number of plots
print(wrap_plots(regionalPlots, design = regionLayout))
}
# then the same for the country plots, with a different layout
countryPlotList <- list()
for (country in names(countryCriterias)) {
countryData <- countryCriterias[[country]]
countryPlots <- createCountryPlots(data = countryData, country = country)
print(wrap_plots(countryPlots, design = countryLayout))
}
dev.off()
And at the end I have my PDF with seperate layouts...
Thank you all for your help!!!
PS: Took me a while to find out why the PDF always was empty, before I realized that wrap_plot just arranges the plots but does not print them. As said, relatively new to R (did I mention that?)
I'm new to rCharts, in fact this is my first attempt. So please forgive a naive question.
I'm trying to create a simple rCharts visual which has a only one horizontal line (X-axis) and no Y-axis. I want to be able to choose the length and each point in the line has mouseover which represents some data. Also I would like to add colors to some of the special points.
This seems very simple, but I'm having great difficulty in this.
library(rCharts)
age <- c(1:2000)
dot <- rep(1,2000)
name <- paste(letters[0], 1:2000, sep="")
df <- data.frame(age=age,dot=dot,name=name)
n1 <- nPlot(dot~age, data=df, type="scatterChart")
n1$chart(tooltipContent = "#! function(key,x,y,e){var d = e.series.values[e.pointIndex];return 'x:'+ x + 'y:' + y + 'name:' + d.name }!#")
n1
Now this will create a line with mouseover but the line in at y=1 and there are x and y axes also. I want just one line, something like a timeline with special events marked.
Thanks a lot.
Well, turning off the y-axis is fairly simple. I added some other ideas to the code.
library(rCharts)
age <- c(1:2000)
dot <- c(
rep(1,1000),
rep(2,1000)
)
name <- c(
rep(letters[1], 1000),
rep(letters[2], 1000)
)
df <- data.frame(age=age,dot=dot,name=name)
n1 <- nPlot(dot~age, data=df, group = "name", type="scatterChart")
n1$chart(
tooltipContent = "#! function(key,x,y,e){
var d = e.series.values[e.pointIndex]
return 'x:'+ x + 'y:' + y + 'name:' + d.name
}!#",
showYAxis = FALSE, #turns off y axis
forceY = c(0,4) #forces y axis to 0 min and 4 max
)
n1
While I think this solves the issue, I am anticipating some things. One is if you define each point, then the data will become large. We could change to lineChart to minimize data sent, but then the tooltip only shows on the points defined. I am sure there is a way to bind an event to the path to show a tooltip then also, but it is beyond my abilities. I would guess you might like the x to be a date format. I'll be happy to demo an example of that also if you would like.
n2 <- nPlot(
dot~age
, data=data.frame(
name = c(rep("A",2),rep("B",2)),
dot = c(1,1,2,2),
age = c(1,1000,1000,2000)
)
, group = "name"
, type="lineChart"
)
n2$chart(
tooltipContent = "#! function(key,x,y,e){
var d = e.series.values[e.pointIndex]
return 'x:'+ x + 'y:' + y + 'name:' + d.name
}!#",
showYAxis = FALSE, #turns off y axis
forceY = c(0,4) #forces y axis to 0 min and 4 max
)
n2
Here is the additional code based on the comments
require(dplyr)
require(magrittr)
require(rCharts)
data <- jsonlite::fromJSON('[
[5,
0, "a1"], [480, 0, "a2"], [250, 0, "a3"], [100, 0, "a4"], [330, 0, "a5"],
[410, 0, "a6"], [475,
0, "a7"], [25, 0, "a8"], [85, 0, "a9"], [220, 0, "a10"],
[600, 0, "a11"]
]') %>% as.data.frame(stringsAsFactors = F) %>%
set_colnames(c("x","y","name")) %>%
mutate(x = as.numeric(x)) %>%
mutate(y = as.numeric(y))
data$grp <- c(rep("A",3),rep("B",5),rep("Z",3))
n1 <- nPlot(
y~x
,group = "grp"
,data = data
,type="scatterChart"
,height=200
)
n1$chart(
tooltipContent = "#! function(key,x,y,e){
var d = e.series.values[e.pointIndex]
var mytip = [];
mytip.push('<h1>name:'+ d.name + '</h1>');
mytip.push('<p>x:' + x + '</p>');
mytip.push('<p>y:' + y + '</p>');
return mytip.join('');
}!#",
showYAxis = FALSE, #turns off y axis
forceY = c(-1,1) #forces y axis to 0 min and 4 max
,showDistX = TRUE #turn on markers on the x axis
,showDistY = FALSE
)
n1$yAxis(
showMaxMin = FALSE
,axisLabel = NULL
)
n1
note: there is a bug in the fisheye that interferes with the tooltip; we can remove to get tooltips to appear immediately
Let me know how this works.
I'm using Paul Bleicher's Calendar Heatmap to visualize some events over time and I'm interested to add black-and-white fill patterns instead of (or on top of) the color coding to increase the readability of the Calendar Heatmap when printed in black and white.
Here is an example of the Calendar Heatmap look in color,
and here is how it look in black and white,
it gets very difficult to distinguish between the individual levels in black and white.
Is there an easy way to get R to add some kind of patten to the 6 levels instead of color?
Code to reproduce the Calendar Heatmap in color.
source("http://blog.revolution-computing.com/downloads/calendarHeat.R")
stock <- "MSFT"
start.date <- "2012-01-12"
end.date <- Sys.Date()
quote <- paste("http://ichart.finance.yahoo.com/table.csv?s=", stock, "&a=", substr(start.date,6,7), "&b=", substr(start.date, 9, 10), "&c=", substr(start.date, 1,4), "&d=", substr(end.date,6,7), "&e=", substr(end.date, 9, 10), "&f=", substr(end.date, 1,4), "&g=d&ignore=.csv", sep="")
stock.data <- read.csv(quote, as.is=TRUE)
# convert the continuous var to a categorical var
stock.data$by <- cut(stock.data$Adj.Close, b = 6, labels = F)
calendarHeat(stock.data$Date, stock.data$by, varname="MSFT Adjusted Close")
update 02-13-2013 03:52:11Z, what do I mean by adding a pattern,
I envision adding a pattern to the individual day-boxes in the Calendar Heatmap as pattern is added to the individual slices in the pie chart to the right (B) in this plot,
found here something like the states in this plot.
I answered this question before he becomes a bounty. It looks like the OP find my previous answer a little bit complicated. I organized the code in a single gist here. you need just to download the file and source it.
I create new function extra.calendarHeat which is an extension of the first one to draw hetmap of double time series.(dat,value1,value2). I addedthis new parameters:
pch.symbol : vector of symbols , defualt 15:20
cex.symbol : cex of the symbols , default = 2
col.symbol : color of symbols , default #00000044
pvalues : value of symbols
Here some examples:
## I am using same data
stock <- "MSFT"
start.date <- "2012-01-12"
end.date <- Sys.Date()
quote <- paste("http://ichart.finance.yahoo.com/table.csv?s=",
stock,
"&a=", substr(start.date,6,7),
"&b=", substr(start.date, 9, 10),
"&c=", substr(start.date, 1,4),
"&d=", substr(end.date,6,7),
"&e=", substr(end.date, 9, 10),
"&f=", substr(end.date, 1,4),
"&g=d&ignore=.csv", sep="")
stock.data <- read.csv(quote, as.is=TRUE)
p1 <- extra.calendarHeat(dates= stock.data$Date, values = stock.data$Adj.Close,
pvalues = stock.data$Volume,
varname="W&B MSFT Adjusted Close
\n Volume as no border symbol ")
## multiply symbols
p2 <- extra.calendarHeat(dates= stock.data$Date, values = stock.data$Adj.Close,
pvalues = stock.data$Volume,
varname="W&B MSFT Adjusted Close \n
black Volume as multiply symbol ",
pch.symbol = c(3,4,8,9),
col.symbol='black')
## circles symbols
p3 <- extra.calendarHeat(dates= stock.data$Date, values = stock.data$Adj.Close,
pvalues = stock.data$Volume,
varname="W&B MSFT Adjusted Close \n blue Volume as circles",
pch.symbol = c(1,10,13,16,18),
col.symbol='blue')
## triangles symbols
p4 <- extra.calendarHeat(dates= stock.data$Date, values = stock.data$Adj.Close,
pvalues = stock.data$Volume,
varname="W&B MSFT Adjusted Close \n red Volume as triangles",
pch.symbol = c(2,6,17,24,25),
col.symbol='red')
p5 <- extra.calendarHeat(dates= stock.data$Date, values = stock.data$Adj.Close,
varname="MSFT Adjusted Close",
pch.symbol = LETTERS,
col.symbol='black')
# symbols are LETTERS
p6 <- extra.calendarHeat(dates= stock.data$Date, values = stock.data$Adj.Close,
pvalues = stock.data$Volume,
varname="MSFT Adjusted Close \n Volume as LETTERS symbols",
pch.symbol = letters,
color='r2b')
You can panel.level.plot from latticeExtra to add pattern. I think the question as it is asked is a little bit specific. So I try to generalize it. The idea is to give the steps to transform a time series to a calendar heatmap: with 2 patterns (fill color and a shape). We can imagine multiple time series (Close/Open). For example, you can get something like this
or like this, using a ggplot2 theme:
The function calendarHeat , giving a single time series (dat,value) , transforms data like this :
date.seq value dotw woty yr month seq
1 2012-01-01 NA 0 2 2012 1 1
2 2012-01-02 NA 1 2 2012 1 2
3 2012-01-03 NA 2 2 2012 1 3
4 2012-01-04 NA 3 2 2012 1 4
5 2012-01-05 NA 4 2 2012 1 5
6 2012-01-06 NA 5 2 2012 1 6
So I assume that I have data formated like this, otherwise, I extracted from calendarHeat the part of data transformation in a function(see this gist)
dat <- transformdata(stock.data$Date, stock.data$by)
Then the calendar is essentially a levelplot with custom sacles , custom theme and custom panel' function.
library(latticeExtra)
levelplot(value~woty*dotw | yr, data=dat, border = "black",
layout = c(1, nyr%%7),
col.regions = (calendar.pal(ncolors)),
aspect='iso',
between = list(x=0, y=c(1,1)),
strip=TRUE,
panel = function(...) {
panel.levelplot(...)
calendar.division(...)
panel.levelplot.points(...,na.rm=T,
col='blue',alpha=0.5,
## you can play with cex and pch here to get the pattern you
## like
cex =dat$value/max(dat$value,na.rm=T)*3
pch=ifelse(is.na(dat$value),NA,20),
type = c("p"))
},
scales= scales,
xlim =extendrange(dat$woty,f=0.01),
ylim=extendrange(dat$dotw,f=0.1),
cuts= ncolors - 1,
colorkey= list(col = calendar.pal(ncolors), width = 0.6, height = 0.5),
subscripts=TRUE,
par.settings = calendar.theme)
Where the scales are:
scales = list(
x = list( at= c(seq(2.9, 52, by=4.42)),
labels = month.abb,
alternating = c(1, rep(0, (nyr-1))),
tck=0,
cex =1),
y=list(
at = c(0, 1, 2, 3, 4, 5, 6),
labels = c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday",
"Friday", "Saturday"),
alternating = 1,
cex =1,
tck=0))
And the theme is setting as :
calendar.theme <- list(
xlab=NULL,ylab=NULL,
strip.background = list(col = "transparent"),
strip.border = list(col = "transparent"),
axis.line = list(col="transparent"),
par.strip.text=list(cex=2))
The panel function uses a function caelendar.division. In fact, the division of the grid(month black countour) is very long and is done using grid package in the hard way (panel focus...). I change it a little bit, and now I call it in the lattice panel function: caelendar.division.
We can use ggplot2's scale_shape_manual to get us shapes that appear close to shading, and we can plot these over the grey heatmap.
Note: This was adapted from #Jay's comments in the original blog posting for the calendar heatmap
# PACKAGES
library(ggplot2)
library(data.table)
# Transofrm data
stock.data <- transform(stock.data,
week = as.POSIXlt(Date)$yday %/% 7 + 1,
month = as.POSIXlt(Date)$mon + 1,
wday = factor(as.POSIXlt(Date)$wday, levels=0:6, labels=levels(weekdays(1, abb=FALSE)), ordered=TRUE),
year = as.POSIXlt(Date)$year + 1900)
# find when the months change
# Not used, but could be
stock.data$mchng <- as.logical(c(0, diff(stock.data$month)))
# we need dummy data for Sunday / Saturday to be included.
# These added rows will not be plotted due to their NA values
dummy <- as.data.frame(stock.data[1:2, ])
dummy[, -which(names(dummy) %in% c("wday", "year"))] <- NA
dummy[, "wday"] <- weekdays(2:3, FALSE)
dummy[, "mchng"] <- TRUE
rbind(dummy, stock.data) -> stock.data
# convert the continuous var to a categorical var
stock.data$Adj.Disc <- cut(stock.data$Adj.Close, b = 6, labels = F)
# vals is the greyscale tones used for the outer monthly borders
vals <- gray(c(.2, .5))
# PLOT
# Expected warning due to dummy variable with NA's:
# Warning message:
# Removed 2 rows containing missing values (geom_point).
ggplot(stock.data) +
aes(week, wday, fill=as.factor(Adj.Disc),
shape=as.factor(Adj.Disc), color=as.factor(month %% 2)) +
geom_tile(linetype=1, size=1.8) +
geom_tile(linetype=6, size=0.4, color="white") +
scale_color_manual(values=vals) +
geom_point(aes(alpha=0.2), color="black") +
scale_fill_grey(start=0, end=0.9) + scale_shape_manual(values=c(2, 3, 4, 12, 14, 8)) +
theme(legend.position="none") + labs(y="Day of the Week") + facet_wrap(~ year, ncol = 1)