Plots.jl color specific bars on plot - julia

n = 20
hgt = rand(n) .+ 1
bot = randn(n)
openpct = rand(n)
closepct = rand(n)
y = OHLC[(openpct[i] * hgt[i] + bot[i], bot[i] + hgt[i], bot[i], closepct[i] * hgt[i] + bot[i]) for i = 1:n]
ohlc(y)
Checked through attributes, i see markers but is there a way to color specific bars at defined index positions? Say i color these 2x bars yellow at x axis index position 11 and 15 for output:

There's probably a better way to do this, but you can get
with
julia> ohlc(y)
julia> ohlc!(OHLC[i ∈ (11, 15) ? y[i] : OHLC(NaN,NaN,NaN,NaN) for i in eachindex(y)], c=:red)

Related

Use gganimate to display calculation of tweened data

I would like to use gganimate to:
Graph two separate curved lines with geom_path
Call a function that performs a calculation with the data from those lines and returns a single coordinate (x, y)
Plot that coordinate as a geom_point
Move the lines around, with the geom_point updating as the lines move
This is simple if the movement is such that the single (x, y) coordinate moves linearly (just calculate it at each stage ahead of time and then animate it, it will move linearly from each stage to the next), but if it's not I'm not sure what to do. If I call a function within aes(), which seems like the natural solution, it seems to calculate it once at the beginning and then not update it as the lines move.
Here is an example.
library(tidyverse)
library(gganimate)
# A function to find the x and y coordinate of the minimum y value of either set
min_of_both <- function(x1, y1, x2, y2) {
cm <- bind_rows(tibble(x = x1, y = y1),
tibble(x = x2, y = y2))
return(list(x = cm[which(cm$y == min(cm$y)),]$x,
y = min(cm$y)))
}
# Create two parabola paths, curve A which moves downwards from t = 1 to t = 2
curveA <- tibble(xA = -50:50/10, yA = 5+(-50:50/10)^2, t = 1) %>%
bind_rows(tibble(xA = -50:50/10, yA = -10 + (-50:50/10)^2, t = 2))
# And curve B which is static in both time 1 and 2
curveB <- tibble(xB = -50:50/10, yB = 1 + (-30:70/10)^2)
data <- curveB %>%
bind_rows(curveB) %>%
bind_cols(curveA)
# Plot Curve A
p <- ggplot(data, aes(x = xA, y = yA)) +
geom_path(color = 'red') +
# And Curve B
geom_path(aes(x=xB,y=yB), color = 'blue')+
# Then plot a single point that uses both curves as input
# Note I also get problems if trying to run the function through data= instead of mapping=
# or if I define two separate functions, one for x and one for y, so as to avoid $
geom_point(aes(
x = min_of_both(xA,yA,xB,yB)$x,
y = min_of_both(xA,yA,xB,yB)$y),
size = 3,
color = 'black') +
theme_minimal()+
transition_states(t)+
ease_aes('sine-in-out')
animate(p)
This results in (not sure if the animation will play on StackOverflow but the parabola does indeed move):
The black dot is intended to mark the lowest y-coordinate on either parabola at each moment, but instead it marks the lowest y-coordinate on either parabola at any point in the animation (at the end).
Any tips appreciated.
After a lot of head-scratching I think I've understood your point and have found one solution. The best way forward might be to manually tween the paths and calculate the min values using your function whilst grouping by .frame before plotting:
# Same curve setup, but labelling points for grouping later
curveA <- tibble(xA = -50:50/10,
yA = 5+(-50:50/10)^2,
point = 1:101,
t = 1) %>%
bind_rows(tibble(xA = -50:50/10,
yA = -10 + (-50:50/10)^2,
point = 1:101,
t = 2))
curveB <- tibble(xB = -50:50/10,
yB = 1 + (-30:70/10)^2,
point = 1:101,
t = 1)
A_frames <- curveA %>%
tween_along(ease = 'sine-in-out', 100, along = t, id = point) %>%
filter(.phase == "transition") %>%
select(xA, yA, point, .frame) %>%
arrange(.frame, point) # arrange by point needed to keep in order
B_frames <- curveB %>%
bind_rows(curveB %>% mutate(t = 2)) %>%
tween_along(ease = 'sine-in-out', 100, along = t, id = point) %>%
filter(.phase == "transition") %>%
select(xB, yB, point, .frame) %>%
arrange(.frame, point)
data <- A_frames %>%
left_join(B_frames, by = c(".frame", "point")) %>%
group_by(.frame) %>%
mutate(xmin = min_of_both(xA,yA,xB,yB)$x,
ymin = min_of_both(xA,yA,xB,yB)$y)
# Plot Curve A
p <- ggplot(data, aes(x = xA, y = yA)) +
geom_path(color = 'red') +
# And Curve B
geom_path(aes(x=xB,y=yB), color = 'blue')+
# Then plot a single point that uses both curves as input
# Note I also get problems if trying to run the function through data= instead of mapping=
# or if I define two separate functions, one for x and one for y, so as to avoid $
geom_point(aes(xmin, ymin),
size = 3,
color = 'black') +
theme_minimal()+
transition_states(.frame)+
ease_aes('sine-in-out')
animate(p, fps = 24)

Rounding frame_time and smooth transitions for gganimate

I have the following data frame:
# Seed RNG
set.seed(33550336)
# Create data frame
df <- data.frame(x = runif(100),
y = runif(100),
t = runif(100, min = 0, max = 10))
I'd like to plot points (i.e., at x and y coordinates) appearing and disappearing as a function of t. gganimate is awesome, so I used that.
# Load libraries
library(gganimate)
library(ggplot2)
# Create animation
g <- ggplot(df, aes(x = x, y = y))
g <- g + geom_point(colour = "#FF3300", shape = 19, size = 5, alpha = 0.25)
g <- g + labs(title = 'Time: {frame_time}')
g <- g + transition_time(t)
g <- g + enter_fade() + exit_fade()
animate(g, fps = 1)
This code produced the following:
There are a couple of things that I don't like about this.
The transitions are very abrupt. My hope using enter_fade and exit_fade was that the points would fade into view, then back out. Clearly this isn't the case, but how would I achieve this result?
I would like to round {frame_time}, so that while the points fade in and out at fractions of t, the actual time t that would be shown would be an integer. If frame_time was a regular variable, this would be simple enough using something like bquote and round, but this doesn't seem to be the case. How can I round frame_time in my title?
Here's a relatively manual approach that relies on doing more of the prep beforehand and feeding that into gganimate. I'd like to see if there's a simpler way to do this inside gganimate more automatically.
First I make a copy of the data frame for each frame I want to show. Then I calculate the difference between the time I'm presently viewing (time) and the t when I want to show each data point. I use cos to handle the easing in and out, so that each dot's appearance at given time is described with display. In the ggplot call, I then map alpha and size to display, and use transition_time(time) to move through the frames.
# Create prep table
fade_time = 1
frame_count = 100
frames_per_time = 10
df2 <- map_df(seq_len(frame_count), ~df, .id = "time") %>%
mutate(time = as.numeric(time)/frames_per_time,
delta_norm = (t - time) / fade_time,
display = if_else(abs(delta_norm) > 1, 0, cos(pi / 2 * delta_norm)))
# Create animation
g <- ggplot(df2, aes(x = x, y = y, alpha = display, size = display))
g <- g + geom_point(colour = "#FF3300", shape = 19)
g <- g + scale_alpha(range = c(0, 1)) + scale_size_area(max_size = 5)
g <- g + labs(title = "{round(frame_time, 1)}")
g <- g + transition_time(time)
animate(g)

How to draw directional spider network in geom_segment/ggplot2 in R?

I am trying to work out to draw so-called spider network or desire line which illustrates movement of things (person, vehicle, etc.) between specific zones by direction.
This is the data frame that I am using:
df <- data.frame(O=c(1,2,4,4,4,6,6,6,7,7,7,9,9,9,9,10,10,10,11,12,12,12,32,86,108,128,128,157,157,157,157,157),
D=c(2,1,6,7,32,4,7,157,4,6,157,10,11,12,157,9,12,157,9,9,10,157,4,128,128,86,108,6,7,9,10,12),
trip=c(971,971,416,621,330,416,620,1134,621,620,625,675,675,378,439,675,724,472,675,378,724,563,330,610,405,610,405,1134,625,439,472,563),
lon.x=c(697746.6,696929.6,696748.8,696748.8,696748.8,694906.4,694906.4,694906.4,696769.4,696769.4,696769.4,698802.2,698802.2,698802.2,698802.2,698900.5,698900.5,698900.5,699686.7,696822.0,696822.0,696822.0,698250.7,702314.7,700907.1,702839.5,702839.5,694518.9,694518.9,694518.9,694518.9,694518.9),
lat.x=c(9312405,9311051,9308338,9308338,9308338,9307087,9307087,9307087,9305947,9305947,9305947,9304338,9304338,9304338,9304338,9302314,9302314,9302314,9306300,9303080,9303080,9303080,9309423,9320738,9321302,9322619,9322619,9301921,9301921,9301921,9301921,9301921),
lon.y=c(696929.6,697746.6,694906.4,696769.4,698250.7,696748.8,696769.4,694518.9,696748.8,694906.4,694518.9,698900.5,699686.7,696822.0,694518.9,698802.2,696822.0,694518.9,698802.2,698802.2,698900.5,694518.9,696748.8,702839.5,702839.5,702314.7,700907.1,694906.4,696769.4,698802.2,698900.5,696822.0),
lat.y=c(9311051,9312405,9307087,9305947,9309423,9308338,9305947,9301921,9308338,9307087,9301921,9302314,9306300,9303080,9301921,9304338,9303080,9301921,9304338,9304338,9302314,9301921,9308338,9322619,9322619,9320738,9321302,9307087,9305947,9304338,9302314,9303080))
df contains following fields:
O: origin of trips
D: destination of trips
trip: number of trips between O and D
lon.x: longitude of origin zone
lat.x: lattitude of origin zone
lon.y: longitude of destination zone
lat.y: lattitude of destination zone
Currently I can draw following figure by the script here using geom_segment in ggplot2 package:
library(ggplot2)
ggplot() +
geom_segment(data = df, aes(x = lon.x, y = lat.x, xend = lon.y, yend = lat.y, size = trip),
color = "blue", alpha = 0.5, show.legend = TRUE,
position = position_dodge2(width = 100)) +
scale_size_continuous(range = c(0, 5), breaks = c(300, 600, 900, 1200),
limits = c(100, 1200), name = "Person trips/day (over 100 trips)") +
theme(legend.key = element_rect(colour = "transparent", fill = alpha("black", 0))) +
guides(size = guide_legend(override.aes = list(alpha = 1.0))) +
geom_point(data = df, aes(x = lon.x, y = lat.x), pch = 16, size = 2.4)
The issue is that each line from O to D and from D to O are overlapped. I would prefer to plot the segments which are dodged based on the center line to properly visualize total number of trips and to see the balance of trips between zone pairs.
An example of desired result is shown below.
Dotted center line is not necessarily displayed (I just put it to show what the balance is). It is also preferable to change color by direction, for instance, red in clockwise and blue in anti-clockwise direction. Arrows are not necessary if direction can be shown in color.
I found some examples to solve the issue, however I cannot reach desirable result at this moment.
Calculation of offset for coordinates
It is not so easy to set offset for each direction in this example as I have around 80 zones which results in 6,400 pairs of zones.
Offset geom_segment in ggplot
position_dodge2 function
It says that I can set margin between segments in width using variable, however if I use trip in it, it returns error. Also, it is not clear how much should I set the value for appropriate offset to make segments follow center lines.
https://ggplot2.tidyverse.org/reference/position_dodge.html
geom_curve and arrow
It is also possible to draw lines with curve so that above issue could be solved. However curved segments are messy to observe the movements in one figure. Arrows are also a bit difficult to see the direction as the shape of arrows are not sharp though I change its style.
color=variable and position=dodge
I also tried to spread/gather the df to get new variable direction and to delete OD-pairs in opposite direction so that I thought I can easily dodge segments using color=direction and position=dodge in ggplot2, however it did not work well (segments are still overlapped). Small example is shown below.
O D trip direction lon.x lat.x lon.y lat.y
1 2 971 clock 697746.6 9312405 696929.6 9311051
2 1 300 anticlock 696929.6 9311051 697746.6 9312405
4 6 416 clock 696748.8 9308338 694906.4 9307087
4 7 621 anticlock 694906.4 9307087 696748.8 9308338
I highly appreciate your idea to obtain well-designed figure.
Please also see the following figure to get actual usage of spider network.
You could use trig functions to calculate an offset value, then plug this into the ggplot() call. Below is an example using your dataset above. I'm not exactly sure what you mean by clockwise, so I put in a simple dummy variable.
# make a dummy "clockwise" variable for now
df$clockwise = df$O > df$D
# angle from coordinates of stations
df$angle = atan((df$lat.y - df$lat.x)/(df$lon.y - df$lon.x))
# offsets from cos/sin of orthogonal angle
# scale the distance of the offsets by the trip size so wider bars offset more
# offset them one way if the trip is clockwise, the other way if not clockwise
df$xoffset = cos(df$angle - pi/2) * df$trip/5 * (2 * df$clockwise - 1)
df$yoffset = sin(df$angle - pi/2) * df$trip/5 * (2 * df$clockwise - 1)
ggplot() +
geom_segment(data = df, aes(x = lon.x + xoffset, y = lat.x + yoffset, xend = lon.y + xoffset, yend = lat.y + yoffset, size = trip, color = clockwise),
alpha = 0.5, show.legend = TRUE) +
scale_size_continuous(range = c(0, 5), breaks = c(300, 600, 900, 1200),
limits = c(100, 1200), name = "Person trips/day (over 100 trips)") +
theme(legend.key = element_rect(colour = "transparent", fill = alpha("black", 0))) +
guides(size = guide_legend(override.aes = list(alpha = 1.0))) +
geom_point(data = df, aes(x = lon.x, y = lat.x), pch = 16, size = 2.4) +
coord_fixed()

How to get total number of x displayed in ggplot?

Consider:
x <- rnorm(100)
qplot(x)
How to I get the total number (N = 100) of x displayed on the top right corner in my ggplot?
See actual output:
See this example (N = 37):
You can also set the location of the label programmatically, based on the data values. ggplot2 defaults to 30 bins, so the code below uses 30 bins to set the y-value for the label location:
set.seed(101)
x <- rnorm(100)
qplot(x) +
annotate("text", label=paste0("N = ", length(x)), x=max(x), y=max(table(cut(x, 30))))
or
qplot(x) +
geom_text(aes(label=paste0("N = ", length(x)), x=max(x), y=max(table(cut(x, 30)))))
UPDATE: To address your comment, let's plot with a discrete x vector. Now if we still want the y position of the text to be at the maximum, we once again find the category with the maximum number of counts. The data are already discrete, so we just need y=max(table(x)). For the x position, if we want the label at the maximum x value, we need the number of unique x categories, since ggplot implicitly numbers these from 1 to the N (where N is the number of categories). The unique function returns a vector containing each unique category. We just need the length of this vector to get the maximum x value in the graph: x=length(unique(x)).
set.seed(101)
x <- cut(rnorm(100), 5)
qplot(x) +
geom_text(aes(label=paste0("N = ", length(x)), x=length(unique(x)), y=max(table(x))))
Lots of ways. geom_text is the most general tool. For a one-off label, maybe annotate:
qplot(x) +
annotate("text",x = Inf,y = Inf,label = "N = 100",hjust = 1.5,vjust = 1.5)
The other answers show how you can add the text to your plot. But annotate() can also be used to add other geoms. If you want to put your annotation inside a rectangle, for instance, you can do the following:
x0 <- max(x)
y0 <- max(table(cut(x, 30)))
qplot(x) +
annotate("rect", xmin = x0*.8, xmax = x0*1.2, ymin = y0*.95, ymax = y0*1.05,
fill = "white", colour = "black") +
annotate("text", label = paste0("N = ", length(x)), x = x0, y = y0)
which gives
Up to the line that starts with annotate("rect", everything is taken from the other answers to this question.
Like this? (code below)
# install.packages("ggplot2", dependencies = TRUE)
library(ggplot2)
set.seed(421)
x <- rnorm(100)
qplot(x) + annotate("text", x = 2, y = 15, label = paste("N =", length(x)))

How to create a shaded error bar "box" for a scatterplot in R or MATLAB

I would like to create a simple scatter plot in R or MATLAB involving two variables $x$ and $y$ which have errors associated with them, $\epsilon_x$ and $\epsilon_y$.
Instead of adding error-bars, however, I was hoping to create a "shaded box" around each $(x,y)$ pair where the height of the box ranges from ($y - \epsilon_y$) to ($y + \epsilon_y$) and the width of the box ranges from ($x - \epsilon_y$) to ($x + \epsilon_y$) .
Is this possible in R or MATLAB? If so, what package or code can I use to generate these plots. Ideally, I would like the package to also support asymmetric error bounds.
You could do it in matlab by creating the following function:
function errorBox(x,y,epsx,epsy)
%# make sure inputs are all column vectors
x = x(:); y = y(:); epsx = epsx(:); epsy = epsy(:);
%# define the corner points of the error boxes
errBoxX = [x-epsx, x-epsx, x+epsx, x+epsx];
errBoxY = [y-epsy, y+epsy, y+epsy, y-epsy];
%# plot the transparant errorboxes
fill(errBoxX',errBoxY','b','FaceAlpha',0.3,'EdgeAlpha',0)
end
x, y, epsx and epsy can all be vectors.
Example:
x = randn(1,5); y = randn(1,5);
epsx = rand(1,5)/5;
epsy = rand(1,5)/5;
plot(x,y,'rx')
hold on
errorBox(x,y,epsx,epsy)
Result:
It's probably easier using the ggplot2. First create some data:
set.seed(1)
dd = data.frame(x = 1:5, eps_x = rnorm(5, 0, 0.1), y = rnorm(5), eps_y = rnorm(5, 0, 0.1))
##Save space later
dd$xmin = dd$x - dd$eps_x
dd$xmax = dd$x + dd$eps_x
dd$ymin = dd$y - dd$eps_y
dd$ymax = dd$y + dd$eps_y
Then use the rectangle geom in ggplot2:
library(ggplot2)
ggplot(dd) +
geom_rect(aes( xmax = xmax, xmin=xmin, ymin=ymin, ymax = ymax))
gives the first plot. Of course, you don't need to use ggplot2, to get something similar in base graphics, try:
plot(0, 0, xlim=c(0.5, 5.5), ylim=c(-1, 1), type="n")
for(i in 1:nrow(dd)){
d = dd[i,]
polygon(c(d$xmin, d$xmax, d$xmax, d$xmin), c(d$ymin, d$ymin, d$ymax,d$ymax), col="grey80")
}
to get the second plot.
Here's how to do it using Matlab (with asymmetric intervals). Converting to symmetric ones should be trivial.
%# define some random data
x = rand(5,1)*10;y = rand(5,1)*10;
%# ex, ey have two columns for lower/upper bounds
ex = abs(randn(5,2))*0.3;ey=abs(randn(5,2));
%# create vertices, faces, for patches
vertx = bsxfun(#minus,y,ey(:,[1 2 2 1]))';
verty = bsxfun(#minus,y,ey(:,[1 1 2 2]))';
vertices = [vertx(:),verty(:)];
faces = bsxfun(#plus,[1 2 3 4],(0:4:(length(x)-1)*4)');
%# create patch
patch(struct('faces',faces,'vertices',vertices),'FaceColor',[0.5 0.5 0.5]);
%# add "centers" - note, the intervals are asymmetric
hold on, plot(x,y,'oy','MarkerFaceColor','r')
It's simple with the ggplot2 package in R.
# An example data frame
dat <- data.frame(x = 1:5, y = 5:1, ex = (1:5)/10, ey = (5:1)/10)
# Plot
library(ggplot2)
ggplot(dat) +
geom_rect(aes(xmin = x - ex, xmax = x + ex, ymin = y - ey, ymax = y + ey),
fill = "grey") +
geom_point(aes(x = x, y = y))
In the aes function inside geom_rect the size of the rectangle is defined by ex and ey around x and y.
Here's a MATLAB answer:
x = randn(1,5); y = 3-2*x + randn(1,5);
ex = (.1+rand(1,5))/5; ey = (.2+rand(1,5))/3;
plot(x,y,'ro')
patch([x-ex;x+ex;x+ex;x-ex],[y-ey;y-ey;y+ey;y+ey],[.9 .9 .9],'facealpha',.2,'linestyle','none')

Resources