Using plotmath with geom_label_repel to have subscripts within the labels - r

I am trying to have subscripts in my geom_label. e.g.
Maine
Apo (km/h) = 9
Qt (m/s) = 90
I am aware of using [x] to get subscripts but I am not sure how to achieve that when I want to get the label values (partly) from a column. I tried using tidyeval (!!) to no avail. Even simply changing parse = T gives me errors. It could be something rudimentary that I am overlooking, but after reading this thread using plotmath in ggrepel labels, I am not sure if it is as simple as I thought.
Here is with what I have so far. I provided the packages and the data I have used, along with data cleaning/preparation steps. Finally, I've shown the code that I have used for creating the "preliminary" plot.
library(tidyverse)
library(stringr)
library(usmap)
library(ggrepel)
library(rlang)
read.table(text = "State Apo Qt
NJ 1 10
MO 2 20
SD 3 30
NY 4 40
FL 5 50
OK 6 60
NE 7 70
KY 8 80
ME 9 90
CA 10 100
NC 11 110
MA 12 120
CT 13 140", header = T, stringsAsFactor = F) -> ex1
# get the states full names
region <- state.name[match(ex1$State,state.abb)]
region <- str_to_title(region)
# US map data (50 States)
us1 <- usmap::us_map()
# adding full names to the dataset
ex_df <- cbind(region = region, ex1)
# adding dataset values to the map data (only states with data)
us_val1 <- left_join(ex_df, us1, by = c("region" = "full"))
# full map dataset joined by ex1 dataset to draw the map
us_map1 <- left_join(us1, ex_df, by = c("full" ="region")) %>%
mutate(qQt = replace_na(Qt, 0))
# creating a dataset with centroids of the states (only the ones in ex1)
us_centroids1 <-
us_val1 %>%
group_by(region) %>%
summarise(centroid.x = mean(range(x)),
centroid.y = mean(range(y)),
label = unique(State),
`Apo` = unique(Apo),
`Qt` = unique(Qt))
## drawing the plot
ggplot() +
geom_polygon(data = us_map1,
aes(x,y, group = group, fill = Qt),
color = "black",
size = .1) +
geom_label_repel(data = us_centroids1,
aes(centroid.x, centroid.y,
label = paste(region, "\n Apo (km/h) = ", `Apo`, "\n Qt (m/s) =", `Qt`)),
size = 5/14*8,
box.padding = 1,
parse = F) +
scale_fill_gradientn(name = expression(Q[t]~(m/s)),
breaks = c(0, seq(10,130,20)),
labels = c("", seq(10,130,20)),
limits = c(0, 130),
colors = c("#DCDCDC", "lightblue", "green"),
guide = guide_colorbar(barwidth = 0.8, barheight = 18)) +
theme_void()

This is kind of a pain, since plotmath doesn't appear to have line breaks. Thus, you have to work around it with atop(). Use bquote() to insert variable values into the expression. This only works on one element at once, thus we have to pmap() over the three variables.
ggplot() +
geom_polygon(data = us_map1,
aes(x,y, group = group, fill = Qt),
color = "black",
size = .1) +
geom_label_repel(data = us_centroids1,
aes(centroid.x, centroid.y,
label = pmap(list(region, Apo, Qt),
\(x,y,z) bquote(atop(.(x), # first line of lab
atop(A[po] (km/h) == .(y), # second line
Q[t] (m/s) == .(z)) # third line
)
)
)
),
size = 5/14*8,
box.padding = 1,
parse = T) +
scale_fill_gradientn(name = expression(Q[t]~(m/s)),
breaks = c(0, seq(10,130,20)),
labels = c("", seq(10,130,20)),
limits = c(0, 130),
colors = c("#DCDCDC", "lightblue", "green"),
guide = guide_colorbar(barwidth = 0.8, barheight = 18)) +
theme_void()
Created on 2022-07-31 by the reprex package (v2.0.1)

Related

Custom manhattan plot multi x-axis

I have the following data set gwas_data
Running
head -n 23 gwas_data gives me the following table.
gwas_data <-
data.frame(
stringsAsFactors = FALSE,
udi = c("A","B","C","D","E",
"F","G","H","I","J","K","A","B","C","D","E",
"F","G","H","I","J","K"),
snp = c("rs71628639_A",
"rs71628639_A","rs71628639_A","rs71628639_A","rs71628639_A",
"rs71628639_A","rs71628639_A","rs71628639_A",
"rs71628639_A","rs71628639_A","rs71628639_A","rs12726330_A",
"rs12726330_A","rs12726330_A","rs12726330_A",
"rs12726330_A","rs12726330_A","rs12726330_A","rs12726330_A",
"rs12726330_A","rs12726330_A","rs12726330_A"),
chr = c(1L,1L,1L,1L,1L,1L,1L,
1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,1L,
1L),
bp = c(154988255L,154988255L,
154988255L,154988255L,154988255L,154988255L,154988255L,
154988255L,154988255L,154988255L,154988255L,
155108167L,155108167L,155108167L,155108167L,155108167L,
155108167L,155108167L,155108167L,155108167L,
155108167L,155108167L),
p = c(0.580621191,0.356577427,
0.494774059,0.984005886,0.492034614,0.581479389,
0.24820214,0.202720896,0.295462221,0.845848783,
0.954714162,0.343101621,0.740942238,0.929127071,0.717965027,
0.335111376,0.857154424,0.480087195,0.980307843,
0.521114038,0.583150471,0.925783695),
beta = c(0.000852277,0.003943912,
0.001091986,-3.18e-05,0.000564413,0.000120028,
0.026156467,0.000303135,0.069146449,-2.96e-07,-2.11e-05,
0.001274261,-0.001232397,0.000123948,-0.000498507,
-0.000689988,-3.41e-50,-0.013934416,5.12e-06,
-0.03696031,-7.28e-07,-3.01e-05),
bp_cum = c(1.154988255,1.154988255,
1.154988255,1.154988255,1.154988255,1.154988255,
1.154988255,1.154988255,1.154988255,1.154988255,
1.154988255,1.155108167,1.155108167,1.155108167,
1.155108167,1.155108167,1.155108167,1.155108167,1.155108167,
1.155108167,1.155108167,1.155108167)
)
I would like to make a manhattan plot, the X-axis should have chromosomal numbers from 1:22, I want each entry to be on the x-axis according to the BP position. The id should act as colour and the y-axis would be -log10(p).
I have rewritten the r command as follows, but my graph doesn't look correct.
library(plyr)
library(dplyr)
library(purrr)
library(tidyverse)
library(ggtext)
library(stringr)
gwas_data <- read.table("gwas_data", header=T)
sig <- 5e-8
manhplot <- ggplot(gwas_data, aes(x = bp_cum, y = -log10(p), color = udi)) +
geom_hline(yintercept = -log10(sig), color = "grey40", linetype = "dashed") +
geom_point(aes(color=as.factor(udi)), alpha=0.8, size=2) +
scale_x_continuous(label = axis_set$chr, breaks = axis_set$center) +
scale_y_continuous(expand = c(0,0), limits = c(0, ylim)) +
#scale_color_manual(values = rep(c("#276FBF", "#183059"), (length(axis_set$chr)))) +
scale_size_continuous(range = c(0.5,3)) +
theme_minimal()
print(manhplot)
I would also like to add the name of the ID and SNP if they are above the significant threshold.
My axis_set looks as follows with test data which goes from chromosome 1:4
chr center
1 179641307
2 354697451
3 553030055
4 558565909
My final graph looks as follows:

How Insert an expression in legend in ggplot2?:: correct color + multiple lines and point

I am new to R and have not been able to correct the following graph.
Xb_exp, it should have blue dots.
Xb_dw, solid red line.
Xb_f, dotted line.
Xb_s, longdash line.
The legend expression should be as shown with the subscript.
I have not been able to correct it.
Is there a way to do this?
enter image description here
my data
CA <- c(3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30)
Xb_exp <- c(0.0231,0.0519,0.0839,0.1197,0.1595,0.1996,0.2384,0.2772,0.3153,0.3520,0.3887,0.4254,0.4615,0.4978,0.5339,0.5685,0.6000,0.6279,0.6528,0.6762,0.6974,0.7166,0.7346,0.7516,0.7669,0.7810,0.7940,0.8059)
Xb_dw <- c(0.0160,0.0516,0.0886,0.1259,0.1633,0.2006,0.2377,0.2749,0.3122,0.3496,0.3870,0.4245,0.4617,0.4984,0.5339,0.5678,0.5996,0.6288,0.6551,0.6786,0.6994,0.7179,0.7346,0.7499,0.7641,0.7774,0.7899,0.8018)
Xb_f <- c(0.0021,0.0031,0.0046,0.0067,0.0095,0.0131,0.0177,0.0234,0,0387,0.0483,0.0591,0.0709,0.0832,0.0955,0.1073,0.1181,0.1272,0.1345,0.1398,0.1443,0.1456,0.1468,0.1474,0.1476,0.1477,0.1477,0.1477,0.1477)
Xb_s <- c(0.0139,0.0484,0.0839,0.1192,0.1538,0.1874,0.2200,0.2515,0.2818,0.3108,0.3387,0.3653,0.3908,0.4151,0.4383,0.4604,0.4815,0.5015,0.5206,0.5387,0.5559,0.5722,0.5877,0.6024,0.6164,0.6264,0.6421,0.6040)
dat <- c(CA, Xb_exp, Xb_dw, Xb_f, Xb_s)
my code
labels = c(expression(X[b_exp]),expression(X[b_dw]),expression(X[b_f]),expression(X[b_s]))
color4 <- c("Xb_exp"="#3C5488FF", "Xb_dw"="#DC0000FF", "Xb_f"="#00A087FF", "Xb_s"="#4DBBD5FF")
Xb_D1 <- ggplot(data = dat) +
theme_bw() +
labs(x="Crank position (ºCA)", y= bquote('Burn fraction ('~X[b]~')')) +
geom_point(aes(x=CA, y=Xb_exp, colour="Xb_exp"), size=3) +
geom_line(aes(x=CA, y=Xb_dw,colour="Xb_dw"), size=1,linetype="solid") +
geom_line(aes(x=CA, y=Xb_f,colour="Xb_f"), size=1,linetype="dotted") +
geom_line(aes(x=CA, y=Xb_s,colour="Xb_s"), size=1,linetype="longdash") +
scale_colour_manual(values=color4, labels=labels) +
theme(legend.title = element_blank(),legend.position = c(0.8, 0.5),
legend.text = element_text(size = 12)) +
scale_x_continuous(limits = c(2,80))
plot(Xb_D1)
ggplot() requires a dataframe not a vector. If you modify your code with:
dat <- data.frame(CA, Xb_exp, Xb_dw, Xb_f, Xb_s)
and fix the typo in your Xb_f vector
Xb_f <- c(0.0021,0.0031,0.0046,0.0067,0.0095,0.0131,0.0177,0.0234,0.0387,0.0483,0.0591,0.0709,0.0832,0.0955,0.1073,0.1181,0.1272,0.1345,0.1398,0.1443,0.1456,0.1468,0.1474,0.1476,0.1477,0.1477,0.1477,0.1477)
Your remaining code will work as but could be achieved more simply using the tidyverse approach below. Use pivot_longer to stack the y variables against your x variable.
dat %>%
pivot_longer(Xb_exp:Xb_s) %>%
ggplot(aes(x = CA, y = value, colour = name)) +
geom_point() +
geom_line() +
scale_colour_manual(values=color4, labels=labels) +
theme_bw() +
theme(legend.title = element_blank(),legend.position = c(0.8, 0.5),
legend.text = element_text(size = 12)) +
scale_x_continuous(limits = c(2,80)) +
labs(x="Crank position (ºCA)", y= bquote('Burn fraction ('~X[b]~')')) ```
Ironically, setting this up with conventional ploting is rather simple:
Given all the data above:
linetypes4 <- c( Xb_exp=NA, Xb_dw="solid", Xb_f="dotted", Xb_s="longdash" )
plot(
NA, type="n", xlim=c(0,30), ylim=c(0,0.8),
xlab = "Crank position (ºCA)", ylab = bquote('Burn fraction ('~X[b]~')'),
panel.first = grid()
)
with( dat, {
points( x=CA, y=Xb_exp, pch=19, col=color4["Xb_exp"], size=3 )
for( n in c("Xb_dw", "Xb_f", "Xb_s")) {
lines( x=CA, y=get(n), lty=linetypes[n], col=color4[n], lwd=2 )
}
})
legend(
x = "right",
legend = labels,
col = color4,
lty = linetypes4,
pch = c(19,NA,NA,NA),
box.lwd = 0,
inset = .02
)
There are some errors in your code suggesting you didn't try what you pasted.
0,0387, in your data should likely be 0.0387, otherwise nothing is right (no data measures several hundreds in there)
c(CA, ... ) should likely be data.frame( CA, ... )
Now, the first problem is you are doing all the heavy lifting yourself, while ggplot sits there with nothing left to do. It was designed to set up colors and line types by group. You however need to transform the data first to take full advantage of that:
library(tidyr)
CA <- c(3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30)
Xb_exp <- c(0.0231,0.0519,0.0839,0.1197,0.1595,0.1996,0.2384,0.2772,0.3153,0.3520,0.3887,0.4254,0.4615,0.4978,0.5339,0.5685,0.6000,0.6279,0.6528,0.6762,0.6974,0.7166,0.7346,0.7516,0.7669,0.7810,0.7940,0.8059)
Xb_dw <- c(0.0160,0.0516,0.0886,0.1259,0.1633,0.2006,0.2377,0.2749,0.3122,0.3496,0.3870,0.4245,0.4617,0.4984,0.5339,0.5678,0.5996,0.6288,0.6551,0.6786,0.6994,0.7179,0.7346,0.7499,0.7641,0.7774,0.7899,0.8018)
Xb_f <- c(0.0021,0.0031,0.0046,0.0067,0.0095,0.0131,0.0177,0.0234,0.0387,0.0483,0.0591,0.0709,0.0832,0.0955,0.1073,0.1181,0.1272,0.1345,0.1398,0.1443,0.1456,0.1468,0.1474,0.1476,0.1477,0.1477,0.1477,0.1477)
Xb_s <- c(0.0139,0.0484,0.0839,0.1192,0.1538,0.1874,0.2200,0.2515,0.2818,0.3108,0.3387,0.3653,0.3908,0.4151,0.4383,0.4604,0.4815,0.5015,0.5206,0.5387,0.5559,0.5722,0.5877,0.6024,0.6164,0.6264,0.6421,0.6040)
dat <- data.frame(CA, Xb_exp, Xb_dw, Xb_f, Xb_s)
color4 <- c("Xb_exp"="#3C5488FF", "Xb_dw"="#DC0000FF", "Xb_f"="#00A087FF", "Xb_s"="#4DBBD5FF")
linetypes <- c( Xb_dw="solid", Xb_f="dotted", Xb_s="longdash" )
dat2 <- pivot_longer( dat, cols=starts_with("Xb_") )
dat2.line <- dat2 %>% filter( name != "Xb_exp" )
dat2.point <- dat2 %>% filter( name == "Xb_exp" )
dat2 is now a long data set, with data category as a variable, not with a separate column for each data series. This is how ggplot likes it:
dat2
# A tibble: 112 x 3
CA name value
<dbl> <fct> <dbl>
1 3 Xb_exp 0.0231
2 3 Xb_dw 0.016
3 3 Xb_f 0.0021
4 3 Xb_s 0.0139
5 4 Xb_exp 0.0519
6 4 Xb_dw 0.0516
7 4 Xb_f 0.0031
8 4 Xb_s 0.0484
9 5 Xb_exp 0.0839
10 5 Xb_dw 0.0886
# … with 102 more rows
I then split the data on what later goes to points and what goes ot lines, just not to make the plot code uglier than it has to be:
Xb_D1 <- ggplot(data = dat2.line, aes(x=CA,y=value,color=name)) +
theme_bw() +
labs(x="Crank position (ºCA)", y= bquote('Burn fraction ('~X[b]~')')) +
geom_point( data = dat2.point, size=3) +
geom_line( aes(col=name,lty=name), size=1 ) +
scale_colour_manual(values=color4) +
scale_linetype_manual( values=linetypes, guide=FALSE ) +
guides(
color = guide_legend( override.aes=list( shape=c(NA,19,NA,NA), linetype=c("solid","solid","dashed","dotted") ) )
) +
theme(legend.title = element_blank(),legend.position = c(0.8, 0.5),
legend.text.align = 0,
legend.text = element_text(size = 12)) +
scale_x_continuous(limits = c(2,30))
print(Xb_D1)
no need to supply labels
use line type as you would use color with ggplot, its just one more channel that can carry information (or aesthetic as they like to call it over there)
align the legends left, looks nicer that way
more sophisticated is the use of override.aes to take away the points from the legend categories who shouldn't have them.
Now, I was unable to change the order of the data series in the labels, that can be a hazzle. Is it still ok for you the order they are?

How can I add a legend to a ggplot in R? [duplicate]

This question already has answers here:
Plotting two variables as lines using ggplot2 on the same graph
(5 answers)
Closed 2 years ago.
I'm using a data frame in R with 3 variables. I want to plot (ggplot) 2 variables (CMod4X and CMod5X) as two distinct lines, in function of the 3th variable (AmtX). In the end I succeed in creating some kind of graph that suits me, but I fail to include a legend. I have already consulted some other treads here, but the answers don't seem not to work for me.
The (artificial) data set looks like this
AmtX <- seq(from = 1, to = 10001, by = 50)
CMod4X <- rnorm(201, mean = 0.87, sd = 0.01)
CMod5X <- rnorm(201, mean = 0.84, sd = 0.01)
EvalAmtX <- as.data.frame(cbind(AmtX,CMod4X,CMod5X))
I have made the plot like this
pltX <- ggplot(data = EvalAmtX, aes (x = AmtX)) +
geom_line(aes(y = CMod4X), color = "red", show.legend = TRUE) +
geom_line(aes(y = CMod5X), color = "blue", show.legend = TRUE) +
geom_smooth(aes(y = CMod4X), color = "red", se = FALSE, show.legend = TRUE) +
geom_smooth(aes(y = CMod5X), color = "blue", se = FALSE, show.legend = TRUE) +
labs(y = "C-index", x = "Amount (Tau)", title = "model 4 and model 5") +
scale_colour_manual(name = "Models", values = c("CMod4" = "red", "CMod5" = "blue"))
pltX
But this plot won't include a label. I've included my plot below:
What am I doing wrong and what must I do to obtain a plot telling me the red line is CMod4 and the blue line is CMod5?
Thx for your help!!
Leonard
I guess you need to dive a little deeper into how ggplot2 works, since your question is related to the basic set up of your data frame. There are a lot of great resources around on this topic, e.g. this one. Anyway, here are two solutions for putting the legend into your graph.
Solution 1: Rearrange data frame to long format
library(reshape2)
df <- melt(data = EvalAmtX, id.vars = "AmtX")
The data frame now looks like this:
head(df)
# AmtX variable value
# 1 1 CMod4X 0.8772716
# 2 51 CMod4X 0.8524197
# 3 101 CMod4X 0.8686019
# 4 151 CMod4X 0.8638835
# 5 201 CMod4X 0.8674627
# 6 251 CMod4X 0.8729925
Now, plotting is easy. Instead of telling ggplot2 the color of each individual line, you simply give it the information which column in your data frame contains the factor that should determine the color of the lines. So you add another aesthetic (col = variable). This also automatically adds a legend for color.
ggplot(df, aes(x=AmtX, y=value, col = variable)) +
geom_line()
Solution 2: Use a manual color scale
You almost got it right in your code.
pltX <- ggplot(data = EvalAmtX, aes (x = AmtX)) +
geom_line(aes(y = CMod4X, color = "CMod4")) +
geom_line(aes(y = CMod5X, color = "CMod5")) +
geom_smooth(aes(y = CMod4X, color = "CMod4"), se = FALSE) +
geom_smooth(aes(y = CMod5X, color = "CMod5"), se = FALSE) +
labs(y = "C-index", x = "Amount (Tau)", title = "model 4 and model 5") +
scale_colour_manual(name = "Models", values = c(CMod4 = "red", CMod5 = "blue"))
pltX

R - ggplot - How to automatically set point color based on values?

My question is similar to this question.
But I can't transfer it to my own data.
I have a dataframe like this (over 1400 rows):
Code Stationsname Startdatum LAT LON Höhe Area Mean
1 AT0ENK1 Enzenkirchen im Sauwald 03.06.1998 48.39167 13.67111 525 rural 55.76619
2 AT0ILL1 Illmitz 01.05.1978 47.77000 16.76640 117 rural 58.98511
3 AT0PIL1 Pillersdorf bei Retz 01.02.1992 48.72111 15.94223 315 rural 59.47489
4 AT0SON1 Sonnblick 01.09.1986 47.05444 12.95834 3106 rural 97.23856
5 AT0VOR1 Vorhegg bei K”tschach-Mauthen 04.12.1990 46.67972 12.97195 1020 rural 70.65373
6 AT0ZIL1 Ried im Zillertal 08.08.2008 47.30667 11.86389 555 rural 36.76401
Now I want to create a map with ggplot and display the points in different colors based on the value in the Mean column, it reaches from 18 to 98.
Also I would like to change the symbols from a dot to a triangle if the value in the column Höhe is over 700.
Until now I did this:
library(ggmap)
library(ggplot2)
Europe <- get_map(location = "Europe", zoom = 3)
p = ggmap(Europe)
p = p + geom_point(data = Cluster, aes(LON, LAT, color = Mean),
size = 1.5, pch = ifelse(Höhe < 700,'19','17')) +
scale_x_continuous(limits = c(-25.0, 40.00), expand = c(0, 0)) +
scale_y_continuous(limits = c(34.00, 71.0), expand = c(0, 0)) +
scale_colour_gradient ---??
But I don't know how to go on and assign the colors.
I had a discussion with the OP using his data. One of his issues was to make scale_colour_gradient2() work. The solution was to set up a midpoint value. By default, it is set at 0 in the function. In his case, he has a continuous variable that has about 50 as median.
library(ggmap)
library(ggplot2)
library(RColorBrewer)
Europe2 <- get_map(maptype = "toner-2011", location = "Europe", zoom = 4)
ggmap(Europe2) +
geom_point(data = Cluster, aes(x = LON, y = LAT, color = Mean, shape = Höhe > 700), size = 1.5, alpha = 0.4) +
scale_shape_manual(name = "Altitude", values = c(19, 17)) +
scale_colour_gradient2(low = "#3288bd", mid = "#fee08b", high = "#d53e4f",
midpoint = median(Cluster$Mean, rm.na = TRUE))
It seems that the colors are not that good in the map given values seem to tend to stay close to the median value. I think the OP needs to create a new grouping variable with cut() and assign colors to the groups or to use another scale_color type of function. I came up with the following with the RColorBrewer package. I think the OP needs to consider how he wanna use colors to brush up his graphic.
ggmap(Europe2) +
geom_point(data = Cluster, aes(x = LON, y = LAT, color = Mean, shape = Höhe > 700), size = 1.5, alpha = 0.4) +
scale_shape_manual(name = "Altitude", values = c(19, 17)) +
scale_colour_distiller(palette = "Set1")

Plotting baseball pitches as qualitative variable by color

I was thinking of doing this in R but am new to it and would appreciate any help
I have a dataset (pitches) of baseball pitches identified by
'pitchNumber' and 'outcome' e.g S = swinging strike, B = ball, H= hit
etc.
e.g.
1 B ;
2 H ;
3 S ;
4 S ;
5 X ;
6 H; etc.
All I want to do is have a graph that plots them in a line cf BHSSXB
but replacing the letter with a small bar colored to represent the letter, with a legend, and optionally having the pitch number above the color . Somewhat like a sparkline.
Any suggestion on how to implement this much appreciated
And the same graph using ggplot.
Data courtesy of #GavinSimpson.
ggplot(baseball, aes(x=pitchNumber, y=1, ymin=0, ymax=1, colour=outcome)) +
geom_point() +
geom_linerange() +
ylab(NULL) +
xlab(NULL) +
scale_y_continuous(breaks=c(0, 1)) +
opts(
panel.background=theme_blank(),
panel.grid.minor=theme_blank(),
axis.text.y = theme_blank()
)
Here is a base graphics idea from which to work. First some dummy data:
set.seed(1)
baseball <- data.frame(pitchNumber = seq_len(50),
outcome = factor(sample(c("B","H","S","S","X","H"),
50, replace = TRUE)))
> head(baseball)
pitchNumber outcome
1 1 H
2 2 S
3 3 S
4 4 H
5 5 H
6 6 H
Next we define the colours we want:
## better colours - like ggplot for the cool kids
##cols <- c("red","green","blue","yellow")
cols <- head(hcl(seq(from = 0, to = 360,
length.out = nlevels(with(baseball, outcome)) + 1),
l = 65, c = 100), -1)
then plot the pitchNumber as a height 1 histogram-like bar (type = "h"), suppressing the normal axes, and we add on points to the tops of the bars to help visualisation:
with(baseball, plot(pitchNumber, y = rep(1, length(pitchNumber)), type = "h",
ylim = c(0, 1.2), col = cols[outcome],
ylab = "", xlab = "Pitch", axes = FALSE, lwd = 2))
with(baseball, points(pitchNumber, y = rep(1, length(pitchNumber)), pch = 16,
col = cols[outcome]))
Add on the x-axis and the plot frame, plus a legend:
axis(side = 1)
box()
## note: this assumes that the levels are in alphabetical order B,H,S,X...
legend("topleft", legend = c("Ball","Hit","Swinging Strike","X??"), lty = 1,
pch = 16, col = cols, bty = "n", ncol = 2, lwd = 2)
Gives this:
This is in response to your last comment on #Gavin's answer. I'm going to build off of the data provided by #Gavin and the ggplot2 plot by #Andrie. ggplot() supports the concept of faceting by a variable or variables. Here you want to facet by pitcher and at the pitch limit of 50 per row. We'll create a new variable that corresponds to each row we want to plot separately. The equivalent code in base graphics would entail adjusting mfrow or mfcol in par() and calling separate plots for each group of data.
#150 pitches represents a somewhat typical 9 inning game.
#Thanks to Gavin for sample data.
longGame <- rbind(baseball, baseball, baseball)
#Starter goes 95 pitches, middle relief throws 35, closer comes in for 20 and the glory
longGame$pitcher <- c(rep("S", 95), rep("M", 35), rep("C",20))
#Adjust pitchNumber accordingly
longGame$pitchNumber <- c(1:95, 1:35, 1:20)
#We want to show 50 pitches at a time, so will combine the pitcher name
#with which set of pitches this is
longGame$facet <- with(longGame, paste(pitcher, ceiling(pitchNumber / 50), sep = ""))
#Create the x-axis in increments of 1-50, by pitcher
longGame <- ddply(longGame, "facet", transform, pitchFacet = rep(1:50, 5)[1:length(facet)])
#Convert facet to factor in the right order
longGame$facet <- factor(longGame$facet, levels = c("S1", "S2", "M1", "C1"))
#Thanks to Andrie for ggplot2 function. I change the x-axis and add a facet_wrap
ggplot(longGame, aes(x=pitchFacet, y=1, ymin=0, ymax=1, colour=outcome)) +
geom_point() +
geom_linerange() +
facet_wrap(~facet, ncol = 1) +
ylab(NULL) +
xlab(NULL) +
scale_y_continuous(breaks=c(0, 1)) +
opts(
panel.background=theme_blank(),
panel.grid.minor=theme_blank(),
axis.text.y = theme_blank()
)
You can obviously change the labels for the facet variable, but the above code will produce:

Resources