How to draw a table using ggplot2 - r

I have a dataframe of football matches in la liga and I want to make a table where each row and column is a team name and each tile shows what the result was in the game between the two teams of row and column
I've tried many using geom_tile and ggplot2 in many different ways but the closest I've come to is the below code
library(dplyr)
library(engsoccerdata)
spain = as.data.frame(spain)
library(ggplot2)
game1 = filter(spain, Season == "2012")
ggplot(game1, aes(home, reorder(visitor,desc(visitor)), fill = FT)) +
geom_tile(color="white", size=1.5, stat="identity", height=1, width=1) +
scale_fill_brewer(palette = rep(c("blue","white"),30)) +
geom_text(data=game1, aes(home, visitor, label = FT), size=rel(3)) +
scale_x_discrete(position="top") + scale_y_discrete(expand = c(0, 0)) +
xlab("Home") + ylab("Visitor") +
ggtitle("Laliga 2012")
I need the rows to be colored by oddity (odd rows white and even rows blue)
Also I want the team names to be inside tiles
over all I want my table to look like the first photo here but with striped lines
can anyone help me on modifications to my code?

You can change the row-colors by specifying a new factor just for fill. Consider e.g. this
fillframe = as.numeric(reorder(game1$visitor,desc(game1$visitor)))
fillframe = as.factor(ifelse(fillframe %% 2 == 0, 1, 0))
ggplot(game1, aes(home, reorder(visitor,desc(visitor)), fill = fillframe)) +
geom_tile(color="white", size=1.5, stat="identity", height=1, width=1) +
scale_fill_manual(values = c("white", "lightblue")) +
geom_text(data=game1, aes(home, visitor, label = FT), size=rel(3)) +
scale_x_discrete(position="top") + scale_y_discrete(expand = c(0, 0)) +
xlab("Home") + ylab("Visitor") +
ggtitle("Laliga 2012") +
theme(legend.position = "None",
axis.text.x = element_text(angle = 315))
For including the axis labels in the tiles, you'd have to expand the axis (since it is categorical, again by specifying additional factors), think this - but then you'd be better off just using Rmarkdown or HTML or so

Related

Set the width and gap in geom_bar in a large dataset with a lot of unique values

I have the dataframe below:
res<-sample.int(2187, 2187)
freq<-floor(runif(2187, 95,105))
t<-data.frame(res,freq)
and Im trying to create a bar chart based on this but despite the fact that I use width and color arguments I still cannot create space between the bars which are black instead of the selected fill.
library(ggplot2)
require(scales)
ggplot(t,width=0.1)+
geom_bar(aes(x=res,y=freq ,fill = (t$res==101)),
color = "black",stat = "identity") +
scale_fill_manual(values=c("darkblue", "lightblue"), guide = F) +
theme_classic(base_size = 16)+ theme(legend.position = "none")+
scale_x_discrete(breaks = seq(80, 115, 5))+ scale_y_continuous(labels = comma)
Note that this code works nice for a dataset with much fewer unique values like:
fac<-factor(rep(c(80,85,100,100.5,100.7,101,101.5,110,105),2000000))
res<-data.frame(fac)
new<-data.frame(table(res))
require(scales)
ggplot(new,width=0.1)+
geom_bar(aes(x=res,y=Freq ,fill = (new$res==101)),
color = "black",stat = "identity") +
scale_fill_manual(values=c("darkblue", "lightblue"), guide = F) +
theme_classic(base_size = 16)+ theme(legend.position = "none")+
scale_x_discrete(breaks = seq(80, 115, 5))+ scale_y_continuous(labels = comma)
May be I am completely wrong but if I understand correctly, the OP wants to reproduce the second chart from scratch using a sample of random numbers instead of already tabulated counts.
To create a histogram / bar chart, we only need a vector of random numbers (wraped in a data.frame for ggplot) and let geom_bar() do the counting. In addition, a particular bar will be highlighted.
By using floor(), the random numbers are already binned but are still considered as continuous by ggplot(). Therefore, they need to be turned into factor.
# create data
set.seed(123L) # ensure random data are reproducible
t <- data.frame(res = floor(runif(2187, 95, 105)))
library(ggplot2)
ggplot(t) +
aes(x = as.factor(res), fill = res == 101) +
geom_bar() +
theme_classic(base_size = 16) +
scale_fill_manual(values = c("darkblue", "lightblue"), guide = FALSE) +
xlab("res") +
ylab("freq")
Edit: geom_histogram()
Ther is an alternative approach using geom_histogram().
geom_histogram() does all steps in one go: The binning (no need to use floor()) as well as counting and plotting:
set.seed(123L) # ensure random data are reproducible
t2 <- data.frame(res = runif(2187, 95,105)) # floor() omitted here
ggplot(t2) +
aes(x = res, fill = floor(res) == 101) +
geom_histogram(breaks = seq(95, 105, 1), closed = "left") +
theme_classic(base_size = 16) +
scale_fill_manual(values = c("darkblue", "lightblue"), guide = FALSE) +
xlab("res") +
ylab("freq")
Here, the breaks parameter was used to specify the bin boundaries explicitely. Alternatively, the number of bins or the width of the bins can be specifies. This gives flexibilty to play around with the parameters.
Edit 2
The OP has asked about the case where the random numbers are uniformly distributed between 100 and 1015. With an adjustment to the sequence of breaks,
set.seed(123L) # ensure random data are reproducible
t3 <- data.frame(res = runif(2187, 100, 1015))
ggplot(t3) +
aes(x = res, fill = floor(res) == 101) +
geom_histogram(breaks = seq(100, 1015, 1), closed = "left") +
theme_classic(base_size = 16) +
scale_fill_manual(values = c("darkblue", "lightblue"), guide = FALSE) +
xlab("res") +
ylab("freq")
returns
This chart contains over 900 bars for each bin of width 1 which aren't all visible depending on the screen resolution as already explained by Jon Spring.
Therefore, it might be more suitable to reduce the number of bins, e.g., to 100 bins:
ggplot(t3) +
aes(x = res, fill = floor(res) == 101) +
geom_histogram(bins = 100L) +
theme_classic(base_size = 16) +
scale_fill_manual(values = c("darkblue", "lightblue"), guide = FALSE) +
xlab("res") +
ylab("freq")
Please note that 101 is still highlighted in the lower left corner.
Edit -- added alternate solutions at bottom.
If you have over 2,000 bars, and each one has a black outline 1 pixel wide on each side, that'll take something on the order of 6,000 horizontal pixels (ignoring anti-aliasing) to see one with a different fill. Most screens have much lower resolution than that.
If you must use bars, and must show every value, one option would be to drop the outline with color = NA and set width = 1 (as a term in the geom_col/geom_bar call) so there's no distracting blank space between bars. Even then, the different color at res == 101 is only visible at certain resolutions. (That might vary on device settings and anti-aliasing.)
ggplot(t)+
geom_col(aes(x=res,y=freq , fill = (res==101)),
color = NA, width = 1) +
scale_fill_manual(values=c("darkblue", "lightblue"), guide = F) +
theme_classic(base_size = 16) +
scale_x_continuous(breaks = c(500*0:4, 101))
If you must show all 2000 points, but want to highlight one, it might make sense to use a different geom that spreads the data out to use more of the available space.
For instance, we might use geom_point or geom_jitter to plot all the coordinates in 2d space. Here, I highlight the element with res == 101. I use arrange to make sure the special dot gets plotted last so that it doesn't get occluded.
library(dplyr)
ggplot(t %>% arrange(res == 101),
aes(x = res, y = freq,
fill = res == 101,
size = res == 101)) +
geom_jitter(shape = 21, stroke = 0.1)
Or we might plot the data as a line, highlighting the special dot on its own:
ggplot(t, aes(res, freq)) +
geom_line(color = "gray70") +
geom_point(data = subset(t, res == 101)) +
expand_limits(y=0)

Need help on customizing my Odds Ratio (ggplot)!

I'm assigned to create an Odds of Ratio ggplot in R. The plot I'm supposed to create is given below.
Given plot
My job is to figure out codes which creates the exact plots in R. I've done most parts. Here is my work.
My work
Before jumping into my code, it is very important that I am not using the correct values for boxOdds, boxCILow, and boxCIHigh since I have not figured out the correct values. I wanted to figure out codes for ggplot first so I can enter the right values as soon as I find them.
This is the code I used:
library(ggplot2)
boxLabels = c("Females/Males", "Student-Centered Prac. (+1)", "Instructor Quality (+1)", "Undecided / STM",
"non-STEM / STM", "Pre-med / STM", "Engineering / STM", "Std. test percentile (+10)",
"No previous calc / HS calc", "College calc / HS calc")
df <- data.frame(yAxis = length(boxLabels):1,
boxOdds =
c(2.23189, 1.315737, 1.22866, 0.8197413, 0.9802449, 0.9786673, 0.6559005, 0.5929812, 0.6923759, 1.3958275),
boxCILow =
c(.7543566,1.016,.9674772,.6463458,.9643047,.864922,.4965308,.3572142, 0.4523759, 1.2023275),
boxCIHigh =
c(6.603418,1.703902,1.560353,1.039654,.9964486,1.107371,.8664225,.9843584, 0.9323759, 1.5893275)
)
(p <- ggplot(df, aes(x = boxOdds, y = boxLabels)) +
geom_vline(aes(xintercept = 1), size = 0.75, linetype = 'dashed') +
geom_errorbarh(aes(xmax = boxCIHigh, xmin = boxCILow), size = .5, height =
0, color = 'gray50') +
geom_point(size = 3.5, color = 'orange') +
theme_bw() +
theme(panel.grid.minor = element_blank()) +
scale_x_continuous(breaks = seq(0,7,1) ) +
ylab('') +
xlab('Odds Ratio') +
annotate(geom = 'text', y =1.1, x = 3.5, label ='',
size = 3.5, hjust = 0) + ggtitle('Estimated Odds of Switching') +
theme(plot.title = element_text(hjust = 0.5, size = 30),
axis.title.x = (element_text(size = 15))) +
theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank())
)
p
Where I'm stuck at:
Removing small vertical lines on the beginning and end of each row's CI). I was not sure what it's called so I was having hard time looking it up. SOLVED
I'm also stuck at coloring specific rows in different colors.
The last part I'm stuck at is assigning proper order of each variable for y-axis. As you can see in my code ("boxLabels" part), I have put all the variables in order of given plot but it seems like the R didn't care about the order. So the varaible located at the very top is "Undecided / STM", instead of "Females / Males".
How do I decrease the space from 0 to 1? SOLVED
Any help would be appreciated!
First, probably you want ggstance::geom_pointrangeh. Second, you could define colors by yAxis right at the beginning. To group some factors create a new variable group. Third is related to your data where you could assign factor labels. Fourth, remove coord_trans as suggested by #beetroot.
Assign factor labels
dat$yAxis <- factor(dat$yAxis, levels=10:1, labels=rev(boxLabels))
Create groups
dat$group <- 1
dat$group[which(dat$yAxis %in% c("Females/Males", "Undecided / STM", "non-STEM / STM",
"Pre-med / STM"))] <- 2
dat$group[which(dat$yAxis %in% c("Student-Centered Prac. (+1)",
"No previous calc / HS calc",
"College calc / HS calc"))] <- 3
Colors
colors <- c("#860fc2", "#fc691d", "black")
Plot
library(ggplot2)
library(ggstance)
ggplot(dat, aes(x=boxOdds, y=yAxis, color=as.factor(group))) +
geom_vline(aes(xintercept=1), size=0.75, linetype='dashed') +
geom_pointrangeh(aes(xmax=boxCIHigh, xmin=boxCILow), size=.5,
show.legend=FALSE) +
geom_point(size=3.5, show.legend=FALSE) +
theme_bw() +
scale_color_manual(values=colors)+
theme(panel.grid.minor=element_blank()) +
scale_x_continuous(breaks=seq(0,7,1), limits=c(0, max(dat[2:4]))) +
ylab('') +
xlab('Odds Ratio') +
annotate(geom='text', y =1.1, x=3.5, label ='',
size=3.5, hjust=0) + ggtitle('Estimated Odds of Switching') +
theme(plot.title=element_text(hjust=.5, size=20)) +
theme(panel.grid.minor=element_blank(), panel.grid.major=element_blank())
Gives
Data
dat <- structure(list(yAxis = 10:1, boxOdds = c(2.23189, 1.315737, 1.22866,
0.8197413, 0.9802449, 0.9786673, 0.6559005, 0.5929812, 0.6923759,
1.3958275), boxCILow = c(0.7543566, 1.016, 0.9674772, 0.6463458,
0.9643047, 0.864922, 0.4965308, 0.3572142, 0.4523759, 1.2023275
), boxCIHigh = c(6.603418, 1.703902, 1.560353, 1.039654, 0.9964486,
1.107371, 0.8664225, 0.9843584, 0.9323759, 1.5893275)), class = "data.frame", row.names = c(NA,
-10L))

ggplot: Grouped, adjacent bars of variable width

I'm trying to produce barplot in which the width and height of the bars both convey information: the height is the number of hours spent on a task, the widths respectively indicate the perceived aptitude and importance associated with the task. I've managed to produce this monstrosity:
It's functional but horrible. I would really like to place the bars alongside one another (rather than overlaying them), so that each activity is represented by two touching bars of the same height (=time spent) but different widths and colors. I've been trying to to pass a width argument to this plot:
but setting 'aes(width = widthVariable)' gives me overlapping bars (similar to the first image) and the following warning message:
"position_dodge requires non-overlapping x intervals".
Is there a way of grouping my bars by activity, displaying them adjacently and varying their widths?
Here's a bit of the df I'm using:
molten = data.frame(Activity = rep(c('Administration','Working with Colleagues','Use of Social Media','Leadership Role'),2),
variable = c(rep('Importance',4),rep('Competence',4)),
value = rep(c(3.02,1.71,2.39,3.32),2),
width = c(3.48,3.52,4.01,2.98,
3.85,2.34,4.87,3.81))
The second plot is this:
ggplot(molten, aes(x=Activity, y=value, fill=variable)) + geom_bar(stat='identity',position = 'dodge')
and the first in something like this:
ggplot(molten, aes(x=Activity, y=value, fill=variable)) + geom_bar(stat='identity',aes(width = width/10))
Although I actually made it using slightly simpler dataframe, which I melt()-ed into the one above.
Not a perfect solution, but you can create a new column that combines Activity and Variable, use that as the x, and fill by variable:
molten<-mutate(molten,activity=paste(Activity,variable))
ggplot(molten, aes(x=activity, y=value,width = width/10)) +
geom_bar(stat='identity', aes(fill=variable)) +
theme(axis.text.x = element_text(angle = 45,hjust=1)) +
scale_x_discrete(breaks=molten$activity, labels=molten$Activity)
I've made some progress, building on iod's idea of mutating the original data frame.
I've made two separate geom-bars and nudged them into one another. I'd really love it if every bar touched its neighbor, but position_nudge() only takes a constant. I'm still getting into ggplot so the most obvious solution in my mind is a recycled 'nudge' vector, akin to barplot()'s color argument.
tldr: Little gaps between bars but reasonably pretty now.
molten<-mutate(molten,activity=paste(Activity,variable))
molten$importanceBars = c(value[variable=='Importance'],rep(0,nrow(molten)-sum(variable=='Importance')))
molten$competenceBars = c(rep(0,nrow(molten)-sum(variable=='Competence')),value[variable=='Competence'])
ggplot(molten, aes(x=activity,width = width/6, fill = variable)) +
geom_bar(stat='identity', aes(y=importanceBars),position=position_nudge(x=-0.2-0.35)) +
geom_bar(stat='identity', aes(y=competenceBars),position=position_nudge(x=-0.35)) +
theme(axis.text.x = element_text(angle = 45,hjust=1)) +
scale_x_discrete(breaks=molten$activity[molten$variable=='Competence'],
labels=molten$Activity[molten$variable=='Competence'])
I've done it - had to draw every bar as a rectangle, adjusting xmin and xmax accordingly.
wadjust = 5.5
gap = 0.0
minv = 1:length(value) - 0.5 + gap/2
maxv = minv + 1 -gap
minv[1:length(minv)%%2!=0] = maxv[1:length(maxv)%%2!=0] - width[order(Activity)][(1:length(width))%%2!=0]/wadjust
maxv[1:length(maxv)%%2==0] = minv[1:length(minv)%%2==0] + width[order(Activity)][(1:length(width))%%2==0]/wadjust
minv = minv +0.525
maxv = maxv +0.525
minvord = minv[order(Activity)]
maxvord = maxv[order(Activity)]
ggplot(molten, aes(x=activity,width = width/6, fill = variable)) +
geom_rect(xmin = minv,xmax = maxv, ymin = rep(0,28), ymax = value[order(Activity)],
fill = rep(c('#e1de00','#e84619'),len=28)) +
theme(axis.text.x = element_text(angle = 45,hjust=1)) +
theme(plot.margin=unit(c(1,0.5,1,2),"cm")) +
scale_x_discrete(breaks=molten$activity[molten$variable=='Importance'],
labels=molten$Activity[molten$variable=='Importance'][order(Activity[molten$variable=='Importance'])]) +
scale_y_continuous(labels = 0:3, breaks = 0:3, limits = c(0,3)) +
xlab('Activity') + ylab('Hours Spent') +
labs(title = 'Perceived Importance & Competence\nAssociated with Clerical Duties') +
theme(panel.grid.major.x = element_blank()) +
geom_vline(xintercept = (maxv[1:length(maxv)%%2!=0]+minv[1:length(minv)%%2==0])/2,col='white') +
geom_vline(xintercept = seq(len = 14, by = 2),col = 'white')

R: how to plot a line plot with obvious distinction between different time periods (line with dots)

I have a data consisting of 14 different time periods where I would like to plot it in a way that viewer can see where the 14 periods lie. I used to achieve this through using different colors
mycolors = c(brewer.pal(name="Set2", n = 7), brewer.pal(name="Set2", n = 7))
ggplot(derv, aes(x=Date, y=derv, colour = Season)) +
geom_point() +
geom_abline(intercept = 0, slope = 0) +
geom_abline(intercept = neg.cut, slope = 0) +
geom_abline(intercept = pos.cut, slope = 0) +
scale_color_manual(values = mycolors) + ggtitle(" Derivative")+ylab("Derivative")
I have used the above code to product such as plot but now in a new report, I can only use black and white scheme. So I am wondering how I can plot such a plot in R. I have thought of using alternating line types for the 14 different time periods but I do not how to achieve through ggplot. I have tried the following code, but it does not work.The line type stayed the same.
ggplot(derv, aes(x=Date, y=derv)) +
geom_line() +
geom_abline(intercept = 0, slope = 0) +
geom_abline(intercept = neg.cut, slope = 0) +
geom_abline(intercept = pos.cut, slope = 0) +
#scale_color_manual(values = mycolors) + ggtitle("S&P 500 (Smoothed) Derivative") + ylab("Derivative")+
scale_linetype_manual(values = c("dashed","solid","dashed","solid","dashed","solid","dashed",
"solid","dashed","solid","dashed","solid","dashed","solid"))
If you need to show where season changes, couldn't you just use an alternating linetype or alternating point marker? See below for two examples. You can play around with different point markers and linetypes to get the look you want. For more on creating linetypes, see this SO answer. For more on additional point markers (beyond the standard one available through pch), see, for example, here and here. I've also included a way to add the three horizontal lines with less code.
# Fake data
x = seq(0,2*pi,length.out=20*14)
dat=data.frame(time=x, y=sin(x) + sin(5*x) + cos(2*x) + cos(7*x),
group=0:(length(x)-1) %/% 20)
ggplot(dat, aes(time, y)) +
geom_hline(yintercept=c(-0.5,0,0.5), colour="grey50") +
geom_point(aes(shape=factor(group), size=factor(group))) +
scale_shape_manual(values=rep(c(3,15),7)) +
scale_size_manual(values=rep(c(2,1.5),7)) +
theme_bw() + guides(shape=FALSE, size=FALSE)
ggplot(dat, aes(time, y, linetype=factor(group))) +
geom_hline(yintercept=c(-0.5,0,0.5), colour="grey50") +
geom_line(size=0.8) +
scale_linetype_manual(values=rep(1:2,7)) +
theme_bw() + guides(linetype=FALSE)

ggplot2: plotting error bars for groups without overlap

I wish to show the effect of two pollutants on the same outcome and was happy with the plot when there are no groups. Now when I want to plot the same data for all-year and stratified by season, I either get overlaps of error bars or three separate panels which are not optimal for my need.
Sample data could be accessed from here:
https://drive.google.com/file/d/0B_4NdfcEvU7LV2RrMjVyUmpoSDg/edit?usp=sharing
As an example with the following code I create a plot for all-year:
ally<-subset(df, seas=="allyear")
ggplot(ally,aes(x = set, y = pinc,ymin = lcinc, ymax =ucinc,color=pair,shape=pair)) +
geom_point(position=position_dodge(width=0.5) ,size = 2.5) +
geom_linerange(position=position_dodge(width=0.5), size =0.5) + theme_bw() +
geom_hline(aes(yintercept = 0)) +
labs(colour="Pollutant", shape="Pollutant", y="Percent Increase", x="") +
scale_x_discrete(labels=c(NO2=expression(NO[2]),
NOx=expression(NO[x]),
Coarse= expression(Coarse),
PM25=expression(PM[2.5]),
PM10=expression(PM[10]))) +
theme(plot.title = element_text(size = 12,face="bold" )) +
theme(axis.title=element_text(size="12") ,axis.text=element_text(size=12))
But when I add facet_grid(. ~ seas) I will have three separate panels. How can I display this data for all year and divided by seasons in one panel?
Either color or shape needs to be used to represent season, not pollutant.
Then this should come close to what you want:
library(ggplot2)
ggplot(df, aes(x = set, y = pinc,ymin = lcinc, ymax =ucinc,
color=seas, shape=pair)) +
geom_point(position=position_dodge(width=0.5), size = 2.5) +
geom_linerange(position=position_dodge(width=0.5), size =0.5) + theme_bw() +
geom_hline(aes(yintercept = 0)) +
labs(colour="Season", shape="Pollutant", y="Percent Increase", x="") +
scale_x_discrete(labels=c(NO2=expression(NO[2]),
NOx=expression(NO[x]),
Coarse= expression(Coarse),
PM25=expression(PM[2.5]),
PM10=expression(PM[10]))) +
theme(plot.title = element_text(size = 12,face="bold" )) +
theme(axis.title=element_text(size="12") ,axis.text=element_text(size=12))
I do think that facetting gives you better graphs here --
if you want to focus attention on the comparison between seasons for each pollutant, use this (facet_grid(~pair, labeller=label_both)):
if you want to focus attention on the comparison between pollutants for each season, use this (facet_grid(~seas, labeller=label_both)):

Resources