How can I adjust the legend box? - r

This is my code:
score <- tapply(exams$writing.score
, list(exams$gender,
exams$race.ethnicity
)
, mean)
plot1 <- barplot(score
, beside = TRUE
, main = "Comparison of Writing Score"
, col = c("red", "lightyellow")
, xlab = "Race Ethnicity Group"
, ylab = "Average Writing Score"
, legend.text = c("Female", "Male")
, args.legend = list(x = "topright")
)
As I want to make the box: Female and Male smaller so it does not hide the bar behind. How can I make the legend box smaller? I tried to move it to the top right of the chart, but I do not think it moves.

You could use the argument cex. Here is a reproducible example:
data <- matrix(c(1,2,3,4,5,6,7,8,9,10), ncol = 5)
colnames(data) <- paste0("V", 1:5)
rownames(data) <- c('A','B')
# Normal
barplot(data, col = 1:nrow(data))
legend("topright", legend = rownames(data), pch = 15, col = 1:nrow(data))
# With cex
barplot(data, col = 1:nrow(data))
legend("topright", legend = rownames(data), pch = 15, col = 1:nrow(data), cex = 0.5)
Created on 2022-10-21 with reprex v2.0.2

Another option (in addition to using cex as #Quinten shows) is to also change the inset to move the legend outside of the plot boundary, as well as using par to specify the parameters for margins, etc.
par(mar = c(5, 4, 4, 8),
xpd = TRUE)
# Normal
barplot(df, col = 1:nrow(df))
legend(
"topright",
inset = c(-0.1, 0),
# Create legend outside of plot
legend = rownames(df),
pch = 15,
col = 1:nrow(df),
cex = 0.8
)
Data
df <- structure(c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), dim = c(2L, 5L), dimnames = list(
c("Female", "Male"), c("V1", "V2", "V3", "V4", "V5")))

It doesn't move because you already are at the very top. To move the top upwards and let the legend follow, expand ylim.
Also try if you like setting the legend horizontal and remove the bty (boxtype). Don't choose the cex too small.
barplot(score
, beside=TRUE
, main="Comparison of Writing Score"
, col=c("red", "lightyellow")
, xlab="Race Ethnicity Group"
, ylab="Average Writing Score"
, legend.text=c("Female", "Male")
, args.legend=list(x="topright", cex=.9, horiz=TRUE, bty='n')
, ylim=c(0, max(score)*1.2)
)
Data:
score <- structure(c(96.8, 95.2, 100, 100, 89.7, 89.2, 81.4, 81, 85.1,
82), dim = c(2L, 5L), dimnames = list(c("1", "2"), c("A", "B",
"C", "D", "E")))

Related

change second y axis color in base R

Change secondary line axis color changes send color for ggplot, but I chose to go with base R, and would like to be able to select the second y axis color.
I have the following data:
df = structure(list(A = c("Q4-17", "Q1-18", "Q2-18", "Q3-18", "Q4-18",
"Q1-19", "Q2-19", "Q3-19", "Q4-19", "Q1-20", "Q2-20", "Q3-20",
"Q4-20", "Q1-21", "Q2-21", "Q3-21", "Q4-21", "Q1-22", "Q2-22",
"Q3-22"), B = c(69.45, 71.1, 74.94, 73.87, 93.61, 91.83,
95.38, 109.8, 133.75, 125.26, 118.22, 145.65, 144.9757185, 155.3464032,
184.367033, 179.8121721, 187.235487, 189.1684376, 184.3864519,
161.5300056), C = c(70.73, 71.73, 74.33, 73.27,
95.94, 94.38, 95.38, 109.8, 115.32, 116.92, 115.9, 113.87, 106.108147,
96.84273563, 111.5150869, 110.1228567, 110.7448835, 194.9684376,
187.7241152, 167.7665553), D = c(260.3, 216.02, 203.72,
203.52, 300.96, 320.77, 330.5, 413.52, 436.7, 474.96, 463.6,
501.87, 493.8865461, 497.1760767, 514.9903459, 503.7601267, 510.8362938,
614.9915546, 603.5761107, 593.660831), E = c(NA,
NA, NA, NA, NA, NA, NA, NA, 39.237, 35.621, 32.964, NA, 152.137,
140.743023, 167.809, 170.877, 117.517, 102.691723, 88.8, 76.2445528
)), class = "data.frame", row.names = c(NA, -20L))
df = df %>%
rowwise() %>%
mutate(sums = sum(D,E, na.rm = TRUE))
df = df[8:nrow(df),]
and this to generate my plot
x <- seq(1,nrow(df),1)
y1 <- df$B
y2 <- df$D
par(mar = c(5, 4, 4, 4) + 0.3)
plot(x, y1, col = "#000000",
type = "l",
main = "title",
ylim = c(0, max(df[,2:3])),
ylab = "Y1",
xlab = "",
xaxt = "n")
axis(1,
at = seq(from = 13, by = -4, length.out = 4),
labels = df$A[seq(from = 13, by = -4, length.out = 4)])
lines(x, df$C, lty = "dashed", col = "#adadad", lwd = 2)
par(new = TRUE)
plot(x, df$sums, col = "#ffa500",
axes = FALSE, xlab = "", ylab = "", type = "l")
axis(side = 4, at = pretty(range(y2)),
ylim = c(0,max(df[,3:5], na.rm = TRUE)),
col = "#00aa00") # Add colour selection of 2nd axis
par(new = TRUE)
plot(x, df$D , col = "#0000ff",
axes = FALSE, xlab = "", ylab = "", type = "l", lwd = 1)
mtext("y2", side = 4, line = 3)
but this does not colour my complete second y axis, nor labels, nor title
does any one have any suggestions to be able to set entire y2 axis to be #00AA00 - ticks, labels, and title?

How to automate positioning of inner labels within a stacked barplot?

I frequently have to produce stacked bar plots with labels. The way I've been coding the labels is very time intensive and I wondered if there was a way to code things more efficiently. I would like the labels to be centered on each section of the bars. I'd prefer base R solutions.
stemdata <- structure(list( #had to round some nums below for 100% bar
A = c(7, 17, 76),
B = c(14, 10, 76),
C = c( 14, 17, 69),
D = c( 4, 10, 86),
E = c( 7, 17, 76),
F = c(4, 10, 86)),
.Names = c("Food, travel, accommodations, and procedures",
"Travel itinerary and dates",
"Location of the STEM Tour stops",
"Interactions with presenters/guides",
"Duration of each STEM Tour stop",
"Overall quality of the STEM Tour"
),
class = "data.frame",
row.names = c(NA, -3L)) #4L=number of numbers in each letter vector#
# attach(stemdata)
print(stemdata)
par(mar=c(0, 19, 1, 2.1)) # this sets margins to allow long labels
barplot(as.matrix(stemdata),
beside = F, ylim = range(0, 10), xlim = range(0, 100),
horiz = T, col=colors, main="N=29",
border=F, las=1, xaxt='n', width = 1.03)
text(7, 2, "14%")
text(19, 2, "10%")
text(62, 2, "76%")
text(7, 3.2, "14%")
text(22.5, 3.2, "17%")
text(65.5, 3.2, "69%")
text(8, 4.4, "10%")
text(55, 4.4, "86%")
text(3.5, 5.6, "7%")
text(15, 5.6, "17%")
text(62, 5.6, "76%")
text(9, 6.9, "10%")
text(55, 6.9, "86%")
Staying base R as OP requested, we can easily automate the inner label positioning (i.e. x coordinates) within a small function.
xFun <- function(x) x/2 + c(0, cumsum(x)[-length(x)])
Now, it's good to know that barplot invisibly trows the y coordinates, we can catch them by assignment (here byc <- barplot(.)).
Eventually, just assemble coordinates and labels in data frame labs and "loop" through the text calls in a sapply. (Use col="white" or col=0 for white labels as wished in the other question.)
# barplot
colors <- c("gold", "orange", "red")
par(mar=c(2, 19, 4, 2) + 0.1) # expand margins
byc <- barplot(as.matrix(stemdata), horiz=TRUE, col=colors, main="N=29", # assign `byc`
border=FALSE, las=1, xaxt='n')
# labels
labs <- data.frame(x=as.vector(sapply(stemdata, xFun)), # apply `xFun` here
y=rep(byc, each=nrow(stemdata)), # use `byc` here
labels=as.vector(apply(stemdata, 1:2, paste0, "%")),
stringsAsFactors=FALSE)
invisible(sapply(seq(nrow(labs)), function(x) # `invisible` prevents unneeded console output
text(x=labs[x, 1:2], labels=labs[x, 3], cex=.9, font=2, col=0)))
# legend (set `xpd=TRUE` to plot beyond margins!)
legend(-55, 8.5, legend=c("Medium","High", "Very High"), col=colors, pch=15, xpd=TRUE)
par(mar=c(5, 4, 4, 2) + 0.1) # finally better reset par to default
Result
Data
stemdata <- structure(list(`Food, travel, accommodations, and procedures` = c(7,
17, 76), `Travel itinerary and dates` = c(14, 10, 76), `Location of the STEM Tour stops` = c(14,
17, 69), `Interactions with presenters/guides` = c(4, 10, 86),
`Duration of each STEM Tour stop` = c(7, 17, 76), `Overall quality of the STEM Tour` = c(4,
10, 86)), class = "data.frame", row.names = c(NA, -3L))
Would you consider a tidyverse solution?
library(tidyverse) # for dplyr, tidyr, tibble & ggplot2
stemdata %>%
rownames_to_column(var = "id") %>%
gather(Var, Val, -id) %>%
group_by(Var) %>%
mutate(id = factor(id, levels = 3:1)) %>%
ggplot(aes(Var, Val)) +
geom_col(aes(fill = id)) +
coord_flip() +
geom_text(aes(label = paste0(Val, "%")),
position = position_stack(0.5))
Result:

In `gplots` in `R`, how can we adjust the position of the x-label ticks in the `heatmap.2` function?

I am trying to duplicate a plot found here on pg. 4:
The reproducible code for it is:
require(devtools)
install_git("https://github.com/marchion/git.switchBox", subdir="switchBox")
require(switchBox)
require(gplots)
data(trainingData)
classifier <- SWAP.KTSP.Train(matTraining, trainingGroup)
kappa <- SWAP.KTSP.Statistics(matTraining, classifier)
mat <- t(1*kappa$comparisons)
rownames(mat) <- gsub(">", "\n more express than\n", rownames(mat))
heatmap.2(mat,
scale="none", Rowv=F, Colv=F, dendrogram="none",
trace="none", key=FALSE,
col=c("lightsteelblue2", "pink3"),
labCol=toupper(paste(trainingGroup, "Prognosis")),
sepwidth=c(0.075,0.075), sepcolor="black",
rowsep=1:ncol(kappa$comparisons),
colsep=1:nrow(kappa$comparisons),
lmat=rbind( c(0, 3), c(2, 1), c(0, 4) ), lhei=c(0.1, 5, 0.5), lwid=c(0.15, 5),
mar=c(7.5, 12), cexRow=0.85, cexCol=0.9)
If you notice in the plot above, the x-labels are slightly off-center to the left. Is there a command inside the heatmap.2 function that can shift each label to the right?
You have to specify argument adjCol (c(1, 0.5)) would give you wanted result (c(1, 0) would move it to the left and c(1, 1) would move it more to the right).
Code (using OPs provided packages and data):
heatmap.2(
mat,
adjCol = c(1, 0.5),
scale = "none", Rowv = FALSE, Colv = FALSE, dendrogram = "none",
trace = "none", key = FALSE,
col = c("lightsteelblue2", "pink3"),
labCol = toupper(paste(trainingGroup, "Prognosis")),
sepwidth = c(0.075,0.075), sepcolor = "black",
rowsep = 1:ncol(kappa$comparisons),
colsep = 1:nrow(kappa$comparisons),
lmat = rbind( c(0, 3), c(2, 1), c(0, 4) ),
lhei = c(0.1, 5, 0.5), lwid = c(0.15, 5),
mar = c(7.5, 12), cexRow = 0.85, cexCol = 0.9,
)
Result:

point labels in R scatter plot

I have the following toy data
Xeafield (1999) (PER) ,1,0.5745375408
Lancelot et al. (1989),0.9394939494,0.4733405876
LemLM Xeafield (1997) (TER) ,0.6265126513,0.2959738847
Almore and Flemin (2001) (KER),0.4218921892,0.5745375408
Malek et al. (2006) (HER) ,0.4125412541,1
Charles and Osborne (2003),0.0308030803,0.1414581066
And trying a simple 2D plot in R with points labeled using the 1st column.
pdf('data.pdf', width = 7, height = 8)
d1 <- read.csv("data.csv", header=F, dec=".",sep = ",")
plot(as.matrix(d1[,2]), as.matrix(d1[,3]), col= "blue", pch = 19, cex = 1, lty = "solid", lwd = 2, ylim=c(0,1), xaxt = "n",yaxt = "n")
text(as.matrix(d1[,2]), as.matrix(d1[,3]), labels=as.matrix(d1[,1]), cex= 0.7, pos=3)
x_axis_range <- c(0,1)
x_axis_labels <- c("Small","Large")
axis(1,at = x_axis_range, labels = x_axis_labels)
y_axis_range <- c(0,1)
y_axis_labels <- c("Slow","Fast")
axis(2,at = y_axis_range, labels = y_axis_labels)
title(xlab="Memory", ylab="Speed",cex.lab=1)
dev.off()
But the plot doesn't come out right. A few issues I have: the axis label are messed up (it shows as.matrix ..., instead of the label I specified), and the margin of the plot is to small that node labels are cutoff. I am new to using R and plot, appreciate your help.
A simple solution for your problem is to define axis labels and axis ranges in the plot function.
d1 <- structure(list(V1 = structure(c(6L, 3L, 4L, 1L, 5L, 2L), .Label = c("Almore and Flemin (2001) (KER)",
"Charles and Osborne (2003)", "Lancelot et al. (1989)", "LemLM Xeafield (1997) (TER) ",
"Malek et al. (2006) (HER) ", "Xeafield (1999) (PER) "), class = "factor"),
V2 = c(1, 0.9394939494, 0.6265126513, 0.4218921892, 0.4125412541,
0.0308030803), V3 = c(0.5745375408, 0.4733405876, 0.2959738847,
0.5745375408, 1, 0.1414581066)), .Names = c("V1", "V2", "V3"
), class = "data.frame", row.names = c(NA, -6L))
# Use xlab and ylab for axis labels and
# and xlim and ylim for setting axis ranges
plot(as.matrix(d1[,2]), as.matrix(d1[,3]), col= "blue", pch = 19,
cex = 1, lty = "solid", lwd = 2, ylim=c(-0.1,1.1), xaxt = "n",yaxt = "n",
xlab="Memory", ylab="Speed",cex.lab=1, xlim=c(-0.1,1.1))
text(as.matrix(d1[,2]), as.matrix(d1[,3]),
labels=as.matrix(d1[,1]), cex= 0.7, pos=3)
x_axis_range <- c(0,1)
x_axis_labels <- c("Small","Large")
axis(1,at = x_axis_range, labels = x_axis_labels)
y_axis_range <- c(0,1)
y_axis_labels <- c("Slow","Fast")
axis(2,at = y_axis_range, labels = y_axis_labels)

R and rect.hclust: rectangle on labels in dendrograms

I am building a dendrogram for the first time and the rectangles around clusters are drawn on top of the labels. Do you know how modify the positioning of these labels in order to avoid this overlap?
Here you can find a working example of my code:
mydata <- c(9.45, 10.54, 10.36, 10.46, 10.78, 10.1, 11.13)
mydata.matrix <- matrix(mydata, nrow = 1, ncol = 7)
colnames(mydata.matrix) <- c("a", "b", "c", "d", "e", "f", "g")
rownames(mydata.matrix) <- c("X")
d <- dist(mydata.matrix["X", ], method = "euclidean")
fit <- hclust(d, method="ward.D")
nodePar <- list(lab.cex = 0.6, pch = c(NA, 19), cex = 0.7, col = "blue")
plot(as.dendrogram(fit), xlab = "", sub="", ylab = "Euclidean distance",
main = "Dendrogram", nodePar = nodePar)
rect.hclust(fit, k=2, border="red")
And here is the plot from the code above:
In particular I would like to have the red rectangles contain entirely the labels of the leaves of the dendrogram.
Thank you!
You should use the rect.dendrogram function from the dendextend package.
For example:
mydata <- c(9.45, 10.54, 10.36, 10.46, 10.78, 10.1, 11.13)
mydata.matrix <- matrix(mydata, nrow = 1, ncol = 7)
colnames(mydata.matrix) <- c("a", "b", "c", "d", "e", "f", "g")
rownames(mydata.matrix) <- c("X")
d <- dist(mydata.matrix["X", ], method = "euclidean")
fit <- hclust(d, method="ward.D")
nodePar <- list(lab.cex = 0.6, pch = c(NA, 19), cex = 0.7, col = "blue")
dend <- as.dendrogram(fit)
plot(dend, xlab = "", sub="", ylab = "Euclidean distance",
main = "Dendrogram", nodePar = nodePar)
library(dendextend)
rect.dendrogram(dend , k=2, border="red")
And you will get:
In general, for plotting dendrograms, you might find the following quick introduction to dendextend useful (or look at the more lengthy version).

Resources