Usage of directlabels in a x,y-scatter plot - r

I have problems finding the best way to use directlabels in a x,y-scatterplot.
library(ggplot2)
library(directlabels)
p1 <- ggplot()+
geom_point(data=sites, aes(X, Y, col=Treatment), alpha=1,show_guide=FALSE) +
geom_polygon(data = hulls, aes(X, Y, colour=Treatment, fill=Treatment), lty="dashed", alpha = 0.1, show_guide=FALSE) +
theme_bw() +
#geom_text(data=sites, aes(X,Y, label=Sample, color=Treatment), size=2, show_guide=FALSE) +
theme(axis.line = element_line(colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
#panel.border = element_blank(),
panel.background = element_blank()) +
coord_fixed() +
annotate("text", x=-0.8, y=-0.55, label="Stress = 0.102")
p2 <- p1 + geom_dl(data=sites, aes(X,Y,label=Sample, colour=Treatment, list( cex = 0.6)), method="smart.grid", show_guide=FALSE)
p2
While this is much better than using vjust/hjust in the geom_text() line, it still has some problems:
For example, the lower labels in the far-left triangle are unnecessarily plotted onto the area, and some of the labels in the greenish triangles (R53, R52 for example) should be placed outside the area. I tried many options available in the directlabels-package, but smart.grid was the best method so far. Is there something i can do to improve the labelling other than using photoshop?
Here is my data:
sites <- structure(list(Sample = c("R11", "R12", "R13", "R21", "R22",
"R23", "R31", "R32", "R33", "R41", "R42", "R43", "R51", "R52",
"R53", "R61", "R62", "R63", "R71", "R72", "R73", "D21", "D22",
"D23", "D31", "D32", "D33", "D41", "D42", "D43", "D51", "D52",
"D53", "D61", "D62", "D63"), X = c(-0.0960291142274892, 0.0842575226370376,
0.407178028123943, -0.00597471992061621, 0.340822839455987, 0.430580770893079,
0.432294207388092, 0.239276903425903, 0.202428525444242, 0.219457881130952,
0.325079921807492, 0.362628649343193, 0.0434810152644517, 0.459448269977165,
0.0743637831168788, 0.0629705355701924, 0.269757227770524, 0.0428786936573877,
0.642912005685253, 0.715465545056878, 0.870415884623661, -0.951515101512284,
-0.596539639874245, -0.939843921119596, -0.522589716428025, -0.233436702923438,
-0.176869256803805, -0.340990181400083, -0.320797597759894, -0.246047602937319,
-0.23780172425706, -0.265780334876648, -0.140281405966232, -0.890481118743505,
-0.0757195299492111, -0.185000541672864), Y = c(-0.338951234980643,
-0.177800321292734, -0.324895018639169, -0.0739123902386802,
-0.345019713119787, 0.18359750205563, -0.108461977599771, -0.0275340962048548,
-0.129423067267885, 0.0143496668618822, -0.355317429073615, 0.0866462123708121,
-0.00768509589834154, -0.258685480417501, 0.288488538303651,
-0.363105213242044, -0.54704407232382, 0.0570134733389543, 0.224034690932126,
0.43051937630073, 0.780464857796767, 0.266199859599797, 0.759400919418545,
0.103161222551216, -0.178046911975698, -0.116472972897424, -0.0289716671368776,
-0.146023515436316, -0.284526289182701, 0.0764403706902978, 0.150831452033757,
0.226303952103805, -0.226670040280512, -0.15689508307977, 0.268053395023382,
0.279936100906792), Treatment = c("A", "A", "A", "B", "B", "B",
"C", "C", "C", "D", "D", "D", "E", "E", "E", "F", "F", "F", "G",
"G", "G", "H", "H", "H", "I", "I", "I", "J", "J", "J", "K", "K",
"K", "L", "L", "L")), .Names = c("Sample", "X", "Y", "Treatment"
), row.names = c(NA, -36L), class = "data.frame")
hulls <- structure(list(Sample = c("R13", "R11", "R12", "R22", "R21",
"R23", "R31", "R33", "R32", "R42", "R41", "R43", "R52", "R51",
"R53", "R62", "R61", "R63", "R71", "R72", "R73", "D23", "D21",
"D22", "D32", "D31", "D33", "D42", "D41", "D43", "D53", "D52",
"D51", "D61", "D63", "D62"), X = c(0.407178028123943, -0.0960291142274892,
0.0842575226370376, 0.340822839455987, -0.00597471992061621,
0.430580770893079, 0.432294207388092, 0.202428525444242, 0.239276903425903,
0.325079921807492, 0.219457881130952, 0.362628649343193, 0.459448269977165,
0.0434810152644517, 0.0743637831168788, 0.269757227770524, 0.0629705355701924,
0.0428786936573877, 0.642912005685253, 0.715465545056878, 0.870415884623661,
-0.939843921119596, -0.951515101512284, -0.596539639874245, -0.233436702923438,
-0.522589716428025, -0.176869256803805, -0.320797597759894, -0.340990181400083,
-0.246047602937319, -0.140281405966232, -0.265780334876648, -0.23780172425706,
-0.890481118743505, -0.185000541672864, -0.0757195299492111),
Y = c(-0.324895018639169, -0.338951234980643, -0.177800321292734,
-0.345019713119787, -0.0739123902386802, 0.18359750205563,
-0.108461977599771, -0.129423067267885, -0.0275340962048548,
-0.355317429073615, 0.0143496668618822, 0.0866462123708121,
-0.258685480417501, -0.00768509589834154, 0.288488538303651,
-0.54704407232382, -0.363105213242044, 0.0570134733389543,
0.224034690932126, 0.43051937630073, 0.780464857796767, 0.103161222551216,
0.266199859599797, 0.759400919418545, -0.116472972897424,
-0.178046911975698, -0.0289716671368776, -0.284526289182701,
-0.146023515436316, 0.0764403706902978, -0.226670040280512,
0.226303952103805, 0.150831452033757, -0.15689508307977,
0.279936100906792, 0.268053395023382), Treatment = c("A",
"A", "A", "B", "B", "B", "C", "C", "C", "D", "D", "D", "E",
"E", "E", "F", "F", "F", "G", "G", "G", "H", "H", "H", "I",
"I", "I", "J", "J", "J", "K", "K", "K", "L", "L", "L")), .Names = c("Sample",
"X", "Y", "Treatment"), row.names = c(NA, -36L), class = "data.frame")

directlabels is not really for labeling individual points on scatterplots (that is NP-hard, https://en.wikipedia.org/wiki/Automatic_label_placement)
that being said, you may want to try to write your own Positioning Method:
your.method <- function(point.df, ...){
print(point.df)
browser()
label.df <- your_label_computation_function(point.df)
label.df
}
p2 <- p1 + geom_dl(data=sites, aes(X,Y,label=Sample, colour=Treatment, list( cex = 0.6)), method="your.method", show_guide=FALSE)

Related

How to deal with uneven spaces between bars in stacked barplot generated with ggplot2?

I have created a stacked barplot with ggplot2. I noticed that the gaps between the bars are not the same. This is strange, as while the problem is recuring on my particular data set, I can't seem to recreate it in a reproducible manner on an exemplary frame of data.
Here is what I have done:
a <- c("A", "B", "C", "D", "E", "F", "G", "H", "A", "B", "C","C", "D", "E", "F", "I", "J", "K", "L", "M", "N", "C", "C", "C", "C", "C", "O", "P", "R", "S", "T", "U", "W")
b <- c("kk", "ll", "ss", "ff", "kk", "ll", "ss", "ff", "kk", "ll", "ss", "ff", "kk", "ll", "ss", "ll", "ss", "ff", "kk", "ll", "ss", "ss", "ss", "ss", "ss", "ss", "kk", "ll", "ss", "ff", "kk", "ss", "ss")
df <- data.frame(a, b)
ggplot(df)+
geom_bar(aes(y = fct_infreq(a), fill = b, ), color = "black")+
theme_classic()+
labs(x = "\n", y = "\n")+
guides(fill=guide_legend(title = "Subunit"))+
ggtitle("Variation in the number of a in b \n\n")+
theme(plot.title = element_text(color="black", size=14, face="bold.italic", hjust = 0.5))+
scale_x_continuous(breaks = seq(0, 20, by = 1))+
expand_limits(x = c(0, 20))+
scale_fill_brewer(palette="Dark2")
This exemplary code returns this plot:
As you can see, everything looks fine.
This is what I get, when I run my set through the same code:
The red arrows indicate the gaps, that are noticeably thinner than other gaps.
Please advise how to correct this.

Efficient way to use geom_boxplot with specified quantiles and long data

I have a dataset with calculated quantiles for each department and country. It looks like this:
df <- structure(list(quantile = c("p5", "p25", "p50", "p75", "p95",
"p5", "p25", "p50", "p75", "p95", "p5", "p25", "p50", "p75",
"p95", "p5", "p25", "p50", "p75", "p95"), value = c(6, 12, 20,
33, 61, 6, 14, 23, 38, 63, 7, 12, 17, 26, 50, 7, 12, 18, 26,
51), country = c("A", "A", "A", "A", "A", "B", "B", "B", "B",
"B", "A", "A", "A", "A", "A", "B", "B", "B", "B", "B"), dep = c("D",
"D", "D", "D", "D", "D", "D", "D", "D", "D", "I", "I", "I", "I",
"I", "I", "I", "I", "I", "I"), kpi = c("F", "F", "F", "F", "F",
"F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F",
"F", "F")), row.names = c(NA, -20L), class = c("tbl_df", "tbl",
"data.frame"))
Now, I would like to build a boxplot for each department comparing countries and using p5/p95 instead of min/max similar to this plot but without outliers (hence, Train_number would be countries):
The corresponding code to this plot is (from question ggplot2, geom_boxplot with custom quantiles and outliers):
ggplot(MyData, aes(factor(Stations), Arrival_Lateness,
fill = factor(Train_number))) +
stat_summary(fun.data = f, geom="boxplot",
position=position_dodge(1))+
stat_summary(aes(color=factor(Train_number)),fun.y = q, geom="point",
position=position_dodge(1))
I tried to derive a solution from the code above and the provided answers. Unfortunately I lack the knowledge how to provide the neccessary values from the variables quantile and value to ggplot(). Is there an argument in the stat_summary() function I missed and could use? Or just another simple solution?
Whatever data you have provided from that you can generate the following plot
library(ggplot2)
f <- function(x) {
r <- quantile(x, probs = c(0.05, 0.25, 0.5, 0.75, 0.95))
names(r) <- c("ymin", "lower", "middle", "upper", "ymax")
r
}
ggplot(df, aes(factor(dep), value)) +
stat_summary(fun.data = f, geom="boxplot",
position=position_dodge(1))+
facet_grid(.~country, scales="free")
I don't know whether it is correct or not.

Error for graph saving loop - Must be length 1 (a summary value)

I'm trying to create and save graphs for individual organizations. I keep getting an error that says "Error in summarise_impl(.data, dots) :
Column Improved must be length 1 (a summary value), not 0"
The graphs work when I combine all the organizations together, so I'm not sure what is going on here!
Starting with this data:
library(ggpubr)
structure(list(Organization = c("A", "B", "C", "D", "E", "F",
"G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S"
), imp_imp20_Improved = c(55.6, 100, 50, 0, 57.1, 0, 0, 45, 50,
60, 100, 50, 66.7, 66.7, 33.3, 0, 50, 0, 50)), row.names = c(NA,
-19L), class = c("tbl_df", "tbl", "data.frame"))
org<- c("A", "B", "C", "D", "E", "F",
"G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S"
)
This is my code for the graph loop:
for(i in org) {
tiff(paste0("//graphs/",i,"_graph11.tiff"), units="in", width=3.5, height=3, res=300)
indicator_graph1<- indicators_ong %>%
filter(Organization==i) %>%
summarise(Improved = imp_imp20_Improved,
"Not Improved" = 100-imp_imp20_Improved)%>%
gather(key="group") %>%
arrange(desc(group))
labs <- paste0(indicator_graph1$group, "\n (", indicator_graph1$value,"%)")
z <- ggpie(indicator_graph1,"value",label=labs, fill= "group", color = "black", palette = c("darkgoldenrod1","azure3"), lab.pos = "in", lab.font = c(3,"black"),title="Improve 20")+
theme(legend.position ="none")+
font("title", size=10, hjust=0.5)
print(z)
dev.off()
}

Cross table graph similar to excel

I need to put a 3D kind of graph similar to attached image created in excel. I am not sure whether we can do this in ggplot?
structure(list(Name = c("A", "B", "C", "D"), P = c(15089, NA,
NA, 43083), Q = c(1589, NA, NA, 18120), R = c(93751, NA, 4709,
211649), S = c(34167, 1323, 1520, 82378), T = c(8831, NA, 4544,
15157)), .Names = c("Name", "P", "Q", "R", "S", "T"), row.names = c(NA,
4L), class = "data.frame")
I have worked with this following code.
ggplot(a, aes(x = a$A, y = a$Amount, fill = a$B)) +
geom_col(position = 'stack') +
geom_text(aes(label = a$Amount), position = position_stack(vjust = .5),
color='grey25', size=2) + coord_flip()
The problem is the labels which shows on top the the graph is overlapping
Updated:
Actually, I thought I need to reshape the data to achieve this kind of graph, not so sure though. So I reshaped the like below
structure(list(AA = c("A", "A", "A", "A", "A", "B", "B", "B",
"B", "B", "C", "C", "C", "C", "C", "D", "D", "D", "D", "D"),
BB = c("P", "Q", "R", "S", "T", "P", "Q", "R", "S", "T",
"P", "Q", "R", "S", "T", "P", "Q", "R", "S", "T"), Amount = c(15089,
1589, 93751, 34167, 8831, NA, NA, NA, 1323, NA, NA, NA, 4709,
1520, 4544, 43083, 18120, 211649, 82378, 15157)), .Names = c("AA",
"BB", "Amount"), row.names = c(NA, 20L), class = "data.frame")
I tried the following code to achieve this to which the labels are overlapping
ggplot(a, aes(x = AA, y = Amount, fill = BB)) +
geom_col(position = 'stack')+
geom_text(aes(label = Amount),
position = position_stack(vjust = 0.2),
color='grey25',
size=2) +
coord_flip()
Also, when I supply this to ggploty for shiny, the graph is not coming in dashboard

Distance matrix from proxy package into a dataframe

I have a distance matrix from this code
I would like to convert the distanceMatrix into a dataframe. I use this:
library(reshape2)
melt(distanceMatrix)
or
as.data.frame(distanceMatrix)
and I receive this error:
Error in as.data.frame.default(x[[i]], optional = TRUE) :
cannot coerce class ""crossdist"" to a data.frame
Data
distanceMatrix <-
structure(c(1.1025096478618, 2.48701192612548, 1.81748937453859,
0.68928345814907, 3.4194165172611, 1.39021901561926, 0.696405607391678,
1.09511501308162, 0.733071057157832, 0.894074317336616, 0.274302486490285,
2.00790247099612, 2.03702210657379, 0.790303515570192, 0.76573433957666,
1.0571870370502, 2.08607605440225, 1.18691928628668, 0.950127106192438,
1.90183580897689, 1.06791623757733, 1.95426617861089, 1.28359907050968,
0.639828869115434, 1.2125883228325, 1.17334881171837, 2.86424081724093,
4.29579721901031, 2.48106485650871, 2.47992202769688, 4.78094585963798,
3.08269692108197, 2.51054397059837, 2.78351950724781, 1.9552995309483,
1.02672164296738, 2.04833064878561, 2.40777909325915, 1.37714830319657,
2.54290296394426, 1.99486295133513, 1.42661425293529, 2.75973709232752,
0.632464187558431, 2.64349038129557, 3.04900615202494, 1.34349249286485,
0.66548291586285, 1.14201671902258, 2.20314775706901, 3.027560891124,
2.58016468923376, 0.701837450761437, 1.82650318310107, 1.17318969224049,
0.898229996978744, 2.04804918964036, 0.510384590416117, 1.20067408397491,
0.479351971313752, 0.900264653292786, 2.17660319096498, 1.11774249289539,
1.50312712068438, 2.35380779446751, 0.74568873241509, 0.860144296532242,
1.49609968893816, 1.27903173482324, 2.30242237929782, 0.546178045451667,
0.696804454166844, 1.57330737370915, 3.18912158434627, 2.63481498585198,
0.743304574607114, 1.2813138290548, 0.278296684614969), .Dim = c(26L,
3L), .Dimnames = list(c("a", "b", "c", "d", "e", "f", "g", "h",
"i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u",
"v", "w", "x", "y", "z"), c("A", "B", "C")), class = "crossdist", method = "Euclidean", call = proxy::dist(x = voterIdealPoints,
y = candidateIdealPoints))
Use
as.dataframe(as.matrix(distanceMatrix))

Resources