I'm plotting a graph of odds-ratios and confidence intervals, and would like to have space in between the different categories of variables on the y-axis, but I'm having trouble doing that.
Here is some code that I have tried
final_matrix = read.table("matrix", sep = "\t", header = T)
thecolnames = c('Coefficients', 'CI2.5', 'CI97.5', 'Demographics', 'Categories')
colnames(final_matrix) = thecolnames
final_matrix$Categories = factor(final_matrix$Categories, levels = final_matrix$Categories)
ggplot(final_matrix, aes(x = final_matrix$Coefficients, y = final_matrix$Categories, color=Demographics)) +
geom_vline(aes(xintercept = 1), size = .25, linetype = "dashed") +
geom_errorbarh(aes(xmax = final_matrix$CI2.5, xmin = final_matrix$CI97.5), size = .5, height =
.2, color = "gray50") +
geom_point(size = 1.5) +
theme(panel.grid.minor = element_blank()) +
ylab("") +
xlab("Odds ratio") +
ggtitle("Association between Coefficients and Categories)
Final matrix looks like:
structure(list(Coefficients = c(1, 1.030507438, 1.044036099,
1, 0.9733293067, 1, 2.33127416, 2.402926091, 2.422669367, 2.395563322,
1, 1.005581977, 1.006818225, 1, 1.021092986, 1.019769848, 0.9455416249,
1.010520081, 1, 0.9912335776, 0.988674891, 1.006406292, 1, 1.002857188,
0.9963459983, 1.005356995, 1, 0.9990484426, 1, 1.025390984, 1,
1.017962091, 1, 0.9805242864), CI2.5 = c(1, 0.9913713797, 0.9998913413,
1, 0.9298311216, 1, 2.189771461, 2.268230465, 2.284251227, 2.257308777,
1, 0.9630167448, 0.9481349102, 1, 0.9800985942, 0.9841991871,
0.9056186125, 0.9576109398, 1, 0.9562607091, 0.9532485042, 0.9664533176,
1, 0.9477868449, 0.9509813538, 0.9519718969, 1, 0.9724519214,
1, 0.9631787377, 1, 0.9547918105, 1, 0.9186602875), CI97.5 = c(1,
1.071188458, 1.090129827, 1, 1.018862369, 1, 2.481920741, 2.545620424,
2.569475192, 2.542285613, 1, 1.050028588, 1.069133651, 1, 1.063802043,
1.056626094, 0.9872245912, 1.066352515, 1, 1.027485492, 1.025417859,
1.048010913, 1, 1.061127346, 1.043874672, 1.061735847, 1, 1.026372378,
1, 1.091621553, 1, 1.0853118, 1, 1.046554302), Demographics = structure(c(5L,
5L, 5L, 2L, 2L, 6L, 6L, 6L, 6L, 6L, 1L, 1L, 1L, 4L, 4L, 4L, 4L,
4L, 3L, 3L, 3L, 3L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L), .Label = c("Age", "Disease", "Education", "Employment", "Financial",
"Health", "Insurance", "Race"), class = "factor"), Categories = structure(1:34, .Label = c("Extremely Difficult",
"Somewhat Difficult", "Not Difficult", "No Disease", "Disease",
"Overall Health (<=3)", "Overall Health (4)", "Overall Health (5)",
"Overall Health (6)", "Overall Health (7)", "18-29", "30-64",
"65+", "Full-Time", "Part-Time", "Unemployed/Homemaker ", "Disability",
"Retired", "Less than HS", "HS Grad", "Some College / Vocational",
"College Degree / PhD", "Private", "Other Public", "Medicaid",
"Medicare", "Non-Hispanic", "Hispanic", "Non-White", "White ",
"Non-Black", "Black", "Non-Other", "Other"), class = "factor")), row.names = c(NA,
-34L), class = "data.frame")
What I get is something like
a --o---
b -o-
c --o--
d -o-
whereas, a and b might be part of one "demographic" category, and c and d might be a part of another "demographic category". Although I can get them to be different colors, I'm struggling to space them into something like:
Demographic 1
a --o---
b -o-
Demographic 2
c --o--
d -o-
I am not sure if you will be happy with that try to run it and comment is that what you want?
final_matrix<-structure(list(Coefficients = c(1, 1.030507438, 1.044036099, 1, 0.9733293067, 1, 2.33127416, 2.402926091, 2.422669367, 2.395563322, 1, 1.005581977, 1.006818225, 1, 1.021092986, 1.019769848, 0.9455416249, 1.010520081, 1, 0.9912335776, 0.988674891, 1.006406292, 1, 1.002857188, 0.9963459983, 1.005356995, 1, 0.9990484426, 1, 1.025390984, 1, 1.017962091, 1, 0.9805242864), CI2.5 = c(1, 0.9913713797, 0.9998913413, 1, 0.9298311216, 1, 2.189771461, 2.268230465, 2.284251227, 2.257308777, 1, 0.9630167448, 0.9481349102, 1, 0.9800985942, 0.9841991871, 0.9056186125, 0.9576109398, 1, 0.9562607091, 0.9532485042, 0.9664533176, 1, 0.9477868449, 0.9509813538, 0.9519718969, 1, 0.9724519214, 1, 0.9631787377, 1, 0.9547918105, 1, 0.9186602875), CI97.5 = c(1, 1.071188458, 1.090129827, 1, 1.018862369, 1, 2.481920741, 2.545620424, 2.569475192, 2.542285613, 1, 1.050028588, 1.069133651, 1, 1.063802043, 1.056626094, 0.9872245912, 1.066352515, 1, 1.027485492, 1.025417859, 1.048010913, 1, 1.061127346, 1.043874672, 1.061735847, 1, 1.026372378, 1, 1.091621553, 1, 1.0853118, 1, 1.046554302),
Demographics = structure(c(5L, 5L, 5L, 2L, 2L, 6L, 6L, 6L, 6L, 6L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 3L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L),
.Label = c("Age", "Disease", "Education", "Employment", "Financial", "Health", "Insurance", "Race"), class = "factor"),
Categories = structure(1:34,
.Label = c("Extremely Difficult", "Somewhat Difficult", "Not Difficult", "No Disease", "Disease", "Overall Health (<=3)", "Overall Health (4)", "Overall Health (5)", "Overall Health (6)", "Overall Health (7)", "18-29", "30-64", "65+", "Full-Time", "Part-Time", "Unemployed/Homemaker ", "Disability", "Retired", "Less than HS", "HS Grad", "Some College / Vocational", "College Degree / PhD", "Private", "Other Public", "Medicaid", "Medicare", "Non-Hispanic", "Hispanic", "Non-White", "White ", "Non-Black", "Black", "Non-Other", "Other"), class = "factor")),
row.names = c(NA, -34L), class = "data.frame")
final_matrix<-data.frame(final_matrix)
ggplot(final_matrix,aes(x=Coefficients,y=Categories,color=Demographics,group=Categories))+
geom_vline(aes(xintercept=1),size=.25,linetype="dashed")+
geom_errorbarh(aes(xmax = final_matrix$CI2.5, xmin = final_matrix$CI97.5), size = .5, height = .2, color = "gray50") +
geom_point(size = 1.5) +
theme(panel.grid.minor = element_blank()) +
ylab("") +
xlab("Odds ratio") +
ggtitle("Association between Coefficients and Categories") + facet_wrap(Demographics~.,scales="free",ncol=1)
ggplot(final_matrix,aes(x=Coefficients,y=Categories,color=Demographics))+
geom_vline(aes(xintercept=1),size=.25,linetype="dashed")+
geom_errorbarh(aes(xmax=CI2.5,xmin=CI97.5),size=.5,height=.2,color="gray50")+
geom_point(size=1.5)+
theme(panel.grid.minor=element_blank())+
labs(y="",x="Odds ratio",title="Association between Coefficients and Categories")+
facet_wrap(Demographics~.,scales="free_y",ncol=1)
Related
How would I go about showing in these histograms the average, median, and standard deviation of the data.
Here is my histogram code:
hist(PRE$Productivity...Productivité, main = "PRE", xlab = "Productivity")
hist(DBN$Productivity...Productivité, main = "DBN", xlab = "Productivity")
hist(DBG$Productivity...Productivité, main = "DBG", xlab = "Productivity")
hist(POST$Productivity...Productivité, main = "POST", xlab = "Productivity")
And here is it's output
dput(head(DBN))
structure(list(Participant.Code = c("AE1_02", "AE1_02", "AE1_02",
"AE1_02", "AE2_08", "AE2_08"), Condition = structure(c(5L, 5L,
5L, 5L, 5L, 5L), levels = c("", "DBG", "DBG DBN", "DBG POST",
"DBN", "DBN DBG", "DBN POST", "POST", "PRE", "PRE DBG", "PRE DBN"
), class = "factor"), Start.time = c("3-9-22 8:39:27", "3-9-22 16:27:44",
"3-10-22 8:48:34", "3-10-22 16:09:33", "3-18-22 8:36:15", "3-18-22 17:26:13"
), Stiffness...Raideur = c(7L, 7L, 7L, 7L, 4L, 4L), Fatigue...Fatigue = c(7L,
8L, 8L, 8L, 4L, 6L), Discomfort...Inconfort = c(7L, 7L, 7L, 7L,
3L, 6L), Happiness...Joie = c(8L, 8L, 8L, 8L, 6L, 5L), Productivity...Productivité = c(6L,
8L, 7L, 7L, 5L, 4L), Ability.to.concentrate...Capacité.de.se.concentrer = c(7L,
8L, 7L, 6L, 5L, 4L), Alertness...Vigilance = c(7L, 8L, 7L, 6L,
5L, 5L), Stress...Stress = c(6L, 8L, 7L, 6L, 5L, 5L), Back.Pain...Mal.de.dos = c(8L,
7L, 8L, 8L, 3L, 4L), Neck.Pain...Douleur.au.cou = c(5L, 4L, 7L,
7L, 3L, 4L), Head.Pain...Mal.de.tête = c(1L, 1L, 1L, 1L, 2L,
4L), Eye.Pain...Douleur.oculaire = c(1L, 1L, 1L, 1L, 3L, 4L)), row.names = c(17L,
18L, 21L, 22L, 57L, 58L), class = "data.frame")
You can use the function abline immediatly after the histogram call to add a vertical line intersecting the x axis. In this case, I am creating a line to show the location of the mean in each dataset. Then, to add the value, you can add it directly as a label or put it into a legend. I am adding some padding to the ylim so the legend or label doesn't overlap with the title. Finally, to arrange them in a similar way as you want it, you can prepare the panel using the function par():
n <- 10000
example_a <- rgamma(n, 5, 2)
example_b <- rnorm(n, 5, 2)
example_c <- rbeta(n, 5, 2)
max_a <- max(hist(example_a, plot = F)$counts)
max_b <- max(hist(example_b, plot = F)$counts)
max_c <- max(hist(example_c, plot = F)$counts)
mean_a <- mean(example_a)
mean_b <- mean(example_b)
mean_c <- mean(example_c)
par(mfrow = c(2,2)) #creates 4x4 layout
hist(example_a, main = "PRE", xlab = "Productivity",
col = "slategray1", border = "gray",
ylim = c(0, max_a + 200))
abline(v = mean_a, col = "darkred", lwd = 3, lty = 2)
legend("topright", legend = c("Mean", round(mean_a, 3)),
lwd = c(3, NA), lty = c(2, NA), col = c("darkred", NA))
hist(example_b, main = "DBN", xlab = "Productivity",
col = "slategray1", border = "gray",
ylim = c(0, max_b + 200))
abline(v = mean_b, col = "forestgreen", lwd = 3, lty = 2)
text(x = mean_b - 2, y = 1990, paste("Mean = ", round(mean_b, 3)))
hist(example_c, main = "DBG", xlab = "Productivity",
col = "slategray1", border = "gray",
ylim = c(0, max_c + 200))
abline(v = mean(example_c), col = "purple4", lwd = 3, lty = 2)
legend("topleft", legend = c("Mean", round(mean_c, 3)),
lwd = c(3, NA), lty = c(2, NA), col = c("darkred", NA))
And that gives us the following plots:
I've made a bar chart using ggplot with grouped data, and facetted with facet_grid. The column widths are inconsistent, so I want to make them all the same. I've read this can be done with preserve="single, but it seems to mess up the position dodging. Any idea how to prevent this happening??
Here is a small sample of the data:
data <- structure(list(grp2 = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L,
7L, 7L, 7L, 7L, 7L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 7L, 7L, 7L, 7L,
7L), .Label = c("CSF1", "CSF2", "PC", "NC", "GPC", "GNC", "standard"
), class = "factor"), label2 = structure(c(7L, 8L, 9L, 7L, 8L,
9L, 7L, 15L, 15L, 15L, 15L, 15L, 7L, 8L, 9L, 7L, 8L, 9L, 7L,
15L, 15L, 15L, 15L, 15L), .Label = c("CSF1_raw", "CSF1_supernatant",
"CSF1_pellet", "CSF2_raw", "CSF2_supernatant", "CSF2_pellet",
"PC_raw", "PC_supernatant", "PC_pellet", "NC_raw", "NC_supernatant",
"NC_pellet", "GPC", "GNC", "standard", "NC"), class = "factor"),
mda_label = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 3L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 1L, 1L, 1L, 1L, 1L
), .Label = c("none", "mda_20", "mda_200"), class = "factor"),
conc = c(`7` = 0, `8` = 0, `9` = 0.324886127298521, `55` = 4.14765656994934,
`56` = 1.16840050032707, `57` = 8.33529714053568, `76` = 10.6220645144775,
`77` = 48.9241552191721, `78` = 4.51513315624087, `79` = 1.03887911533275,
`80` = 0.0445944796011582, `81` = 0.00484116548901831, `89` = 0,
`90` = 0, `91` = 0.322922569348207, `137` = 6.38488684568018,
`138` = 1.68909814271646, `139` = 7.61828609738757, `158` = 15.3082130743032,
`159` = 41.3127531345335, `160` = 4.64193087683391, `161` = 0.411672491030815,
`162` = 0.0568193835425769, `163` = 0.00439419098560105)), row.names = c(NA,
-24L), class = c("tbl_df", "tbl", "data.frame"))
Here's the initial plot:
ggplot(data, aes(x=label2, y=conc, colour=mda_label, fill=mda_label)) +
facet_grid(. ~ grp2, scales="free_x", space="free") +
stat_summary(fun = mean, geom = "bar", position = position_dodge()) +
stat_summary(fun.data = mean_se, geom = "errorbar", colour="black", width=0.5,
position = position_dodge(width=0.9)) +
geom_point(position = position_dodge(width=0.9), pch=21, colour="black") +
scale_y_continuous(trans='pseudo_log',
labels = scales::number_format(accuracy=0.01)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
But when I try to standardise the column widths with preserve="single", it gets messed up:
ggplot(data, aes(x=label2, y=conc, colour=mda_label, fill=mda_label)) +
facet_grid(. ~ grp2, scales="free_x", space="free") +
stat_summary(fun = mean, geom = "bar", position = position_dodge(preserve="single")) +
stat_summary(fun.data = mean_se, geom = "errorbar", colour="black", width=0.5,
position = position_dodge(width=0.9, preserve="single")) +
geom_point(position = position_dodge(width=0.9, preserve="single"), pch=21, colour="black") +
scale_y_continuous(trans='pseudo_log',
labels = scales::number_format(accuracy=0.01)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Since you're using data that as 0 values, you could make the 0 values for the other 'mda_label' on grp2/label2 standard categories.
data <- rbind(data, data.frame(grp2 = c("standard", "standard"),
label2 = c("standard", "standard"),
mda_label = c("mda_20", "mda_200"),
conc = c(0, 0)))
Also you never actually make the bar plot
data %>%
ggplot(aes(label2, conc, fill = mda_label)) +
geom_col(position = position_dodge(width = 1)) +
facet_grid(. ~ grp2, scales = "free", space = "free")
I have the following data frame:
structure(list(Substance = c("Cefotaxime", "Cefepim", "Chloramphenicol",
"Sulfamethoxazole", "Ampicillin", "Ampicillin", "Tetracycline",
"Cefotaxime", "Trimethoprim", "Cefepim", "Cefepim", "Sulfamethoxazole",
"Ceftazidime", "Nalidixic acid", "Cefepim", "Ceftazidime", "Ampicillin",
"Ceftazidime", "Cefotaxime", "Ceftazidime"), Species = c("Cattle",
"Chicken", "Cattle", "Cattle", "Cattle", "Cattle", "Cattle",
"Pig", "Cattle", "Cattle", "Horse", "Horse", "Pig", "Cattle",
"Pig", "Pig", "Cattle", "Cattle", "Pig", "Horse"), gene = c("AmpC",
"blaCMY-2", "blaSHV-12", "blaCMY-2", "AmpC", "blaCMY-2", "blaCMY-2",
"AmpC", "blaSHV-12", "blaSHV-12", "blaCTX-M Group 1", "blaCTX-M Group 1",
"AmpC", "blaSHV-12", "blaCTX-M-15", "blaCTX-M-15", "AmpC", "AmpC",
"blaCMY-2", "AmpC"), n = c(3, 6, 1, 1, 3, 1, 1, 3, 1, 1, 1, 1,
3, 1, 1, 1, 1, 1, 1, 1), group = c(8L, 3L, 5L, 9L, 8L, 9L, 9L,
13L, 5L, 5L, 2L, 2L, 13L, 5L, 16L, 16L, 7L, 7L, 15L, 1L), value = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-20L), .Names = c("Substance", "Species", "gene", "n", "group",
"value"))
And I have this plot:
gene_palette <- c("AmpC" = "#b2182b",
"blaCMY-2" = "#ef8a62",
"blaCTX-M-15" = "#fddbc7",
"blaCTX-M Group 1" = "#d1e5f0",
"blaSHV-12" = "#67a9cf",
"ESBL" = "#2166ac")
library(ggplot2)
ggplot(test, aes(factor(group), Substance, fill = gene))+
geom_point(pch = 21, size = 5)+
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))+
scale_fill_manual(values = gene_palette)+
theme_classic()
This produces the following plot:
Is there a way to sort the order of "groups" on the x-axis so that each gene will be next to eachother on the x axis, in the order listed in the legend? This way, the color for each gene type will be next to each other.
I tried the solutions presented here, but since the factor variables in "gene" is fewer and doesn't match the ones in "group", that didn't work (introduced NA's)
ggplot(test, aes(factor(group,
levels = unique(test[order(test$gene, test$group), "group", drop = TRUE])),
Substance, fill = gene))+
geom_point(pch = 21, size = 5)+
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))+
scale_fill_manual(values = gene_palette)+
theme_classic()
this is my dataset:
> dput(dfw)
structure(list(SITE = c("ASPEN", "ASPEN", "BioCON", "DUKE", "Lancaster",
"Merrit Island", "Nevada FACE", "NZ", "ORNL", "PHACE", "BioCON"
), SPECIES = c("A", "AB", "Legume", "PITA", "mixed", "Oak", "desert",
"grassland", "SG", "grassland", "C3forb"), FRr = c(0.197028535345918,
0.296799297050907, 0.195436310641759, 0.152972526753089, 0.0313948973476966,
0.139533057346518, 0.188221278921143, NA, 0.70542764380006, 0.119320766735777,
0.135665667633474), Nupr = c(0.122177669046786, 0.305573297532757,
0.131181914007488, 0.217519050530067, -0.0436788294371676, 0.153632658941404,
-0.00803217169726427, 0.168440046857285, 0.145172439177718, -0.108563178158001,
0.00546006390438276), myc = c("ECM", "ECM", "N-fixing", "ECM",
"ECM", "ECM", "AM", "AM", "AM", "AM", "AM"), SITE_Sps = structure(c(1L,
2L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 3L), .Label = c("Aspen FACE-A",
"Aspen FACE-AB", "BioCON", "BioCON-legumes", "Duke FACE", "Lascaster",
"Florida OTC", "Nevada FACE", "NZ FACE", "ORNL FACE", "PHACE"
), class = "factor")), row.names = c(NA, -11L), vars = list(SITE,
SPECIES, myc), indices = list(0L, 1L, 10L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L), group_sizes = c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L), biggest_group_size = 1L, labels = structure(list(
SITE = c("ASPEN", "ASPEN", "BioCON", "BioCON", "DUKE", "Lancaster",
"Merrit Island", "Nevada FACE", "NZ", "ORNL", "PHACE"), SPECIES = c("A",
"AB", "C3forb", "Legume", "PITA", "mixed", "Oak", "desert",
"grassland", "SG", "grassland"), myc = structure(c(2L, 2L,
1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L), .Label = c("am", "ecm",
"ecm+am"), class = "factor")), row.names = c(NA, -11L), class = "data.frame", vars = list(
SITE, SPECIES, myc), .Names = c("SITE", "SPECIES", "myc")), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), .Names = c("SITE", "SPECIES",
"FRr", "Nupr", "myc", "SITE_Sps"))
I want to draw the same background as in the attached figure, added to my current ggplot code:
ggplot(dfw, aes(FRr, Nupr, group=myc, label = SITE_Sps)) +
geom_point(aes(fill=myc),size=4,shape = 21) +
geom_text() +
geom_hline(yintercept=0) + geom_vline(xintercept = 0) +
geom_abline(intercept = 0, slope = 1, linetype = "longdash")
I guess I should use the function geom_polygon, but I don't really know how to create a dataset to draw all the required segments, including the colour gradient from dark grey to light grey and white.
Perhaps this could be a start?
nlines <-
phis <- seq( 0, 2*pi, by=2*pi/nlines )
rad <- 999
xs <- rad * cos( phis )
ys <- rad * sin( phis )
Here is a way using geom_polygon:
nlines <- 25
inc <- pi/(nlines)
phis <- seq( -pi/2, by=inc, length.out = nlines )
rad <- 1
#Create the triangles
points <- lapply(phis, function(a) {
x <-c(0, rad*cos(a), rad*cos(a+inc),0, -rad*cos(a), -rad*cos(a+inc))
y <-c(0, rad*sin(a), rad*sin(a+inc),0, rad*sin(a), rad*sin(a+inc))
g <-c(a,a,a,a,a,a) # used for grouping
data.frame(x,y,g)
})
#Create a data.frame to be used on ggplot
bckg <- do.call(rbind,points)
#You need to set the data for each geometry as we have more than one dataset
ggplot(mapping=aes(FRr, Nupr, group=myc)) +
#Draw the background
geom_polygon(data=bckg,aes(x=x,y=y,group=g,alpha=g), fill = "gray50")+
geom_point(data=dfw, aes(FRr, Nupr, group=myc, fill=myc),size=4,shape = 21) +
geom_text(data=dfw, aes(FRr, Nupr, group=myc, label = SITE_Sps), nudge_y = -0.02) +
geom_hline(data=dfw,yintercept=0) + geom_vline(data=dfw,xintercept = 0) +
geom_abline(data=dfw,intercept = 0, slope = 1, linetype = "longdash")+
#We need to define a scale in ourder to deal with out of boundary points on the background
scale_x_continuous(limits = c(-0.2,0.4), oob=function(x, rg) x)+
scale_y_continuous(limits = c(-0.2,0.4), oob=function(x, rg) x)+
scale_alpha_continuous(guide="none", range=c(1.0,0))+
theme(panel.background = element_blank())
Here is the plot:
I am making a dotplot using ggplot with the code and data that is below which produces the following the graph.
ggplot(data=holder, aes(x=Coef, y=CoefShort, colour=factor(Name))) + geom_point() + labs(x="Value", y="Coefficient") + scale_colour_discrete("Model")
Their is a significant amount of overplotting and I would like to create some vertical seperation between dots. But it has to be systematic, i.e. the order is always red-green-blue.
geom_stack only seems to work when the x value is the same, not when the y value is the same. I tried using the opposite axes (swapping the x and y), doing a position_dodge() then doing a coord_flip(). This only works with bars, as seen below.
ggplot(data=holder, aes(y=Coef, x=CoefShort, fill=factor(Name))) + geom_bar(position="dodge", aes(ymax=Coef)) + labs(y="Value", x="Coefficient") + scale_fill_discrete("Model") + coord_flip()
Any ideas how to achieve that stacking with geom_point()? Thanks.
The data:
structure(list(Coef = c(-3875.46969970703, 7871.08213392282,
1120.33185255098, 1510.13540851347, 1439.07714113149, 1800.92398445336,
-3760.05411752962, 8183.74295221482, 1126.98290537184, 1517.99524139857,
1442.73063836897, 1808.03721179571, -90.6507661872817, -71.7225864185226,
-103.615416254984, -732.167583256825, -1075.67574987664, -1908.56266462926,
-7362.80215630299, 8886.12888250011, 655.767448263926, 848.716877683527,
869.395903077767, 998.254438325812, -211.682481369473, -303.310032581644,
-506.1995360406, -978.697664841985, -1440.30190190734, -2325.22236024601,
2625.94998656519, 3573.68798735398, 4217.82910198788, 4534.8789695778,
4967.19941000705, 5072.02764498623, 5419.64684461491), Name = c(1,
1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3), CoefShort = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L, 19L), .Label = c("(Intercept)",
"carat", "Good", "Very Good", "Premium", "Ideal", "E", "F", "G",
"H", "I", "J", "SI2", "SI1", "VS2", "VS1", "VVS2", "VVS1", "IF"
), class = "factor")), .Names = c("Coef", "Name", "CoefShort"
), class = "data.frame", row.names = c(NA, -37L))
points in geom_point is zero width, so you need to set the width of dodging:
ggplot(data=holder, aes(y=Coef, x=CoefShort, colour=factor(Name))) +
geom_point(position=position_dodge(width = 0.8)) +
labs(y="Value", x="Coefficient", colour = "Model") +
coord_flip()