Related
This question already has answers here:
Reorder bars in geom_bar ggplot2 by value
(3 answers)
Change bar plot colour in geom_bar with ggplot2 in r
(2 answers)
Closed last year.
How can I easily ad one color in each bar and make it descending?
QG4 %>%
filter(value=="Yes") %>%
ggplot(aes(y=Freq, x=variable))+
geom_bar(position = "dodge", stat = "identity")+
theme_bw()+
coord_flip()+
labs(x="Mode", y=NULL, title = "What is your usual (or most frequently used) mode of travel to work/place of study?")
I used dput(QG4) to avoid using a picture of the dataset:
structure(list(variable = structure(c(1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L), .Label = c("Bicycle",
"Bicycle (Yélo)", "Bus", "Car", "Car (Yélo)", "Carpool", "Motorcycle/scooter",
"On foot", "Scooter (trottinette)", "Train"), class = "factor"),
value = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("No",
"Yes"), class = "factor"), Freq = c(1634L, 2143L, 1781L,
1532L, 2281L, 2202L, 2267L, 1331L, 2265L, 2172L, 655L, 146L,
508L, 757L, 8L, 87L, 22L, 958L, 24L, 117L)), class = "data.frame", row.names = c(NA,
-20L))
enter image description here
I am having trouble getting my plots to work, I have multiple categorical variables by which I want to color by one, and facet by another. However, R keeps adding the "values" (I used melt) for the same variables together instead. It works when I only have one variable.
Here is my plot with one variable
Here is my plot with two variables, you can see the adding that is happening
simple dataframe
Here is my code:
library(reshape2)
library(ggplot2)
test2 <- structure(list(SampleID = c(12.19, 12.22, 13.1, 12.19, 12.22,
13.1, 12.19, 12.22, 13.1, 12.19, 12.22, 13.1), patient = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), type = structure(c(1L,
1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 2L), .Label = c("L",
"T"), class = "factor"), timepoint = structure(c(1L, 2L, 2L,
1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 2L), .Label = c("1", "2"), class = "factor"),
Group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), .Label = "D", class = "factor"), variable = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L), .Label = c("A",
"B", "C", "D", "E", "F", "G", "H", "I"), class = "factor"),
value = c(2L, 5L, 6L, 25L, 18L, 12L, 6L, 10L, 15L, 21L, 23L,
33L)), .Names = c("SampleID", "patient", "type", "timepoint",
"Group", "variable", "value"), row.names = c(NA, 12L), class = "data.frame")
ggplot(test2, aes(test2$variable, test2$value, fill=test2$timepoint)) +
geom_bar(stat="identity", position = "dodge") +
scale_fill_manual(values=c("rosybrown1", "steelblue2", "gray")) +
labs(x="Category", y="Count", title = paste0("Sample ", as.character(unique(test2$patient)) , " - " , as.character(unique(test2$Group)))) +
facet_wrap(~test2$type) +
theme(text = element_text(size=15),
axis.text.x = element_text(angle = 90, hjust = 1, vjust=.5, size = 7))
If I am understanding right, it looks like you just need to give the scales option to facet_wrap like so:
facet_wrap(~type, scales = "free_x")
I can't work out why my data points in the ternary diagram appear distorted, particularly visible in Fe02 scale where none of the values approaching 50% seem to be plotting correctly. Does ggtern require some data transformation or am I missing something?
The dataset:
KiDaSm<-structure(list(Site = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Dakawa", "Fukuchani",
"Kilwa", "Mkokotoni", "Tumbe Chwaka", "Unguja Ukuu"), class = "factor"),
Sample = structure(c(7L, 8L, 9L, 10L, 11L, 14L, 15L, 16L,
17L, 19L, 20L, 21L, 23L, 24L, 25L, 26L), .Label = c("EB005",
"EB008", "EB009", "EB017", "EB018", "EB023", "EB028", "EB030",
"EB033", "EB034", "EB035", "EB036", "EB037", "EB038", "EB040",
"EBDAK002", "EBDAK006", "EBDAK007", "EBDAK009", "EBDAK012",
"EBDAK014", "EBDAK015", "EBDAK017", "EBDAK020", "EBDAK021",
"EBDAK022", "FKCH002", "FKCH003", "FKCH005", "FKCH006", "FKCH008",
"FKCH009", "FKCH010", "FKCH012", "FKCH014", "FKCH015", "FKCH016",
"FKCH017", "FKCH018", "FKCH019", "FKCH023", "MKK002", "MKK003",
"MKK007", "MKK009", "MKK011", "MKK013", "MKK014", "MKK017",
"MKK018", "MKK020", "MKK06", "TBCH001", "TBCH002", "TBCH003",
"TBCH005", "TBCH007", "TBCH008", "TBCH009", "TBCH010", "TBCH011",
"TBCH014", "TBCH017", "TBCH018", "TBCH021", "TBCH022", "UU001",
"UU003", "UU004", "UU005", "UU007", "UU008", "UU010", "UU011",
"UU012", "UU014", "UU018", "UU020", "UU022", "UU023", "UU026",
"UU031", "UU033"), class = "factor"), ID = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("ND", "Smelting", "Smithing"), class = "factor"),
Iron = c(52.2866002788889, 57.437955161, 55.880450631, 50.213473286,
53.068958017, 55.776340727, 56.764639409, 61.37738424, 75.741474131,
75.459980082, 69.785922113, 76.298245515, 75.860464737, 77.221978734,
76.602317775, 67.582636787), Aluminium = c(8.07348620588889,
6.9369729006, 6.4314347298, 7.7061493869, 7.3254949831, 7.2108549156,
7.2113019865, 8.2022565362, 4.570137602, 4.3668232665, 5.8538177888,
4.5660791632, 4.2671637947, 4.727287541, 4.7084385736, 6.0287010895
), Silicon = c(24.6786504477778, 22.516695383, 24.261662172,
26.81463386, 25.558654883, 23.062108874, 23.144722305, 26.480492462,
17.138349267, 16.917779397, 19.620246624, 16.265818105, 17.628059944,
15.696017597, 15.786928218, 22.04500569)), .Names = c("Site",
"Sample", "ID", "Iron", "Aluminium", "Silicon"), row.names = c(NA,
-16L), class = "data.frame")
My code:
library(ggtern)
ggtern(KiDaSm, aes(Iron,Silicon, Aluminium, color=Site, shape=Site )) + geom_point() +
labs(x = expression(FeO[2]), y=expression(SiO[2]), z=expression(Al[2]*O[3])) +
scale_color_manual(values = c("#FFC300", "#FF5733")) +
theme_bw()
Ternary diagram:
I would like to summarize my "karyotype" molecular data by location and substrate (see sample data below) as percentages in order to create a stack-bar plot in ggplot2.
I have figured out how to use 'dcast' to get a total for each karyotype, but cannot figure out how to get a percent for each of the three karyotypes (i.e. 'BB', 'BD', 'DD').
The data should be in a format to make a stacked bar plot in 'ggplot2'.
Sample Data:
library(reshape2)
Karotype.Data <- structure(list(Location = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L), .Label = c("Kampinge", "Kaseberga", "Molle", "Steninge"
), class = "factor"), Substrate = structure(c(1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L,
2L, 2L, 2L, 2L, 2L), .Label = c("Kampinge", "Kaseberga", "Molle",
"Steninge"), class = "factor"), Karyotype = structure(c(1L, 3L,
4L, 4L, 3L, 3L, 4L, 4L, 4L, 3L, 1L, 4L, 3L, 4L, 4L, 3L, 1L, 4L,
3L, 3L, 4L, 3L, 4L, 3L, 3L), .Label = c("", "BB", "BD", "DD"), class = "factor")), .Names = c("Location",
"Substrate", "Karyotype"), row.names = c(135L, 136L, 137L, 138L,
139L, 165L, 166L, 167L, 168L, 169L, 236L, 237L, 238L, 239L, 240L,
326L, 327L, 328L, 329L, 330L, 426L, 427L, 428L, 429L, 430L), class = "data.frame")
## Summary count for each karoytype ##
Karyotype.Summary <- dcast(Karotype.Data , Location + Substrate ~ Karyotype, value.var="Karyotype", length)
You can use the dplyr package:
library(dplyr)
z.counts <- Karotype.Data %>%
group_by(Location,Substrate,Karyotype) %>%
summarize(freq=n())
z.freq <- z.counts %>%
group_by(Location,Substrate) %>%
mutate(freq=freq/sum(freq)*100)
Here, the data remain in the long format, so it is straightforward to build the barplot with ggplot:
library(ggplot2)
ggplot(z.freq) +
aes(x=Karyotype,y=freq) +
facet_grid(Location~Substrate) +
geom_bar(stat='identity')
With some help from 'Marat Talipov' and many other answers to questions on Stackoverflow I found out that it is important to load 'plyr' before 'dplyr' and to use 'summarise' rather than 'summarize'. Then removing the missing data was the last step using 'filter'.
library(dplyr)
z.counts <- Karotype.Data %>%
group_by(Location,Substrate,Karyotype) %>%
summarise(freq=n())
z.freq <- z.counts %>% filter(Karyotype != '') %>%
group_by(Location,Substrate) %>%
mutate(freq=freq/sum(freq))
z.freq
library (ggplot2)
ggplot(z.freq, aes(x=Substrate, y=freq, fill=Karyotype)) +
geom_bar(stat="identity") +
facet_wrap(~ Location)
Now I have created the plot I was looking for:
I have created a PCA for measurements collected on individual from four locations placed on four substrates with three replicates. I have the sex (male or female)and "karyotype" (factor with three possible categories) and the calculated the first two PC scores for each individual.
I would like to make a plot where male and female have different symbols and the colour of the symbols is dependent on the karotype. I have created a plot with the code below that gives me one symbol colour coded for the three karyotypes and put 95% confidence elispses around the males and females.
How can I change the symbol for each sex and keeping the colouring dependent on the karytype? I would also like to have this reflected in the legend.
One last question. Is it possible to add an arrow for each PC (not each individual) from the origin similar to those found in ordination plots?
Sample Data:
test <- structure(list(Location = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L), .Label = c("Kampinge", "Kaseberga", "Molle", "Steninge"
), class = "factor"), Substrate = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L), .Label = c("Kampinge", "Kaseberga", "Molle",
"Steninge"), class = "factor"), Replicate = structure(c(1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 1L, 1L), .Label = c("1", "2", "3"), class = "factor"),
Sex = structure(c(2L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L
), .Label = c("Female", "Male"), class = "factor"), Karyotype = structure(c(3L,
4L, 3L, 3L, 4L, 3L, 4L, 3L, 4L, 3L, 3L, 3L, 3L, 3L, 2L, 4L,
3L, 3L, 4L, 4L, 3L, 4L, 3L, 4L, 3L), .Label = c("", "BB",
"BD", "DD"), class = "factor"), Wing_Length = c(1439L, 1224L,
1558L, 1508L, 1286L, 1560L, 1377L, 1486L, 1638L, 1475L, 1703L,
1726L, 1668L, 1405L, 1737L, 1419L, 1530L, 1508L, 1525L, 1326L,
1609L, 1357L, 1830L, 1476L, 1661L), Leg_Length = c(465L,
357L, 610L, 415L, 343L, 560L, 435L, 390L, 425L, 514L, 693L,
695L, 657L, 454L, 661L, 382L, 431L, 531L, 435L, 387L, 407L,
414L, 752L, 524L, 650L), Development_Time = c(15, 15, 12,
12, 12, 12, 12, 12, 12, 15, 15, 15, 15, 15, 15, 15, 11, 12,
14, 12, 14, 14, 14, 11, 11), PC1 = c(-281.031806232855, -515.247908786317,
-96.7283446465637, -260.171340782501, -476.664849753781,
-127.267190895631, -347.839240839062, -293.08530374415, -154.026702195308,
-221.98257463847, 67.7504074590983, 86.6778734586525, 17.8073498265326,
-314.171132928964, 73.3068216627556, -349.616320093329, -233.030545551831,
-185.761623361004, -234.30046275676, -417.754317941649, -187.820500930148,
-376.653043663908, 203.025275308178, -214.80078992031, 7.94703091626344
), PC2 = c(-78.3082792875783, -133.370219905995, -113.211488986839,
4.31036861466361, -82.8593541869054, -73.5708675263244, -95.0643731443612,
9.37702847686542, 80.0290301136235, -92.8061497557789, -83.8731164047719,
-70.6537733486393, -78.706783632851, -91.6793310834752, -37.5144466525303,
-27.4637667171696, 6.14809390611532, -84.6794844768708, -0.127837123829732,
-90.9556028004192, 75.2353710655562, -91.7834027435658, -47.669385541585,
-99.8362257341741, -77.8269478596591)), .Names = c("Location",
"Substrate", "Replicate", "Sex", "Karyotype", "Wing_Length",
"Leg_Length", "Development_Time", "PC1", "PC2"), row.names = c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 11L, 12L, 13L, 16L, 17L, 18L,
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 30L, 31L), class = "data.frame")
## Plot
par(mfrow=c(1,1), mar=c(4,4,2,1), pty = "s")
plot(test$PC1, test$PC2, xlab="PC1", ylab="PC2", pch=16, col=as.numeric(test[,"Karyotype"]),
xlim = c(-1000, 1000), ylim = c(-250, 250), las=1, cex.lab = 1.5, cex.axis = 1.25, main = NULL)
ordiellipse(test[,9:10], test$Sex, conf=0.95, col="black", cex=1.75, label=TRUE)
legend("bottomright", pch=16, col=unique(as.numeric(test[,"Karyotype"])), legend=unique(test[,"Karyotype"]), cex = 1.75)
Replace your pch plot argument by something like :
pch=ifelse(test$Sex=='Male',15,19)
Try with ggplot:
library(ggplot2)
ggplot(test, aes(x=PC1, y=PC2, color=Karyotype, shape=Sex, group=Sex))+geom_point(size=5)+stat_ellipse()