Geom Bar Plot, sum of count not visible

Geom Bar Plot, sum of count not visible - r

I have a dataframe tag, with 51X5 structure
structure(list(Tagging = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("CIRCLE CAMPIAGN",
"NATIONAL CAMPIAGN"), class = "factor"), Status = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("Negative", "Positive"), class = "factor"),
Month = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L), .Label = c("JUL",
"JUN", "MAY"), class = "factor"), Category = structure(c(1L,
4L, 6L, 1L, 2L, 4L, 6L, 1L, 2L, 4L, 5L, 6L, 1L, 2L, 4L, 5L,
6L, 1L, 2L, 4L, 5L, 6L, 1L, 2L, 4L, 6L, 1L, 4L, 6L, 2L, 3L,
4L, 6L, 1L, 2L, 3L, 4L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L,
3L, 4L, 5L, 6L, 6L), .Label = c("Data", "Other", "Roaming",
"Unlimited", "VAS", "Voice"), class = "factor"), count = c(3L,
2L, 1L, 4L, 5L, 2L, 1L, 2L, 6L, 7L, 2L, 3L, 4L, 9L, 6L, 2L,
3L, 3L, 3L, 10L, 2L, 5L, 5L, 5L, 4L, 3L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 4L, 1L, 1L, 3L, 3L, 2L,
1L, 1L, 1L, 3L, 4L, 2L)), class = "data.frame", row.names = c(NA,
-51L))
I want to create a bar plot (ggplot) to show bar graph with label on bar as sum of count of category month wise I am using below code
ggplot(data = tag, aes(x = Tagging, y = count, fill = Status)) +
geom_col() +
labs(x = "Tagging", y = "Count", title = "FlyTxt ROI", subtitle = "Statistics") +
geom_text(aes(label = count), color = "white", size = 3, position = position_stack(vjust = 0.5)) +
theme_minimal()+facet_wrap(~Month)
But I am getting split count values:
Help as I want only sum of count for each status

The problem is, that the information you show in the bar is accumulated by geom_col over all categories but the geom_text doesn't do that.
On option is to pre-summarize the data (to get rid of the category split) and then plot the graph.
library(tidyverse)
tag_sum <- tag %>%
group_by(Tagging, Status, Month) %>%
summarise(count_sm = sum(count))
ggplot(data = tag_sum, aes(x = Tagging, y = count_sm, fill = Status)) +
geom_col() +
geom_text(aes(label = count_sm), color = "white", size = 3,
position = position_stack(vjust = 0.5)) +
facet_wrap(~Month) +
labs(x = "Tagging", y = "Count", title = "FlyTxt ROI", subtitle = "Statistics") +
theme_minimal()

Related

R - boxplot log scale problem with 0 values

I have a dataset that consists of 0 values. I want to use log scale but because of the 0 values, it is returning an error. I tried to replace 0s with 1s and it returned something that did not seem right.
As you can see in the figure, I have very small values for the 16k case but to show it clearly, I want to use log scale. Also, I want the order to be 8k_B, 8k_S, 16k_B, 16k_S. I tried factor and levels but still it didn't change the order.
Can someone please help? I can post the data if necessary. Thank you.
Here is the code I used.
data_freq <- data.frame(name=c( rep("8K_B",24), rep("8K_S",24), rep("16_B",24), rep("16K_S",24)),sines=c(rep("B",24),rep("S",24),rep("B",24),rep("S",24)),
value_freq=c( r1B$Frequency, r1S$Frequency, r2B$Frequency, r2S$Frequency)
)
p <- ggplot(data_freq, aes(x=name, y=value_freq, fill=name)) +
geom_boxplot()
Here is the data:
data_freq <- structure(list(name = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("16K_S", "16_B",
"8K_B", "8K_S"), class = "factor"), sines = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("B",
"S"), class = "factor"), value_freq = c(6.269822e-05, 5.494403e-05,
5.84888e-05, 5.727028e-05, 7.300023e-05, 6.502448e-05, 6.568913e-05,
5.771338e-05, 5.638409e-05, 5.693796e-05, 5.527635e-05, 6.103661e-05,
5.660564e-05, 6.269822e-05, 5.594099e-05, 6.978778e-05, 5.571945e-05,
6.258745e-05, 6.779384e-05, 6.668609e-05, 6.048274e-05, 5.826725e-05,
5.671641e-05, 6.070429e-05, 9.433902e-05, 8.037108e-05, 8.203393e-05,
8.591391e-05, 9.633444e-05, 9.123503e-05, 8.946133e-05, 8.447278e-05,
7.638024e-05, 8.103622e-05, 8.15905e-05, 8.480535e-05, 7.527167e-05,
8.779847e-05, 8.192307e-05, 9.7443e-05, 7.649109e-05, 8.425106e-05,
9.134589e-05, 9.555844e-05, 8.724419e-05, 7.881908e-05, 7.771052e-05,
8.358592e-05, 1.1077e-07, 1.1077e-07, 0, 0, 1.1077e-07, 0, 0,
1.1077e-07, 1.1077e-07, 0, 0, 0, 0, 0, 3.3232e-07, 0, 2.2155e-07,
4.431e-07, 1.1077e-07, 1.1077e-07, 1.1077e-07, 0, 2.2155e-07,
0, 5.5428e-07, 5.5428e-07, 6.6514e-07, 6.6514e-07, 7.64911e-06,
6.6514e-07, 6.6514e-07, 1.1086e-07, 5.5428e-07, 6.6514e-07, 6.6514e-07,
6.6514e-07, 3.3257e-07, 6.6514e-07, 0, 6.6514e-07, 3.87998e-06,
6.6514e-06, 1.1086e-07, 1.1086e-07, 1.1086e-07, 3.3257e-07, 3.3257e-07,
1.10857e-06)), class = "data.frame", row.names = c(NA, -96L))

You could try to do log(x+n) transformation instead.
p <- data_freq %>%
mutate(value_freq = log(value_freq + 0.000001)) %>% # your numbers are really small so I am adding a small number
ggplot(aes(x=name, y=value_freq, fill=name)) +
geom_boxplot()
Alternatively, you can try square root transformation.
p <- data_freq %>%
mutate(value_freq = value_freq^(1/2)) %>%
ggplot(aes(x=name, y=value_freq, fill=name)) +
geom_boxplot()
Or do the transformation using ggplot:
p <- data_freq %>%
ggplot(aes(x=name, y=value_freq, fill=name)) +
geom_boxplot() +
scale_y_log10()

Running ggplot inside foreach is slower than running it with just a for loop

I am making a map of a dataset with 20 million data points in ggplot. A single map (with facetting) takes 10-15 mins, so I checked if using multiple cores in parallel mode would work better. Using foreach, maps took more time to make. Some foreach runs were running for 30min-1 hour without producing any results. I know that the answer to this question (Why is the parallel package slower than just using apply?) shows that in parallel computation, sometimes it takes more time to combine the result from the separate parallel processes than running the task itself. But I saw some run examples when I was searching that minute-runs can be improved. Do you think it is possible?
Due to the sheer amount of data I have, I sampled my dataset:
structure(list(lat = c(46.791667, 52.958333, 57.375, 62.625,
74.041667, 60.208333, 30.208333, 56.791667, 57.375, 40.958333,
56.541667, 38.958333, 35.291667, 43.625, 71.375, 66.875, 74.375,
66.458333, 47.791667, 48.041667, 41.541667, 40.875, 57.208333,
64.375, 42.625, 43.958333, 69.958333, 72.375, 36.875, 66.958333,
39.791667, 36.625, 52.625, 65.708333, 42.208333, 53.708333, 35.458333,
58.625, 34.875, 57.291667, 59.708333, 61.708333, 72.041667, 59.958333,
32.208333, 43.625, 39.541667, 62.625, 41.208333, 32.291667, 48.958333,
47.291667, 60.375, 49.458333, 37.208333, 65.708333, 57.958333,
31.041667, 63.875, 43.625, 54.541667, 55.541667, 45.458333, 72.375,
54.708333, 37.958333, 32.375, 60.125, 59.041667, 37.875, 42.958333,
44.375, 59.791667, 49.208333, 34.375, 53.208333, 59.458333, 53.375,
45.458333, 72.125, 66.208333, 60.958333, 47.625, 60.291667, 41.125,
67.541667, 54.625, 55.541667, 37.541667, 44.291667, 44.458333,
40.041667, 49.458333, 39.625, 73.375, 41.458333, 71.375, 31.041667,
66.791667, 42.541667), lon = c(-6.125, -19.541667, -29.291667,
-9.2083333, -11.541667, -6.625, -25.708333, -14.458333, -48.291667,
-63.541667, -41.291667, -12.541667, -48.291667, -58.708333, 6.625,
-2.7083333, -69.375, -19.291667, -27.208333, -36.625, -17.791667,
-50.541667, -38.708333, 9.375, -56.208333, -44.958333, -59.041667,
8.875, -21.125, -24.791667, -40.375, -26.208333, -31.875, -11.875,
-60.958333, -39.125, -32.458333, -54.791667, -44.541667, -37.958333,
-48.625, -10.541667, 3.5416667, -17.791667, -16.041667, -9.9583333,
-32.708333, 0.875, -18.625, -54.208333, -63.125, -56.458333,
-55.125, -22.708333, -40.958333, -56.208333, -33.625, -69.125,
-58.125, -17.541667, -23.541667, -17.041667, -18.458333, -64.791667,
-25.208333, -35.875, -44.791667, -11.291667, -58.291667, -46.208333,
-41.208333, -5.0416667, -38.208333, -13.875, -55.291667, -17.291667,
-48.625, -38.791667, -59.375, -19.291667, 5.5416667, -19.625,
-41.375, -66.291667, -17.625, -14.208333, -39.291667, -48.875,
-16.541667, -21.375, -46.375, 2.625, -60.291667, -40.375, 2.4583333,
-16.458333, 4.875, -66.291667, -4.2916667, -36.458333), entity = structure(c(2L,
2L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 2L,
2L, 2L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L,
1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L,
2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 1L,
2L, 1L, 1L), .Label = c("Cc", "Li"), class = "factor"), watcon = structure(c(1L,
2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L,
2L, 2L, 2L, 2L, 1L, 1L, 2L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 1L,
1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
1L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L,
2L, 2L, 2L), .Label = c("calm", "stormy"), class = "factor"),
step = structure(c(1L, 3L, 2L, 4L, 4L, 2L, 3L, 3L, 3L, 4L,
2L, 2L, 3L, 2L, 1L, 3L, 3L, 3L, 4L, 4L, 4L, 1L, 1L, 1L, 2L,
4L, 1L, 2L, 2L, 4L, 2L, 4L, 4L, 3L, 4L, 2L, 2L, 1L, 2L, 2L,
1L, 4L, 3L, 2L, 3L, 3L, 2L, 4L, 1L, 3L, 2L, 2L, 3L, 4L, 4L,
2L, 2L, 3L, 4L, 1L, 3L, 2L, 1L, 4L, 2L, 3L, 1L, 3L, 1L, 2L,
1L, 4L, 4L, 3L, 1L, 1L, 1L, 3L, 1L, 2L, 2L, 4L, 3L, 4L, 4L,
4L, 1L, 2L, 1L, 4L, 3L, 4L, 2L, 4L, 1L, 4L, 1L, 2L, 2L, 4L
), .Label = c("abundance", "enccomb", "adscomb", "infcomb"
), class = "factor")), row.names = c(NA, -100L), class = "data.frame")
The code I am using is:
library(oceanmap)
library(sf)
library(ggmap)
library(rnaturalearth)
library(rnaturalearthdata)
library(rgeos)
library(tidyverse)
world <- ne_countries(scale = "medium", returnclass = "sf")
library(doParallel)
library(foreach)
cl <- makeCluster(5)
doParallel::registerDoParallel(cl)
entities <- unique(sample$entity)
foreach(i=1:length(entities), .packages = c("tidyverse", "dplyr")) %dopar% {
ggplot (data=world) + geom_sf(color="white", fill="white") + coord_sf(xlim = c(-34,-30), ylim = c(53,62) , expand = FALSE) + geom_point(subset(sample, sample$entity==entities[i]), mapping=aes(x = lon, y = lat, color = log10(value))) + facet_grid(watcon~step2) +
ggtitle(entities[i])
ggsave(filename = paste0(i,".png"))
}
stopCluster(cl)
P.S. I am using ggplot because of the opportunities for editing the map itself. Lattice is faster but lacks the customization that I am looking for.
Any help on how to improve my foreach code or if it's even possible is greatly appreciated!

Plot linear regression analysis with error bar for variability

I wanted to make plots that look like figure 1 (source: link)
In figure 1, they have plotted the regression analysis with one-year yield variability. In my case, I would like to plot variability between two locations and 4 blocks for each treatment group. So the plot I wanted would have three facets for factors B.glucosidase, Protein, POX.C of variable and four colors for treatments factors. Also, in my current plot I have legend for block and treatment. I should only have treatment because the block should be used for making error bar for variability.
I tried with this code, which obviously doesn't work for what I want. (Data for df.melted included below.)
ggplot(df.melted, aes(x = value, y = yield, color = as.factor(treatment))) +
geom_point(aes(shape= as.factor(block))) +
stat_smooth(method = "lm", formula = y ~ x, col = "darkslategrey", se=F) +
stat_poly_eq(formula = y~x,
# aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")),
aes(label = ..rr.label..),
parse = TRUE) +
theme_classic() +
geom_errorbar(aes(ymax = df.melted$yield+sd(df.melted$yield), ymin = df.melted$yield-sd(df.melted$yield)), width = 0.05)+
facet_wrap(~variable)
Data:
df.melted <- structure(list(Location = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("M", "U"), class = "factor"),
treatment = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L), .Label = c("CC",
"CCS", "CS", "SCS"), class = "factor"), block = c(1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L,
1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L,
1L, 2L, 3L, 4L), yield = c(5156L, 5157L, 5551L, 5156L, 4804L,
4720L, 4757L, 5021L, 4826L, 4807L, 4475L, 4596L, 4669L, 4588L,
4542L, 4592L, 5583L, 5442L, 5693L, 5739L, 5045L, 4902L, 5006L,
5086L, 4639L, 4781L, 4934L, 4857L, 4537L, 4890L, 4842L, 4608L,
5156L, 5157L, 5551L, 5156L, 4804L, 4720L, 4757L, 5021L, 4826L,
4807L, 4475L, 4596L, 4669L, 4588L, 4542L, 4592L, 5583L, 5442L,
5693L, 5739L, 5045L, 4902L, 5006L, 5086L, 4639L, 4781L, 4934L,
4857L, 4537L, 4890L, 4842L, 4608L, 5156L, 5157L, 5551L, 5156L,
4804L, 4720L, 4757L, 5021L, 4826L, 4807L, 4475L, 4596L, 4669L,
4588L, 4542L, 4592L, 5583L, 5442L, 5693L, 5739L, 5045L, 4902L,
5006L, 5086L, 4639L, 4781L, 4934L, 4857L, 4537L, 4890L, 4842L,
4608L), variable = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("B.glucosidase",
"Protein", "POX.C"), class = "factor"), value = c(1.600946,
1.474084, 1.433078, 1.532492, 1.198667, 1.193193, 1.214941,
1.360981, 1.853056, 1.690117, 1.544357, 1.825132, 1.695409,
1.764123, 1.903743, 1.538684, 0.845077, 1.011463, 0.857032,
0.989803, 0.859022, 0.919467, 1.01717, 0.861689, 0.972332,
0.952922, 0.804431, 0.742634, 1.195837, 1.267285, 1.08571,
1.20097, 6212.631579, 5641.403509, 4392.280702, 7120.701754,
5305.964912, 4936.842105, 5383.157895, 6077.894737, 5769.122807,
5016.842105, 5060.350877, 5967.017544, 5576.842105, 5174.035088,
5655.438596, 5468.77193, 7933.333333, 7000, 6352.982456,
8153.684211, 6077.894737, 4939.649123, 5002.807018, 6489.122807,
4694.035088, 5901.052632, 4303.859649, 6768.421053, 6159.298246,
6090.526316, 4939.649123, 5262.45614, 810.3024, 835.5242,
856.206, 759.8589, 726.2298, 792.6472, 724.7165, 699.3266,
500.9153, 634.8698, 637.9536, 648.8814, 641.0357, 623.3822,
555.2834, 520.8119, 683.3528, 595.9173, 635.4315, 672.4234,
847.2944, 745.5665, 778.3548, 735.8141, 395.2647, 570.4148,
458.0383, 535.3851, 678.0293, 670.7419, 335.2923, 562.5674
)), row.names = c(NA, -96L), class = "data.frame")

library(dplyr)
library(ggplot2)
library(ggpmisc)
Summarize data frame (this could also be done with stat_summary(), but it's often clearer/more transparent to do it explicitly up front). (I think that because your data set is balanced you could collapse/average over the block structure first, and then do your whole plot with the reduced data set - it shouldn't change the outcome of the linear regressions at all, at least not the mean values ... and any statistical comparisons should probably done on block-level summaries anyway ...)
df.sum <- (df.melted
%>% group_by(Location,treatment,variable)
%>% summarise(value=mean(value),yield_sd=sd(yield),
## collapse yield to mean *after* computing sd!
yield=mean(yield))
)
Plot:
(ggplot(df.melted,
aes(x = value, y = yield, color = treatment))
+ stat_smooth(method = "lm", col = "darkslategrey", se=FALSE)
+ stat_poly_eq(
formula = y ~ x,
## aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")),
aes(group=1, label = ..rr.label..),
parse = TRUE)
+ theme_classic()
+ scale_shape(guide=FALSE)
+ geom_point(data=df.sum)
+ geom_errorbar(data=df.sum,
aes(ymax = yield+yield_sd, ymin = yield-yield_sd),
width = 0.05)
+ facet_wrap(~variable,scale="free_x")
)
(adding group=1 to the stat_poly_eq() aesthetics means we only plot a single R^2 value per facet)
Since you're no longer using the shape aesthetic for anything, you could consider using it to show the Location variable ...

How make labels of plot clear

I did a plot explaining occurrences of each modality for many variables.
It is about clustering problem to show which variables are explaining each cluster.
So
> dput(DATA1[1:20,])
structure(list(TYPE_PEAU = structure(c(1L, 2L, 1L, 3L, 1L, 2L,
1L, 2L, 2L, 2L, 3L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L), .Label = c("Sèche",
"Mixte", "Normale", "Grasse"), class = "factor"), SENSIBILITE = structure(c(2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 1L, 2L, 2L, 3L, 3L, 3L,
1L, 3L, 1L), .Label = c("Aucune", "Fréquente", "Occasionnelle"
), class = "factor"), IMPERFECTIONS = structure(c(2L, 2L, 3L,
2L, 3L, 1L, 2L, 2L, 1L, 2L, 2L, 3L, 2L, 1L, 2L, 2L, 2L, 3L, 2L,
3L), .Label = c("Fréquente", "Occasionnelle", "Rares"), class = "factor"),
BRILLANCE = structure(c(3L, 3L, 1L, 1L, 1L, 2L, 1L, 3L, 3L,
3L, 3L, 1L, 3L, 3L, 3L, 3L, 1L, 1L, 3L, 3L), .Label = c("Aucune",
"Partout", "Zone T"), class = "factor"), GRAIN_PEAU = structure(c(1L,
2L, 1L, 2L, 2L, 3L, 3L, 3L, 3L, 2L, 1L, 2L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 1L), .Label = c("Fin", "Moyen", "Dilaté"), class = "factor"),
RIDES_VISAGE = structure(c(3L, 3L, 3L, 3L, 3L, 1L, 1L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 3L, 3L, 3L), .Label = c("Aucune",
"Très visibles", "Visibles"), class = "factor"), ALLERGIES = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("Non", "Oui"), class = "factor"),
MAINS = structure(c(1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 3L, 2L,
2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L), .Label = c("Sèches",
"Normales", "Moites"), class = "factor"), PEAU_CORPS = structure(c(3L,
2L, 2L, 2L, 2L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 2L, 3L, 2L, 3L,
2L, 3L, 3L, 1L), .Label = c("Normale", "Sèche", "Très sèche"
), class = "factor"), INTERET_ALIM_NATURELLE = structure(c(3L,
1L, 1L, 1L, 1L, 2L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("Beaucoup", "Pas du tout", "Un peu"
), class = "factor"), INTERET_ORIGINE_GEO = structure(c(3L,
1L, 1L, 1L, 3L, 2L, 3L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 1L, 1L,
3L, 1L, 1L, 1L), .Label = c("Beaucoup", "Pas du tout", "Un peu"
), class = "factor"), INTERET_VACANCES = structure(c(1L,
1L, 2L, 2L, 3L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 2L, 2L), .Label = c("À la mer", "À la montagne",
"En ville"), class = "factor"), INTERET_ENVIRONNEMENT = structure(c(3L,
1L, 1L, 1L, 3L, 2L, 3L, 3L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("Beaucoup", "Pas du tout", "Un peu"
), class = "factor"), INTERET_COMPOSITION = structure(c(3L,
1L, 1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("Beaucoup", "Pas du tout", "Un peu"
), class = "factor"), PRIORITE_1 = structure(c(1L, 1L, 1L,
1L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 1L, 1L, 1L, 1L, 3L, 2L, 2L,
3L, 1L), .Label = c("éclatante", "hydratée", "lisse", "matifiée",
"nourrie", "purifiée", "reposée"), class = "factor"), MILIEU_RESIDENCE = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
2L, 2L, 1L, 2L), .Label = c("nature", "urbain"), class = "factor")), .Names = c("TYPE_PEAU",
"SENSIBILITE", "IMPERFECTIONS", "BRILLANCE", "GRAIN_PEAU", "RIDES_VISAGE",
"ALLERGIES", "MAINS", "PEAU_CORPS", "INTERET_ALIM_NATURELLE",
"INTERET_ORIGINE_GEO", "INTERET_VACANCES", "INTERET_ENVIRONNEMENT",
"INTERET_COMPOSITION", "PRIORITE_1", "MILIEU_RESIDENCE"), row.names = c(1L,
2L, 11L, 13L, 15L, 16L, 17L, 20L, 23L, 32L, 33L, 34L, 37L, 38L,
39L, 40L, 42L, 43L, 45L, 48L), class = "data.frame")
Then I use this code:
library(tidyverse)
DATA1 %>%
gather(k, v) %>%
ggplot(aes(v)) +
geom_bar(fill = "orange", width = 0.7) +
coord_flip() +
facet_wrap(~k)
Then I get as result this plot:
But as you can see lebels in the vertical axis are not clear !!
please how can I resolve this issue??

You can try to resize the text:
DATA1 %>%
gather(k, v) %>%
ggplot(aes(v)) +
geom_bar(fill = "orange", width = 0.7) +
theme(axis.text.y = element_text(face="bold", color="black", size=4)) +
coord_flip() +
facet_wrap(~k)
And/or abbreviate the labels with:
+ scale_x_discrete(labels = abbreviate)

Bar chart for factorial designs in R

I'm currently trying to create a clustered bar chart using ggplot2. It's basically just mean response times for a 2x2x2 factorial design. The three factors are load, compatibility and salience. I'm having a hard time jamming the third factor (salience) in there though. It shouldn't be a stacked graph though
This is what I currently have
bar+stat_summary(fun.y = mean, geom = "bar", position = "dodge") +
+ stat_summary(fun.data = mean_cl_normal, geom = "errorbar", position = position_dodge(width = 0.90), width = 0.2)+
+ labs(x = "Compatibility", y = "Mean RT", fill = "Load")
Here's a small sample of the data I'm trying to graph:
ID load comp sal rt
1 1 High Incompatible Non_Salient 787
2 1 Low Compatible Salient 754
3 2 High Incompatible Salient 654
I've seen graphs like these numerous times before but I have no idea how to get ggplot2 to display three independent variables at the same time.
I've tried splitting the graphs by adding
+ facet_wrap( ~ sal)
but that doesn't work either. It just says "Invalid argument to unary operator"
Any help would be appreciated.

Is this the kind of plot you are looking for?
I used the Wii data from the book "Discovering Statistics Using R", which is in a similar format to yours.
structure(list(athlete = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("Athlete", "Non-Athlete"), class = "factor"),
stretch = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No Stretching", "Stretching"
), class = "factor"), wii = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Playing Wii",
"Watching Wii"), class = "factor"), injury = c(2L, 2L, 1L,
2L, 0L, 1L, 2L, 0L, 2L, 2L, 2L, 1L, 4L, 2L, 2L, 0L, 0L, 3L,
3L, 3L, 2L, 1L, 0L, 2L, 2L, 3L, 2L, 2L, 3L, 1L, 2L, 4L, 1L,
2L, 2L, 2L, 1L, 4L, 4L, 1L, 2L, 3L, 3L, 3L, 3L, 2L, 3L, 2L,
2L, 2L, 1L, 0L, 3L, 3L, 2L, 1L, 2L, 4L, 1L, 2L, 5L, 5L, 3L,
6L, 4L, 3L, 4L, 5L, 5L, 2L, 6L, 4L, 4L, 4L, 3L, 4L, 3L, 2L,
1L, 4L, 3L, 2L, 2L, 1L, 3L, 1L, 1L, 3L, 4L, 2L, 7L, 8L, 6L,
9L, 4L, 7L, 5L, 9L, 6L, 4L, 8L, 5L, 4L, 7L, 10L, 1L, 3L,
2L, 1L, 3L, 3L, 2L, 3L, 4L, 2L, 0L, 1L, 3L, 2L, 0L)),
.Names = c("athlete", "stretch", "wii", "injury"),
class = "data.frame", row.names = c(NA, -120L))
Here is how to produce the plot.
library(ggplot2)
library(Hmisc)
ggplot(data=Wii, aes(x=stretch, y=injury, fill=wii)) +
facet_wrap(~athlete) +
stat_summary(fun.y = mean, geom = "bar", position = "dodge") +
stat_summary(fun.data = mean_cl_normal, geom = "errorbar", position = position_dodge(width = 0.90), width = 0.2)

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

Geom Bar Plot, sum of count not visible - r

Related

R - boxplot log scale problem with 0 values

Running ggplot inside foreach is slower than running it with just a for loop

Plot linear regression analysis with error bar for variability

How make labels of plot clear

Bar chart for factorial designs in R

Categories

Resources