Related
I have a dataset that consists of 0 values. I want to use log scale but because of the 0 values, it is returning an error. I tried to replace 0s with 1s and it returned something that did not seem right.
As you can see in the figure, I have very small values for the 16k case but to show it clearly, I want to use log scale. Also, I want the order to be 8k_B, 8k_S, 16k_B, 16k_S. I tried factor and levels but still it didn't change the order.
Can someone please help? I can post the data if necessary. Thank you.
Here is the code I used.
data_freq <- data.frame(name=c( rep("8K_B",24), rep("8K_S",24), rep("16_B",24), rep("16K_S",24)),sines=c(rep("B",24),rep("S",24),rep("B",24),rep("S",24)),
value_freq=c( r1B$Frequency, r1S$Frequency, r2B$Frequency, r2S$Frequency)
)
p <- ggplot(data_freq, aes(x=name, y=value_freq, fill=name)) +
geom_boxplot()
Here is the data:
data_freq <- structure(list(name = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("16K_S", "16_B",
"8K_B", "8K_S"), class = "factor"), sines = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("B",
"S"), class = "factor"), value_freq = c(6.269822e-05, 5.494403e-05,
5.84888e-05, 5.727028e-05, 7.300023e-05, 6.502448e-05, 6.568913e-05,
5.771338e-05, 5.638409e-05, 5.693796e-05, 5.527635e-05, 6.103661e-05,
5.660564e-05, 6.269822e-05, 5.594099e-05, 6.978778e-05, 5.571945e-05,
6.258745e-05, 6.779384e-05, 6.668609e-05, 6.048274e-05, 5.826725e-05,
5.671641e-05, 6.070429e-05, 9.433902e-05, 8.037108e-05, 8.203393e-05,
8.591391e-05, 9.633444e-05, 9.123503e-05, 8.946133e-05, 8.447278e-05,
7.638024e-05, 8.103622e-05, 8.15905e-05, 8.480535e-05, 7.527167e-05,
8.779847e-05, 8.192307e-05, 9.7443e-05, 7.649109e-05, 8.425106e-05,
9.134589e-05, 9.555844e-05, 8.724419e-05, 7.881908e-05, 7.771052e-05,
8.358592e-05, 1.1077e-07, 1.1077e-07, 0, 0, 1.1077e-07, 0, 0,
1.1077e-07, 1.1077e-07, 0, 0, 0, 0, 0, 3.3232e-07, 0, 2.2155e-07,
4.431e-07, 1.1077e-07, 1.1077e-07, 1.1077e-07, 0, 2.2155e-07,
0, 5.5428e-07, 5.5428e-07, 6.6514e-07, 6.6514e-07, 7.64911e-06,
6.6514e-07, 6.6514e-07, 1.1086e-07, 5.5428e-07, 6.6514e-07, 6.6514e-07,
6.6514e-07, 3.3257e-07, 6.6514e-07, 0, 6.6514e-07, 3.87998e-06,
6.6514e-06, 1.1086e-07, 1.1086e-07, 1.1086e-07, 3.3257e-07, 3.3257e-07,
1.10857e-06)), class = "data.frame", row.names = c(NA, -96L))
You could try to do log(x+n) transformation instead.
p <- data_freq %>%
mutate(value_freq = log(value_freq + 0.000001)) %>% # your numbers are really small so I am adding a small number
ggplot(aes(x=name, y=value_freq, fill=name)) +
geom_boxplot()
Alternatively, you can try square root transformation.
p <- data_freq %>%
mutate(value_freq = value_freq^(1/2)) %>%
ggplot(aes(x=name, y=value_freq, fill=name)) +
geom_boxplot()
Or do the transformation using ggplot:
p <- data_freq %>%
ggplot(aes(x=name, y=value_freq, fill=name)) +
geom_boxplot() +
scale_y_log10()
I wanted to make plots that look like figure 1 (source: link)
In figure 1, they have plotted the regression analysis with one-year yield variability. In my case, I would like to plot variability between two locations and 4 blocks for each treatment group. So the plot I wanted would have three facets for factors B.glucosidase, Protein, POX.C of variable and four colors for treatments factors. Also, in my current plot I have legend for block and treatment. I should only have treatment because the block should be used for making error bar for variability.
I tried with this code, which obviously doesn't work for what I want. (Data for df.melted included below.)
ggplot(df.melted, aes(x = value, y = yield, color = as.factor(treatment))) +
geom_point(aes(shape= as.factor(block))) +
stat_smooth(method = "lm", formula = y ~ x, col = "darkslategrey", se=F) +
stat_poly_eq(formula = y~x,
# aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")),
aes(label = ..rr.label..),
parse = TRUE) +
theme_classic() +
geom_errorbar(aes(ymax = df.melted$yield+sd(df.melted$yield), ymin = df.melted$yield-sd(df.melted$yield)), width = 0.05)+
facet_wrap(~variable)
Data:
df.melted <- structure(list(Location = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("M", "U"), class = "factor"),
treatment = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L), .Label = c("CC",
"CCS", "CS", "SCS"), class = "factor"), block = c(1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L,
1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L,
4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L,
1L, 2L, 3L, 4L), yield = c(5156L, 5157L, 5551L, 5156L, 4804L,
4720L, 4757L, 5021L, 4826L, 4807L, 4475L, 4596L, 4669L, 4588L,
4542L, 4592L, 5583L, 5442L, 5693L, 5739L, 5045L, 4902L, 5006L,
5086L, 4639L, 4781L, 4934L, 4857L, 4537L, 4890L, 4842L, 4608L,
5156L, 5157L, 5551L, 5156L, 4804L, 4720L, 4757L, 5021L, 4826L,
4807L, 4475L, 4596L, 4669L, 4588L, 4542L, 4592L, 5583L, 5442L,
5693L, 5739L, 5045L, 4902L, 5006L, 5086L, 4639L, 4781L, 4934L,
4857L, 4537L, 4890L, 4842L, 4608L, 5156L, 5157L, 5551L, 5156L,
4804L, 4720L, 4757L, 5021L, 4826L, 4807L, 4475L, 4596L, 4669L,
4588L, 4542L, 4592L, 5583L, 5442L, 5693L, 5739L, 5045L, 4902L,
5006L, 5086L, 4639L, 4781L, 4934L, 4857L, 4537L, 4890L, 4842L,
4608L), variable = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L), .Label = c("B.glucosidase",
"Protein", "POX.C"), class = "factor"), value = c(1.600946,
1.474084, 1.433078, 1.532492, 1.198667, 1.193193, 1.214941,
1.360981, 1.853056, 1.690117, 1.544357, 1.825132, 1.695409,
1.764123, 1.903743, 1.538684, 0.845077, 1.011463, 0.857032,
0.989803, 0.859022, 0.919467, 1.01717, 0.861689, 0.972332,
0.952922, 0.804431, 0.742634, 1.195837, 1.267285, 1.08571,
1.20097, 6212.631579, 5641.403509, 4392.280702, 7120.701754,
5305.964912, 4936.842105, 5383.157895, 6077.894737, 5769.122807,
5016.842105, 5060.350877, 5967.017544, 5576.842105, 5174.035088,
5655.438596, 5468.77193, 7933.333333, 7000, 6352.982456,
8153.684211, 6077.894737, 4939.649123, 5002.807018, 6489.122807,
4694.035088, 5901.052632, 4303.859649, 6768.421053, 6159.298246,
6090.526316, 4939.649123, 5262.45614, 810.3024, 835.5242,
856.206, 759.8589, 726.2298, 792.6472, 724.7165, 699.3266,
500.9153, 634.8698, 637.9536, 648.8814, 641.0357, 623.3822,
555.2834, 520.8119, 683.3528, 595.9173, 635.4315, 672.4234,
847.2944, 745.5665, 778.3548, 735.8141, 395.2647, 570.4148,
458.0383, 535.3851, 678.0293, 670.7419, 335.2923, 562.5674
)), row.names = c(NA, -96L), class = "data.frame")
library(dplyr)
library(ggplot2)
library(ggpmisc)
Summarize data frame (this could also be done with stat_summary(), but it's often clearer/more transparent to do it explicitly up front). (I think that because your data set is balanced you could collapse/average over the block structure first, and then do your whole plot with the reduced data set - it shouldn't change the outcome of the linear regressions at all, at least not the mean values ... and any statistical comparisons should probably done on block-level summaries anyway ...)
df.sum <- (df.melted
%>% group_by(Location,treatment,variable)
%>% summarise(value=mean(value),yield_sd=sd(yield),
## collapse yield to mean *after* computing sd!
yield=mean(yield))
)
Plot:
(ggplot(df.melted,
aes(x = value, y = yield, color = treatment))
+ stat_smooth(method = "lm", col = "darkslategrey", se=FALSE)
+ stat_poly_eq(
formula = y ~ x,
## aes(label = paste(..eq.label.., ..rr.label.., sep = "~~~")),
aes(group=1, label = ..rr.label..),
parse = TRUE)
+ theme_classic()
+ scale_shape(guide=FALSE)
+ geom_point(data=df.sum)
+ geom_errorbar(data=df.sum,
aes(ymax = yield+yield_sd, ymin = yield-yield_sd),
width = 0.05)
+ facet_wrap(~variable,scale="free_x")
)
(adding group=1 to the stat_poly_eq() aesthetics means we only plot a single R^2 value per facet)
Since you're no longer using the shape aesthetic for anything, you could consider using it to show the Location variable ...
I have a dataframe tag, with 51X5 structure
structure(list(Tagging = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("CIRCLE CAMPIAGN",
"NATIONAL CAMPIAGN"), class = "factor"), Status = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("Negative", "Positive"), class = "factor"),
Month = structure(c(1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L), .Label = c("JUL",
"JUN", "MAY"), class = "factor"), Category = structure(c(1L,
4L, 6L, 1L, 2L, 4L, 6L, 1L, 2L, 4L, 5L, 6L, 1L, 2L, 4L, 5L,
6L, 1L, 2L, 4L, 5L, 6L, 1L, 2L, 4L, 6L, 1L, 4L, 6L, 2L, 3L,
4L, 6L, 1L, 2L, 3L, 4L, 6L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L,
3L, 4L, 5L, 6L, 6L), .Label = c("Data", "Other", "Roaming",
"Unlimited", "VAS", "Voice"), class = "factor"), count = c(3L,
2L, 1L, 4L, 5L, 2L, 1L, 2L, 6L, 7L, 2L, 3L, 4L, 9L, 6L, 2L,
3L, 3L, 3L, 10L, 2L, 5L, 5L, 5L, 4L, 3L, 1L, 1L, 1L, 2L,
1L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 2L, 4L, 1L, 1L, 3L, 3L, 2L,
1L, 1L, 1L, 3L, 4L, 2L)), class = "data.frame", row.names = c(NA,
-51L))
I want to create a bar plot (ggplot) to show bar graph with label on bar as sum of count of category month wise I am using below code
ggplot(data = tag, aes(x = Tagging, y = count, fill = Status)) +
geom_col() +
labs(x = "Tagging", y = "Count", title = "FlyTxt ROI", subtitle = "Statistics") +
geom_text(aes(label = count), color = "white", size = 3, position = position_stack(vjust = 0.5)) +
theme_minimal()+facet_wrap(~Month)
But I am getting split count values:
Help as I want only sum of count for each status
The problem is, that the information you show in the bar is accumulated by geom_col over all categories but the geom_text doesn't do that.
On option is to pre-summarize the data (to get rid of the category split) and then plot the graph.
library(tidyverse)
tag_sum <- tag %>%
group_by(Tagging, Status, Month) %>%
summarise(count_sm = sum(count))
ggplot(data = tag_sum, aes(x = Tagging, y = count_sm, fill = Status)) +
geom_col() +
geom_text(aes(label = count_sm), color = "white", size = 3,
position = position_stack(vjust = 0.5)) +
facet_wrap(~Month) +
labs(x = "Tagging", y = "Count", title = "FlyTxt ROI", subtitle = "Statistics") +
theme_minimal()
I did a plot explaining occurrences of each modality for many variables.
It is about clustering problem to show which variables are explaining each cluster.
So
> dput(DATA1[1:20,])
structure(list(TYPE_PEAU = structure(c(1L, 2L, 1L, 3L, 1L, 2L,
1L, 2L, 2L, 2L, 3L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 2L), .Label = c("Sèche",
"Mixte", "Normale", "Grasse"), class = "factor"), SENSIBILITE = structure(c(2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 1L, 2L, 2L, 3L, 3L, 3L,
1L, 3L, 1L), .Label = c("Aucune", "Fréquente", "Occasionnelle"
), class = "factor"), IMPERFECTIONS = structure(c(2L, 2L, 3L,
2L, 3L, 1L, 2L, 2L, 1L, 2L, 2L, 3L, 2L, 1L, 2L, 2L, 2L, 3L, 2L,
3L), .Label = c("Fréquente", "Occasionnelle", "Rares"), class = "factor"),
BRILLANCE = structure(c(3L, 3L, 1L, 1L, 1L, 2L, 1L, 3L, 3L,
3L, 3L, 1L, 3L, 3L, 3L, 3L, 1L, 1L, 3L, 3L), .Label = c("Aucune",
"Partout", "Zone T"), class = "factor"), GRAIN_PEAU = structure(c(1L,
2L, 1L, 2L, 2L, 3L, 3L, 3L, 3L, 2L, 1L, 2L, 3L, 2L, 2L, 2L,
2L, 2L, 2L, 1L), .Label = c("Fin", "Moyen", "Dilaté"), class = "factor"),
RIDES_VISAGE = structure(c(3L, 3L, 3L, 3L, 3L, 1L, 1L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 3L, 3L, 3L), .Label = c("Aucune",
"Très visibles", "Visibles"), class = "factor"), ALLERGIES = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("Non", "Oui"), class = "factor"),
MAINS = structure(c(1L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 3L, 2L,
2L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 2L), .Label = c("Sèches",
"Normales", "Moites"), class = "factor"), PEAU_CORPS = structure(c(3L,
2L, 2L, 2L, 2L, 1L, 2L, 2L, 3L, 1L, 2L, 2L, 2L, 3L, 2L, 3L,
2L, 3L, 3L, 1L), .Label = c("Normale", "Sèche", "Très sèche"
), class = "factor"), INTERET_ALIM_NATURELLE = structure(c(3L,
1L, 1L, 1L, 1L, 2L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("Beaucoup", "Pas du tout", "Un peu"
), class = "factor"), INTERET_ORIGINE_GEO = structure(c(3L,
1L, 1L, 1L, 3L, 2L, 3L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 1L, 1L,
3L, 1L, 1L, 1L), .Label = c("Beaucoup", "Pas du tout", "Un peu"
), class = "factor"), INTERET_VACANCES = structure(c(1L,
1L, 2L, 2L, 3L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 2L, 1L, 1L,
1L, 1L, 2L, 2L), .Label = c("À la mer", "À la montagne",
"En ville"), class = "factor"), INTERET_ENVIRONNEMENT = structure(c(3L,
1L, 1L, 1L, 3L, 2L, 3L, 3L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("Beaucoup", "Pas du tout", "Un peu"
), class = "factor"), INTERET_COMPOSITION = structure(c(3L,
1L, 1L, 1L, 1L, 3L, 3L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L), .Label = c("Beaucoup", "Pas du tout", "Un peu"
), class = "factor"), PRIORITE_1 = structure(c(1L, 1L, 1L,
1L, 3L, 3L, 3L, 3L, 2L, 3L, 3L, 1L, 1L, 1L, 1L, 3L, 2L, 2L,
3L, 1L), .Label = c("éclatante", "hydratée", "lisse", "matifiée",
"nourrie", "purifiée", "reposée"), class = "factor"), MILIEU_RESIDENCE = structure(c(1L,
1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L,
2L, 2L, 1L, 2L), .Label = c("nature", "urbain"), class = "factor")), .Names = c("TYPE_PEAU",
"SENSIBILITE", "IMPERFECTIONS", "BRILLANCE", "GRAIN_PEAU", "RIDES_VISAGE",
"ALLERGIES", "MAINS", "PEAU_CORPS", "INTERET_ALIM_NATURELLE",
"INTERET_ORIGINE_GEO", "INTERET_VACANCES", "INTERET_ENVIRONNEMENT",
"INTERET_COMPOSITION", "PRIORITE_1", "MILIEU_RESIDENCE"), row.names = c(1L,
2L, 11L, 13L, 15L, 16L, 17L, 20L, 23L, 32L, 33L, 34L, 37L, 38L,
39L, 40L, 42L, 43L, 45L, 48L), class = "data.frame")
Then I use this code:
library(tidyverse)
DATA1 %>%
gather(k, v) %>%
ggplot(aes(v)) +
geom_bar(fill = "orange", width = 0.7) +
coord_flip() +
facet_wrap(~k)
Then I get as result this plot:
But as you can see lebels in the vertical axis are not clear !!
please how can I resolve this issue??
You can try to resize the text:
DATA1 %>%
gather(k, v) %>%
ggplot(aes(v)) +
geom_bar(fill = "orange", width = 0.7) +
theme(axis.text.y = element_text(face="bold", color="black", size=4)) +
coord_flip() +
facet_wrap(~k)
And/or abbreviate the labels with:
+ scale_x_discrete(labels = abbreviate)
I've a ggplot that shows the counts of tweets for some brands as well as a label for the overall percentage. This was done with much help from this link: Show % instead of counts in charts of categorical variables
# plot ggplot of brands
ggplot(data = test, aes(x = brand, fill = brand))
+ geom_bar()
+ stat_bin(aes(label = sprintf("%.02f %%", ..count../sum(..count..)*100)), geom = 'text', vjust = -0.3)
Next, I would like to plot it based on brand and sentiment, with the labels for the bars of each brand totalling up to 100%. However, I have difficulty amending my code to do this. Would you be able to help please? Also, would it be possible to change the colours for neu to blue and pos to green?
# plot ggplot of brands and sentiment
ggplot(data = test, aes(x = brand, fill = factor(sentiment)))
+ geom_bar(position = 'dodge')
+ stat_bin(aes(label = sprintf("%.02f %%", ..count../sum(..count..)*100)), geom = 'text', position = position_dodge(width = 0.9), vjust=-0.3)
Here's a dput of 100 rows of my data's brand and sentiment column
structure(list(brand = structure(c(3L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 1L, 1L, 2L, 3L, 4L, 4L, 1L, 2L, 1L, 2L, 1L, 3L, 3L, 3L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 3L, 5L, 2L, 1L, 2L, 1L, 1L, 2L,
2L, 1L, 4L, 5L, 5L, 1L, 1L, 2L, 3L, 1L, 1L, 4L, 1L, 2L, 1L, 2L,
1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L,
1L, 3L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 4L, 1L, 1L), .Label = c("apple",
"samsung", "sony", "bb", "htc", "nokia", "huawei"), class = "factor"),
sentiment = structure(c(2L, 1L, 3L, 1L, 2L, 3L, 1L, 1L, 3L,
1L, 1L, 2L, 3L, 1L, 1L, 3L, 2L, 1L, 3L, 1L, 3L, 3L, 3L, 2L,
1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 3L, 2L, 1L, 1L, 2L,
2L, 1L, 1L, 1L, 1L, 2L, 3L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 1L,
3L, 1L, 1L, 1L, 3L, 3L, 2L, 1L, 1L, 2L, 3L, 3L, 1L, 3L, 2L,
1L, 3L, 1L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
3L, 1L, 3L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 2L, 1L, 1L, 1L, 1L,
3L), .Label = c("neg", "pos", "neu"), class = "factor")), .Names = c("brand",
"sentiment"), class = c("data.table", "data.frame"), row.names = c(NA,
-100L), .internal.selfref = <pointer: 0x0000000003070788>)
Posting a hack far far far from the ggplot2 idiomatic way to do this, so if someone posts a more ggplot2 way to do this, you should accept the idiomatic method.
So basically I'm creating a dummy data set which will include all the information you've calculated using ..count../sum(..count..)*100 and plotting it on top of your bar plot using geom_text
temp <- as.data.frame(table(test$brand, test$sentiment))
temp <- merge(temp, as.data.frame(table(test$brand)), by = "Var1", all.x = T)
names(temp) <- c("brand", "sentiment", "Freq", "Count")
library(ggplot2)
ggplot(data = test, aes(x = brand, fill = factor(sentiment))) +
geom_bar(position = 'dodge') +
geom_text(data = temp, aes(x = brand, y = Freq, label = sprintf("%.02f %%", Freq/Count*100)), position = position_dodge(width = 0.9), vjust=-0.3)
This is not exactly same as your plot because you only provided a subset of your data
To choose the colors you would like for sentiment, make use of
scale_fill_manual(value = [and choose your colors by RGB, name, etc.]
You will have to experiment but the three factors will be in alphabetical order (unless you change that) so the colors you pick for the scale will match that order: neg, neu, pos could be "grey", "blue", "green"