Related
My goal is to create a bar graph of categorical variables and then add a pie chart inside it as shown in the attached image.
my data is the same as in the image and is below:
#For bar grapgh
chromosomes = c("1A", "1B", "1D", "2A", "2B", "2D", "3A", "3B", "3D", "4A","4B","4D","5A","5B","5D","6A","6B","6D","7A","7B","7D")
Frequency = c(668, 752, 213, 826, 948, 334, 625, 834, 264, 488, 391, 136, 745, 917, 234, 543, 848, 182, 901, 740, 241)
data_bar <- data.frame(chromosomes, Frequency)
#For pie chart
Genome = c("A","B","D")
Count = c(4796, 5430, 1604)
data_pie <- data.frame(Genome, Count)
I will highly appreciate if anyone can guide me or direct me towards where I can find the answers
Here's a ggplot2 solution:
pie <- ggplot(data_pie, aes(x = 1, y = Count, fill = Genome)) +
geom_col(color = "black") +
scale_fill_manual(values = c("red2", "forestgreen", "dodgerblue3")) +
coord_polar(theta = "y") +
theme_void() +
theme(legend.position = "none")
ggplot(data_bar) +
geom_col(aes(chromosomes, Frequency, fill = substr(chromosomes, 2, 2)),
color = "black", width = 0.5) +
scale_fill_manual(values = c("red2", "forestgreen", "dodgerblue3")) +
theme_classic() +
theme(legend.position = "none") +
annotation_custom(ggplotGrob(pie), xmin = "2B", xmax = "6A", ymin = 500)
Using only base R functions, please see the code below:
## Your data ---------------------------------
#For bar grapgh
chromosomes = c("1A", "1B", "1D", "2A", "2B", "2D", "3A", "3B", "3D", "4A","4B","4D","5A","5B","5D","6A","6B","6D","7A","7B","7D")
Frequency = c(668, 752, 213, 826, 948, 334, 625, 834, 264, 488, 391, 136, 745, 917, 234, 543, 848, 182, 901, 740, 241)
#For pie chart
Genome = c("A","B","D")
Count = c(4796, 5430, 1604)
## One idea to start with --------------------
plot.new()
par(mfrow = c(2, 1), # set the layout, you might also consider layout() or split.screen() as suggested in ?par
mar=c(4, 4, 1, 1), # this set up give enough space to see axis labels and titles
oma=c(0, 0, 0, 0)) # remove outer margins
pie(Count, labels = Genome)
barplot(Frequency~chromosomes)
Output:
I think it is possible to make it look cleaner adjusting par() arguments but I am not very familiar with them.
There are also packages cowplot and gridExtra that works nicely with ggplot2.
I'm trying to get the 8 hour rolling mean for o3 using the rollingMean function using this code:
mydata <- rollingMean(mydata, pollutant = "o3", hours = 8)
breaks <- c(0, 34, 66, 100, 121, 141, 160, 188, 214, 240, 500)
labels <- c("Low.1", "Low.2", "Low.3", "Moderate.4", "Moderate.5", "Moderate.6",
"High.7", "High.8", "High.9", "Very High.10")
calendarPlot(mydata, pollutant = "rolling8o3",breaks = breaks, labels = labels, cols = "jet", statistic = "max"
But I keep getting this error message:
Error in strsplit(interval, split = " ") : non-character argument
In addition: Warning message:
In diff(as.numeric(unique(dates[order(dates)]))) :
NAs introduced by coercion
Any help with this would be greatly appreciated I'm very new to R. Thanks
If you attach mydata from the package using data(mydata), it should work.
library(openair)
data(mydata)
mydata <- rollingMean(mydata, pollutant = "o3", hours = 8)
breaks <- c(0, 34, 66, 100, 121, 141, 160, 188, 214, 240, 500)
labels <- c("Low.1", "Low.2", "Low.3", "Moderate.4", "Moderate.5", "Moderate.6",
"High.7", "High.8", "High.9", "Very High.10")
calendarPlot(mydata, pollutant = "rolling8o3",breaks = breaks, labels = labels, cols = "jet", statistic = "max")
Some of you this could be an easy question.
I have 2 data frames:
dput(head(Activitieslessthan35))
structure(list(`Main job: Working time in main job` = c(470,
440, 430, 430, 410, 150), Sleep = c(420, 450, 450, 420, 450,
460), `Unspecified TV video or DVD watching` = c(60, 40, 210,
190, 60, 0), Eating = c(80, 60, 40, 70, 60, 130), `Other personal care:Wash and dress` = c(60,
60, 50, 50, 70, 50), `Travel to work from home and back only` = c(60,
60, 50, 90, 90, 30), `Unspecified radio listening` = c(140, 180,
50, 90, 140, 160), `Other specified social life` = c(350, 270,
310, 330, 710, 440), `Socialising with family` = c(350, 270,
360, 330, 730, 540), `Food preparation and baking` = c(410, 310,
420, 380, 1000, 950)), row.names = c(NA, 6L), class = "data.frame")
and
dput(head(ActivitiesMoreOrEqual35))
structure(list(`Main job: Working time in main job` = c(360,
420, 390, 490, 540, 390), Sleep = c(590, 480, 310, 560, 280,
370), `Unspecified TV video or DVD watching` = c(100, 60, 130,
120, 60, 30), Eating = c(70, 100, 70, 40, 190, 80), `Other personal care:Wash and dress` = c(10,
30, 100, 60, 270, 90), `Travel to work from home and back only` = c(0,
50, 260, 50, 0, 0), `Unspecified radio listening` = c(50, 80,
260, 80, 210, 200), `Other specified social life` = c(190, 320,
790, 250, 580, 420), `Travel in the course of work` = c(50, 80,
260, 70, 120, 200), `Food preparation and baking` = c(440, 570,
820, 570, 820, 590)), row.names = c(NA, 6L), class = "data.frame")
I would like to convert the data.frames into factors - for example to have a factor variable called Activitieslessthan35 with colums of the data frame to be used as levels such as `Main job: Working time in main job', 'Sleep', etc. Later I would like also to plot (the sum) the levels of the factors on a side-by-side bar plot.
I don't know if you care transform a data.frame into factor variable as well how to change the format of the data.frames to create the plot
Any suggestion is welcome
If I understand well, you want to have both of your dataframe in a long format of two columns, one column containing all colnames of your dataframe, and the second column with all values, then summarise each "factor" of the first column, merging both dataframes and plotting both dataframes into a single plot. Am I right ?
Here a way to do it. I called df the dataframe Activitieslessthan35 and df2 the
dataframe ActivitiesMoreOrEqual35.
First, we are going to transpose to a long format each of your dataframe using pivot_longer
library(tidyr)
library(dplyr)
df <- df %>% pivot_longer(everything(), names_to = "Activities", values_to = "Values_less_than35")
df2 <- df2 %>% pivot_longer(everything(),names_to = "Activities", values_to = "Values_More_than_35")
Then, we will calculate the sum value for each factor of each of your dataframe:
df_sum = df%>% group_by(Activities) %>% summarise(Values_less_than35 = sum(Values_less_than35))
df2_sum = df2 %>% group_by(Activities) %>% summarise(Values_More_than_35 = sum(Values_More_than_35))
Then, we are merging both dataframe into a singe one by using "Activities" as merging columns
final_df = merge(df_sum,df2_sum, by.x = "Activities", by.y = "Activities", all = TRUE)
Finally, we are transposing one last time final_df in order to have values in the correct shape for plotting them with ggplot2
final_df <- final_df %>% pivot_longer(., -Activities, names_to = "Variable", values_to = "Value")
And now we can plot your final dataframe using ggplot2
library(ggplot2)
ggplot(final_df, aes(x = stringr::str_wrap(Activities, 15), y = Value, fill = Variable)) +
geom_col(stat = "identity", position = position_dodge()) +
coord_flip()+
xlab("")
And you get the following plot:
Does it look what you are expecting ?
I'm trying to plot an Euler Diagram using the eulerr package by Johan Larsson in R. I'm following this example where the developer explains how to customize colors/border transparency. However, when I try to implement it with the following code:
fit2 <- euler(c(A = 16971, B = 218, C = 215,
"A&B" = 112, "A&C" = 112, "B&C"= 51,"A&B&C" = 23))
plot(fit2,
polygon_args = list(col = c("dodgerblue4", "darkgoldenrod1", "cornsilk4"),
border = "transparent"),
text_args = list(font = 8), counts=TRUE)
Colors/border remain unchanged.
I am using RStudio 1.0.136, R 3.3.1 on Windows.
Sorry about this. That post is old and the arguments have changed.
Try this instead
fit2 <- euler(c(A = 16971, B = 218, C = 215,
"A&B" = 112, "A&C" = 112, "B&C"= 51,"A&B&C" = 23))
plot(fit2,
fills = c("dodgerblue4", "darkgoldenrod1", "cornsilk4"),
edges = FALSE,
fontsize = 8,
quantities = list(fontsize = 8))
I got this problem I still haven't found out how to solve it. I want to plot all the Values MW1, MW2 and MW3 in function of "DHT + Procymidone". How can I plot all this values in the graphic so that I will get 3 different curves (in different colors and different number like curve 1, 2, ...)? And I want the labels of the X-Values("DHT + Procymidone") to be like -10, -9, ... , -4 instead of 1,00E-10, ...
DHT + Procymidone MW 1 MW 2 MW 3
1,00E-10 114,259526780335 111,022461066274 213,212408408682
1,00E-09 115,024187788314 111,083316791613 114,529425136628
1,00E-08 110,517449986348 107,867941606743 125,10230718665
1,00E-07 100,961311263444 98,4219995773135 116,045168653416
1,00E-06 71,2383604211297 73,539659636842 50,3213799775309
1,00E-05 20,3553333652104 36,1345771905088 15,42260866106
1,00E-04 4,06189509055904 18,1246447874679 10,1988107887318
I have shortened your data frame for convenience reasons, so here's an example:
mydat <- data.frame(DHT_Procymidone = c(-10, -9, -8, -7, -6, -5, -4),
MW1 = c(114, 115, 110, 100, 72, 20, 4),
MW2 = c(111, 111, 107, 98, 73, 36, 18),
MW3 = c(213, 114, 123, 116, 50, 15, 10))
library(tidyr)
library(ggplot2)
mydf <- gather(mydat, "grp", "MW", 2:4)
ggplot(mydf, aes(x = DHT_Procymidone, y = MW, colour = grp)) + geom_line()
which gives following plot:
To use ggplot, your data needs to be in long-format. gather does this for you, appending columns MW1-MW3 into one column, while the column names are added as new column values in the grp-column. This group-column allows to identify different groups, i.e. different colored lines in the plot.
Depending on the type of DHT + Procymidone, you can, e.g. use format(..., scientific = FALSE) to convert to numeric, however, this will result in -0.0000000001 (and not -10).
However, if this data column is a character vector (you can coerce with as.character), this may work:
a <- "1,00E-10"
sub("1,00E", "", a, fixed = TRUE)
> [1] "-10"
As an alternative answer to #Daniel's which doesn't rely on ggplot (thanks Daniel for providing the reproducible data).
mydat <- data.frame(DHT_Procymidone = c(-10, -9, -8, -7, -6, -5, -4),
MW1 = c(114, 115, 110, 100, 72, 20, 4),
MW2 = c(111, 111, 107, 98, 73, 36, 18),
MW3 = c(213, 114, 123, 116, 50, 15, 10))
plot(mydat[,2] ~ mydat[,1], typ = "l", ylim = c(0,220), xlim = c(-10,-2), xlab = "DHT Procymidone", ylab = "MW")
lines(mydat[,3] ~ mydat[,1], col = "blue")
lines(mydat[,4] ~ mydat[,1], col = "red")
legend(x = -4, y = 200, legend = c("MW1","MW2","MW3"), lty = 1, bty = "n", col = c("black","blue","red"))
To change axis labels see the text in xlab and ylab. To change axis limits see xlim and ylim.