Two columns on x-axis and different grids in R - r

I need some help with a graph in R.
This is how my dataframe looks like
Footprint
Local Number
Remote Number
Location
10.4
45
4
L1
12.5
452
78
L9
15.6
86
52
L5
85.3
12
12
L4
12.5
35
36
L2
85.9
78
78
L3
78.5
44
44
L6
4.6
10
11
L7
13.9
157
2
L8
What I want to achieve is a graph with the 'Footprint' column in the y-axis, the 'Local Number' column(in the x-axis) in the positive grid of the graph and the 'Remote Number' column(in the x-axis) in the negative grid of the graph. The data should be presented in dots and the lab name should be the label. So basically, I want to show for each location the remote and local number of employees.
I am struggling on presenting the two columns in the x-axis. I appreciate the help!

Maybe you want something like where you could use geom_point for both columns with one negative and positive and add labels using geom_text like this:
df <- read.table(text = 'Footprint Local_Number Remote_Number Location
10.4 45 4 L1
12.5 452 78 L9
15.6 86 52 L5
85.3 12 12 L4
12.5 35 36 L2
85.9 78 78 L3
78.5 44 44 L6
4.6 10 11 L7
13.9 157 2 L8
', header = TRUE)
library(ggplot2)
ggplot() +
geom_point(df, mapping = aes(x = Footprint, y = Local_Number, color = '1')) +
geom_point(df, mapping = aes(x = -Remote_Number, y = Local_Number, color = '2')) +
geom_text(df, mapping = aes(x = Footprint, y = Local_Number, label = Location), hjust = 0, vjust = 0) +
geom_text(df, mapping = aes(x = -Remote_Number, y = Local_Number, label = Location), hjust = 0, vjust = 0) +
scale_color_manual('Legend', labels = c('Footprint', 'Remote number'), values = c('blue', 'red')) +
labs(y = 'Local Number')
Created on 2022-10-14 with reprex v2.0.2
If you want to show it on only a positive axis you could the negative sign like this:
library(ggplot2)
ggplot() +
geom_point(df, mapping = aes(x = Footprint, y = Local_Number, color = '1')) +
geom_point(df, mapping = aes(x = Remote_Number, y = Local_Number, color = '2')) +
geom_text(df, mapping = aes(x = Footprint, y = Local_Number, label = Location), hjust = 0, vjust = 0) +
geom_text(df, mapping = aes(x = Remote_Number, y = Local_Number, label = Location), hjust = 0, vjust = 0) +
scale_color_manual('Legend', labels = c('Footprint', 'Remote number'), values = c('blue', 'red')) +
labs(y = 'Local Number')
Created on 2022-10-14 with reprex v2.0.2

Here two more suggestions for visualisation. This seems to be paired data - remote vs local number. That can be either represented as a scatter plot or as change.
Footprint can then be encoded as color. Thanks +1 to Quieten for the data.
library(tidyverse)
df <- read.table(text = 'Footprint Local_Number Remote_Number Location
10.4 45 4 L1
12.5 452 78 L9
15.6 86 52 L5
85.3 12 12 L4
12.5 35 36 L2
85.9 78 78 L3
78.5 44 44 L6
4.6 10 11 L7
13.9 157 2 L8
', header = TRUE)
df %>%
ggplot(aes(Local_Number, Remote_Number)) +
## use Number as x and y and color code by footprint value
geom_point(aes(color = Footprint), size = 3) +
## label the points, best with repel
ggrepel::geom_text_repel(aes(label = Location)) +
## optional add a line of equality to help intuitive recognition of change
## + keeping same limits helps intuitive comparison
geom_abline(intercept = 0, slope = 1, lty = 2, size = .3) +
coord_equal(xlim = range(c(df$Local_Number, df$Remote_Number)), ylim = range(c(df$Local_Number, df$Remote_Number))) +
## optional change color scale
scale_color_viridis_c(option = "magma")
## or, not to waste half of your graph (there is no positive value)
## you can show the difference instead
df %>%
mutate(change = Local_Number-Remote_Number) %>%
ggplot() +
## now use Location as x variable, therefore no labels needed any more
geom_point(aes(Location, change, color = Footprint), size = 3) +
## optional change color scale
scale_color_viridis_c(option = "magma")
Created on 2022-10-14 by the reprex package (v2.0.1)

Related

Change legend labels and position dodge

I created with ggplot an interaction plot and added with a different dataframe outliers into the same plot. I want to change the legend's labels (yes and no), but a new legend is added instead of changing them. Here is the Code:
the theme I'm using:
theme_apa(
legend.pos = "right",
legend.use.title = FALSE,
legend.font.size = 12,
x.font.size = 12,
y.font.size = 12,
facet.title.size = 12,
remove.y.gridlines = TRUE,
remove.x.gridlines = TRUE
)
the plot:
InteractionWithOutliers <- ggplot() +
geom_line(data=data2, aes(x=Messzeitpunkt,
y = Sum_PCLMean,group = TB2,linetype=TB2),) +
scale_color_manual(labels = c("test", "test"),values=c('#000000','#000000'))+
geom_point(data = outliersDF, aes(Messzeitpunkt,Sum_PCL,
shape=TB2, color=TB2, size=TB2),) +
geom_point(data = data2, aes(Messzeitpunkt,Sum_PCLMean,
shape=TB2, color=TB2, size=TB2), ) +
scale_shape_manual(values=c(15, 17))+
scale_size_manual(values=c(2,2)) +
ylim(0, 60) +
scale_x_continuous(breaks = seq(0,2)) +
geom_errorbar(data=data2,aes(x = Messzeitpunkt,ymin=Sum_PCLMean-Sum_PCLSD, ymax=Sum_PCLMean+Sum_PCLSD), width=.2,)
InteractionWithOutliers + theme_apa() +
labs(x ="Measurement Period", y = "PTSS mean scores")
Image of the Graph:
Furthermore, when i try to use position dodge to split the position of the interaction plot and the outliers, not everything moves the same way.
Code:
InteractionWithOutliers <- ggplot() +
geom_line(data=data2, aes(x=Messzeitpunkt,
y = Sum_PCLMean,group = TB2,linetype=TB2),position = position_dodge(width = 0.4)) +
scale_color_manual(labels = c("test", "test"),values=c('#000000','#000000'))+
geom_point(data = outliersDF, aes(Messzeitpunkt,Sum_PCL,
shape=TB2, color=TB2, size=TB2),position = position_dodge(width = 0.4)) +
geom_point(data = data2, aes(Messzeitpunkt,Sum_PCLMean,
shape=TB2, color=TB2, size=TB2),position = position_dodge(width = 0.4) ) +
scale_shape_manual(values=c(15, 17))+
scale_size_manual(values=c(2,2)) +
ylim(0, 60) +
scale_x_continuous(breaks = seq(0,2)) +
geom_errorbar(data=data2,aes(x = Messzeitpunkt,ymin=Sum_PCLMean-Sum_PCLSD, ymax=Sum_PCLMean+Sum_PCLSD),
width=.2,position = position_dodge(width = 0.4))
InteractionWithOutliers + theme_apa() +
labs(x ="Measurement Period", y = "PTSS mean scores")
Thank you for your help!
Edit: Data for the Outliers:
Messzeitpunkt Sum_PCL TB2
0 38 no
0 37 yes
0 40 yes
0 41 yes
0 38 yes
1 56 no
1 33 no
2 39 no
2 33 no
Data for the interaction plots:
Messzeitpunkt Sum_PCLMean TB2 Sum_PCLSD
0 9 no 11
0 12 yes 11
1 9 no 15
1 18 yes 16
2 8 no 12
2 14 yes 12
Merging legends can sometimes be painful. If your variables are already labelled (like in your example), then you also don't need to stipulate breaks or labels. (see first example).
However, a good rule is - don't add an aesthetic if you don't really need it. Size and color are constant aesthetics in your case, thus you could (and should) add it as a constant aesthetic outside of aes.
P.S. I have slightly changed the plot in order to make the essential more visible. I personally prefer to keep my plots in an order geoms->scales->coordinates->labels->theme, this helps me keeping an overview over the layers.
library(ggplot2)
data2 <- read.table(text = "Messzeitpunkt Sum_PCL TB2
0 38 no
0 37 yes
0 40 yes
0 41 yes
0 38 yes
1 56 no
1 33 no
2 39 no
2 33 no", head = T)
outliersDF <- read.table(text = "Messzeitpunkt Sum_PCLMean TB2 Sum_PCLSD
0 9 no 11
0 12 yes 11
1 9 no 15
1 18 yes 16
2 8 no 12
2 14 yes 12", head = T)
ggplot() +
geom_line(data = data2, aes(
x = Messzeitpunkt,
y = Sum_PCL, group = TB2, linetype = TB2
)) +
geom_point(data = outliersDF, aes(Messzeitpunkt, Sum_PCLMean,
shape = TB2, color = TB2, size = TB2
)) +
geom_point(data = data2, aes(Messzeitpunkt, Sum_PCL,
shape = TB2, color = TB2, size = TB2
)) +
## if your variable is labelled, no need to specify breaks or labels
scale_color_manual(values = c("#000000", "#000000")) +
scale_shape_manual(values = c(15, 17)) +
scale_size_manual(values = c(2, 2))
## Better, if you have constant aesthetics, not to use aes(), but
## add the values as constants instead
ggplot() +
geom_line(data = data2, aes(
x = Messzeitpunkt,
y = Sum_PCL, group = TB2, linetype = TB2
)) +
geom_point(data = outliersDF, aes(Messzeitpunkt, Sum_PCLMean,
shape = TB2
), size = 2) +
geom_point(data = data2, aes(Messzeitpunkt, Sum_PCL,
shape = TB2
## black color is default, this is just for demonstration
), color = "black", size = 2) +
scale_shape_manual(values = c(15, 17))
Created on 2022-07-15 by the reprex package (v2.0.1)

Adjusting the secondary y axis using ggplot

I am trying to graph two different datasets, reconstructed temperatures (10-16) and charcoal data (0-140), with two different time series values, using ggplot. Is this possible?
I used this code (see below) but unfortunately it produced a plot (see below) that limits the variability of the temperature reconstruction. Is there a way to adjust the y axis so we can see more variability in the temperature record?
Thank you very much for your support.
R code
df <- data.frame(Charfiretempdata$AGETEMPS, Charfiretempdata$FIREAGE, Charfiretempdata$Comp2TEMPS,Charfiretempdata$Char.Acc.Rate..Char...cm.2.yr.1.)
ggplot(df) +
geom_col(mapping = aes(x = Charfiretempdata$FIREAGE,
y = Charfiretempdata$Char.Acc.Rate..Char...cm.2.yr.1. * 16/150), size = 2, color = "darkblue",
fill = "white") +
geom_line(mapping = aes(x = Charfiretempdata$AGETEMPS, y = Charfiretempdata$Comp2TEMPS)) +
geom_point(mapping = aes(x = Charfiretempdata$AGETEMPS, y = Charfiretempdata$Comp2TEMPS), size
= 3, shape = 21, fill = "white")+
scale_y_continuous(
name = expression("Temperature ("~degree~"C)"),
sec.axis = sec_axis(~ . * 150/16 , name = "Charcoal (mm)"))
R plot
I create a random sample data that would share similar characteristics to your data.
library(dplyr)
library(ggplot2)
set.seed(282930)
df <- tibble(x_axis = c(1400, 1500, 1600, 2000, 2001, 2002, 2003, 2004, 2005, 2006,
2007, 2008, 2009, 2010, 2011, 2012, 2013, 2015, 2016, 2017),
y_axis_1 = runif(20, min = 10, max = 16),
y_axis_2 = runif(20, min = 0, max = 150))
Here is the df
> df
# A tibble: 20 x 3
x_axis y_axis_1 y_axis_2
<dbl> <dbl> <dbl>
1 1400 15.7 5.28
2 1500 11.8 141.
3 1600 14.5 149.
4 2000 11.6 121.
5 2001 15.6 37.3
6 2002 15.0 72.5
7 2003 10.7 130.
8 2004 15.4 84.7
9 2005 11.5 118.
10 2006 10.4 17.4
11 2007 11.3 124.
12 2008 13.6 22.6
13 2009 13.0 14.5
14 2010 15.9 142.
15 2011 12.3 103.
16 2012 10.3 131.
17 2013 12.6 93.6
18 2015 14.6 12.4
19 2016 11.4 27.9
20 2017 15.3 116.
Here is the ggplot similar to your but with the different Axis adjustment
ggplot(df,
# as they sharing same X-axis you can define share variable aes in the
# main call of ggplot
aes(x = x_axis)) +
geom_col(mapping =
# added 10 to 2nd axis value as will scale from 10 instead of 0
aes(y = (y_axis_2 * 10 / 150) + 10),
# the size here is size of the border - and due to the nature of
# your data, the col suppose to be very thin to match with that one
# tick on x-axis - so the inner fill is covered by dark blue border
size = 2, color = "darkblue",
# The fill is not really useful as you cannot see it.
fill = "white") +
geom_line(mapping = aes(y = y_axis_1)) +
geom_point(mapping = aes(y = y_axis_1), size
= 3, shape = 21, fill = "white") +
# Set the main Axis start at 10 instead of 0 so it would allow more zoom into it
coord_cartesian(ylim = c(10, 20), expand = c(0, 0)) +
scale_y_continuous(
name = expression("Temperature ("~degree~"C)"),
# The calculation of second axis lable is calculate base on 1st axis.
# and as the 1st axis start at 10, there fore the fomular need to minus 10
# before multiply back 15 - I keep 150 / 10 so it clear reverse of original
# transform of the 2nd axis value above.
sec.axis = sec_axis(~ (. - 10) * 150 / 10 , name = "Charcoal (mm)"))
Here is the sample output plot
And even with the adjsut y-axis we can hardly see the temperature at the end of the data because there are a lot more data points at the end. I think if you don't need all of data point at the end you may just take every 10 x as the data was on the range of 600 years so you don't need to graph so much details at the end. And if you need details just graph that time frame separately
Filter data at the end to only take every 10 year instead
ggplot(df %>% filter(x_axis <= 2000 | x_axis %% 10 == 0),
aes(x = x_axis)) +
# similar code to above but I use geom_bar instead
geom_bar(mapping =
aes(y = (y_axis_2 * 10 / 150) + 10),
stat = "identity", size = 2, color = "darkblue",
fill = "white") +
geom_line(mapping = aes(y = y_axis_1)) +
geom_point(mapping = aes(y = y_axis_1), size
= 3, shape = 21, fill = "white")+
scale_y_continuous(
name = expression("Temperature ("~degree~"C)"),
sec.axis = sec_axis(~ (. - 10) * 150/10 , name = "Charcoal (mm)")) +
coord_cartesian(ylim = c(10, 20), expand = c(0, 0))
(As you can see that with less data point, we started to see the fill as plot have more space)
Zoom in at the end of the data
ggplot(df %>% filter(x_axis >= 2000),
aes(x = x_axis)) +
# similar code to above but I use geom_bar instead
geom_bar(mapping =
aes(y = (y_axis_2 * 10 / 150) + 10),
stat = "identity", size = 2, color = "darkblue",
fill = "white") +
geom_line(mapping = aes(y = y_axis_1)) +
geom_point(mapping = aes(y = y_axis_1), size
= 3, shape = 21, fill = "white")+
scale_y_continuous(
name = expression("Temperature ("~degree~"C)"),
sec.axis = sec_axis(~ (. - 10) * 150/10 , name = "Charcoal (mm)")) +
coord_cartesian(ylim = c(10, 20), expand = c(0, 0))
(Now we can see both the darkblue border and the white fill inside)

Scatterpie: How to add annotation over line connecting pies to mark percent change in y-values between pies

I have a scatterpie plot with pies plotted over x and y axes and a "trend line" connecting them. In the spirit of this answer, I would like to add an annotation over each line to mark the percent increase/decrease between the y-values underlying each adjacent pies.
My data
library(tidyverse)
library(scatterpie)
my_df <- structure(list(day_in_july = 13:20, yes_and_yes = c(0.611814345991561,
0.574750830564784, 0.593323216995448, 0.610539845758355, 0.650602409638554,
0.57429718875502, 0.575971731448763, 0.545454545454545), yes_but_no = c(0.388185654008439,
0.425249169435216, 0.406676783004552, 0.389460154241645, 0.349397590361446,
0.42570281124498, 0.424028268551237, 0.454545454545455), y = c(0.388185654008439,
0.425249169435216, 0.406676783004552, 0.389460154241645, 0.349397590361446,
0.42570281124498, 0.424028268551237, 0.454545454545455)), row.names = c(NA,
-8L), class = c("tbl_df", "tbl", "data.frame"))
My current visualization
p <- ggplot(data = my_df) +
geom_path(aes(x=day_in_july, y = y*50)) +
geom_scatterpie(aes(x = day_in_july, y = y*50, r = 0.3),
data = my_df,
cols = colnames(my_df)[2:3],
color = "red") +
geom_text(aes(y = y*50, x = day_in_july,
label = paste0(formatC(y*100, digits = 3), "%")),
nudge_y = 0.07, nudge_x = -0.25, size = 3) +
geom_text(aes(y = y*50, x = day_in_july,
label = paste0(formatC((1-y)*100, digits = 3), "%")),
nudge_y = -0.07, nudge_x = 0.25, size = 3) +
scale_fill_manual(values = c("pink", "seagreen3")) +
scale_x_continuous(labels = xvals, breaks = xvals) +
scale_y_continuous(name = "yes but no",
labels = function(x) x/50) +
coord_fixed()
> p
I want to add percent increase/decrease between y-values of adjacent pies
The y-value of the first pie (at day_in_july = 13) is 0.388. From this y-value to the next pie's y-value (0.425) there's a percent increase of 9.53%. Therefore, I want to mark the line that connects the two pies with a label of +9.53% .
Ultimately, I want the plot to look like this one:
On the way to the solution
This answer already has the relevant mechanism to get what I'm looking for.
The idea is to use ggplot_build() to access the data underlying the plot, then calculate the percent change between two consecutive values, then rebuild the plot with the lines annotated accordingly. However, this solution isn't working for me with the scatterpie plot since the underlying data outputted from ggplot_build is of its own kind.
plot_data <- ggplot_build(p) %>% ggplot_build(p)$data[[1]] %>% as.tibble()
> plot_data
## # A tibble: 2,904 x 13
## fill group index amount PANEL stringsAsFactors nControl x y colour size linetype alpha
## <chr> <chr> <dbl> <dbl> <fct> <lgl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <lgl>
## 1 pink 1 0 0.612 1 FALSE 221 13 19.7 red 0.5 1 NA
## 2 pink 1 0.00452 0.612 1 FALSE 221 13.0 19.7 red 0.5 1 NA
## 3 pink 1 0.00905 0.612 1 FALSE 221 13.0 19.7 red 0.5 1 NA
## 4 pink 1 0.0136 0.612 1 FALSE 221 13.0 19.7 red 0.5 1 NA
## 5 pink 1 0.0181 0.612 1 FALSE 221 13.0 19.7 red 0.5 1 NA
## 6 pink 1 0.0226 0.612 1 FALSE 221 13.0 19.7 red 0.5 1 NA
## 7 pink 1 0.0271 0.612 1 FALSE 221 13.0 19.7 red 0.5 1 NA
## 8 pink 1 0.0317 0.612 1 FALSE 221 13.0 19.7 red 0.5 1 NA
## 9 pink 1 0.0362 0.612 1 FALSE 221 13.0 19.7 red 0.5 1 NA
## 10 pink 1 0.0407 0.612 1 FALSE 221 13.0 19.7 red 0.5 1 NA
## # ... with 2,894 more rows
Where are the actual y-values that I need for calculating the percent change between pies' y-values? Obviously, I can get the y-values from the data. But in order to reconstruct the plot, this data from ggplot_build() doesn't make sense to me, and I don't know how to utilize the technique to add the percentage change between pies to the plot line.
Here is my attempt with the ggrepel package. I basically created a new data frame containing necessary information for geom_label_repel(). I omit the details of what I did to create foo. But I think you can read it. I invested a bit of time to find the optimal positions for the label, and this is what I could do for you for now. If you are not happy with the position, you gotta play around by yourself.
foo <- tibble(day_in_july = my_df$day_in_july + 0.5,
y = my_df$y * 50 + (((lead(my_df$y * 50) - (my_df$y * 50))) / 2),
gap = ((lead(my_df$yes_but_no) / my_df$yes_but_no) - 1) * 100) %>%
mutate(gap = paste(round(gap, digits = 2), "%", sep = ""),
hue = ifelse(gap > 0, "green", "red"))
p <- ggplot(data = my_df) +
geom_path(aes(x = day_in_july, y = y*50)) +
geom_scatterpie(aes(x = day_in_july, y = y*50, r = 0.3),
data = my_df,
cols = colnames(my_df)[2:3],
color = "red") +
geom_text(aes(y = y * 50, x = day_in_july,
label = paste0(formatC(y * 100, digits = 3), "%")),
nudge_y = 0.07, nudge_x = -0.25, size = 3) +
geom_text(aes(y = y * 50, x = day_in_july,
label = paste0(formatC((1-y) * 100, digits = 3), "%")),
nudge_y = -0.07, nudge_x = 0.25, size = 3) +
scale_fill_manual(values = c("pink", "seagreen3")) +
geom_label_repel(data = foo,
aes(x = day_in_july, y = y,
color = hue, label = as.character(gap)),
show.legend = FALSE,
nudge_x = 0.3,
direction = "y",
vjust = -1.0) +
scale_color_manual(values = c("green", "red"))

geom_text() to label two separate points from different plots in ggplot

I am trying to create individual plots facetted by 'iid' using 'facet_multiple', in the following dataset (first 3 rows of data)
iid Age iop al baseIOP baseAGE baseAL agesurg
1 1 1189 20 27.9 21 336 24.9 336
2 2 877 11 21.5 16 98 20.3 98
3 2 1198 15 21.7 16 98 20.3 98
and wrote the following code:
# Install gg_plus from GitHub
remotes::install_github("guiastrennec/ggplus")
# Load libraries
library(ggplot2)
library(ggplus)
# Generate ggplot object
p <- ggplot(data_longF1, aes(x = Age, y = al)) +
geom_point(alpha = 0.5) +
geom_point(aes(x= baseAGE, y=baseAL)) +
labs(x = 'Age (days)',
y = 'Axial length (mm)',
title = 'Individual plots of Axial length v time')
p1 <- p+geom_vline(aes(xintercept = agesurg),
linetype = "dotted",
colour = "red",
size =1.0)
p2<- p1 + geom_text(aes(label=iop ,hjust=-1, vjust=-1))
p3 <- p2 + geom_text(aes(label = baseIOP, hjust=-1, vjust=-1))
# Plot on multiple pages (output plot to R/Rstudio)
facet_multiple(plot = p3,
facets = 'iid',
ncol = 1,
nrow = 1,
scales = 'free')
The main issue I am having is labeling the points. The points corresponding to (x=age, y=axl) get labelled fine, but labels for the second group of points (x=baseIOP, y=baseAL) gets put in the wrong place.individual plot sample
I have had a look at similar issues in Stack Overflow e.g. ggplot combining two plots from different data.frames
But not been able to correct my code.
Thanks for your help
You need to define the x and y coordinates for the labels or they will default to the last ones specified.
Thus the geom_text() definitions should look something like:
data_longF1 <-read.table(header=TRUE, text="iid Age iop al baseIOP baseAGE baseAL agesurg
1 1 1189 20 27.9 21 336 24.9 336
2 2 877 11 21.5 16 98 20.3 98
3 2 1198 15 21.7 16 98 20.3 98")
# Generate ggplot object
p <- ggplot(data_longF1, aes(x = Age, y = al)) +
geom_point(alpha = 0.5) +
geom_point(aes(x= baseAGE, y=baseAL)) +
labs(x = 'Age (days)',
y = 'Axial length (mm)',
title = 'Individual plots of Axial length v time')
p1 <- p+geom_vline(aes(xintercept = agesurg),
linetype = "dotted",
colour = "red",
size =1.0)
#Need to specify the x and y coordinates or will default to the last ones defined
p2<- p1 + geom_text(aes(x=Age, y= al, label=iop ,hjust=-1, vjust=-1))
p3 <- p2 + geom_text(aes(x=baseAGE, y= baseAL, label = baseIOP, hjust=-1, vjust=-1))
print(p3)

Creating a geom histogram that counts both positive/negative ggplot

I am having trouble plotting the positive log2_ratio count on the positive y-axis and the count of the negative log2_ratio on the negative-y axis.
In essence, I want the positive count to be above the x-axis while the negative count to be below the x-axis.
Here is the data frame and the code:
chrom chr_start chr_stop num_positions normal_depth tumor_depth log2_ratio gc_content sample
324202 1 156249804 156249858 55 12.3 4.7 -1.399 34.5 10
324203 1 156250463 156250473 11 10.0 4.6 -1.109 27.3 10
324204 1 156250664 156250705 42 12.0 7.4 -0.704 19.0 10
324205 1 156250816 156250847 32 11.7 4.6 -1.343 40.6 10
324206 1 156251108 156251132 25 10.6 3.6 -1.569 60.0 10
324207 1 156251411 156251464 54 12.3 6.8 -0.863 46.3 10
newHist = ggplot(resultsPileup1COMBINED[resultsPileup1COMBINED$sample <= 25,],
aes(x=sample)) +
geom_histogram(fill="blue" , bindwidth = 1) +
geom_histogram(data=resultsPileup1COMBINED[resultsPileup1COMBINED$sample > 25,],
fill="gray50" , binwidth = 1) +
scale_x_continuous(breaks = seq(from = 1, to = 50, by = 3))
Here is a current graph:
If you are asking for a completely new graph, try:
ggplot() + geom_histogram(data = resultsPileup1COMBINED[resultsPileup1COMBINED$log2_ratio > 0, ],
aes(x = log2_ratio, y = ..count..)) +
geom_histogram(data = resultsPileup1COMBINED[resultsPileup1COMBINED$log2_ratio < 0, ],
aes(x = - log2_ratio, y = - ..count..))
In this plot we have the < 0 log2_ratios on the bottom, with the x values negative to line up with the above, using the - ..count.. method
Edit: Asking for a slightly different graph, leaving the above in for posterity.
To graph the number of +/- values for each bin, we plot it out, again using the -..count.. trick:
ggplot() + geom_histogram(data =
resultsPileup1COMBINED[resultsPileup1COMBINED$log2_ratio < 0, ],
aes(x = sample, y = -..count..)) +
geom_histogram(data =
resultsPileup1COMBINED[resultsPileup1COMBINED$log2_ratio > 0, ],
aes(x = sample, y = ..count..))
Again, breaks and colours are up to you.
To make it similar to the original plot, make sure you include:
scale_x_discrete(breaks = seq(from = 1, to = 50, by = 3))

Resources