I have following data. Each observation is a genomic coordinate with copy number changes (copy.number.type) which is found in some percentage of samples (per.found).
chr<-c('1','12','2','12','12','4','2','X','12','12','16','16','16','5'
,'4','16','X','16','16','4','1','5','2','4','5','X','X','X','4',
'1','16','16','1','4','4','12','2','X','1','16','16','2','1','12',
'2','2','4','4','2','1','5','X','4','2','12','16','2','X','4','5',
'4','X','5','5')
start <- c(247123880,91884413,88886155,9403011,40503634,10667741,88914884,
100632615,25804205,25803542,18925987,21501823,21501855,115902990,
26120955,22008406,432498,22008406,22008406,69306802,4144380,73083197,
47743372,34836043,16525257,315832,1558229,51048657,49635818,239952709,
69727769,27941625,80328938,49136485,49136654,96076105,133702693,315823,
16725215,69728318,88520557,89832606,202205081,124379013,16045662,89836880,
49657307,97117994,76547133,35051701,344973,1770075,49139874,77426085,
9406416,69727781,108238962,151006944,49121333,6669602,89419843,74214551,
91203955,115395615)
type <- c('Inversions','Deletions','Deletions','Deletions','Deletions','Duplications','Deletions','Deletions',
'Duplications','Deletions','Duplications','Inversions','Inversions','Deletions','Duplications',
'Deletions','Deletions','Deletions','Deletions','Inversions','Duplications','Inversions','Inversions',
'Inversions','Deletions','Deletions','Deletions','Insertions','Deletions','Inversions','Inversions',
'Inversions','Inversions','Deletions','Deletions','Inversions','Deletions','Deletions','Inversions',
'Inversions','Deletions','Deletions','Deletions','Insertions','Inversions','Deletions','Deletions',
'Deletions','Inversions','Deletions','Duplications','Inversions','Deletions','Deletions','Deletions',
'Inversions','Deletions','Inversions','Deletions','Inversions','Inversions','Inversions','Deletions','Deletions')
per.found <- c(-0.040,0.080,0.080,0.040,0.080,0.040,0.080,0.040,0.040,0.120,0.040,-0.080,-0.080,0.040,0.040,0.120,
0.040,0.120,0.120,-0.040,0.011,-0.011,-0.023,-0.023,0.011,0.023,0.011,0.011,0.011,-0.011,-0.034,
-0.011,-0.023,0.011,0.011,-0.011,0.023,0.023,-0.023,-0.034,0.011,0.023,0.011,0.011,-0.023,0.023,
0.011,0.011,-0.011,0.011,0.011,-0.023,0.011,0.057,0.011,-0.034,0.023,-0.011,0.011,-0.011,-0.023,
-0.023,0.011,0.011)
df <- data.frame(chromosome = chr, start.coordinate = start, copy.number.type = type, per.found = per.found )
I would like to create a line plot. I created a plot using ggplot (facets), but the problem is I can not connect the points between two facets. Is there any way to do that. I do not necessarily need to use facets if there is a way to annotate x axis scales by chromosome. In the following image the dotted line shows what I would like to have for all copy.number.type lines.
EDIT: Looking for simplified approach.
library(ggplot2)
ggplot(df, aes(x=start.coordinate,y=per.found, group=copy.number.type, color=copy.number.type))+
geom_line()+
geom_point()+
facet_grid(.~chromosome,scales = "free_x", space = "free_x")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))
Desired output: As shown by the red dashed lines. I want to connect all the border points with a dashed line across facets.
Note: it may not make sense to connect the lines between the chromosomes.
But here is one way, by avoiding facets:
library(dplyr)
df2 <- df %>%
mutate(chromosome = factor(chromosome, c(1, 2, 4, 5, 12, 16, 'X'))) %>%
arrange(chromosome, start.coordinate)
chromosome_positions <- df2 %>%
group_by(chromosome) %>%
summarise(start = first(start.coordinate), end = last(start.coordinate)) %>%
mutate(
size = end - start,
new_start = cumsum(lag(size, default = 0)),
new_end = new_start + size
)
df3 <- df2 %>%
left_join(chromosome_positions, 'chromosome') %>%
mutate(new_x = start.coordinate + (new_start - start))
ggplot(df3, aes(x=new_x,y=per.found, group=copy.number.type, color=copy.number.type))+
geom_rect(
aes(xmin = new_start, xmax = new_end, ymin = -Inf, ymax = Inf, fill = chromosome),
chromosome_positions, inherit.aes = FALSE, alpha = 0.3
) +
geom_line() +
geom_point() +
geom_text(
aes(x = new_start + 0.5 * size, y = Inf, label = chromosome),
chromosome_positions, inherit.aes = FALSE, vjust = 1
) +
scale_fill_manual(values = rep(c('grey60', 'grey90'), 10), guide = 'none') +
scale_x_continuous(expand = c(0, 0))
Related
I am attempting to make a ggplot2 scatter plot that is grouped by bins in R. I successfully made the first model, which I did not try to alter the fill for. But when I tried to have the fill of the scatter plot be based upon my variable (Miss.) ,which is a numeric value ranging from 0.00 to 0.46, it essentially ignores the heat map scale and turns everything gray.
ggplot(data = RightFB, mapping = aes(x = TMHrzBrk, y = TMIndVertBrk))+
geom_bin_2d(bins = 15)+
scale_fill_continuous(type = "viridis")+
ylim(5, 20)+
xlim(0,15)+
coord_fixed(1.3)
ggplot(data = RightFB, mapping = aes(x = TMHrzBrk, y = TMIndVertBrk, fill
=Miss.))+
geom_bin_2d(bins = 15)+
scale_fill_continuous(type = "viridis")+
ylim(5, 20)+
xlim(0,15)+
coord_fixed(1.3)
I appreciate any help! Thanks!
I think I understand your problem, so let's replicate it with a reproducible example. Obviously we don't have your data, but the following data frame has the same names, types and ranges as your own data, so this walk-through should work for you.
set.seed(1)
RightFB <- data.frame(TMHrzBrk = runif(1000, 0, 15),
TMIndVertBrk = runif(1000, 5, 20),
Miss. = runif(1000, 0, 0.46))
Your first plot will look something like this:
library(tidyverse)
ggplot(data = RightFB, mapping = aes(x = TMHrzBrk, y = TMIndVertBrk)) +
geom_bin_2d(bins = 15) +
scale_fill_continuous(type = "viridis") +
ylim(5, 20) +
xlim(0, 15) +
coord_fixed(1.3)
#> Warning: Removed 56 rows containing missing values (`geom_tile()`).
Here, the fill colors represent the counts of observations within each bin. But if you try to map the fill to Miss., you get all gray squares:
ggplot(data = RightFB, mapping = aes(x = TMHrzBrk, y = TMIndVertBrk,
fill = Miss.)) +
geom_bin_2d(bins = 15) +
scale_fill_continuous(type = "viridis") +
ylim(5, 20) +
xlim(0, 15) +
coord_fixed(1.3)
#> Warning: The following aesthetics were dropped during statistical transformation: fill
#> i This can happen when ggplot fails to infer the correct grouping structure in
#> the data.
#> i Did you forget to specify a `group` aesthetic or to convert a numerical
#> variable into a factor?
#> Removed 56 rows containing missing values (`geom_tile()`).
The reason this happens is that by default geom_bin_2d calculates the bins and the counts within each bin to get the fill variable. There are multiple observations within each bin, and they all have a different value of Miss. . Furthermore, geom_bin_2d doesn't know what you want to do with this variable. My guess is that you are looking for the average of Miss. within each bin, but this is difficult to achieve within the framework of geom_bin_2d.
The alternative is to calculate the bins yourself, get the average of Miss. in each bin, and plot as a geom_tile
RightFB %>%
mutate(TMHrzBrk = cut(TMHrzBrk, breaks = seq(0, 15, 1), seq(0.5, 14.5, 1)),
TMIndVertBrk = cut(TMIndVertBrk, seq(5, 20, 1), seq(5.5, 19.5, 1))) %>%
group_by(TMHrzBrk, TMIndVertBrk) %>%
summarize(Miss. = mean(Miss., na.rm = TRUE), .groups = "drop") %>%
mutate(across(TMHrzBrk:TMIndVertBrk, ~as.numeric(as.character(.x)))) %>%
ggplot(aes(x = TMHrzBrk, y = TMIndVertBrk, fill = Miss.)) +
geom_tile() +
scale_fill_continuous(type = "viridis") +
ylim(5, 20) +
xlim(0, 15) +
coord_fixed(1.3)
EDIT
With the link to the data in the comments, here is a full reprex:
library(tidyverse)
RightFB <- read.csv(paste0("https://raw.githubusercontent.com/rileyfeltner/",
"FB-Analysis/main/Right%20FB.csv"))
RightFB <- RightFB[c(2:6, 9, 11, 13, 18, 19)]
RightFB$Miss. <- as.numeric(as.character(RightFB$Miss.))
#> Warning: NAs introduced by coercion
RightFB$TMIndVertBrk <- as.numeric(as.character(RightFB$TMIndVertBrk))
#> Warning: NAs introduced by coercion
RightFB <- na.omit(RightFB)
RightFB1 <- filter(RightFB, P > 24)
RightFB %>%
mutate(TMHrzBrk = cut(TMHrzBrk, breaks = seq(0, 15, 1), seq(0.5, 14.5, 1)),
TMIndVertBrk = cut(TMIndVertBrk, seq(5, 20, 1), seq(5.5, 19.5, 1))) %>%
group_by(TMHrzBrk, TMIndVertBrk) %>%
summarize(Miss. = mean(Miss., na.rm = TRUE), .groups = "drop") %>%
mutate(across(TMHrzBrk:TMIndVertBrk, ~as.numeric(as.character(.x)))) %>%
ggplot(aes(x = TMHrzBrk, y = TMIndVertBrk, fill = Miss.)) +
geom_tile() +
scale_fill_continuous(type = "viridis") +
ylim(5, 20) +
xlim(0, 15) +
coord_fixed(1.3)
#> Warning: Removed 18 rows containing missing values (`geom_tile()`).
Created on 2022-11-23 with reprex v2.0.2
Here is my raw data, first column is length and second is label:
length label
6.2 sc1__1__62000
0.5 sc1__63001__68000
2.6 sc1__75001__101000
0.7 sc1__103001__110000
....
There are 200 entries as such in the file.
I want to make an image as follow, width of each rectangle is same as corresponding length in the table:
How should I do this in R?
You need to use dplyr to create extra columns for plotting
library(dplyr)
library(ggplot2)
library(readr)
set.seed(20191234)
testdata <- tibble(
length = sample(1:10,10,replace = TRUE),
label = replicate(10,paste0(sample(letters,sample(5:15,10,replace = TRUE)),collapse = ""))
) %>%
# plot data
mutate(
xmax = cumsum(length),
xmin = dplyr::lag(xmax,default = 0),
ymin = 0,
ymax = 2,
text_x = (xmin+xmax)/2,
text_y = nchar(label)
)
Create plot using geom_rect() and geom_text()
text_y_adjust <- -0.032
testdata %>%
ggplot() +
geom_rect(aes(xmin = xmin,xmax = xmax,ymin =ymin,ymax = ymax),
alpha = 0,color = "black",size = 1) +
geom_text(aes(x = text_x,y = text_y_adjust * text_y, label = label),angle = 90) +
ylim(c(-2,2)) +
theme_void()
Note: If you modify any of height of rectangles, text_y_adjust ratio, or ylim, you need to also change other values correspondingly.
I suppose it can be done more elegant, but here is a suggestion:
#create sample dataframe
length<-c(6.2,0.5,2.6,0.7)
label<-letters[1:4]
d<-data.frame(length,label)
#calculate distance for labels
labelLength<-length[1]/2
for (i in 2:length(length)) {
labelLength[i]<-sum(length[1:(i-1)])+length[i]/2
}
#create plot
library(ggplot2)
p<-ggplot()+geom_bar(mapping=aes(x=1,y=length),
stat="identity",fill="white",color="red")+coord_flip()+
scale_y_continuous(breaks=labelLength,labels=label)+
theme(axis.text.x=element_text(angle=90,vjust=0.5,size=20),
axis.text.y=element_blank(),
axis.title.y = element_blank(),
panel.background = element_rect(fill="white"))
plot(p)
I want to highlight text based on the position in a string, for example if we have this text:
this is a really nice informative piece of text
Then I want to say let's draw a rectangle around positions 2 till 4:
t[his] is a really nice informative piece of text
I tried to do so in ggplot2 using the following code:
library(ggplot2)
library(dplyr)
box.data <- data.frame(
start = c(4,6,5,7,10,7),
type = c('BOX1.start', 'BOX1.start', 'BOX1.start','BOX1.end', 'BOX1.end', 'BOX1.end'),
text.id = c(1,2,3,1,2,3)
)
text.data <- data.frame(
x = rep(1,3),
text.id = c(1,2,3),
text = c('Thisissomerandomrandomrandomrandomtext1',
'Thisissomerandomrandomrandomrandomtext2',
'Thisissomerandomrandomrandomrandomtext3')
)
ggplot(data = text.data, aes(x = x, y = text.id)) +
scale_x_continuous(limits = c(1, nchar(as.character(text.data$text[1])))) +
geom_text(label = text.data$text, hjust = 0, size = 3) +
geom_line(data = box.data, aes(x = start, y = text.id, group = text.id, size = 3, alpha = 0.5, colour = 'red'))
This produces the following graph:
My method fails as a letter does not cover exactly one unit of the x-axis, is there any way to achieve this?
I just figured out that I can split the string in characters and plot these, perhaps it is useful for someone else.
library(ggplot2)
library(dplyr)
library(splitstackshape)
# First remember the plotting window, which equals the text length
text.size = nchar(as.character(text.data$text[1]))
# Split the string into single characters, and adjust the X-position to the string position
text.data <- cSplit(text.data, 'text', sep = '', direction = 'long', stripWhite = FALSE) %>%
group_by(text.id) %>%
mutate(x1 = seq(1,n()))
# Plot each character and add highlights
ggplot(data = text.data, aes(x = x1, y = text.id)) +
scale_x_continuous(limits = c(1, text.size)) +
geom_text(aes(x = text.data$x1, y = text.data$text.id, group = text.id, label = text)) +
geom_line(data = box.data, aes(x = start, y = text.id, group = text.id, size = 3, alpha = 0.5, colour = 'red'))
Which produces this plot:
Perhaps the marking should extend a little but upwards and downwards, but that's an easy fix.
I have parsed some data on grenade throws from the videogame Counter Strike. The sample data beloew reveals that I have positions on where the grenade is thrown from and where the grenade detonates and when the grenade is thrown.
df <- data.frame(pos_x = c(443.6699994744587,459.4566921116250, 443.5131582404877, 565.8823313012402, 725.3048665125078, 437.3428992800084, 475.7286794460795, 591.4138769182258),
pos_y = c(595.8564633895517, 469.8560006170301, 558.8543552036199, 390.5840189222542, 674.7983854380914, 688.0909476552858, 468.4987145207733, 264.6016042780749),
plot_group = c(1, 1, 2, 2, 3, 3, 4, 4),
round_throw_time = c(31.734375, 31.734375, 24.843750, 24.843750, 35.281250, 35.281250, 30.437500, 30.437500),
pos_type = c("Player position", "HE detonate", "Player position", "HE detonate", "Player position", "HE detonate", "Player position", "HE detonate"))
And using ggplot2 I can plot the static trajectories of the grenades like shown here
But I would like to animate the grenade trajectories and iniate the animation of each trajectory in the order that round_throw_time prescribes it and moving from player position to detonate position.
So far I have attempted this:
ggplot(df, aes(pos_x, pos_y, group = plot_group)) +
annotation_custom(grid::rasterGrob(img, width = unit(1,"npc"), height =
unit(1,"npc")), 0, w, 0, -h) +
scale_x_continuous(expand = c(0,0),limits = c(0,w)) +
scale_y_reverse(expand = c(0,0),limits = c(h,0)) +
geom_point(color = "red") +
transition_states(states=pos_type, transition_length = 1, state_length = 1)
But I'm kinda lost when it comes to adding the trajectory lines and how to reset the animation instead of the point just moving back to their origin.
Any tips would be greatly appreciated!
The image I plot onto can be downloaded here
http://simpleradar.com/downloads/infernoV2.zip
First of all, this is awesome.
First approach using shadow_wake and no other data prep
I commented out the pieces that weren't defined in the question. I added wrap = F to make the animation reset (rather than rewind) at the end, and shadow_wake to capture the trajectory.
# The factors of pos_type are backwards (b/c alphabetical), so R thinks the detonation
# comes before the player position. Here we reverse that.
df$pos_type <- forcats::fct_rev(df$pos_type)
ggplot(df, aes(pos_x, pos_y, group = plot_group)) +
# annotation_custom(grid::rasterGrob(img, width = unit(1,"npc"), height =
# unit(1,"npc")), 0, w, 0, -h) +
scale_x_continuous(expand = c(0,0)) + # ,limits = c(0,w)) +
scale_y_reverse(expand = c(0,0)) + # ,limits = c(h,0)) +
geom_point(color = "red") +
transition_states(states=pos_type, transition_length = 1, state_length = 1, wrap = F) +
shadow_wake(wake_length = 1)
Second approach, adding initial position and geom_segment
We could also add the trajectory as a segment, if we give each frame a reference to the player position:
df %>%
# Add reference to first coordinates for each plot_group
left_join(by = "plot_group",
df %>%
group_by(plot_group) %>%
filter(pos_type == "Player position") %>%
mutate(pos_x1 = pos_x, pos_y1 = pos_y) %>%
select(plot_group, pos_x1, pos_y1)
) %>%
ggplot(aes(pos_x, pos_y, group = plot_group)) +
# annotation_custom(grid::rasterGrob(img, width = unit(1,"npc"), height =
# unit(1,"npc")), 0, w, 0, -h) +
scale_x_continuous(expand = c(0,0)) + # ,limits = c(0,w)) +
scale_y_reverse(expand = c(0,0)) + # ,limits = c(h,0)) +
geom_point(color = "red") +
geom_segment(color = "gray70", aes(xend = pos_x1, yend = pos_y1)) +
transition_states(states=pos_type, transition_length = 1, state_length = 1, wrap = F)
Third variation, showing trajectories by start time
Similar to 2nd, but I add each trajectory's distance, travel time, and start time. I assume here that the detonation is the end time, and work back to when the trajectory started.
(I first tried transition_time, but couldn't get it to work without buggy behavior after the first trajectory.)
# trajectory speed
dist_per_time = 50
df2 <- df %>%
# Add reference to first coordinates for each plot_group
left_join(by = "plot_group",
df %>%
group_by(plot_group) %>%
filter(pos_type == "Player position") %>%
mutate(pos_x1 = pos_x, pos_y1 = pos_y) %>%
select(plot_group, pos_x1, pos_y1)
) %>%
left_join(by = c("plot_group", "pos_type"),
df %>%
group_by(plot_group) %>%
mutate(x_d = (range(pos_x)[1] - range(pos_x)[2]),
y_d = (range(pos_y)[1] - range(pos_y)[2]),
dist = sqrt(x_d^2 + y_d^2),
event_time = round_throw_time - if_else(pos_type == "Player position",
dist / dist_per_time,
0),
event_time = round(event_time, 1)) %>%
select(plot_group, pos_type, dist, event_time)
) %>%
### EDIT - added below to make timing explicit and fix code which
# was broken in current version of gganimate # 2019-11-15
# Thanks #Deep North for tip.
group_by(plot_group) %>%
mutate(event_time_per_grp = event_time - first(event_time)) %>%
ungroup() %>%
mutate(event_time_cuml = cumsum(event_time))
ggplot(df2, aes(pos_x, pos_y, group = plot_group)) +
# annotation_custom(grid::rasterGrob(img, width = unit(1,"npc"), height =
# unit(1,"npc")), 0, w, 0, -h) +
scale_x_continuous(expand = c(0,0)) + # ,limits = c(0,w)) +
scale_y_reverse(expand = c(0,0)) + # ,limits = c(h,0)) +
geom_point(color = "red") +
geom_segment(color = "gray70", aes(xend = pos_x1, yend = pos_y1)) +
transition_reveal(event_time_cuml) ### EDIT, see above
I am trying to create a chart like this one produced in the NYTimes using ggplot:
I think I'm getting close, but I'm not quite sure how to separate out some of my data so I get the right view. My data is political office holders that appear something like this:
name,year_elected,year_left,years_in_office,type,party
Person 1,1969,1969,1,Candidate,Unknown
Person 2,1969,1971,2,Candidate,Unknown
Person 3,1969,1973,4,Candidate,Unknown
Person 4,1969,1973,4,Candidate,Unknown
Person 5,1971,1974,3,Candidate,Unknown
Person 1,1971,1976,5,Candidate,Unknown
Person 2,1971,1980,9,Candidate,Unknown
Person 6,1973,1978,5,Candidate,Unknown
Person 7,1973,1980,7,Candidate,Unknown
Person 8,1975,1980,5,Candidate,Unknown
Person 9,1977,1978,1,Candidate,Unknown
And I've used the below code to get very close to this view, but I think an issue I'm running into is either drawing segments incorrectly (e.g., I don't seem to have a single segment for each candidate), or segments are overlapping/stacking. The key issue I'm running into is my list of office holders is around 60, but my chart is only drawing around 28 lines.
library(googlesheets)
library(tidyverse)
# I'm reading from a Google Spreadsheet
data <- gs_title("Council Members")
data_sj <- gs_read(ss = data, ws = "Sheet1")
ggplot(data, aes(year_elected, years_in_office)) +
geom_segment(aes(x = year_elected, y = 0,
xend = year_left, yend = years_in_office)) +
theme_minimal()
The above code gives me:
Thanks ahead of time for any pointers!
If your data frame is called d, then:
Transform it to data.table
Add jitter to year_electer
Add equivalent jitter to year_left
Add group (as an example) to color your samples
Use ggrepel to add text if there are many points.
Code:
library(data.table)
library(ggplot2)
library(ggrepel)
d[, year_elected2 := jitter(year_elected)]
d[, year_left2 := year_left + year_elected2 - year_elected + 0.01]
d[, group := TRUE]
d[factor(years_in_office %/% 9) == 1, group := FALSE]
ggplot(d, aes(year_elected2, years_in_office)) +
geom_segment(aes(x = year_elected2, xend = year_left2,
y = 0, yend = years_in_office, linetype = group),
alpha = 0.8, size = 1, color = "grey") +
geom_point(aes(year_left2), color = "black", size = 3.3) +
geom_point(aes(year_left2, color = group), size = 2.3) +
geom_text_repel(aes(year_left2, label = name), ) +
scale_colour_brewer(guide = FALSE, palette = "Dark2") +
scale_linetype_manual(guide = FALSE, values = c(2, 1)) +
labs(x = "Year elected",
y = "Years on office") +
theme_minimal(base_size = 10)
Result:
For the record and to address my comment on #PoGibas answer above, here's my tidyverse version:
data_transform <- data_sj %>%
mutate(year_elected_jitter = jitter(year_elected)) %>%
mutate(year_left_jitter = year_left + year_elected_jitter - year_elected + 0.01)
ggplot(data_transform, aes(year_elected, years_in_office, label = name)) +
geom_segment(aes(x = year_elected_jitter, y = 0, xend = year_left_jitter, yend = years_in_office, color = gender), size = 0.3) +
geom_text_repel(aes(year_left_jitter, label = name)) +
theme_minimal()