Hide some legend entries in ggplot - r

I have the follow lines of code:
ggplot() +
geom_line(data=TS_SimHeads_HOBS_final, aes(x=as.Date(Date), y=BH2672), color='red') +
geom_point(data=Hydro_dates_wellData_2014_2018, aes(x=as.Date(Date), y=BH2672), color='red') +
geom_line(data=TS_SimHeads_HOBS_final, aes(x=as.Date(Date), y=BH3025), color='green') +
geom_point(data=Hydro_dates_wellData_2014_2018, aes(x=as.Date(Date), y=BH3025), color='green') +
xlab("Date") + ylab("Head")
#theme_bw()
which generate the following plot:
What I am trying to do, unsuccessfully, is to include legends only for the lines (points are the experimental data and lines the simulated ones). Some data for reproduction purposes:
Date BH2672 BH278 BH2978 BH2987 BH3025 BH312 BH3963 BH3962 BH3957
2014-02-19 31.28400 78.86755 5.671027 39.48419 53.60201 44.29516 69.23685 61.70843 56.13871
2014-02-20 30.76656 78.87344 5.656940 39.49012 53.56489 44.50679 69.50910 61.70638 56.09621
2014-02-21 30.43226 78.88097 5.642136 39.49902 53.56041 44.65761 69.65709 61.70126 56.04346
2014-02-22 30.16532 78.88979 5.643818 39.51101 53.56065 44.78333 69.75621 61.69643 55.99459
2014-02-23 29.93577 78.89954 5.650873 39.52544 53.55970 44.89429 69.82983 61.69332 55.95241
2014-02-24 29.73162 78.90991 5.658991 39.54147 53.55682 44.99520 69.88845 61.69236 55.91639

As is quite often the case you first have to convert both of your datasets to long or tidy format using e.g. tidyr::pivot_longer which will result in a new column with the variable names as categories which could then be mapped on the color aes. Doing so will automatically create a legend and also allows to simplify your code. And if you want only the lines to appear in the legend then you could add show.legend=FALSE to geom_point. Finally you can set your desired colors via scale_color_manual.
As you provided only one dataset I used this for both datasets which however shouldn't matter. Also, to make my life a bit easier I have put the datasets in an named list:
library(dplyr, warn = FALSE)
library(tidyr)
library(ggplot2)
data_list <- list(data = Hydro_dates_wellData_2014_2018, sim = TS_SimHeads_HOBS_final) %>%
lapply(function(x) {
x %>%
select(Date, BH2672, BH3025) %>%
mutate(Date = as.Date(Date)) %>%
tidyr::pivot_longer(-Date)
})
ggplot() +
geom_line(data=data_list$sim, aes(x=Date, y=value, color = name)) +
geom_point(data=data_list$data, aes(x=Date, y=value, color = name), show.legend = FALSE) +
scale_color_manual(values = c(BH2672 = "red", BH3025 = "green")) +
labs(x = "Date", y = "Head")
DATA
TS_SimHeads_HOBS_final <- structure(list(Date = c(
"2014-02-19", "2014-02-20", "2014-02-21",
"2014-02-22", "2014-02-23", "2014-02-24"
), BH2672 = c(
31.284,
30.76656, 30.43226, 30.16532, 29.93577, 29.73162
), BH278 = c(
78.86755,
78.87344, 78.88097, 78.88979, 78.89954, 78.90991
), BH2978 = c(
5.671027,
5.65694, 5.642136, 5.643818, 5.650873, 5.658991
), BH2987 = c(
39.48419,
39.49012, 39.49902, 39.51101, 39.52544, 39.54147
), BH3025 = c(
53.60201,
53.56489, 53.56041, 53.56065, 53.5597, 53.55682
), BH312 = c(
44.29516,
44.50679, 44.65761, 44.78333, 44.89429, 44.9952
), BH3963 = c(
69.23685,
69.5091, 69.65709, 69.75621, 69.82983, 69.88845
), BH3962 = c(
61.70843,
61.70638, 61.70126, 61.69643, 61.69332, 61.69236
), BH3957 = c(
56.13871,
56.09621, 56.04346, 55.99459, 55.95241, 55.91639
)), class = "data.frame", row.names = c(
NA,
-6L
))
Hydro_dates_wellData_2014_2018 <- TS_SimHeads_HOBS_final

Related

Ggplot Aesthetics must be either length 1 or the same as the data

My ggplot code below is working on my Mac, but isn't working on my Linux instance.
As per the title, I get the error Error: Aesthetics must be either length 1 or the same as the data (1): x, y and colour with this code.
library(tidyverse)
library(plotly)
library(ggplot2)
plot_total <-
demo %>%
as.data.frame() %>%
ggplot(aes(log2(baseMean), log2FoldChange, color=padj < 0.01,
text = paste(" log2(baseMean):", formatC(log2(baseMean), format = "e", digits = 2), "\n",
"log2FoldChange:", formatC(log2FoldChange, format = "e", digits = 2), "\n",
"Gene Name:", Gene, "\n",
"EnsGeneID:", ENSG))) +
geom_point(data = . %>% filter(!padj<0.01), cex = 0.1, size=0.1) +
geom_point(data = . %>% filter(is.na(padj)), cex = 0.1, size=0.1) +
geom_point(data = . %>% filter(padj<0.01), cex = 1, size=1) +
scale_color_manual(values=c("FALSE" = "#30B7BC", "TRUE" = "#DE653A", "NA" = "#B3B3B3")) +
labs(title = "Treated vs. Control", x = "log2Expression") +
theme_bw() +
NULL
ggplotly(plot_total,
tooltip = "text",
type = 'scattergl') %>%
config(displaylogo = FALSE,
modeBarButtonsToRemove = c('sendDataToCloud', 'select2d', 'lasso2d')) %>%
toWebGL()
The culprit seems to be the text variable. I have tried placing this variable inside each geom_point(aes()) where I'm filtering the data, but I get the same error. This is a small subset of the data:
ENSG
Gene
baseMean
log2FoldChange
lfcSE
stat
pvalue
padj
ENSG00000000003.15
TSPAN6
2106.78150011798
-0.214594792659649
0.0830671887250501
-2.58338816990607
0.00978351787155086
0.0632360231867511
ENSG00000000005.6
TNMD
3.51684529099911
1.33352064084301
0.680105184521041
1.96075647001887
0.0499074381755472
NA
ENSG00000000419.13
DPM1
1156.63019852077
-0.184963193359761
0.0697284461507299
-2.65262175726578
0.00798693161495278
0.0556311026232396
ENSG00000000457.14
SCYL3
270.00909649605
-0.0558608208897551
0.0945944838404759
-0.590529369386473
0.554835801729326
0.752831745155342
ENSG00000000460.17
C1orf112
423.916241362646
0.00337838445840835
0.0904413626324259
0.0373544179352855
0.970202416501722
0.985859261611512
ENSG00000000938.13
FGR
1.4952572446323
0.189881722278028
1.0225523780978
0.18569388360453
0.852684825077251
NA
ENSG00000000971.16
CFH
8.35615935084791
-0.407549583396925
0.407283654269094
-1.00065293346552
0.316994629452948
NA
ENSG00000001036.14
FUCA2
3286.83167836124
-0.113975393359072
0.0532994402577163
-2.13839756680318
0.032484489796959
0.140642991190518
ENSG00000001084.13
GCLC
1525.77400190981
-0.0964283327267667
0.0697918041485629
-1.38165697108938
0.167077052186927
0.387307483908263
ENSG00000001167.14
NFYA
775.15489019341
-0.0282781481284204
0.108165475282215
-0.2614341411124
0.793757731444887
0.89507246376111
ENSG00000001460.18
STPG1
1105.63501614957
0.154649228441584
0.0886193148590866
1.74509618684697
0.0809681272261289
0.252149556355932
ENSG00000001461.17
NIPAL3
1755.50307288809
-0.0313646069063427
0.0699845764115142
-0.448164560172753
0.654034437625039
0.815455908057006
ENSG00000001497.17
LAS1L
2188.7653564921
-0.0523385791864584
0.0712541067124691
-0.734534212851194
0.462623216454675
0.684620504636789
ENSG00000001561.7
ENPP4
830.269207280792
-0.298210398294484
0.10493466780185
-2.84186727362211
0.00448501591940391
0.0369683950248886
ENSG00000001617.12
SEMA3F
507.841167325578
0.220524182082938
0.107840828258815
2.04490438031211
0.0408642859982967
0.163022035306157
ENSG00000001626.16
CFTR
130.392965133479
2.01581704225126
0.726570495644851
2.7744273327011
0.00552989865962655
0.0428809290359717
ENSG00000001629.10
ANKIB1
1378.77776720815
-0.00295677498548296
0.0912941535974317
-0.0323873421130675
0.974163156724246
0.987546941506975
ENSG00000001631.16
KRIT1
1.9871339581813
-0.292437549054293
0.868146512011618
-0.336852760459377
0.736227897832157
NA
ENSG00000002016.18
RAD52
680.085519620988
0.373097140440191
0.156407957981969
2.3854102134829
0.0170600862524028
0.0916401980775939
ENSG00000002079.14
MYH16
1.86080100353
-0.182051454327792
1.01764691310774
-0.178894518307764
0.858020527111129
NA
ENSG00000002330.14
BAD
551.20996778861
-0.0039768348985057
0.12107851487019
-0.0328450914909992
0.973798119784072
0.98747939145517
ENSG00000002549.12
LAP3
852.041745825179
-0.254029783752867
0.0719977839970102
-3.52830003439294
0.000418237809317195
0.00642614461337485
ENSG00000002587.10
HS3ST1
2.57897400765919
0.0144688913294909
0.777073236085079
0.0186197267613869
0.985144465883661
NA
ENSG00000002745.13
WNT16
16.1270763411369
-0.00537408371714073
0.30400368641928
-0.0176776925978747
0.985895976595832
0.993837819473658
ENSG00000002746.15
HECW1
46.6208055525063
-0.273107000095639
0.217402004605246
-1.25623036729372
0.209032459705226
0.438247467420401
ENSG00000002834.18
LASP1
2081.18262394114
0.25353664096991
0.0740348392744757
3.42455853831129
0.000615799164096869
0.00849490484558848
ENSG00000002919.15
SNX11
630.57793127673
0.0459782434754682
0.105868331615849
0.434296477272386
0.664073161622284
0.820640102421514
ENSG00000002933.9
TMEM176A
1.2826428629398
-0.208251902266481
1.04854999265659
-0.198609416551381
0.842568287014726
NA
ENSG00000003056.8
M6PR
2075.74758354412
-0.298258970884233
0.0818583611419073
-3.64359812148181
0.000268853092542719
0.00454947228687621
ENSG00000003096.14
KLHL13
536.62234593866
0.158623735269079
0.107348560798392
1.47765125204599
0.139501148591557
0.348980620824275
ENSG00000003137.8
CYP26B1
287.854716803642
-0.0740116292474195
0.109594213807301
-0.675324240908867
0.499469786095141
0.712657283500843
ENSG00000003147.19
ICA1
535.667848254903
-0.143475821995906
0.0973591074296888
-1.47367643134487
0.140568731550252
0.350583904431043
I don't really understand your question, as your code works fine. The only thing that was not working was "WebGL is not supported by your browser". I solved it forcing RStudio to use 'Desktop OpenGL'. In RStudio, I went to [Tools/Global Options.../Advanced] and choosing 'Desktop OpenGL' in Rendering engine.
Sample code:
library(tidyverse)
library(plotly)
library(ggplot2)
plot_total <-demo %>%
as.data.frame() %>%
ggplot(aes(log2(baseMean), log2FoldChange, color=padj < 0.01,
text = paste(" log2(baseMean):", formatC(log2(baseMean), format = "e", digits = 2), "\n",
"log2FoldChange:", formatC(log2FoldChange, format = "e", digits = 2), "\n",
"Gene Name:", Gene, "\n",
"EnsGeneID:", ENSG))) +
geom_point(data = . %>% filter(!padj<0.01), cex = 0.1, size=0.1) +
geom_point(data = . %>% filter(is.na(padj)), cex = 0.1, size=0.1) +
geom_point(data = . %>% filter(padj<0.01), cex = 1, size=1) +
scale_color_manual(values=c("FALSE" = "#30B7BC", "TRUE" = "#DE653A", "NA" = "#B3B3B3")) +
labs(title = "Treated vs. Control", x = "log2Expression") +
theme_bw() +
NULL
ggplotly(plot_total,
tooltip = "text",
type = 'scattergl') %>%
config(displaylogo = FALSE,
modeBarButtonsToRemove = c('sendDataToCloud', 'select2d', 'lasso2d')) %>%
toWebGL()
Plot:
The reason I was getting this error was because the df didn't contain NA under padj column for my particular dataset, so it was trying to filter what wasn't there. Adding an if/else statement fixed.

One dodged barchart per year

I have a dodged bar chart that shows the data "Created Date" and "Last Accessed" per year. The date data is formatted as year-month-date hour:minute:second.
Now I want to split the data up into one graph per year that shows Created date and Last accessed per date of the year. I tried to plot everything in one graph, however it was really messy..
Is it possible to plot per year, so in this example it will be 6 graphs because it is 6 years. I was thinking something like looping trough each year?
for(x in 1:number_of_years) {
... plot
}
This is my code:
```{r echo=FALSE, warning=FALSE}
# Libraries
library(ggplot2)
library(data.table)
library(tidyr)
# Read data
df = read.csv2(text = "File.Name|Created.Date|Last.Accessed|Visual.Group
60be1ba43bf7cjpg|1989-11-17 06:25:22|2017-07-15 01:25:22|0
60be1ba43bf89jpg|1989-02-04 04:03:16|2021-12-17 04:03:16|1
60be1ba43bf8djpg|2017-04-22 14:57:13|2017-11-17 23:57:13|2
60be1ba43bf90jpg|2021-04-12 23:03:44|2018-11-17 05:03:44|3
60be1ba43bf93jpg|2019-08-28 18:23:16|1989-09-07 12:23:16|4
60be1ba43bf95jpg|1989-09-11 08:16:20|2020-03-17 10:16:20|5
60be1ba43bf98jpg|2018-08-01 16:56:05|2017-04-24 03:56:05|5
60be1ba43bf9bjpg|2017-06-23 19:01:37|1989-07-14 22:01:37|6
60be1ba43bf9ejpg|2018-02-20 15:21:26|2020-02-01 18:21:26|7
60be1ba43bfa1jpg|2021-12-10 08:34:09|2021-04-17 02:34:09|8
60be1ba43bfa4jpg|2017-01-02 19:03:10|1989-11-16 14:03:10|9
60be1ba43bfa6jpg|2017-04-28 15:50:33|2019-02-22 06:50:33|9
60be1ba43bfd6jpg|2018-04-14 22:21:37|2021-08-28 14:21:37|9
60be1ba43bfdajpg|2019-07-15 04:31:04|2017-07-11 04:31:04|10
60be1ba43bfddjpg|2020-11-06 01:06:25|1989-08-16 09:06:25|10
60be1ba43bfe0jpg|2021-08-05 06:38:07|2021-12-25 15:38:07|11
60be1ba43bfe3jpg|2017-01-14 03:47:54|2020-08-03 20:47:54|12
60be1ba43bfe6jpg|1989-11-26 17:33:01|2018-11-07 22:33:01|13
60be1ba43bfe9jpg|2018-09-21 07:17:29|1989-10-05 03:17:29|14
60be1ba43bfebjpg|2017-03-13 09:42:04|2020-08-23 11:42:04|14
60be1ba43bfeejpg|2020-07-18 08:36:52|2018-10-04 12:36:52|15
60be1ba43bff1jpg|2021-01-21 05:20:29|2019-04-28 03:20:29|16
60be1ba43bff4jpg|2018-10-19 08:13:24|1989-10-16 18:13:24|17
60be1ba43bff7jpg|2017-06-15 07:49:44|2021-05-11 01:49:44|18
60be1ba43bff9jpg|2019-05-23 23:41:20|2019-08-26 13:41:20|18
60be1ba43bffcjpg|1989-02-17 03:13:27|1989-11-20 16:13:27|19
60be1ba43bfffjpg|2020-08-26 15:22:01|2018-08-21 17:22:01|20
60be1ba43c001jpg|2020-12-21 05:22:03|2017-04-04 15:22:03|21
60be1ba43c004jpg|2018-02-14 08:23:01|1989-03-26 06:23:01|21
60be1ba43c007jpg|2018-10-25 08:19:18|2021-07-06 08:19:18|21
60be1ba43c00ajpg|2017-11-28 07:22:03|2021-09-23 08:22:03|22
60be1ba43c00cjpg|2019-11-19 17:12:02|2019-05-23 03:12:02|23
60be1ba43c00fjpg|2017-02-26 00:43:21|2019-11-19 20:43:21|24
60be1ba43c012jpg|2017-10-27 01:05:53|1989-03-14 02:05:53|25
60be1ba43c015jpg|2019-06-18 08:06:07|1989-10-28 16:06:07|25
60be1ba43c017jpg|2021-08-19 02:01:32|2020-05-28 03:01:32|25
60be1ba43c01ajpg|2021-07-13 23:02:21|2017-07-05 03:02:21|25
60be1ba43c01djpg|1989-05-14 02:51:23|2020-03-24 01:51:23|25
60be1ba43c020jpg|2021-12-13 04:05:19|2021-04-16 03:05:19|25
60be1ba43c023jpg|2019-03-26 23:42:25|2021-08-03 20:42:25|26
60be1ba43c025jpg|1989-05-09 04:08:58|2019-10-23 00:08:58|26
60be1ba43c028jpg|2018-08-11 00:48:32|2019-05-08 08:48:32|26
60be1ba43c02bjpg|2018-07-14 08:38:02|2019-05-06 22:38:02|27
60be1ba43c02ejpg|2020-03-06 19:13:14|2019-09-18 18:13:14|28
60be1ba43c030jpg|1989-07-10 11:40:46|2019-08-03 01:40:46|28
60be1ba43c033jpg|2021-12-11 02:23:44|2019-08-09 01:23:44|28
60be1ba43c036jpg|2017-11-03 19:53:43|2021-02-13 17:53:43|29
60be1ba43c038jpg|2017-02-07 02:45:47|2021-09-20 09:45:47|29
60be1ba43c03bjpg|2020-07-28 20:56:59|2018-06-06 11:56:59|30
60be1ba43c040jpg|2019-06-20 16:42:30|2020-01-02 00:42:30|31
60be1ba43c042jpg|2020-05-27 03:39:41|2021-08-11 08:39:41|31
60be1ba43c045jpg|2017-08-07 12:11:03|2017-12-15 20:11:03|32
60be1ba43c048jpg|1989-04-28 01:19:49|2019-08-17 23:19:49|32
60be1ba43c04bjpg|2017-08-26 22:07:51|2019-02-25 16:07:51|33
60be1ba43c04djpg|1989-10-12 02:27:44|2020-11-10 22:27:44|34
60be1ba43c050jpg|2021-08-18 09:01:48|2020-01-08 07:01:48|35
60be1ba43c053jpg|2017-07-21 20:56:50|2018-02-24 20:56:50|36
60be1ba43c056jpg|1989-05-13 11:23:09|2020-01-20 11:23:09|37
60be1ba43c058jpg|2020-08-21 18:46:14|2017-04-14 10:46:14|37
60be1ba43c05bjpg|2020-04-08 11:14:54|2020-12-02 00:14:54|38
60be1ba43c05ejpg|2021-02-21 06:13:14|2018-08-21 03:13:14|38
60be1ba43c060jpg|2018-06-28 04:36:20|2020-09-28 15:36:20|39
60be1ba43c063jpg|2017-09-22 23:39:53|2021-05-27 20:39:53|39
60be1ba43c065jpg|2018-05-09 16:54:13|2021-06-03 16:54:13|39
60be1ba43c068jpg|2019-01-04 00:19:33|2017-05-18 08:19:33|40
60be1ba43c06bjpg|2019-04-12 21:19:37|2017-06-23 04:19:37|40
60be1ba43c06ejpg|2019-09-22 20:44:20|2021-10-23 12:44:20|40
60be1ba43c070jpg|2021-10-23 07:57:35|2019-02-23 21:57:35|40
60be1ba43c073jpg|2021-09-17 19:13:53|2020-05-27 06:13:53|40
60be1ba43c075jpg|2017-08-13 23:06:53|2019-08-03 00:06:53|41
60be1ba43c078jpg|2017-11-02 04:47:11|2018-07-05 03:47:11|42
60be1ba43c07bjpg|2017-05-11 23:04:25|2021-09-22 04:04:25|42
60be1ba43c07ejpg|2018-04-06 21:19:03|2021-09-16 21:19:03|42
60be1ba43c080jpg|2019-03-16 01:56:36|2021-04-03 18:56:36|42
60be1ba43c083jpg|2019-02-24 13:21:29|2019-10-28 09:21:29|43
60be1ba43c086jpg|2020-03-08 10:50:00|2018-06-22 15:50:00|44
60be1ba43c088jpg|1989-06-10 16:34:03|2020-11-10 18:34:03|45
60be1ba43c08bjpg|2018-04-20 10:29:46|2021-01-24 08:29:46|46
60be1ba43c08ejpg|2019-02-13 18:27:24|2021-10-16 07:27:24|47
60be1ba43c090jpg|2019-09-16 21:54:43|2018-07-21 22:54:43|48
60be1ba43c094jpg|2021-05-23 17:25:18|2019-06-18 00:25:18|49
60be1ba43c097jpg|2020-09-15 06:42:26|2019-03-25 13:42:26|50
60be1ba43c09ajpg|2020-05-02 20:14:35|2020-06-13 08:14:35|50
60be1ba43c09djpg|1989-02-25 15:11:22|2017-08-28 03:11:22|51
60be1ba43c09fjpg|2018-07-13 15:42:14|2019-02-23 14:42:14|52
60be1ba43c0a2jpg|2020-09-09 08:58:49|2019-07-11 18:58:49|53
60be1ba43c0a4jpg|1989-12-17 02:36:08|2021-10-04 10:36:08|54
60be1ba43c0a7jpg|2020-03-11 08:41:55|2017-10-16 17:41:55|55
60be1ba43c0aajpg|2020-12-18 19:59:08|2017-12-02 03:59:08|55
60be1ba43c0adjpg|2019-06-20 09:48:27|2020-11-10 17:48:27|56
60be1ba43c0b0jpg|2020-01-05 15:49:18|1989-11-27 05:49:18|56
60be1ba43c0b2jpg|1989-06-23 23:50:52|2017-09-08 02:50:52|56
60be1ba43c0b5jpg|2019-09-01 04:29:25|2020-10-25 00:29:25|56
60be1ba43c0b8jpg|2020-08-08 07:08:47|2021-05-22 20:08:47|57
60be1ba43c0bbjpg|2018-04-11 07:32:17|2018-06-21 12:32:17|58
60be1ba43c0bdjpg|2021-05-26 08:32:28|1989-02-04 12:32:28|58
60be1ba43c0c0jpg|1989-11-25 22:22:37|2019-07-16 04:22:37|58
60be1ba43c0c4jpg|2018-02-03 10:37:57|2019-08-02 08:37:57|58
60be1ba43c0c7jpg|2018-08-18 06:36:04|1989-03-17 08:36:04|58
60be1ba43c0cajpg|2019-02-12 23:31:52|2020-06-17 13:31:52|59",
sep="|",stringsAsFactors=TRUE, na.strings="unknown");
# Remove duplicates (Visual group defines duplicate)
df <-df[!duplicated(df$Visual.Group), ]
# Extract year
df$Created.Date.Year <- format(as.Date(df$Created.Date, format="%Y-%m-%d"), format="%Y");
df$Last.Accessed.Year <- format(as.Date(df$Last.Accessed, format="%Y-%m-%d"), format="%Y");
#set to data.table object
dt <- as.data.table(df)
#change column names to match desired names of groups
setnames(dt, old = c("Created.Date", "Last.Accessed"), new = c("Created date", "Last Accessed"))
#pivot longer (this column becomes the variable you use in aes(fill = ..) later on. Change it to whichever name you want to have as legend title
dt <- pivot_longer(dt, cols = c("Created date", "Last Accessed"), names_to = "Legend Title", values_to = "Date")
#plot
ggplot(dt, aes(x = Date, fill = `Legend Title`)) +
geom_bar(position = "dodge") +
theme_bw() + geom_text(stat = "count", aes(label = after_stat(count)), position = position_dodge(width = 1), vjust = -1)
```
perhaps this one? I switched to points and lines for better visualization.
library(tidyverse)
df %>%
pivot_longer(cols = 2:3) %>%
mutate(value = as.Date(value)) %>%
count(name, value) %>%
mutate(year = lubridate::year(value)) %>%
ggplot(aes(x = value, n, color =name)) +
geom_point() +
geom_line() +
scale_x_date(date_breaks = "3 month", date_labels = "%b") +
facet_grid(~year, scales = "free") +
theme_bw()

More than one expression parsed error ggplot2

I have the following data frame:
df.test <- data.frame(
id = c("EIF3H", "USP9X", "USP44", "USP51", "USP15",
"USP48", "USP47", "USP43", "USPL1", "UCHL5", "USP50", "USP7",
"UCHL1", "USP11", "USP26", "PAN2", "VCPIP1", "USP46", "USP29",
"USP22", "USP49", "ZRANB1", "OTUD4", "OTUD7B", "USP54", "PSMD14",
"USP20", "USP6", "OTUD3", "USP39", "UCHL3", "USP19", "USP21",
"USP30", "TNFAIP3", "USP17L2", "USP32", "JOSD2", "PSMD7", "ATXN3L",
"SENP2", "STAMBPL1", "USP37", "USP35", "USP3", "ALG13", "USP45",
"Control", "USP9Y", "ATXN3", "OTUD6A", "USP42", "USP12", "MPND",
"USP40", "OTUD1", "USP31", "USP8", "USP13", "USP53", "USP34",
"USP17L5", "MYSM1", "USP36", "OTUD7A", "USP10", "USP2", "USP18",
"OTUB1", "EIF3F", "USP1", "USP14", "COPS5", "USP24", "USP4",
"CYLD", "COPS6", "STAMBP", "USP5", "OTUD6B", "BAP1", "USP25",
"YOD1", "USP28", "USP38", "USP41", "JOSD1", "UCK2", "USP16",
"USP27X", "BRCC3", "USP33", "OTUD5", "OTUB2"),
log.score = c(4.22265293851218, 3.03983376346562,
2.4139305569695, 2.32586482009754, 2.30391458369018, 2.19017103893211,
2.10803347738743, 2.10011933499842, 1.82596928196197, 1.79890343496053,
1.78330640083025, 1.58384231036782, 1.4480988629484, 1.4331502122056,
1.41965675282741, 1.37552194849409, 1.37548070593268, 1.3126672736385,
1.27123241483349, 1.25213781606166, 1.1643918571801, 1.14738583497561,
1.0423927129399, 1.03157776352028, 1.0279685056071, 0.953426802337995,
0.94104282122269, 0.929925173732472, 0.886424283199432, 0.886123467368948,
0.815961921373111, 0.811437095842094, 0.767054687254773,
0.754314635766764, 0.750654863646671, 0.728646377897516,
0.707899061519581, 0.703532261199885, 0.692546751828376,
0.684554481775416, 0.652104306506768, 0.642046105413661,
0.630116510664521, 0.62908000782908, 0.619354680809075, 0.614876544107784,
0.61293067306798, 0.606898831140113, 0.603504247802433, 0.578642901486857,
0.576246380387172, 0.549612309171809, 0.53101794103743, 0.513442014568548,
0.506304999011214, 0.492144128304169, 0.462596515841992,
0.454185884038717, 0.450163300207299, 0.434529992991809,
0.429725658566606, 0.42864060724616, 0.419896514762075, 0.409715596281838,
0.365946146577929, 0.363963683646553, 0.357614629472314,
0.352851847129221, 0.343470593766502, 0.313051079788499,
0.304614649499993, 0.291604597354374, 0.287030586811975,
0.272263598289704, 0.27175988000523, 0.265200170411153, 0.264528852761016,
0.244704590019742, 0.179680291853473, 0.154102353851514,
0.147800680553723, 0.127575655021633, 0.126051956011554,
0.1207205737776, 0.118712371231544, 0.11046860245595, 0.0939775902962627,
0.0673791277640148, 0.066320409857141, 0.0582650179118847,
0.0548860857591892, 0.0374554663486737, 0.0147532091971383,
0.0134163514896924),
neg.rank = 1:94)
From this data frame I made this plot:
library(ggplot2)
x <- "neg.rank"
p <- ggplot(df.test, aes_string(x = x, y = df.test$log.score)) +
geom_point()
I want to add labels to the top10 ids and I tried the following:
library(ggrepel)
library(dplyr)
p + geom_label_repel(data = df.test[df.test[[x]] %in% 1:10, ], aes_string(x = x, y = df$log.score, label = df.test$id))
But this gives me a More than one expression parsed error:
More than one expression parsed
Backtrace:
█
1. ├─ggrepel::geom_label_repel(...)
2. │ └─ggplot2::layer(...)
3. └─ggplot2::aes_string(x = x, y = df$log.score, label = df.test$id)
4. └─base::lapply(...)
5. └─ggplot2:::FUN(X[[i]], ...)
6. └─rlang::parse_expr(x)
I have no clue what is wrong with the code.
It is not working as you are inserting the vectors directly into your aes_string.
If you want yours to be working you need to be strict with your aes_string and really should only use strings:
p +
geom_label_repel(
data = df.test[df.test[[x]] %in% 1:10, ],
aes_string(x = x, y = "log.score", label = "id"),
)
I also added a "cleaner" solution. I changed your subsetting logic to use dplyr, as you are already loading the package anyway and changed all your aes_string() to aes().
library(ggplot2)
library(ggrepel)
library(dplyr)
ggplot(df.test, aes(x = neg.rank, y = log.score)) +
geom_point() +
geom_label_repel(
data = df.test %>% slice_min(neg.rank, n = 10),
aes(label = id),
max.overlaps = 10,
xlim = c(10, NA),
ylim = c(3, NA),
direction = "x"
)
Cheers
Hannes

How to edit the labels of a facet_wrap/grid if there are two variables?

In ggplot I have faceted by two variables (tau and z) but can only change the label of the first:
df<-data.frame(x=runif(1e3),y=runif(1e3),tau=rep(c("A","aBc"),each=500),z=rep(c("DDD","EEE"),each=500))
tauNames <- c(
`A` = "10% load",
`aBc` = "40% load"
)
df%>%
ggplot(aes(x=x,y=y))+
geom_point(alpha=0.4)+
xlab(label = "Time[s]")+
ylab(label = "Dose")+
facet_grid(tau~z,labeller = as_labeller(tauNames))+
ggpubr::theme_pubclean()
As you can see I can change one of the labels but not both. Any thoughts are much appreciated
In the documentation of ?as_labeller you can find in the examples how you get the labels for multiple faceting variables.
library(tidyverse)
df<-data.frame(x=runif(1e3),y=runif(1e3),tau=rep(c("A","aBc"),each=500),z=rep(c("DDD","EEE"),each=500))
tauNames <- c(
`A` = "10% load",
`aBc` = "40% load"
)
df%>%
ggplot(aes(x=x,y=y))+
geom_point(alpha=0.4)+
xlab(label = "Time[s]")+
ylab(label = "Dose")+
facet_grid(tau~z,labeller = labeller(tau = tauNames,
z = c("DDD" = "D", "EEE" = "E")))+
ggpubr::theme_pubclean()

Plot multiple rows as columns with ggplotly

I have the following data
dput(head(new_data))
structure(list(series = c("serie1", "serie2", "serie3",
"serie4"), Chr1_Coverage = c(0.99593043561, 0.995148711122,
0.996666194154, 1.00012127128), Chr2_Coverage = c(0.998909597935,
0.999350808049, 0.999696737431, 0.999091916132), Chr3_Coverage = c(1.0016871729,
1.00161108919, 0.997719609642, 0.999887319775), Chr4_Coverage = c(1.00238874787,
1.00024296426, 1.0032143002, 1.00118558895), Chr5_Coverage = c(1.00361001984,
1.00233184803, 1.00250793369, 1.00019989912), Chr6_Coverage = c(1.00145962318,
1.00085036645, 0.999767433622, 1.00018523387), Chr7_Coverage = c(1.00089620637,
1.00201715802, 1.00430458519, 1.00027257509), Chr8_Coverage = c(1.00130277775,
1.00332841536, 1.0027493578, 0.998107829176), Chr9_Coverage = c(0.998473062701,
0.999400379593, 1.00130178863, 0.9992796405), Chr10_Coverage = c(0.996508132358,
0.999973856701, 1.00180072957, 1.00172163916), Chr11_Coverage = c(1.00044015107,
0.998982489577, 1.00072330837, 0.998947935281), Chr12_Coverage = c(0.999707836898,
0.996654676531, 0.995380321719, 1.00116773966), Chr13_Coverage = c(1.00199118466,
0.99941499519, 0.999850500793, 0.999717689167), Chr14_Coverage = c(1.00133747054,
1.00232593477, 1.00059139379, 1.00233368187), Chr15_Coverage = c(0.997036875653,
1.0023727983, 1.00020943048, 1.00089130742), Chr16_Coverage = c(1.00527426537,
1.00318861724, 1.0004269482, 1.00471256502), Chr17_Coverage = c(0.995530811404,
0.995103514254, 0.995135851149, 0.99992196636), Chr18_Coverage = c(0.99893371568,
1.00452723685, 1.00006262572, 1.00418478844), Chr19_Coverage = c(1.00510422346,
1.00711968194, 1.00552123413, 1.00527171097), Chr20_Coverage = c(1.00113612137,
1.00130658886, 0.999390191542, 1.00178637085), Chr21_Coverage = c(1.00368753618,
1.00162782873, 1.00056883447, 0.999797571642), Chr22_Coverage = c(0.99677846234,
1.00168287612, 0.997645576841, 0.999297594524), ChrX_Coverage = c(1.04015901555,
0.934772492047, 0.98981339011, 0.999960536561), ChrY_Coverage = c(9.61374227868e-09,
2.50609172398e-07, 8.30448295172e-08, 1.23741398572e-08)), .Names = c("series",
"Chr1_Coverage", "Chr2_Coverage", "Chr3_Coverage", "Chr4_Coverage",
"Chr5_Coverage", "Chr6_Coverage", "Chr7_Coverage", "Chr8_Coverage",
"Chr9_Coverage", "Chr10_Coverage", "Chr11_Coverage", "Chr12_Coverage",
"Chr13_Coverage", "Chr14_Coverage", "Chr15_Coverage", "Chr16_Coverage",
"Chr17_Coverage", "Chr18_Coverage", "Chr19_Coverage", "Chr20_Coverage",
"Chr21_Coverage", "Chr22_Coverage", "ChrX_Coverage", "ChrY_Coverage"
), row.names = c(NA, -4L), class = c("tbl_df", "tbl", "data.frame"
))
and I would like to plot it as this
I thought of transposing the data starting from the second column and name the new transposed data by the first column in the initial data with the following code:
output$Plot_1 <- renderPlotly({
Plot_1_new_data[,2:24] <- lapply(Plot_1_new_data[,2:24], as.numeric)
# first remember the names
n <- as.data.frame(Plot_1_new_data[0:nrow(Plot_1_new_data),1])
# transpose all but the first column (name)
Plot_1_new_data_T <- as.data.frame(t(Plot_1_new_data[,-1]))
colnames(Plot_1_new_data_T) <- n
#plot data
library(reshape)
melt_Transposed_Plot_1_new_data <- melt(Plot_1_new_data_T,id="series")
ggplotly(melt_Transposed_Plot_1_new_data,aes(x=series,y=value,colour=variable,group=variable)) + geom_line()
})
However, when I check the "Plot_1_new_data_T" it seems that the first column is named as c("serie1","serie2",..."serie14") and the rest is named as NA.
Any idea how to proceed because I am new to both R and shiny.
Something like this?
xm = melt(x)
ggplot(xm[xm$variable != 'ChrY_Coverage' & xm$variable != 'ChrX_Coverage', ],
aes(as.integer(variable), value, color=series)) +
geom_line() +
scale_x_continuous(breaks = as.integer(xm$variable),
labels = as.character(xm$variable)) +
theme(axis.text.x = element_text( angle=45, hjust = 1))
ggplotly()
Note that the last two columns were removed from this plot, because they are of such a different scale that including them masks any variation in the other columns. If you want to include all the columns, you could use this instead:
ggplot(xm, aes(as.integer(variable), value, color=series)) +
geom_line() +
...

Resources