How to change range of heatmap using gheatmap in R - r

Im trying to add a heatmap to my phylogenetic tree. The range of the heatmap should be from 0 to 100 instead it only covers the the min and max of the values. Can I reset the range of the heatmap?
thanks.
library(ggtree)
library(ggplot2)
library(ggstance)
df1 <- structure(
list(id = structure(
c(5L, 15L, 29L, 18L, 24L, 21L,
13L, 11L, 8L, 25L, 23L, 9L, 16L, 3L, 6L, 2L, 20L, 27L, 30L, 17L,
14L, 4L, 1L, 7L, 22L, 28L, 10L, 12L, 26L, 19L),
.Label = c("t1",
"t10", "t11", "t12", "t13", "t14", "t15", "t16", "t17", "t18",
"t19", "t2", "t20", "t21", "t22", "t23", "t24", "t25", "t26",
"t27", "t28", "t29", "t3", "t30", "t4", "t5", "t6", "t7", "t8",
"t9"), class = "factor"),
location = structure(c(1L, 3L, 2L,
1L, 2L, 3L, 3L, 2L, 3L, 2L, 3L, 3L, 2L, 2L, 1L, 1L, 3L, 2L, 1L,
1L, 3L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 1L, 1L),
.Label = c("CZ", "GZ", "HK"), class = "factor"),
Value = c(22L, 10L, 33L, 12L, NA,
NA, NA, NA, NA, NA, NA, NA, 45L, 89L, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, 80L, NA, NA, NA, NA)),
class = "data.frame", row.names = c(NA,
-30L))
tr <- rtree(30)
p <- ggtree(tr)
#df1<- your_example_data
p1 <- p %<+% df1 + geom_tippoint(aes(color=location))+ guides(color = "none")
d2 <- data.frame( val=rnorm(30, mean= 50, sd=20))
rownames(d2)<- tr$tip.label
library(ggnewscale)
p1 <- p1 + new_scale_fill()
p2<- gheatmap(p1, d2 ,offset=0.015, width=0.05,
colnames_angle=45, colnames_offset_y = 0.25,colnames_offset_x =0.001, colnames=TRUE,
colnames_position='top',font.size = 3)+
scale_fill_viridis_c(option="A", name="query\ncoverage\npercentage")
p2

Try using scale_fill_gradientn. I don't have ggtree in my library collection, but it should work with it too. data$Z are the values used in the legend.
min(data$Z)
[1] 10.43507
# using geom_tile instead
ggplot(data, aes(X, Y, fill= Z)) +
geom_tile() +
scale_fill_gradientn(limits = c(0,max(data$Z)),
colours=viridis(10,o="A"),
breaks=c(0,max(data$Z)),
labels=c(0,max(data$Z)))
Data
data <- structure(list(X = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L), .Label = c("A", "B", "C", "D", "E", "F", "G", "H",
"I", "J"), class = "factor"), Y = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L), .Label = c("var1", "var2", "var3", "var4", "var5",
"var6", "var7", "var8", "var9", "var10"), class = "factor"),
Z = c(33.991562910378, 35.5263787321746, 24.5632742531598,
18.0254957079887, 45.778294513002, 38.0070783570409, 38.8778781332076,
13.9182714093477, 13.2864724285901, 12.3245238792151, 45.4634746629745,
43.5207717958838, 14.6174691990018, 14.6395265311003, 16.3748204801232,
37.5898649636656, 46.154183940962, 21.7670671269298, 45.9928634669632,
15.2300526481122, 42.3459290526807, 36.1509132292122, 13.004608694464,
17.2632187511772, 24.1008642502129, 21.0504860430956, 47.8412099648267,
22.8905160259455, 26.2689692527056, 42.2642367053777, 49.7228981740773,
18.5286565497518, 19.9640860501677, 19.8192273359746, 46.2587429210544,
45.3112288471311, 14.0251182205975, 46.5721819829196, 19.2603973485529,
11.8241156637669, 43.5814412590116, 12.3338401783258, 34.6708638872951,
16.535308547318, 12.5870429351926, 17.7716215513647, 38.3571200724691,
40.5572446156293, 38.3018106594682, 36.1261784471571, 23.6329158209264,
38.2715854980052, 31.8956978339702, 19.8036628682166, 41.236245688051,
42.5284101255238, 47.3572976142168, 10.9305525757372, 41.5727174282074,
39.237065333873, 41.6476187948138, 43.6902561411262, 39.2061061505228,
18.3187866955996, 42.8791201952845, 33.8544269837439, 17.3525733780116,
14.5423825085163, 46.209614733234, 24.5643785689026, 35.3784507885575,
44.3101883865893, 45.7905176281929, 36.0531417001039, 44.190902383998,
32.4274326208979, 33.8546730671078, 43.7150628026575, 44.4308217708021,
27.6862936094403, 39.8551124054939, 10.4350713547319, 35.6894047465175,
28.6168400477618, 18.5768875014037, 17.1367645263672, 30.369380293414,
17.7864238992333, 36.1986118741333, 43.2466325163841, 49.581032032147,
49.736803509295, 40.3205085452646, 27.0655540842563, 42.9749015253037,
30.9310132544488, 23.7332978192717, 35.1737863756716, 40.4224442131817,
15.6103290617466)), out.attrs = list(dim = c(X = 10L, Y = 10L
), dimnames = list(X = c("X=A", "X=B", "X=C", "X=D", "X=E", "X=F",
"X=G", "X=H", "X=I", "X=J"), Y = c("Y=var1", "Y=var2", "Y=var3",
"Y=var4", "Y=var5", "Y=var6", "Y=var7", "Y=var8", "Y=var9", "Y=var10"
))), row.names = c(NA, -100L), class = "data.frame")

Related

How to filter out duplicates in 2-column data frame but in reverse orientation?

I have a data frame of pairs of genes. There are some pairs which are listed twice but in reverse orientation. How do I remove those pairs which are duplicates (but in reverse orientation)? Thanks!
> dput(all_pairs)
structure(list(gene1 = structure(c(2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 1L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 7L, 8L,
9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 8L, 9L, 10L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L), .Label = c("ASXL1", "BICRA",
"CCDC168", "HRAS", "MUC16", "NOTCH1", "OBSCN", "PLEC", "RREB1",
"TTN"), class = "factor"), gene2 = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L), .Label = c("ASXL1", "BICRA",
"CCDC168", "HRAS", "MUC16", "NOTCH1", "OBSCN", "PLEC", "RREB1",
"TTN"), class = "factor")), out.attrs = list(dim = c(10L, 10L
), dimnames = list(Var1 = c("Var1=ASXL1", "Var1=BICRA", "Var1=CCDC168",
"Var1=HRAS", "Var1=MUC16", "Var1=NOTCH1", "Var1=OBSCN", "Var1=PLEC",
"Var1=RREB1", "Var1=TTN"), Var2 = c("Var2=ASXL1", "Var2=BICRA",
"Var2=CCDC168", "Var2=HRAS", "Var2=MUC16", "Var2=NOTCH1", "Var2=OBSCN",
"Var2=PLEC", "Var2=RREB1", "Var2=TTN"))), class = "data.frame", row.names = c(NA,
-90L))
This keeps only one copy of each pair, no matter what the orientation/order is:
all_pairs[!duplicated(t(apply(all_pairs, 1, sort))), ]

Ggplot2 graph in R Shiny with line according to user input

I want to make a graph in my Shiny App using ggplot2 that plots the line regarding the user selection by topic. In this question I was told how to add those 0s in case given topic didn't have any item (using ankrun's answer). Now I'm looking for fitting this graph in a shiny App and thought about adding a sliderInput where you could choose the topic for the line. This is what I've tried:
# # # global # # #
#1. App
if("shiny" %in% rownames(installed.packages()) == FALSE){ install.packages("shiny") }
library(shiny)
#2. Easier data handling
if("dplyr" %in% rownames(installed.packages()) == FALSE){ install.packages("dplyr") }
library(dplyr)
#3. Graphs
if("ggplot2" %in% rownames(installed.packages()) == FALSE){ install.packages("ggplot2") }
library(ggplot2)
#4. Completion in graphs
if("tidyr" %in% rownames(installed.packages()) == FALSE){ install.packages("tidyr") }
library(tidyr)
# # # ui # # #
ui <- fluidPage(
sidebarPanel(
selectInput("select_topic_timeline", "What topic?",
choices = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"),
selected = c("1", "2", "3", "4","5", "6", "7", "8", "9", "10", "11", "12"),
multiple = T)
),#sidebarPanel
mainPanel(
plotOutput("per_topic_timeline")
) #mainPanel
) #fluidPage
# # # server # # #
server <- function(input, output, session) {
# TIMELINE PER TOPIC PER YEAR
output$per_topic_timeline <- renderPlot({
dtd2 <- structure(list(Topic = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 12L, 12L, 12L), .Label = c("Topic 1",
"Topic 10", "Topic 11", "Topic 12", "Topic 2", "Topic 3", "Topic 4",
"Topic 5", "Topic 6", "Topic 7", "Topic 8", "Topic 9"), class = "factor"),
Year = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 2L,
3L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 1L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 6L, 7L, 8L,
9L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 2L, 3L, 4L, 5L, 6L, 7L, 8L), .Label = c("2011",
"2012", "2013", "2014", "2015", "2016", "2017", "2018", "2019"
), class = "factor"), Count = c(3L, 3L, 3L, 5L, 5L, 11L,
17L, 14L, 4L, 1L, 1L, 4L, 2L, 3L, 9L, 4L, 2L, 1L, 3L, 4L,
5L, 18L, 23L, 19L, 15L, 1L, 5L, 6L, 8L, 11L, 17L, 7L, 1L,
3L, 6L, 4L, 20L, 21L, 18L, 12L, 3L, 1L, 1L, 2L, 5L, 5L, 11L,
5L, 2L, 1L, 1L, 2L, 2L, 5L, 7L, 23L, 9L, 1L, 1L, 2L, 3L,
6L, 4L, 9L, 8L, 1L, 1L, 6L, 2L, 3L, 3L, 1L, 3L, 2L, 5L, 7L,
11L, 11L, 28L, 11L, 2L, 1L, 2L, 2L, 5L, 6L, 5L, 16L, 3L,
4L, 2L, 2L, 7L, 6L, 8L, 6L)), row.names = c(NA, -96L), class = "data.frame")
dtd2 %>%
expand(Topic = factor(Topic, levels = gtools::mixedsort(levels(c(input$select_topic_timeline)))) ,
Year = unique(Year)) %>%
left_join(dtd2) %>%
mutate(Count = replace_na(Count, 0)) %>%
ggplot(aes(x = Year, y = Count), colour = c(input$select_topic_timeline), group = Topic) +
geom_point() +
geom_line() +
labs(x = "Year", y = NULL, title = "Timeline")
})
}
shinyApp(ui,server)
It's not printing any error out, but it's saying Warning: Column "Topic" joining factors with different levels, coercing to character vector and it's not giving any graph out, only an empty gray box. I'm not sure about how to fit the input in the graph code and I must've changed it wrong!
This issue is that you are basically refactoring the dtd2$Topic based on what's in the selectInput, but since the Topic only has 9 levels, and you can select up to 12 levels with selectInput, you are getting errors.
For this reason, I'd suggest instead using #Ronak Shah's answer with tidyr::complete instead.
Once you've completed the data, you should then dplyr::filter by Topic to get the what I believe to be the desired result:
# # # global # # #
#1. App
if("shiny" %in% rownames(installed.packages()) == FALSE){ install.packages("shiny") }
library(shiny)
#2. Easier data handling
if("dplyr" %in% rownames(installed.packages()) == FALSE){ install.packages("dplyr") }
library(dplyr)
#3. Graphs
if("ggplot2" %in% rownames(installed.packages()) == FALSE){ install.packages("ggplot2") }
library(ggplot2)
#4. Completion in graphs
if("tidyr" %in% rownames(installed.packages()) == FALSE){ install.packages("tidyr") }
library(tidyr)
# # # ui # # #
ui <- fluidPage(
sidebarPanel(
selectInput("select_topic_timeline",
label = "What topic?",
choices = as.character(1:12),
selected = as.character(1:12),
multiple = TRUE),
),#sidebarPanel
mainPanel(
plotOutput("per_topic_timeline")
) #mainPanel
) #fluidPage
# # # server # # #
server <- function(input, output, session) {
# TIMELINE PER TOPIC PER YEAR
output$per_topic_timeline <- renderPlot({
dtd2 <- structure(list(Topic = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 12L, 12L, 12L), .Label = c("Topic 1",
"Topic 10", "Topic 11", "Topic 12", "Topic 2", "Topic 3", "Topic 4",
"Topic 5", "Topic 6", "Topic 7", "Topic 8", "Topic 9"), class = "factor"),
Year = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 2L,
3L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 1L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 6L, 7L, 8L,
9L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 2L, 3L, 4L, 5L, 6L, 7L, 8L), .Label = c("2011",
"2012", "2013", "2014", "2015", "2016", "2017", "2018", "2019"
), class = "factor"), Count = c(3L, 3L, 3L, 5L, 5L, 11L,
17L, 14L, 4L, 1L, 1L, 4L, 2L, 3L, 9L, 4L, 2L, 1L, 3L, 4L,
5L, 18L, 23L, 19L, 15L, 1L, 5L, 6L, 8L, 11L, 17L, 7L, 1L,
3L, 6L, 4L, 20L, 21L, 18L, 12L, 3L, 1L, 1L, 2L, 5L, 5L, 11L,
5L, 2L, 1L, 1L, 2L, 2L, 5L, 7L, 23L, 9L, 1L, 1L, 2L, 3L,
6L, 4L, 9L, 8L, 1L, 1L, 6L, 2L, 3L, 3L, 1L, 3L, 2L, 5L, 7L,
11L, 11L, 28L, 11L, 2L, 1L, 2L, 2L, 5L, 6L, 5L, 16L, 3L,
4L, 2L, 2L, 7L, 6L, 8L, 6L)), row.names = c(NA, -96L), class = "data.frame")
dtd2 %>%
complete(Topic, Year = unique(Year), fill = list(Count = 0)) %>%
filter(Topic %in% paste("Topic", input$select_topic_timeline)) %>%
ggplot(aes(x = Year, y = Count, colour = Topic, group = Topic)) +
geom_point() +
geom_line() +
labs(x = "Year", y = NULL, title = "Timeline")
})
}
shinyApp(ui,server)
As a side note, I'd also recommend using the shinyWidgets package for this particular input, specifically shinyWidgets::pickerInput, rather than the vanilla shiny::selectInput. I'll leave that decision up to you though.

Facets: organising their order and organising the levels within facets

I would like to please organise the following plots so that facets are printed out from most to least busy (i.e. Hemiptera, Coleoptera, Hymenoptera, Siphonaptera, Lepidoptera, etc.)
I would also like to order the levels within each facet like in Coleoptera. I realise that the X-labels will change order too so I need each facet to print out its own X-label according the level order.
I have already read many threads and that's how I was able to organise Coleoptera. But now I want it to be more tidy.
This is the data (let me know if this format is ok, if not I can try another way):
structure(list(Order = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L), .Label = c("Coleoptera",
"Dermaptera", "Dictyoptera", "Diptera", "Hemiptera", "Hymenoptera",
"Lepidoptera", "Phthiraptera", "Psocoptera", "Siphonaptera",
"Thysanoptera"), class = "factor"), Nrange = structure(c(1L,
3L, 4L, 5L, 6L, 7L, 8L, 10L, 11L, 12L, 14L, 14L, 1L, 10L, 1L,
3L, 4L, 6L, 7L, 10L, 11L, 12L, 14L, NA, 1L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 14L, NA, 1L, 4L, 5L, 6L, 7L, 8L, 10L, 11L,
12L, 14L, 15L, NA, 1L, 2L, 4L, 5L, 6L, 7L, 8L, 10L, 11L, 12L,
13L, 14L, 4L, 10L, 11L, 12L, 14L, 1L, 4L, 10L, 11L, 12L, 13L,
14L, 1L, 5L, 10L, 1L, 4L, 6L, 7L, 10L, 11L, 12L, 14L), .Label = c("Africa",
"Africa, Asia", "Americas", "Asia", "Asia-Temp", "Asia-Trop",
"Australasia", "C&S America", "Cosmopolitan", "Cryptogenic",
"N America", "S America", "Trop", "Trop, SubTrop", "Unknown"), class = "factor"),
Records = c(16L, 1L, 9L, 7L, 11L, 17L, 1L, 15L, 8L, 8L, 5L,
1L, 2L, 1L, 5L, 1L, 1L, 1L, 1L, 9L, 9L, 2L, 1L, 4L, 11L,
10L, 30L, 15L, 9L, 2L, 2L, 2L, 34L, 11L, 21L, 1L, 21L, 16L,
8L, 1L, 14L, 3L, 5L, 25L, 4L, 2L, 1L, 1L, 8L, 1L, 10L, 1L,
2L, 1L, 1L, 8L, 5L, 2L, 1L, 2L, 2L, 9L, 1L, 2L, 1L, 3L, 1L,
12L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 1L, 3L,
3L, 2L)), .Names = c("Order", "Nrange", "Records"), row.names = c(NA,
-83L), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), vars = "Order", drop = TRUE)
This is the reordering that I guess is affecting only Coleoptera.
xy<-x%>%
mutate(Nrange=reorder(Nrange,-Records,sum))
This is the plot:
to_plot<-xy %>%
filter(!is.na(Nrange))
ggplot(to_plot,aes(x=Nrange,y=Records,fill=Nrange))+
geom_col()+
theme(axis.text.x = element_text(angle=90, vjust=0.7), legend.position = "none") +
facet_wrap(~Order,ncol=3)+
labs(title="Insects recorded as alien-invasive to mainland Spain",
subtitle="Native ranges vs number of records",
caption="Data source: DAISIE (http://www.europe-aliens.org/)")
And this is the plot:
enter image description here
Assuming you're using the tidyverse (based on your code):
library(tidyverse)
xy <- x %>%
ungroup() %>%
mutate(
Order = fct_reorder(Order, Records, sum, .desc = TRUE)
)
xy %>%
filter(!is.na(Nrange)) %>%
ggplot() +
aes(x = Nrange, y = Records, fill = Nrange) +
geom_col() +
facet_wrap(~Order, ncol = 3)
fct_reorder comes from the forcats package, which I believe is now a part of the tidyverse.
Or, using base R, something like this:
xy <- x
record_sums <- tapply(xy$Records, xy$Order, sum)
levels(xy$Order) <- levels(xy$Order)[order(record_sums, decreasing = TRUE)]

Merge and edit multiple legends when facets and geom_line are plotted separately

Following this guide I have plotted the following graph using the following code. I did split my dataset into one that contains the data that goes in all plots 'control', and the rest 'dfnocontrol'.
ggplot(dfnocontrol,aes(y=value,x=year)) + geom_line(data=dfnocontrol,
aes(color=survivorship),size=1.5) + facet_wrap(~density,nrow=2) +
geom_line(data=dfcontrol,aes(linetype=simulname),color='grey',size=1.5)
I have tried many ways to have only one legend, or to edit the existing two legend but nothing seems to work. scale_fill_manual() seems to be ignored, even though I don't get any error message. I was forced to use linetype to make the 'control' appear in the legend. How can I merge these two legends?
edit: these are the data for control
structure(list(year = 1:2, psize = structure(c(6L, 6L), .Label = c("all plants",
"all plants no-seedl", "seedlings", "SmallerT10", "SmallerT10 no-seedl",
"LargerT10", "10-30", "30-50", "50+"), class = "factor"), value = c(392.884450281975,
392.76842677951), simulname = structure(c(1L, 1L), .Label = c("control",
"d02s70", "d02s80", "d02s90", "d05s70", "d05s80", "d05s90", "d07s70",
"d07s80", "d07s90", "d1s70", "d1s80", "d1s90", "d2s70", "d2s80",
"d2s90", "d3s70", "d3s80", "d3s90", "d4s70", "d4s80", "d4s90",
"d5s70", "d5s80", "d5s90"), class = "factor"), survivorship = structure(c(1L,
1L), .Label = c("control", "s70", "s80", "s90"), class = "factor")), .Names = c("year",
"psize", "value", "simulname", "survivorship"), row.names = 2501:2502, class = "data.frame")
and data for the rest
structure(list(year = c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L), psize = structure(c(6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label = c("all plants",
"all plants no-seedl", "seedlings", "SmallerT10", "SmallerT10 no-seedl",
"LargerT10", "10-30", "30-50", "50+"), class = "factor"), value = c(391.933827876557,
390.784233661738, 391.931768654094, 390.777949423224, 391.930831801103,
390.775125884957, 391.904131913644, 390.671681105517, 391.903377880798,
390.669377819171, 391.902842713777, 390.667498067697, 391.874743014214,
390.557893743236, 391.874006362415, 390.555639401299, 391.8735511448,
390.554149478021, 391.84367266143, 390.443618794749, 391.843064602404,
390.442149462261, 391.842594963982, 390.440725187945, 391.72267802326,
388.998242801555, 391.722309813432, 388.996838950063, 391.721745089041,
388.995715149179, 384.967818982887, 383.215849576989, 384.967407490871,
383.214728664341, 384.96689031843, 383.213390281481, 391.897592532656,
389.445606459513, 391.897234485415, 389.444632515097, 391.89681267375,
389.443358475326, 391.402389493961, 388.987279260992, 391.401979078947,
388.985920091544, 391.401583421483, 388.984891027315), simulname = structure(c(2L,
2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 7L, 7L, 8L, 8L, 9L, 9L, 10L,
10L, 11L, 11L, 12L, 12L, 13L, 13L, 14L, 14L, 15L, 15L, 16L, 16L,
17L, 17L, 18L, 18L, 19L, 19L, 20L, 20L, 21L, 21L, 22L, 22L, 23L,
23L, 24L, 24L, 25L, 25L), .Label = c("control", "d02s70", "d02s80",
"d02s90", "d05s70", "d05s80", "d05s90", "d07s70", "d07s80", "d07s90",
"d1s70", "d1s80", "d1s90", "d2s70", "d2s80", "d2s90", "d3s70",
"d3s80", "d3s90", "d4s70", "d4s80", "d4s90", "d5s70", "d5s80",
"d5s90"), class = "factor"), density = structure(c(2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L,
7L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L), .Label = c("control",
"d02", "d05", "d07", "d1", "d2", "d3", "d4", "d5"), class = "factor"),
survivorship = structure(c(2L, 2L, 3L, 3L, 4L, 4L, 2L, 2L,
3L, 3L, 4L, 4L, 2L, 2L, 3L, 3L, 4L, 4L, 2L, 2L, 3L, 3L, 4L,
4L, 2L, 2L, 3L, 3L, 4L, 4L, 2L, 2L, 3L, 3L, 4L, 4L, 2L, 2L,
3L, 3L, 4L, 4L, 2L, 2L, 3L, 3L, 4L, 4L), .Label = c("control",
"s70", "s80", "s90"), class = "factor")), .Names = c("year",
"psize", "value", "simulname", "density", "survivorship"), row.names = c(6081L,
6082L, 9845L, 9846L, 14345L, 14346L, 17985L, 17986L, 21797L,
21798L, 26297L, 26298L, 30567L, 30568L, 34528L, 34529L, 38744L,
38745L, 43144L, 43145L, 47519L, 47520L, 51983L, 51984L, 56483L,
56484L, 60983L, 60984L, 65483L, 65484L, 69983L, 69984L, 74483L,
74484L, 78983L, 78984L, 83483L, 83484L, 87983L, 87984L, 92483L,
92484L, 96983L, 96984L, 101483L, 101484L, 105983L, 105984L), class = "data.frame")
Since you provided no data, I will give you an example using the economics data set.
library(wesanderson) # for the colours
library(tidyverse)
data("economics")
We will need two data sets for this task. Variable unemploy will serve as our 'control' (6th column). All variables will be scaled.
First data set:
economics_gathered <- economics[, 1:5] %>% # exclude unemploy
modify_if(is.numeric, scale) %>%
gather(key, value, -date)
Second data set:
economics_control <- economics[, c(1, 6)] %>%
dplyr::rename(control = unemploy) %>%
gather(some_other_key, value, 2) %>%
mutate(value = scale(value))
Now we can plot:
ggplot() +
geom_line(data = economics_control, aes(x = date, y = value, col = some_other_key)) +
geom_line(data = economics_gathered, aes(date, value, col = key)) +
scale_colour_manual(values = c("grey", wes_palette("GrandBudapest"))) +
facet_wrap(~key, scales = "free_y")
To which the result is the plot below.
EDIT
With the data provided by the OP the following code
ggplot() +
geom_line(data = dfcontrol, aes(year, value, col = survivorship), size = 1.5) +
geom_line(data = dfnocontrol, aes(year, value, col = survivorship), size = 1.5) +
facet_wrap( ~ density, nrow = 2) +
scale_colour_manual(values = c("grey", "forestgreen", "red", "blue"))
gives this plot:
DATA
1)
dfcontrol <- structure(list(year = 1:2, psize = structure(c(6L, 6L), .Label = c("all plants",
"all plants no-seedl", "seedlings", "SmallerT10", "SmallerT10 no-seedl",
"LargerT10", "10-30", "30-50", "50+"), class = "factor"), value = c(392.884450281975,
392.76842677951), simulname = structure(c(1L, 1L), .Label = c("control",
"d02s70", "d02s80", "d02s90", "d05s70", "d05s80", "d05s90", "d07s70",
"d07s80", "d07s90", "d1s70", "d1s80", "d1s90", "d2s70", "d2s80",
"d2s90", "d3s70", "d3s80", "d3s90", "d4s70", "d4s80", "d4s90",
"d5s70", "d5s80", "d5s90"), class = "factor"), survivorship = structure(c(1L,
1L), .Label = c("control", "s70", "s80", "s90"), class = "factor")), .Names = c("year",
"psize", "value", "simulname", "survivorship"), row.names = 2501:2502, class = "data.frame")
2)
dfnocontrol <- structure(list(year = c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L), psize = structure(c(6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label = c("all plants",
"all plants no-seedl", "seedlings", "SmallerT10", "SmallerT10 no-seedl",
"LargerT10", "10-30", "30-50", "50+"), class = "factor"), value = c(391.933827876557,
390.784233661738, 391.931768654094, 390.777949423224, 391.930831801103,
390.775125884957, 391.904131913644, 390.671681105517, 391.903377880798,
390.669377819171, 391.902842713777, 390.667498067697, 391.874743014214,
390.557893743236, 391.874006362415, 390.555639401299, 391.8735511448,
390.554149478021, 391.84367266143, 390.443618794749, 391.843064602404,
390.442149462261, 391.842594963982, 390.440725187945, 391.72267802326,
388.998242801555, 391.722309813432, 388.996838950063, 391.721745089041,
388.995715149179, 384.967818982887, 383.215849576989, 384.967407490871,
383.214728664341, 384.96689031843, 383.213390281481, 391.897592532656,
389.445606459513, 391.897234485415, 389.444632515097, 391.89681267375,
389.443358475326, 391.402389493961, 388.987279260992, 391.401979078947,
388.985920091544, 391.401583421483, 388.984891027315), simulname = structure(c(2L,
2L, 3L, 3L, 4L, 4L, 5L, 5L, 6L, 6L, 7L, 7L, 8L, 8L, 9L, 9L, 10L,
10L, 11L, 11L, 12L, 12L, 13L, 13L, 14L, 14L, 15L, 15L, 16L, 16L,
17L, 17L, 18L, 18L, 19L, 19L, 20L, 20L, 21L, 21L, 22L, 22L, 23L,
23L, 24L, 24L, 25L, 25L), .Label = c("control", "d02s70", "d02s80",
"d02s90", "d05s70", "d05s80", "d05s90", "d07s70", "d07s80", "d07s90",
"d1s70", "d1s80", "d1s90", "d2s70", "d2s80", "d2s90", "d3s70",
"d3s80", "d3s90", "d4s70", "d4s80", "d4s90", "d5s70", "d5s80",
"d5s90"), class = "factor"), density = structure(c(2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L,
7L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L), .Label = c("control",
"d02", "d05", "d07", "d1", "d2", "d3", "d4", "d5"), class = "factor"),
survivorship = structure(c(2L, 2L, 3L, 3L, 4L, 4L, 2L, 2L,
3L, 3L, 4L, 4L, 2L, 2L, 3L, 3L, 4L, 4L, 2L, 2L, 3L, 3L, 4L,
4L, 2L, 2L, 3L, 3L, 4L, 4L, 2L, 2L, 3L, 3L, 4L, 4L, 2L, 2L,
3L, 3L, 4L, 4L, 2L, 2L, 3L, 3L, 4L, 4L), .Label = c("control",
"s70", "s80", "s90"), class = "factor")), .Names = c("year",
"psize", "value", "simulname", "density", "survivorship"), row.names = c(6081L,
6082L, 9845L, 9846L, 14345L, 14346L, 17985L, 17986L, 21797L,
21798L, 26297L, 26298L, 30567L, 30568L, 34528L, 34529L, 38744L,
38745L, 43144L, 43145L, 47519L, 47520L, 51983L, 51984L, 56483L,
56484L, 60983L, 60984L, 65483L, 65484L, 69983L, 69984L, 74483L,
74484L, 78983L, 78984L, 83483L, 83484L, 87983L, 87984L, 92483L,
92484L, 96983L, 96984L, 101483L, 101484L, 105983L, 105984L), class = "data.frame")

Showing data.frame as table or matrix in R

I want to show the following data.frame
df <- structure(list(Variety = structure(c(2L, 3L, 4L, 5L, 6L, 7L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 1L), .Label = c("F2022",
"F9917", "Hegari", "JS2002", "JS263", "PC1", "Sadabahar"), class = "factor"),
Priming = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 1L), .Label = c("CaCl2",
"Dry", "Hydro", "KCL", "KNO3", "NaCl", "Onfarm"), class = "factor"),
Letters = structure(c(1L, 3L, 10L, 11L, 10L, 19L, 27L, 5L,
28L, 11L, 18L, 20L, 9L, 1L, 22L, 14L, 30L, 26L, 24L, 3L,
22L, 9L, 16L, 10L, 15L, 25L, 6L, 7L, 17L, 30L, 18L, 13L,
20L, 27L, 19L, 29L, 23L, 2L, 8L, 12L, 6L, 31L, 8L, 22L, 4L,
32L, 21L, 33L, 2L), .Label = c("a", "at", "bcd", "bclq",
"bcq", "bd", "bds", "chlq", "ds", "e", "efg", "efgmnor",
"efgnor", "efgnr", "efgr", "eg", "fgmnor", "fmnor", "hijkl",
"hijkp", "hikl", "hklq", "ijkmp", "ijmop", "jmop", "mno",
"mnop", "mnor", "su", "t", "uv", "v", "w"), class = "factor")), .Names = c("Variety",
"Priming", "Letters"), row.names = c(NA, -49L), class = "data.frame")
as Table or matrix with Ordered Variety names along rows and Ordered Priming names along columns and showing Letter column in the main body of the table in R.
I could not figure out how to do this. Any help will be highly appreciated. Thanks in advance.
This should do it.
d <- d[order(d$Variety,d$Priming),]
dw <- reshape(data = d, idvar = 'Variety', timevar = 'Priming', direction = 'wide')
dw
You might want to edit the column names.
names(dw) <- gsub('Letters.', '', names(dw), fixed = TRUE)
Simple one
library(reshape2)
acast(data=df, formula=Variety~Priming)
CaCl2 Dry Hydro KCL KNO3 NaCl Onfarm
F2022 at mnop a hklq bds hijkl uv
F9917 a bcq hklq ds fgmnor su chlq
Hegari bcd mnor efgnr eg t ijkmp hklq
JS2002 e efg t e fmnor at bclq
JS263 efg fmnor mno efgr efgnor chlq v
PC1 e hijkp ijmop jmop hijkp efgmnor hikl
Sadabahar hijkl ds bcd bd mnop bd w

Resources