Related
I have a data frame of pairs of genes. There are some pairs which are listed twice but in reverse orientation. How do I remove those pairs which are duplicates (but in reverse orientation)? Thanks!
> dput(all_pairs)
structure(list(gene1 = structure(c(2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 1L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 7L, 8L,
9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 8L, 9L, 10L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L), .Label = c("ASXL1", "BICRA",
"CCDC168", "HRAS", "MUC16", "NOTCH1", "OBSCN", "PLEC", "RREB1",
"TTN"), class = "factor"), gene2 = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L), .Label = c("ASXL1", "BICRA",
"CCDC168", "HRAS", "MUC16", "NOTCH1", "OBSCN", "PLEC", "RREB1",
"TTN"), class = "factor")), out.attrs = list(dim = c(10L, 10L
), dimnames = list(Var1 = c("Var1=ASXL1", "Var1=BICRA", "Var1=CCDC168",
"Var1=HRAS", "Var1=MUC16", "Var1=NOTCH1", "Var1=OBSCN", "Var1=PLEC",
"Var1=RREB1", "Var1=TTN"), Var2 = c("Var2=ASXL1", "Var2=BICRA",
"Var2=CCDC168", "Var2=HRAS", "Var2=MUC16", "Var2=NOTCH1", "Var2=OBSCN",
"Var2=PLEC", "Var2=RREB1", "Var2=TTN"))), class = "data.frame", row.names = c(NA,
-90L))
This keeps only one copy of each pair, no matter what the orientation/order is:
all_pairs[!duplicated(t(apply(all_pairs, 1, sort))), ]
I want to make a graph in my Shiny App using ggplot2 that plots the line regarding the user selection by topic. In this question I was told how to add those 0s in case given topic didn't have any item (using ankrun's answer). Now I'm looking for fitting this graph in a shiny App and thought about adding a sliderInput where you could choose the topic for the line. This is what I've tried:
# # # global # # #
#1. App
if("shiny" %in% rownames(installed.packages()) == FALSE){ install.packages("shiny") }
library(shiny)
#2. Easier data handling
if("dplyr" %in% rownames(installed.packages()) == FALSE){ install.packages("dplyr") }
library(dplyr)
#3. Graphs
if("ggplot2" %in% rownames(installed.packages()) == FALSE){ install.packages("ggplot2") }
library(ggplot2)
#4. Completion in graphs
if("tidyr" %in% rownames(installed.packages()) == FALSE){ install.packages("tidyr") }
library(tidyr)
# # # ui # # #
ui <- fluidPage(
sidebarPanel(
selectInput("select_topic_timeline", "What topic?",
choices = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"),
selected = c("1", "2", "3", "4","5", "6", "7", "8", "9", "10", "11", "12"),
multiple = T)
),#sidebarPanel
mainPanel(
plotOutput("per_topic_timeline")
) #mainPanel
) #fluidPage
# # # server # # #
server <- function(input, output, session) {
# TIMELINE PER TOPIC PER YEAR
output$per_topic_timeline <- renderPlot({
dtd2 <- structure(list(Topic = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 12L, 12L, 12L), .Label = c("Topic 1",
"Topic 10", "Topic 11", "Topic 12", "Topic 2", "Topic 3", "Topic 4",
"Topic 5", "Topic 6", "Topic 7", "Topic 8", "Topic 9"), class = "factor"),
Year = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 2L,
3L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 1L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 6L, 7L, 8L,
9L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 2L, 3L, 4L, 5L, 6L, 7L, 8L), .Label = c("2011",
"2012", "2013", "2014", "2015", "2016", "2017", "2018", "2019"
), class = "factor"), Count = c(3L, 3L, 3L, 5L, 5L, 11L,
17L, 14L, 4L, 1L, 1L, 4L, 2L, 3L, 9L, 4L, 2L, 1L, 3L, 4L,
5L, 18L, 23L, 19L, 15L, 1L, 5L, 6L, 8L, 11L, 17L, 7L, 1L,
3L, 6L, 4L, 20L, 21L, 18L, 12L, 3L, 1L, 1L, 2L, 5L, 5L, 11L,
5L, 2L, 1L, 1L, 2L, 2L, 5L, 7L, 23L, 9L, 1L, 1L, 2L, 3L,
6L, 4L, 9L, 8L, 1L, 1L, 6L, 2L, 3L, 3L, 1L, 3L, 2L, 5L, 7L,
11L, 11L, 28L, 11L, 2L, 1L, 2L, 2L, 5L, 6L, 5L, 16L, 3L,
4L, 2L, 2L, 7L, 6L, 8L, 6L)), row.names = c(NA, -96L), class = "data.frame")
dtd2 %>%
expand(Topic = factor(Topic, levels = gtools::mixedsort(levels(c(input$select_topic_timeline)))) ,
Year = unique(Year)) %>%
left_join(dtd2) %>%
mutate(Count = replace_na(Count, 0)) %>%
ggplot(aes(x = Year, y = Count), colour = c(input$select_topic_timeline), group = Topic) +
geom_point() +
geom_line() +
labs(x = "Year", y = NULL, title = "Timeline")
})
}
shinyApp(ui,server)
It's not printing any error out, but it's saying Warning: Column "Topic" joining factors with different levels, coercing to character vector and it's not giving any graph out, only an empty gray box. I'm not sure about how to fit the input in the graph code and I must've changed it wrong!
This issue is that you are basically refactoring the dtd2$Topic based on what's in the selectInput, but since the Topic only has 9 levels, and you can select up to 12 levels with selectInput, you are getting errors.
For this reason, I'd suggest instead using #Ronak Shah's answer with tidyr::complete instead.
Once you've completed the data, you should then dplyr::filter by Topic to get the what I believe to be the desired result:
# # # global # # #
#1. App
if("shiny" %in% rownames(installed.packages()) == FALSE){ install.packages("shiny") }
library(shiny)
#2. Easier data handling
if("dplyr" %in% rownames(installed.packages()) == FALSE){ install.packages("dplyr") }
library(dplyr)
#3. Graphs
if("ggplot2" %in% rownames(installed.packages()) == FALSE){ install.packages("ggplot2") }
library(ggplot2)
#4. Completion in graphs
if("tidyr" %in% rownames(installed.packages()) == FALSE){ install.packages("tidyr") }
library(tidyr)
# # # ui # # #
ui <- fluidPage(
sidebarPanel(
selectInput("select_topic_timeline",
label = "What topic?",
choices = as.character(1:12),
selected = as.character(1:12),
multiple = TRUE),
),#sidebarPanel
mainPanel(
plotOutput("per_topic_timeline")
) #mainPanel
) #fluidPage
# # # server # # #
server <- function(input, output, session) {
# TIMELINE PER TOPIC PER YEAR
output$per_topic_timeline <- renderPlot({
dtd2 <- structure(list(Topic = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 12L, 12L, 12L), .Label = c("Topic 1",
"Topic 10", "Topic 11", "Topic 12", "Topic 2", "Topic 3", "Topic 4",
"Topic 5", "Topic 6", "Topic 7", "Topic 8", "Topic 9"), class = "factor"),
Year = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 2L,
3L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 1L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 6L, 7L, 8L,
9L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 2L, 3L, 4L, 5L, 6L, 7L, 8L), .Label = c("2011",
"2012", "2013", "2014", "2015", "2016", "2017", "2018", "2019"
), class = "factor"), Count = c(3L, 3L, 3L, 5L, 5L, 11L,
17L, 14L, 4L, 1L, 1L, 4L, 2L, 3L, 9L, 4L, 2L, 1L, 3L, 4L,
5L, 18L, 23L, 19L, 15L, 1L, 5L, 6L, 8L, 11L, 17L, 7L, 1L,
3L, 6L, 4L, 20L, 21L, 18L, 12L, 3L, 1L, 1L, 2L, 5L, 5L, 11L,
5L, 2L, 1L, 1L, 2L, 2L, 5L, 7L, 23L, 9L, 1L, 1L, 2L, 3L,
6L, 4L, 9L, 8L, 1L, 1L, 6L, 2L, 3L, 3L, 1L, 3L, 2L, 5L, 7L,
11L, 11L, 28L, 11L, 2L, 1L, 2L, 2L, 5L, 6L, 5L, 16L, 3L,
4L, 2L, 2L, 7L, 6L, 8L, 6L)), row.names = c(NA, -96L), class = "data.frame")
dtd2 %>%
complete(Topic, Year = unique(Year), fill = list(Count = 0)) %>%
filter(Topic %in% paste("Topic", input$select_topic_timeline)) %>%
ggplot(aes(x = Year, y = Count, colour = Topic, group = Topic)) +
geom_point() +
geom_line() +
labs(x = "Year", y = NULL, title = "Timeline")
})
}
shinyApp(ui,server)
As a side note, I'd also recommend using the shinyWidgets package for this particular input, specifically shinyWidgets::pickerInput, rather than the vanilla shiny::selectInput. I'll leave that decision up to you though.
I have a nice DCA ordination of some of my data, but can't produce an ordiellipse for all different dataclusters. My data is found in the bottom of this post.
What happens now: I have four different groups (Block A-D) with three clusters per group (three year categories). So, I want to end up with different delineated groups. When I run the ordiellipes I get the following error message:
Error in pts[gr, , drop = FALSE] : subscript out of bounds
In addition: Warning message:
In complete.cases(pts) & !is.na(groups) :
longer object length is not a multiple of shorter object length
and a graph as follows:
DCA ordination of four blocks of vegetation
This piece of code reproduces the error, but due to the reduced dataset presented here, the graph looks a bit different:
install.packages("vegan")
library(vegan)
{plot(site_scr_kikker, type="n", main="Kikkervalleien", xlab="DCA1 Eigenvalue = 0.62",
ylab="DCA2 Eigenvalue = 0.39")
points(site_scr_kikker, display = "sites", cex = 0.8, pch=10)
ordiellipse(site_scr_kikker, Years_KIKKER, kind="se", conf=0.95, lwd=2,
draw = "polygon", col=1:4, border=1:4,
alpha=63)}
DCA Axes Dataset:
structure(list(DCA1 = c(-0.554410061801955, 2.68272013411215,
2.68697635940812, 2.82668169800565, 2.80053527027075, 2.23642581481516,
2.35425133786973, 2.52415368531054, 2.83239838572004, 2.84069370354046,
2.77338234239721, 2.81710120200121, 3.02325331285456, 2.53697043507954,
3.05037536310673, 3.32304086730676, 2.94495328416423, 3.15022598269494,
3.39489992455406, 3.28769160043834, -0.350924337608413, 0.275699505382009,
0.297344502163647, 0.240762119868438, 0.228861788615913, 0.314964666243383,
0.371085287846039, 0.455145784364889, 0.652221371003541, 0.499839296442089,
0.379360398080226, 0.549933370572594, 0.399966004306952, 0.500218697886041,
0.441564088620194, 0.374702692230443, 0.382333410536051, 0.43285459912782,
0.428611459750847, 0.349092514843647, -0.888853907037661, -1.28333663263808,
-1.40792331844972, -1.38537198615101, -1.38995889090796, -1.3655773745443,
-1.31803656153966, -1.34826448701426, -1.34653537792753, -1.49305269877646,
-1.50814236008689, -1.41827597394111, -1.39602666811321, -1.4148816003514,
-1.49783699791751, -1.47003691605731, -1.42755467648435, -1.30533485632748,
-1.36950094020217, -1.23477912087743, 3.23114892464093, 2.9350886798946,
3.14124836945073, 3.26161277365282, 3.09515391638416, 3.1529521123077,
3.06459587965894, 3.10368520711438, 3.22697584876561, 3.53654928835111,
2.98450087615265, 3.270797532973, 3.26776719866589, 3.49199289032157,
3.22923990263853, 3.25429242878212, 3.04740856725947, 3.0826704683258,
3.13214804334072, 3.02742007198209, -0.117264033632094, -0.16440126600505,
-0.0448538849517754, -0.0426633870391433, 0.0330104299718532,
-0.0752808949299411, 0.117242046915944, 0.0416044435035445, 0.124146770645119,
0.0523946356429974, -0.110261999817611, -0.228252641183511, -0.188814210123203,
-0.290927018876809, -0.248633979863795, -0.0903889717097015,
-0.123459222045697, -0.149699086185127, -0.150112841061331, 3.01689076683526,
2.01577708020474, 2.03044077034707, 2.07207139315213, 2.12441461917371,
2.03701011931199, 2.01252790874418, 1.83219506720427, 2.04345013029757,
2.15504917885961, 2.06913115663176, 1.98989149024749, 1.99123174245595,
1.96507730677135, 1.95295285738276, 2.04095710166195, 1.84679490913208,
1.83479477688629, 2.06370280057877, 2.09660967186289, 0.541840589690319,
0.103220405988339, 0.145850580989204, 0.171702980416538, 0.0991444115624873,
0.163980634495489, 0.0100630096884953, 0.00653099371627297, -0.049057450717042,
-0.0731989798652191, 0.0484957737765508, -0.0813429375561661,
0.226394829075491, 0.118747426326434, 0.0785696207929674, 0.372080921641888,
0.228084973201013, 0.436449500065551, 0.380195760092951, 0.421054280535058,
-0.3407891239866, -0.770535673192646, -0.78726979249955, -0.605034153126869,
-0.79603463000109, -0.611191548761836, -0.479087063427777, -0.431712806416684,
-0.442179135680639, -0.359040655364315, -0.387751952086651, -0.333064178275891,
-0.245245634230479, -0.294664916205089, -0.325293571885643, -0.371714350289459,
-0.384076243072539, -0.364275416660051, -0.492176029276133, -0.360665042070641,
1.97498200909125, 1.71918456504906, 1.65998788992634, 1.66434225634425,
1.56633028293729, 1.74620235786651, 1.62590128379407, 1.50258825353478,
1.48820880624004, 1.42926003809109, 1.45513337793396, 1.42592371006012,
1.4963606424124, 1.44021703608174, 1.44438380462437, 1.47109090679392,
1.82139520526838, 1.43656718298432, 1.44873704214624, 1.6139306940386,
0.329534864476447, 0.242211052748716, 0.235001084932526, 0.203151203202996,
0.0621389966258401, 0.0944651233344451, 0.335947463398379, 0.34920131294113,
0.356337550057783, 0.413800173211847, 0.475501084593146, 0.636972497835927,
0.378416570342712, 0.405927373162309, 0.483958766985421, 0.313492417628128,
0.18082570013463, 0.213448692873988, 0.175969392011173, 0.306174433718341,
-0.661344430804266, -0.36312534912334, -0.531638029637394, -0.323308841681458,
-0.1705480775506, -0.320797820641974, -0.0112455616689928, 0.0058094693123143,
-0.173103348877858, -0.187484910613069, -0.140328782633759, -0.262935718115112,
-0.213706195115846, -0.201623466852359, -0.176562229774177, -0.129977719792298,
-0.214157064357283, -0.312304712680445, -0.321801942265119, -0.447307072541585
), DCA2 = c(1.55135681949654, 0.390676820301294, 0.298911889220322,
0.263998071977169, 0.318540344211798, 0.261720092088233, 0.185092505227324,
0.266125079431566, 0.394828240097056, 0.302396200887096, 0.427178178571868,
0.362329582087479, 0.329300702637127, 0.106852609024896, 0.0916401140801768,
-0.0498768808296606, 0.0568755736541453, 0.0409183688588972,
-0.00982842960758612, -0.0532614523308772, 2.37879922539826,
0.870845236307184, 0.875448127097767, 0.641275864686684, 0.642137889278431,
0.61287181240447, 0.46096369228661, 0.353139355245069, 0.30571197713629,
0.127480232335107, 0.155591712070341, 0.201701485575426, 0.164465659451652,
0.053079369473755, 0.0208974057538049, 0.146542798250278, 0.133527092556681,
0.0558014251324042, 0.0947450033654067, 0.146527814538444, 1.5634624218799,
-0.045338607959831, 0.0921067787998133, 0.100136516321785, 0.176555931155601,
0.1779356816878, 0.169352553487154, 0.159084219879744, 0.1416643202517,
0.046751076432749, -0.0143690327219694, 0.0854961342502074, 0.0502099136978105,
0.0730195528098192, 0.0853374008019263, 0.115531044767214, 0.0847573955605063,
0.163097640034325, 0.134198472975748, 0.275479651900967, 0.0460034929226141,
0.560715956164233, 0.37831200537774, 0.258027386145382, 0.388149229049795,
0.321257843490554, 0.403942482899889, 0.195339552141307, 0.151011302110764,
0.0876417694236817, 0.0587161407304979, 0.0994546033680268, 0.251510488850064,
0.122130974589908, 0.111911790245653, 0.165535261771949, 0.060970314956561,
0.225723170237567, 0.313941078588394, 0.231918137883269, 0.993679773684799,
0.881292795126892, 0.949549576326203, 0.820143650778247, 0.967230951435818,
0.913507935790706, 0.987962294037885, 0.89747403569919, 1.0281502304616,
1.03056849037379, 0.985558206829436, 0.956118614451869, 0.990861510942461,
1.03853618229401, 0.76894643786781, 0.71956843396122, 0.895677149723554,
1.04202078104011, 0.994362394242357, 0.45816044069548, -0.256799924265915,
-0.219215409286906, -0.274974314031124, 0.00673120866587418,
-0.34588695905374, -0.330796391785146, -0.284953089585678, -0.358994836114401,
-0.0877152907820218, -0.0179836181616615, -0.0514356092538941,
-0.0631722426274615, -0.321764014760995, -0.292880797095688,
-0.124966216219314, 0.0448721698628494, -0.0122687592139075,
-0.0293240055712474, 0.241689548511685, 0.258771228150735, -0.243978231909183,
-0.273670301716394, -0.346381197575676, -0.540924824745573, -0.578466142473874,
-0.881698449269004, -0.988487876600371, -0.874559759965791, -0.898043753863041,
-0.65177624643986, -0.897172266653606, -0.428027378766614, -0.618350571130815,
-0.650486911424929, -0.522529645458612, -0.540007295687359, -0.56048323820591,
-0.318120499195913, -0.233107811772576, 1.11444300342379, -0.464311522843928,
-0.671043500267456, -0.293709784912165, -0.48957037940714, -0.303799057505386,
-0.5014139212286, -0.446968540045644, -0.584723850846212, -0.768962102318167,
-0.473903387692755, -0.476071214131476, -0.738718937014587, -0.802748557174088,
-0.878862063849493, -1.03232927446183, -0.901938937530595, -0.90685531694932,
-0.835172924486567, -0.444400981243365, 0.0711913939922195, -0.376272209371,
-0.278328148225639, -0.37229823300335, -0.158396017104884, -0.221206427389147,
-0.356652754022269, -0.130851791393296, -0.208569352651987, -0.12330848067377,
0.119039186900003, -0.145975049001435, -0.0110773787283525, 0.154455358806736,
0.186221284305681, 0.0518734671667143, 0.0410707622863646, 0.295096579413462,
0.277622386022512, -0.0377429837590535, -0.126848197591401, -0.0574585504960616,
-0.250845634712495, -0.0177800130809138, -0.107737216176091,
-0.0631643821637247, 0.1010605032824, -0.0442202733629364, -0.372070473916875,
-0.533311401539873, -0.724584176353283, -0.865166680824871, -0.87656068793911,
-0.813421991975295, -0.839998556813832, -0.655707249050569, -0.534597066763741,
-0.378820955906015, -0.0722774697143169, -0.109467994974947,
-0.331582307211823, -1.28959124402666, -1.37962362889618, -1.43451046953702,
-1.38447488090246, -1.69236882979906, -1.44344360082209, -1.3915281556235,
-1.58096147044615, -1.68132043125815, -2.20367091829309, -2.5599499288299,
-2.31297384112025, -2.38435599310711, -2.20768782296035, -1.65607037944418,
-1.64014952994504, -1.69013789782066, -1.7017681151936, -1.61898692370139
)), .Names = c("DCA1", "DCA2"), row.names = c("01A01", "01A02",
"01A03", "01A04", "01A05", "01A06", "01A07", "01A08", "01A09",
"01A10", "01A11", "01A12", "01A13", "01A14", "01A15", "01A16",
"01A17", "01A18", "01A19", "01A20", "08A01", "08A02", "08A03",
"08A04", "08A05", "08A06", "08A07", "08A08", "08A09", "08A10",
"08A11", "08A12", "08A13", "08A14", "08A15", "08A16", "08A17",
"08A18", "08A19", "08A20", "18A01", "18A02", "18A03", "18A04",
"18A05", "18A06", "18A07", "18A08", "18A09", "18A10", "18A11",
"18A12", "18A13", "18A14", "18A15", "18A16", "18A17", "18A18",
"18A19", "18A20", "01B01", "01B02", "01B03", "01B04", "01B05",
"01B06", "01B07", "01B08", "01B09", "01B10", "01B11", "01B12",
"01B13", "01B14", "01B15", "01B16", "01B17", "01B18", "01B19",
"01B20", "18B02", "18B03", "18B04", "18B05", "18B06", "18B07",
"18B08", "18B09", "18B10", "18B11", "18B12", "18B13", "18B14",
"18B15", "18B16", "18B17", "18B18", "18B19", "18B20", "01C01",
"01C02", "01C03", "01C04", "01C05", "01C06", "01C07", "01C08",
"01C09", "01C10", "01C11", "01C12", "01C13", "01C14", "01C15",
"01C16", "01C17", "01C18", "01C19", "01C20", "08C01", "08C02",
"08C03", "08C04", "08C05", "08C06", "08C07", "08C08", "08C09",
"08C10", "08C11", "08C12", "08C13", "08C14", "08C15", "08C16",
"08C17", "08C18", "08C19", "08C20", "18C01", "18C02", "18C03",
"18C04", "18C05", "18C06", "18C07", "18C08", "18C09", "18C10",
"18C11", "18C12", "18C13", "18C14", "18C15", "18C16", "18C17",
"18C18", "18C19", "18C20", "01D01", "01D02", "01D03", "01D04",
"01D05", "01D06", "01D07", "01D08", "01D09", "01D10", "01D11",
"01D12", "01D13", "01D14", "01D15", "01D16", "01D17", "01D18",
"01D19", "01D20", "08D01", "08D02", "08D03", "08D04", "08D05",
"08D06", "08D07", "08D08", "08D09", "08D10", "08D11", "08D12",
"08D13", "08D14", "08D15", "08D16", "08D17", "08D18", "08D19",
"08D20", "18D01", "18D02", "18D03", "18D04", "18D05", "18D06",
"18D07", "18D08", "18D09", "18D10", "18D11", "18D12", "18D13",
"18D14", "18D15", "18D16", "18D17", "18D18", "18D19", "18D20"
), class = "data.frame")
Years_Kikker Dataset
structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L), .Label = c("2001_A", "2001_B",
"2001_C", "2001_D", "2008_A", "2008_C", "2008_D", "2018_A", "2018_B",
"2018_C", "2018_D"), class = "factor")
I know you usually should use a barplot for categorical variables but in my case somebody split the continuous variable into groups and it would be nice to have a histogram anyway.
This is what I want to get (except as a histogram):
par(oma=c(2,0,0,0)) #so labels are not cut off
barplot(table(hhincome),ylab = "Frequency", main = "Netto houshold income",
border="black", col="grey",las=2)
(Note: A histogram has no space between bars and a x-axis)
Data:
hhincome <- structure(c(4L, 4L, 1L, 6L, 8L, 1L, 4L, 5L, 2L, 3L, 1L, 5L, 1L, 7L, 6L, 7L, 3L, 2L, 6L, 7L, 8L, 4L, 7L, 8L, 7L, 4L, 5L, 5L, 5L, 9L, 7L, 5L, 8L, 8L, 6L, 5L, 5L, 3L, 5L, 4L, 3L, 5L, 3L, 5L, 4L, 4L, 5L, 7L, 6L, 7L, 2L, 6L, 1L, 7L, 4L, 4L, 5L, 2L, 4L, 6L, 6L, 8L, 6L, 7L, 4L, 7L, 9L, 1L, 4L, 6L, 2L, 6L, 8L, 6L, 5L, 8L, 7L, 9L, 7L, 9L, 8L, 5L, 5L, 7L, 6L, 2L, 7L, 6L, 6L, 1L, 7L, 7L, 2L, 6L, 6L, 6L, 7L, 5L, 2L, 2L, 9L, 6L, 7L, 7L, 5L, 6L, 6L, 5L, 5L, 7L, 8L, 6L, 6L, 3L, 7L, 6L, 4L, 5L, 5L, 4L, 8L, 3L, 4L, 6L, 5L, 7L, 3L, 4L, 7L, 5L, 3L, 6L, 2L, 2L, 5L, 2L, 4L, 8L, 4L, 3L, 2L, 7L, 2L, 5L, 2L, 1L, 8L, 7L, 3L, 6L, 6L, 7L, 2L, 9L, 3L, 3L, 5L, 7L, 7L, 5L, 6L, 8L, 5L, 6L, 5L, 5L, 7L, 6L, 5L, 5L, 6L, 10L, 3L, 6L, 6L, 3L, 2L, 4L, 9L, 2L, 6L, 7L, 1L, 5L, 6L, 5L, 4L, 7L, 5L, 2L, 6L, 3L, 3L, 2L, 7L, 6L, 6L, 5L, 7L, 6L, 1L, 7L, 3L, 2L, 5L, 5L, 3L, 3L, 3L, 4L, 1L, 7L, 5L, 3L, 3L, 3L, 8L, 6L, 3L, 2L, 5L, 5L, 4L, 1L, 4L, 1L, 2L, 6L, 4L, 5L, 5L, 8L, 3L, 7L, 7L, 3L, 4L, 4L, 4L, 3L, 4L, 6L, 3L, 3L, 4L, 7L, 2L, 6L, 8L, 5L, 3L, 3L, 6L, 2L, 3L, 4L, 3L, 5L, 5L, 7L, 8L, 6L, 6L, 8L, 4L, 7L, 9L, 1L, 5L, 3L, 2L, 3L, 6L, 3L, 4L, 6L, 3L, 7L, 3L, 1L, 6L, 8L, 4L, 4L, 5L, 6L, 8L, 4L, 4L, 2L, 8L, 6L, 5L, 1L, 4L, 6L, 3L, 5L, 6L, 6L, 4L, 4L, 7L, 8L, 3L, 3L, 4L, 6L, 1L, 6L, 7L, 7L, 1L, 3L, 5L, 6L, 7L, 2L, 3L, 6L, 3L, 2L, 7L, 9L, 3L, 10L, 6L, 9L, 3L, 5L, 11L, 10L, 7L, 8L, 8L, 5L, 5L, 3L, 5L, 8L, 9L, 3L, 2L, 6L, 7L, 5L, 5L, 7L, 5L, 8L, 7L, 11L, 7L, 3L, 3L, 5L, 6L, 8L, 2L, 5L, 6L, 6L, 9L, 4L, 5L, 6L, 7L, 6L, 3L, 8L, 7L, 6L, 9L, 7L, 7L, 4L, 7L, 9L, 3L, 9L, 6L, 11L, 6L, 9L, 4L, 7L, 2L, 7L, 8L, 6L, 8L, 6L, 6L, 6L, 5L, 5L, 2L, 4L, 9L, 7L, 6L, 9L, 5L, 3L, 8L, 2L, 5L, 4L, 7L, 4L, 8L, 6L, 1L, 6L, 5L, 9L, 6L, 7L, 1L, 1L, 4L, 3L, 11L, 3L, 6L, 5L, 2L, 7L, 5L, 6L, 8L, 8L, 3L, 4L, 9L, 6L, 5L, 7L, 8L, 8L, 6L, 8L, 1L, 3L, 5L, 8L, 1L, 6L, 7L, 9L, 8L, 4L, 4L, 6L, 5L, 7L, 6L, 7L, 7L, 3L, 9L, 5L, 8L, 11L, 3L, 7L, 6L, 7L, 8L, 8L, 2L, 2L, 3L, 2L, 5L, 6L, 5L, 7L, 4L, 7L, 2L, 7L, 2L, 2L, 4L, 7L, 6L, 9L, 8L, 5L, 1L, 6L, 3L, 10L, 1L, 7L, 4L, 7L, 5L, 6L, 8L, 4L, 8L, 4L, 5L, 8L, 6L, 7L, 7L, 8L, 7L, 7L, 6L, 7L, 5L, 7L, 9L, 5L, 7L, 4L, 2L, 7L, 3L, 6L, 3L, 8L, 5L, 2L, 6L, 7L, 7L), .Label = c("Less than 500 €", "500-900 €", "900-1300 €", "1300-1500 €", "1500-2000 €", "2000-2600 €", "2600-3500 €", "3500-4500 €", "4500-6000 €", "6000-8000 €", "8000€ or more"), class = "factor")
Thanks to Zheyuan Li I already have my answer. Instead of forcing a histogram, I can simply create a barplot that looks excatly like a histogram:
par(oma=c(2,0,0,0)) #so labels are not cut off
barplot(table(hhincome2), space = 0, # set space between bars to zero
ylab = "Frequency", main = "Netto houshold income",
border="black", col="grey",las=2)
axis(1, at = hhincome,labels = FALSE) # at x-axis at category borders
box()
Edit: I just found an alternative way:
h <- hist(as.numeric(hhincome2) #as.numeric converst factor levels to numeric values
, xlab = "", ylab = "Frequency", main = "Netto houshold income \n(with normal disttribution curve)",
border="black", col="grey",las=2,
xaxt='n') #this supresses the x-axis which would disply levels instead values
axis(1, at = hhincome2, labels = hhincome2, las=2) #just add factor level labels as labels
box()
This way it's possible to add a normal disttribution curve as well:
xfit<-seq(min(as.numeric(hhincome2)),max(as.numeric(hhincome2)),length=1100)
yfit<-dnorm(xfit,mean=mean(as.numeric(hhincome2)),sd=sd(as.numeric(hhincome2)))
yfit <- yfit*diff(h$mids[1:2])*length(as.numeric(hhincome2))
lines(xfit, yfit, col="black", lwd=2)
I know you usually should use a barplot for categorical variables but in my case somebody split the continuous variable into groups and it would be nice to have a histogram anyway.
This is what I want to get (except as a histogram):
par(oma=c(2,0,0,0)) #so labels are not cut off
barplot(table(hhincome),ylab = "Frequency", main = "Netto houshold income",
border="black", col="grey",las=2)
(Note: A histogram has no space between bars and a x-axis)
Data:
hhincome <- structure(c(4L, 4L, 1L, 6L, 8L, 1L, 4L, 5L, 2L, 3L, 1L, 5L, 1L, 7L, 6L, 7L, 3L, 2L, 6L, 7L, 8L, 4L, 7L, 8L, 7L, 4L, 5L, 5L, 5L, 9L, 7L, 5L, 8L, 8L, 6L, 5L, 5L, 3L, 5L, 4L, 3L, 5L, 3L, 5L, 4L, 4L, 5L, 7L, 6L, 7L, 2L, 6L, 1L, 7L, 4L, 4L, 5L, 2L, 4L, 6L, 6L, 8L, 6L, 7L, 4L, 7L, 9L, 1L, 4L, 6L, 2L, 6L, 8L, 6L, 5L, 8L, 7L, 9L, 7L, 9L, 8L, 5L, 5L, 7L, 6L, 2L, 7L, 6L, 6L, 1L, 7L, 7L, 2L, 6L, 6L, 6L, 7L, 5L, 2L, 2L, 9L, 6L, 7L, 7L, 5L, 6L, 6L, 5L, 5L, 7L, 8L, 6L, 6L, 3L, 7L, 6L, 4L, 5L, 5L, 4L, 8L, 3L, 4L, 6L, 5L, 7L, 3L, 4L, 7L, 5L, 3L, 6L, 2L, 2L, 5L, 2L, 4L, 8L, 4L, 3L, 2L, 7L, 2L, 5L, 2L, 1L, 8L, 7L, 3L, 6L, 6L, 7L, 2L, 9L, 3L, 3L, 5L, 7L, 7L, 5L, 6L, 8L, 5L, 6L, 5L, 5L, 7L, 6L, 5L, 5L, 6L, 10L, 3L, 6L, 6L, 3L, 2L, 4L, 9L, 2L, 6L, 7L, 1L, 5L, 6L, 5L, 4L, 7L, 5L, 2L, 6L, 3L, 3L, 2L, 7L, 6L, 6L, 5L, 7L, 6L, 1L, 7L, 3L, 2L, 5L, 5L, 3L, 3L, 3L, 4L, 1L, 7L, 5L, 3L, 3L, 3L, 8L, 6L, 3L, 2L, 5L, 5L, 4L, 1L, 4L, 1L, 2L, 6L, 4L, 5L, 5L, 8L, 3L, 7L, 7L, 3L, 4L, 4L, 4L, 3L, 4L, 6L, 3L, 3L, 4L, 7L, 2L, 6L, 8L, 5L, 3L, 3L, 6L, 2L, 3L, 4L, 3L, 5L, 5L, 7L, 8L, 6L, 6L, 8L, 4L, 7L, 9L, 1L, 5L, 3L, 2L, 3L, 6L, 3L, 4L, 6L, 3L, 7L, 3L, 1L, 6L, 8L, 4L, 4L, 5L, 6L, 8L, 4L, 4L, 2L, 8L, 6L, 5L, 1L, 4L, 6L, 3L, 5L, 6L, 6L, 4L, 4L, 7L, 8L, 3L, 3L, 4L, 6L, 1L, 6L, 7L, 7L, 1L, 3L, 5L, 6L, 7L, 2L, 3L, 6L, 3L, 2L, 7L, 9L, 3L, 10L, 6L, 9L, 3L, 5L, 11L, 10L, 7L, 8L, 8L, 5L, 5L, 3L, 5L, 8L, 9L, 3L, 2L, 6L, 7L, 5L, 5L, 7L, 5L, 8L, 7L, 11L, 7L, 3L, 3L, 5L, 6L, 8L, 2L, 5L, 6L, 6L, 9L, 4L, 5L, 6L, 7L, 6L, 3L, 8L, 7L, 6L, 9L, 7L, 7L, 4L, 7L, 9L, 3L, 9L, 6L, 11L, 6L, 9L, 4L, 7L, 2L, 7L, 8L, 6L, 8L, 6L, 6L, 6L, 5L, 5L, 2L, 4L, 9L, 7L, 6L, 9L, 5L, 3L, 8L, 2L, 5L, 4L, 7L, 4L, 8L, 6L, 1L, 6L, 5L, 9L, 6L, 7L, 1L, 1L, 4L, 3L, 11L, 3L, 6L, 5L, 2L, 7L, 5L, 6L, 8L, 8L, 3L, 4L, 9L, 6L, 5L, 7L, 8L, 8L, 6L, 8L, 1L, 3L, 5L, 8L, 1L, 6L, 7L, 9L, 8L, 4L, 4L, 6L, 5L, 7L, 6L, 7L, 7L, 3L, 9L, 5L, 8L, 11L, 3L, 7L, 6L, 7L, 8L, 8L, 2L, 2L, 3L, 2L, 5L, 6L, 5L, 7L, 4L, 7L, 2L, 7L, 2L, 2L, 4L, 7L, 6L, 9L, 8L, 5L, 1L, 6L, 3L, 10L, 1L, 7L, 4L, 7L, 5L, 6L, 8L, 4L, 8L, 4L, 5L, 8L, 6L, 7L, 7L, 8L, 7L, 7L, 6L, 7L, 5L, 7L, 9L, 5L, 7L, 4L, 2L, 7L, 3L, 6L, 3L, 8L, 5L, 2L, 6L, 7L, 7L), .Label = c("Less than 500 €", "500-900 €", "900-1300 €", "1300-1500 €", "1500-2000 €", "2000-2600 €", "2600-3500 €", "3500-4500 €", "4500-6000 €", "6000-8000 €", "8000€ or more"), class = "factor")
Thanks to Zheyuan Li I already have my answer. Instead of forcing a histogram, I can simply create a barplot that looks excatly like a histogram:
par(oma=c(2,0,0,0)) #so labels are not cut off
barplot(table(hhincome2), space = 0, # set space between bars to zero
ylab = "Frequency", main = "Netto houshold income",
border="black", col="grey",las=2)
axis(1, at = hhincome,labels = FALSE) # at x-axis at category borders
box()
Edit: I just found an alternative way:
h <- hist(as.numeric(hhincome2) #as.numeric converst factor levels to numeric values
, xlab = "", ylab = "Frequency", main = "Netto houshold income \n(with normal disttribution curve)",
border="black", col="grey",las=2,
xaxt='n') #this supresses the x-axis which would disply levels instead values
axis(1, at = hhincome2, labels = hhincome2, las=2) #just add factor level labels as labels
box()
This way it's possible to add a normal disttribution curve as well:
xfit<-seq(min(as.numeric(hhincome2)),max(as.numeric(hhincome2)),length=1100)
yfit<-dnorm(xfit,mean=mean(as.numeric(hhincome2)),sd=sd(as.numeric(hhincome2)))
yfit <- yfit*diff(h$mids[1:2])*length(as.numeric(hhincome2))
lines(xfit, yfit, col="black", lwd=2)