How do I plot a sequence of number using ggplot2 - r

Suppose I have a sequence of numbers:
> dput(firstGrade_count)
c(4L, 346L, 319L, 105L, 74L, 5L, 124L, 2L, 10L, 35L, 6L, 206L,
7L, 8L, 6L, 9L, 26L, 1L, 35L, 18L, 4L, 4L, 2L, 63L, 6L, 23L,
6L, 82L, 10L, 17L, 45L, 74L, 10L, 8L, 14L, 23L, 26L, 53L, 55L,
16L, 2L, 141L, 113L, 98L, 179L, 13L, 34L, 16L, 8L, 144L, 2L,
141L, 26L, 9L, 125L, 201L, 32L, 452L, 179L, 30L, 4L, 141L, 5L,
40L, 7L, 255L, 120L, 223L, 28L, 252L, 21L, 8L, 362L, 4L, 5L,
2L, 285L, 18L, 76L, 5L, 73L, 11L, 367L, 7L, 50L, 6L, 37L, 15L,
48L, 5L, 12L, 7L, 96L)
I want to plot it using ggplot2 so the result would be something similar to:
barplot(firstGrade_count)
How would I define the aesthetics in ggplot2?
Here is the plot produced by base plot that I mentioned above:

firstGrade_count <- c(4,346,319,105,74,5,124,2,10,35,6,206,7,8,6,9,26,1,35,18,4,4,2,63,6,23,6,
82,10,17,45,74,10,8,14,23,26,53,55,16,2,141,113,98,179,13,34,16,8,144,2,141,26,9,
125,201,32,452,179,30,4,141,5,40,7,255,120,223,28,252,21,8,362,4,5,2,285,18,76,5,73,
11,367,7,50,6,37,15,48,5,12,7,96)
You can optionally make this into a data frame to make it easier to work with and to hold any additional features, though it's not required.
library(ggplot2)
# Optional transformation to data.frame:
firstGrade_count <- as.data.frame(firstGrade_count)
# Index for x-coordinates
firstGrade_count$index <- seq(1:nrow(firstGrade_count))
# Plotting
c <- ggplot(firstGrade_count, aes(index,firstGrade_count))
c + geom_bar(stat = "identity")
Or,
firstGrade_count <- as.data.frame(firstGrade_count)
c <- ggplot(firstGrade_count, aes(factor(firstGrade_count)))
c + geom_bar()
The way I set it up shows you the count of each unique value. There are many variations and additional formats you could add:
If you don't want the count you can add the stat = option and change it from the default to something else.

Without creating any dataframe:
ggplot() +
geom_bar(aes(1:length(firstGrade_count),firstGrade_count), stat='identity') +
xlab('')

Related

Loop with multiple subset of data frame

I have a data.frame fish.test0 for which I want to grep specific variables (in varlist) matching the group column to create a sub-data.frame that will undergo a statistical test. The results of the test is saved in tests.res.t. I want to loop the varlist so that I get one results for each input in varlist
Script:
varlist <- c("Abiotrophia","Alphatorquevirus")
for (i in varlist) {
fish.test <- fish.test0[grep("i",fish.test0$group),]
column <- c("ACDC")
tests <- list()
dat_test <- sapply( column, function(colx)
lapply( unique(fish.test$Merge), function(x)
fisher.test( data.frame(
a=c(( fish.test[ which(fish.test$Merge %in% x)[2],"Present"] -
fish.test[ which(fish.test$Merge %in% x)[2], colx] ),fish.test[ which(fish.test$Merge %in% x)[2], colx]
),
b=c(( fish.test[ which(fish.test$Merge %in% x)[1],"NotPresent"] -
fish.test[ which(fish.test$Merge %in% x)[1], colx] ), fish.test[ which(fish.test$Merge %in% x)[1], colx]))) #,alternative = "greater"
) )
rownames(dat_test) <- unique(fish.test$Merge )
colnames(dat_test) <- column
tests.res <- sapply(dat_test[1:dim(dat_test)[1],1], function(x) {
c(x$estimate[1],
x$estimate[2],
ci.lower = x$conf.int[1],
ci.upper = x$conf.int[2],
p.value = x$p.value)
})
tests.res.t <- as.data.frame(t(tests.res))
}
test-data:
fish.test0 <- structure(list(Present = c(4L, 4L, 9L, 9L, 57L, 57L, 146L, 146L,
91L, 91L, 26L, 26L, 6L, 6L, 12L, 12L, 33L, 33L, 10L, 10L, 66L,
66L, 4L, 4L, 4L, 4L, 9L, 9L, 18L, 18L, 19L, 19L, 51L, 51L, 50L,
50L, 12L, 12L, 7L, 7L, 14L, 14L, 27L, 27L, 9L, 9L, 5L, 5L, 6L,
6L, 22L, 22L, 3L, 3L, 14L, 14L, 4L, 4L, 15L, 15L, 6L, 6L, 8L,
8L, 4L, 4L), NotPresent = c(11L, 11L, 44L, 44L, 126L, 126L, 532L,
532L, 382L, 382L, 97L, 97L, 14L, 14L, 43L, 43L, 85L, 85L, 41L,
41L, 336L, 336L, 19L, 19L, 27L, 27L, 67L, 67L, 108L, 108L, 81L,
81L, 240L, 240L, 258L, 258L, 47L, 47L, 31L, 31L, 82L, 82L, 110L,
110L, 63L, 63L, 178L, 178L, 672L, 672L, 451L, 451L, 120L, 120L,
104L, 104L, 47L, 47L, 387L, 387L, 94L, 94L, 300L, 300L, 133L,
133L), group = c("G__Abiotrophia_NotPresent_Anus", "G__Abiotrophia_Present_Anus",
"G__Abiotrophia_NotPresent_Bile duct", "G__Abiotrophia_Present_Bile duct",
"G__Abiotrophia_NotPresent_Bone/Soft tissue", "G__Abiotrophia_Present_Bone/Soft tissue",
"G__Abiotrophia_NotPresent_Breast", "G__Abiotrophia_Present_Breast",
"G__Abiotrophia_NotPresent_Colorectum", "G__Abiotrophia_Present_Colorectum",
"G__Abiotrophia_NotPresent_Esophagus", "G__Abiotrophia_Present_Esophagus",
"G__Abiotrophia_NotPresent_Gallbladder", "G__Abiotrophia_Present_Gallbladder",
"G__Abiotrophia_NotPresent_Head and neck", "G__Abiotrophia_Present_Head and neck",
"G__Abiotrophia_NotPresent_Kidney", "G__Abiotrophia_Present_Kidney",
"G__Abiotrophia_NotPresent_Liver", "G__Abiotrophia_Present_Liver",
"G__Abiotrophia_NotPresent_Lung", "G__Abiotrophia_Present_Lung",
"G__Abiotrophia_NotPresent_Lymphoid tissue", "G__Abiotrophia_Present_Lymphoid tissue",
"G__Abiotrophia_NotPresent_Mesothelium", "G__Abiotrophia_Present_Mesothelium",
"G__Abiotrophia_NotPresent_Nervous system", "G__Abiotrophia_Present_Nervous system",
"G__Abiotrophia_NotPresent_Ovary", "G__Abiotrophia_Present_Ovary",
"G__Abiotrophia_NotPresent_Pancreas", "G__Abiotrophia_Present_Pancreas",
"G__Abiotrophia_NotPresent_Prostate", "G__Abiotrophia_Present_Prostate",
"G__Abiotrophia_NotPresent_Skin", "G__Abiotrophia_Present_Skin",
"G__Abiotrophia_NotPresent_Small intestine", "G__Abiotrophia_Present_Small intestine",
"G__Abiotrophia_NotPresent_Stomach", "G__Abiotrophia_Present_Stomach",
"G__Abiotrophia_NotPresent_Unknown", "G__Abiotrophia_Present_Unknown",
"G__Abiotrophia_NotPresent_Urothelial tract", "G__Abiotrophia_Present_Urothelial tract",
"G__Abiotrophia_NotPresent_Uterus", "G__Abiotrophia_Present_Uterus",
"G__Alphatorquevirus_NotPresent_Bone/Soft tissue", "G__Alphatorquevirus_Present_Bone/Soft tissue",
"G__Alphatorquevirus_NotPresent_Breast", "G__Alphatorquevirus_Present_Breast",
"G__Alphatorquevirus_NotPresent_Colorectum", "G__Alphatorquevirus_Present_Colorectum",
"G__Alphatorquevirus_NotPresent_Esophagus", "G__Alphatorquevirus_Present_Esophagus",
"G__Alphatorquevirus_NotPresent_Kidney", "G__Alphatorquevirus_Present_Kidney",
"G__Alphatorquevirus_NotPresent_Liver", "G__Alphatorquevirus_Present_Liver",
"G__Alphatorquevirus_NotPresent_Lung", "G__Alphatorquevirus_Present_Lung",
"G__Alphatorquevirus_NotPresent_Pancreas", "G__Alphatorquevirus_Present_Pancreas",
"G__Alphatorquevirus_NotPresent_Skin", "G__Alphatorquevirus_Present_Skin",
"G__Alphatorquevirus_NotPresent_Urothelial tract", "G__Alphatorquevirus_Present_Urothelial tract"
), ABCD = c(3L, 2L, 17L, 6L, 34L, 18L, 240L, 53L, 321L, 73L,
87L, 25L, 6L, 3L, 20L, 8L, 15L, 7L, 19L, 4L, 265L, 42L, 6L, 1L,
4L, 2L, 22L, 4L, 70L, 13L, 54L, 12L, 116L, 33L, 58L, 11L, 6L,
2L, 26L, 6L, 42L, 8L, 74L, 18L, 19L, 3L, 52L, 0L, 288L, 5L, 377L,
17L, 110L, 2L, 19L, 3L, 21L, 2L, 298L, 9L, 60L, 6L, 68L, 1L,
89L, 3L), Total = c(15L, 15L, 53L, 53L, 183L, 183L, 678L, 678L,
473L, 473L, 123L, 123L, 20L, 20L, 55L, 55L, 118L, 118L, 51L,
51L, 402L, 402L, 23L, 23L, 31L, 31L, 76L, 76L, 126L, 126L, 100L,
100L, 291L, 291L, 308L, 308L, 59L, 59L, 38L, 38L, 96L, 96L, 137L,
137L, 72L, 72L, 183L, 183L, 678L, 678L, 473L, 473L, 123L, 123L,
118L, 118L, 51L, 51L, 402L, 402L, 100L, 100L, 308L, 308L, 137L,
137L), Merge = c("Abiotrophia_Anus", "Abiotrophia_Anus", "Abiotrophia_Bile duct",
"Abiotrophia_Bile duct", "Abiotrophia_Bone/Soft tissue", "Abiotrophia_Bone/Soft tissue",
"Abiotrophia_Breast", "Abiotrophia_Breast", "Abiotrophia_Colorectum",
"Abiotrophia_Colorectum", "Abiotrophia_Esophagus", "Abiotrophia_Esophagus",
"Abiotrophia_Gallbladder", "Abiotrophia_Gallbladder", "Abiotrophia_Head and neck",
"Abiotrophia_Head and neck", "Abiotrophia_Kidney", "Abiotrophia_Kidney",
"Abiotrophia_Liver", "Abiotrophia_Liver", "Abiotrophia_Lung",
"Abiotrophia_Lung", "Abiotrophia_Lymphoid tissue", "Abiotrophia_Lymphoid tissue",
"Abiotrophia_Mesothelium", "Abiotrophia_Mesothelium", "Abiotrophia_Nervous system",
"Abiotrophia_Nervous system", "Abiotrophia_Ovary", "Abiotrophia_Ovary",
"Abiotrophia_Pancreas", "Abiotrophia_Pancreas", "Abiotrophia_Prostate",
"Abiotrophia_Prostate", "Abiotrophia_Skin", "Abiotrophia_Skin",
"Abiotrophia_Small intestine", "Abiotrophia_Small intestine",
"Abiotrophia_Stomach", "Abiotrophia_Stomach", "Abiotrophia_Unknown",
"Abiotrophia_Unknown", "Abiotrophia_Urothelial tract", "Abiotrophia_Urothelial tract",
"Abiotrophia_Uterus", "Abiotrophia_Uterus", "Alphatorquevirus_Bone/Soft tissue",
"Alphatorquevirus_Bone/Soft tissue", "Alphatorquevirus_Breast",
"Alphatorquevirus_Breast", "Alphatorquevirus_Colorectum", "Alphatorquevirus_Colorectum",
"Alphatorquevirus_Esophagus", "Alphatorquevirus_Esophagus", "Alphatorquevirus_Kidney",
"Alphatorquevirus_Kidney", "Alphatorquevirus_Liver", "Alphatorquevirus_Liver",
"Alphatorquevirus_Lung", "Alphatorquevirus_Lung", "Alphatorquevirus_Pancreas",
"Alphatorquevirus_Pancreas", "Alphatorquevirus_Skin", "Alphatorquevirus_Skin",
"Alphatorquevirus_Urothelial tract", "Alphatorquevirus_Urothelial tract"
)), row.names = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 10L, 9L, 12L,
11L, 13L, 14L, 16L, 15L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L,
25L, 26L, 28L, 27L, 29L, 30L, 31L, 32L, 34L, 33L, 35L, 36L, 38L,
37L, 40L, 39L, 42L, 43L, 45L, 44L, 47L, 46L, 1011L, 1012L, 1014L,
1013L, 1015L, 1016L, 1017L, 1018L, 1019L, 1020L, 1022L, 1021L,
1023L, 1024L, 1026L, 1025L, 1027L, 1028L, 1029L, 1030L), class = "data.frame")
This is probably not an answer but it should help to improve you code. If I'm terribly wrong, I'll remove my answer right away. I have loeft out the test business which I don't understand, but your problem seems to be extraction.
The first thing is that you need to remove the quotation marks in your grep command, try:
varlist <- c("Abiotrophia","Alphatorquevirus")
for( i in varlist )
{
# extract rows which contain the variable
fish.test <- fish.test0[ grep( i, fish.test0$group ), ]
print( head( fish.test ) )
}
From what I understand, you need to define column and tests outside your loop. Does that give you more of what you want:
varlist <- c("Abiotrophia","Alphatorquevirus")
column <- "ACDC"
tests <- list()
for( i in 1 : length( varlist ) ) # index can be used later to fill the list
{
# extract rows which contain the variable
fish.test <- fish.test0[ grep( varlist[ i ], fish.test0$group ), ]
# add a column with your name of choice
fish.test <- cbind( fish.test, c( 1: length( fish.test$group ) ) )
colnames( fish.test )[ length( fish.test ) ] <- column
# write each result into your defined list
tests[[ i ]] <- fish.test
}

Adding a year to a time series with only months and days

I have a df like this
month day x
1 1 1 84
2 1 2 43
3 1 3 49
4 1 4 67
5 1 5 59
......
366 12 31 97
The year should be 2019 from Oct-Dec and 2020 from Jan-Sep
I tried to use
df$year<-as.date(df,origin='2019-01-01')
But I am not sure how to make an argument.
I want the year column to get a date column and try then
df$date<-as.date(with(paste("???",month,day,sep="-"), %Y-%m-%d,origin ="2019-01-01")
but again I don't know how to make an argument for year
Please any help would save me a lot of time because doing it manually seems impossible
We could use an ifelse statement with the make_date function from lubridate:
library(dplyr)
library(lubridate)
df %>%
mutate(year= ifelse(month %in% c(10,11,12), 2019, 2020),
date = make_date(year, month, day))
output:
month day x year date
1 1 1 84 2020 2020-01-01
2 1 2 43 2020 2020-01-02
3 11 3 49 2019 2019-11-03
4 1 4 67 2020 2020-01-04
5 1 5 59 2020 2020-01-05
366 12 31 97 2019 2019-12-31
You could use something like below. If you need a fixed variable instead of 2019/2020 you can use something like var-1 when it is oct-dec and var when it is jan - sep.
library(dplyr)
library(lubridate)
df1 %>%
mutate(date = if_else(month %in% c(10:12),
ymd(paste(2019, df1$month, df1$day, sep = "-")),
ymd(paste(2020, df1$month, df1$day, sep = "-"))))
data:
df1 <- data.frame(month = c(1:12), day = 1, x = 5)
Using base features you could use rowSums to identify 31th of October, then ISOdate.
w <- which.max(rowSums(d[1:2]) == 31 + 10)
d$year <- c(rep(2020, w), rep(2019, 365 - w))
d$date <- do.call(\(year, month, day, ...) as.Date(ISOdate(year, month, day)), d)
Result
head(d, 3)
# month day x year date
# 1 1 1 58 2020 2020-01-01
# 2 1 2 74 2020 2020-01-02
# 3 1 3 43 2020 2020-01-03
tail(d, 3)
# month day x year date
# 363 12 29 46 2019 2019-12-29
# 364 12 30 82 2019 2019-12-30
# 365 12 31 63 2019 2019-12-31
Note:
R.version.string
# [1] "R version 4.1.1 (2021-08-10)"
Data:
d <- structure(list(month = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L), day = c(1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L,
18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L,
31L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L,
27L, 28L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L,
26L, 27L, 28L, 29L, 30L, 31L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L,
22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L,
1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L,
15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L,
28L, 29L, 30L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L,
25L, 26L, 27L, 28L, 29L, 30L, 31L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L,
21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L,
30L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L,
27L, 28L, 29L, 30L, 31L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L,
23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L,
20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 30L, 31L),
x = c(72L, 95L, 95L, 76L, 84L, 64L, 85L, 84L, 70L, 95L, 75L,
64L, 72L, 48L, 68L, 68L, 44L, 53L, 46L, 49L, 62L, 53L, 74L,
86L, 58L, 63L, 85L, 85L, 81L, 44L, 66L, 82L, 86L, 90L, 75L,
54L, 53L, 52L, 47L, 48L, 61L, 95L, 96L, 73L, 59L, 57L, 94L,
70L, 81L, 68L, 83L, 83L, 95L, 55L, 73L, 51L, 50L, 83L, 58L,
45L, 74L, 64L, 54L, 60L, 77L, 94L, 90L, 47L, 44L, 50L, 70L,
69L, 76L, 69L, 62L, 63L, 62L, 55L, 47L, 43L, 71L, 47L, 66L,
69L, 74L, 53L, 85L, 62L, 53L, 57L, 52L, 65L, 85L, 68L, 62L,
43L, 72L, 69L, 79L, 71L, 95L, 45L, 96L, 70L, 96L, 51L, 48L,
67L, 52L, 48L, 72L, 54L, 64L, 79L, 49L, 55L, 90L, 57L, 51L,
63L, 79L, 69L, 48L, 52L, 89L, 70L, 95L, 64L, 75L, 95L, 70L,
94L, 95L, 43L, 87L, 56L, 46L, 53L, 60L, 91L, 61L, 88L, 83L,
89L, 45L, 87L, 69L, 83L, 71L, 44L, 93L, 96L, 80L, 46L, 80L,
66L, 80L, 59L, 86L, 51L, 48L, 80L, 81L, 79L, 65L, 80L, 72L,
84L, 61L, 55L, 49L, 54L, 60L, 44L, 44L, 84L, 49L, 94L, 45L,
80L, 79L, 51L, 70L, 48L, 66L, 89L, 60L, 57L, 76L, 86L, 88L,
71L, 79L, 94L, 74L, 93L, 80L, 75L, 90L, 91L, 77L, 95L, 48L,
90L, 77L, 50L, 49L, 56L, 71L, 73L, 62L, 85L, 90L, 76L, 67L,
44L, 96L, 52L, 73L, 85L, 44L, 44L, 79L, 89L, 93L, 58L, 57L,
75L, 48L, 58L, 59L, 51L, 64L, 89L, 82L, 76L, 51L, 56L, 46L,
82L, 48L, 76L, 93L, 60L, 52L, 75L, 77L, 53L, 52L, 56L, 50L,
66L, 70L, 67L, 87L, 90L, 50L, 80L, 54L, 81L, 54L, 73L, 88L,
64L, 52L, 64L, 73L, 79L, 68L, 53L, 86L, 94L, 56L, 62L, 65L,
85L, 61L, 54L, 93L, 60L, 69L, 82L, 83L, 56L, 51L, 82L, 71L,
76L, 77L, 60L, 79L, 61L, 83L, 87L, 43L, 74L, 76L, 63L, 59L,
54L, 93L, 82L, 65L, 89L, 68L, 62L, 61L, 91L, 89L, 79L, 59L,
52L, 80L, 71L, 96L, 46L, 84L, 47L, 92L, 80L, 86L, 64L, 88L,
56L, 93L, 94L, 66L, 46L, 87L, 63L, 89L, 92L, 88L, 65L, 90L,
71L, 53L, 91L, 61L, 91L, 62L, 62L, 48L, 80L, 73L, 62L, 75L,
59L, 72L, 61L, 90L, 51L, 66L, 74L, 58L, 73L, 89L, 50L, 79L,
90L, 94L, 59L, 47L, 88L, 83L)), row.names = c(NA, -365L), class = "data.frame")
Base R option -
transform(df, date = as.Date(paste(ifelse(month %in% 10:12, 2019, 2020), month, day, sep = '-')))
# month day x date
#1 1 1 84 2020-01-01
#2 1 2 43 2020-01-02
#3 11 3 49 2019-11-03
#4 1 4 67 2020-01-04
#5 1 5 59 2020-01-05

Problem with Shiny filtered selectinput and ggplot graph

I have a shinyscript prepared where i want to show a graph based on two widgets.
The first widget(selectInput) controls for the area i want to show a diagnostic plot for.
The second widget (checkboxGroupInput) controls for the amount of data i want to show for the area selected with the first widget. So, the options for the checkboxes depend on what is selected with the selectInput.
I solved this with a htmlOutput("") in the UI and a corresponding renderUI in the server.
Everything works fine, but when i proceed to the plotting, something weird happens.
I can use a reactive filter to control for the area as selected with selectInput, but when i extend the filter to also work work with the checkboxGroupInputi get the following error when i run the app:
Warning: Error in : Problem with filter() input ..2.
x Input ..2 must be of size 611 or 1, not size 0.
i Input ..2 is Code == input$code.
202:
This only shows when all the checkboxes are unchecked and no graph is visible. I Can plot the graph that corresponds with the checkboxes, but it only shows 5 barcharts (when for example ten are to be plotted) and the error is given.
Can someone tell me if there is something wrong with m code? And how i can resolve the error and work with these dependand widgets?
Below my code and data
Code
#libraries needed
library(shiny)
library(ggplot2)
library(dplyr)
#data needed
df <- "load in data"
# user interface ----
ui <- fluidPage(
tabsetPanel(
tabPanel("diagnostische tabellen",fluid = TRUE,
titlePanel("PQ analyse"),
sidebarLayout(
sidebarPanel(
helpText("selecteer terrein waar je de PQ-data van wil bekijken"),
#make first dropdownmenu for area
selectInput("terrein",
label = "Kies een terrein",
choices = sort((unique(df$Terrein))),
selected = 1),
htmlOutput("code")
),
mainPanel(plotOutput("map1"))))
)
)
# Server logic ----------------------------
server <- function(input, output){
# ceate a reactive list of PQ-codes based on previous selection for area
output$code <- renderUI({
data_available <- df[df$Terrein == input$terrein, "Code"]
checkboxGroupInput("code",
label = "PQ-code",
choices = sort(unique(data_available)),
selected = unique(data_available))
})
## filter the data for the graph
filtered_data <- reactive({
filter(df, Terrein == input$terrein, Code == input$code)
})
## GGplot graph
output$map1 <- renderPlot({
ggplot(filtered_data(), aes( x = Code, fill = as.character(Jaar))) +
geom_bar(position = position_stack(reverse = TRUE))+
theme(axis.text.x = element_text(angle = 45, size = 15))+
scale_fill_brewer()+
labs(fill='Jaar')+
ggtitle(paste("Aantal herhalingen PQ's op",input$terrein))
})
}
# Run app
shinyApp(ui, server)
df
structure(list(Terrein = structure(c(25L, 25L, 25L, 25L, 1L,
1L, 1L, 1L, 1L, 1L, 29L, 29L, 13L, 13L, 13L, 7L, 7L, 7L, 7L,
7L, 7L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 8L, 8L, 8L, 13L, 8L, 8L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L,
13L, 13L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 15L, 15L, 15L, 15L,
16L, 16L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 13L, 13L,
13L, 13L, 24L, 24L, 24L, 24L, 28L, 28L, 28L, 28L, 2L, 2L, 2L,
2L, 2L, 2L, 23L, 23L, 23L, 23L, 23L, 22L, 21L, 21L, 21L, 21L,
21L, 7L, 7L, 7L, 7L, 7L, 7L, 20L, 20L, 20L, 20L, 20L, 20L, 20L,
14L, 14L, 14L, 14L, 14L, 14L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
18L, 18L, 18L, 18L, 30L, 30L, 30L, 30L, 20L, 10L, 10L, 10L, 10L,
10L, 13L, 13L, 13L, 6L, 6L, 6L, 6L, 6L, 5L, 5L, 5L, 3L, 3L, 3L,
3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 25L, 1L, 1L, 17L, 17L, 17L, 17L,
13L, 13L, 13L, 13L, 13L, 13L, 23L, 23L, 23L, 23L, 23L, 3L, 3L,
3L, 13L, 3L, 10L, 10L, 25L, 25L, 25L, 25L, 14L, 14L, 14L, 14L,
14L, 14L, 23L, 23L, 23L, 23L, 23L, 15L, 15L, 15L, 15L, 16L, 16L,
16L, 5L, 5L, 5L, 5L, 5L, 12L, 12L, 12L, 12L, 12L, 19L, 15L, 15L,
15L, 15L, 9L, 16L, 16L, 16L, 8L, 19L, 16L, 19L, 8L, 8L, 16L,
16L, 16L, 8L, 8L, 8L, 8L, 8L, 19L, 16L, 19L, 8L, 16L, 16L, 16L,
8L, 16L, 25L, 15L, 15L, 15L, 15L, 15L, 15L, 25L, 21L, 21L, 21L,
7L, 7L, 7L, 12L, 12L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 11L,
11L, 11L, 11L, 11L, 11L, 11L, 10L, 10L, 10L, 15L, 15L, 28L, 28L,
28L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 25L, 25L, 25L, 7L, 7L,
7L, 22L, 23L, 23L, 23L, 23L, 23L, 1L, 1L, 1L, 1L, 1L, 23L, 23L,
23L, 23L, 15L, 15L, 15L, 15L, 29L, 29L, 26L, 26L, 26L, 26L, 26L,
26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L,
26L, 7L, 7L, 7L, 5L, 5L, 5L, 5L, 5L, 20L, 12L, 12L, 8L, 20L,
20L, 20L, 20L, 7L, 7L, 7L, 12L, 25L, 25L, 25L, 24L, 24L, 24L,
20L, 20L, 15L, 15L, 15L, 15L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 15L, 15L,
15L, 15L, 14L, 14L, 14L, 14L, 14L, 14L, 12L, 8L, 8L, 8L, 8L,
21L, 21L, 21L, 12L, 10L, 2L, 1L, 1L, 1L, 1L, 1L, 10L, 10L, 15L,
15L, 15L, 15L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L,
26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 6L, 6L, 6L, 6L,
6L, 14L, 14L, 14L, 14L, 23L, 23L, 23L, 23L, 15L, 15L, 15L, 15L,
15L, 15L, 15L, 15L, 21L, 21L, 21L, 26L, 26L, 26L, 25L, 25L, 23L,
23L, 23L, 23L, 26L, 26L, 26L, 13L, 15L, 15L, 15L, 15L, 10L, 10L,
10L, 10L, 26L, 26L, 26L, 13L, 13L, 13L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 23L, 23L, 23L, 23L, 23L, 1L, 1L, 1L, 1L,
1L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 15L, 15L, 15L, 15L, 23L,
23L, 23L, 23L, 23L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 13L,
25L, 25L, 21L, 21L, 21L, 12L, 13L, 13L, 13L, 13L, 2L), .Label = c("Arnhemse Heide",
"ASK Doornspijkse Heide", "ASK Oldenbroekse Heide", "Balloërveld",
"Convooi AOCS Nieuw-Milligen", "De Dellen", "de Kom", "De Stompert & Vlasakkers",
"Deelen, VB", "Eder- en Ginkelse Heide", "Ermelosche Heide",
"Havelte", "ISK Harskamp", "Joost Dourleinkazerne", "Kruispeel en Achterbroek",
"Leusderheide", "Luitenant-Generaal Best Kazerne (vml. VB de Peel)",
"Olst-Welsum", "Oude Kamp", "Oude Molen", "Radiostation Noordwijk",
"Rucphense Heide", "Schinveldse Bossen", "Stroese Zand", "Uilenbosch (Waalsdorp)",
"Vliehors", "Vughtse Heide", "Weerter- en Bosoverheide", "Woensdrechtse Heide",
"Zwaluwenberg"), class = "factor"), Code = structure(c(230L,
228L, 228L, 231L, 4L, 5L, 6L, 1L, 2L, 3L, 239L, 240L, 100L, 101L,
102L, 116L, 117L, 118L, 119L, 120L, 121L, 10L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 26L, 27L, 28L, 30L, 29L, 14L, 15L, 16L, 23L, 24L,
25L, 17L, 18L, 19L, 20L, 21L, 22L, 44L, 45L, 46L, 47L, 48L, 49L,
216L, 217L, 218L, 102L, 214L, 215L, 31L, 42L, 35L, 36L, 37L,
38L, 43L, 32L, 33L, 34L, 39L, 40L, 41L, 71L, 71L, 72L, 59L, 60L,
61L, 62L, 57L, 65L, 63L, 64L, 58L, 55L, 56L, 67L, 68L, 68L, 69L,
70L, 70L, 91L, 92L, 78L, 79L, 80L, 73L, 74L, 75L, 76L, 77L, 103L,
100L, 105L, 108L, 102L, 101L, 104L, 109L, 107L, 106L, 94L, 95L,
93L, 96L, 99L, 97L, 98L, 122L, 123L, 124L, 125L, 135L, 136L,
225L, 222L, 219L, 220L, 221L, 223L, 226L, 224L, 227L, 106L, 105L,
107L, 104L, 188L, 189L, 186L, 187L, 236L, 235L, 237L, 238L, 55L,
56L, 57L, 58L, 59L, 60L, 176L, 177L, 178L, 179L, 180L, 175L,
143L, 144L, 145L, 146L, 147L, 116L, 119L, 117L, 118L, 121L, 120L,
163L, 165L, 160L, 161L, 162L, 164L, 166L, 111L, 110L, 112L, 113L,
114L, 115L, 84L, 85L, 86L, 87L, 88L, 89L, 90L, 167L, 168L, 169L,
170L, 241L, 242L, 242L, 242L, 160L, 73L, 74L, 77L, 80L, 75L,
103L, 105L, 107L, 50L, 51L, 52L, 53L, 54L, 138L, 139L, 140L,
151L, 152L, 153L, 158L, 159L, 1L, 2L, 3L, 4L, 5L, 6L, 231L, 1L,
2L, 171L, 172L, 173L, 174L, 100L, 102L, 108L, 101L, 109L, 104L,
176L, 177L, 178L, 179L, 180L, 154L, 155L, 156L, 106L, 157L, 79L,
78L, 230L, 229L, 228L, 230L, 115L, 114L, 113L, 112L, 110L, 111L,
176L, 177L, 178L, 179L, 180L, 122L, 123L, 124L, 125L, 137L, 135L,
136L, 141L, 142L, 138L, 139L, 140L, 97L, 95L, 96L, 99L, 98L,
150L, 126L, 127L, 128L, 129L, 190L, 133L, 134L, 132L, 213L, 148L,
131L, 149L, 211L, 212L, 133L, 134L, 132L, 210L, 213L, 210L, 212L,
211L, 148L, 131L, 149L, 210L, 134L, 133L, 132L, 213L, 130L, 231L,
125L, 128L, 129L, 127L, 126L, 124L, 231L, 145L, 144L, 143L, 118L,
120L, 117L, 93L, 94L, 160L, 161L, 166L, 165L, 164L, 163L, 162L,
89L, 88L, 85L, 84L, 90L, 86L, 87L, 79L, 78L, 91L, 123L, 122L,
238L, 237L, 235L, 92L, 80L, 75L, 74L, 76L, 77L, 73L, 232L, 233L,
234L, 119L, 121L, 116L, 175L, 176L, 177L, 179L, 180L, 178L, 2L,
3L, 5L, 4L, 1L, 176L, 178L, 179L, 180L, 126L, 127L, 128L, 129L,
239L, 240L, 191L, 192L, 193L, 194L, 195L, 196L, 197L, 198L, 199L,
200L, 201L, 202L, 203L, 204L, 205L, 206L, 207L, 208L, 209L, 116L,
121L, 119L, 138L, 142L, 141L, 139L, 140L, 161L, 94L, 95L, 183L,
166L, 165L, 160L, 163L, 117L, 120L, 118L, 93L, 233L, 234L, 232L,
189L, 187L, 186L, 162L, 164L, 128L, 126L, 129L, 127L, 74L, 75L,
80L, 76L, 77L, 73L, 79L, 78L, 91L, 92L, 100L, 103L, 108L, 101L,
109L, 106L, 105L, 104L, 123L, 124L, 125L, 122L, 115L, 114L, 113L,
112L, 111L, 110L, 97L, 182L, 184L, 185L, 181L, 145L, 144L, 143L,
96L, 82L, 66L, 2L, 3L, 4L, 5L, 1L, 83L, 81L, 128L, 129L, 126L,
127L, 209L, 206L, 207L, 208L, 191L, 192L, 193L, 194L, 203L, 204L,
205L, 198L, 197L, 196L, 195L, 202L, 201L, 199L, 200L, 52L, 51L,
53L, 50L, 54L, 112L, 115L, 114L, 110L, 180L, 179L, 176L, 178L,
122L, 124L, 126L, 127L, 128L, 129L, 123L, 125L, 145L, 144L, 143L,
192L, 195L, 195L, 233L, 234L, 178L, 176L, 180L, 179L, 191L, 194L,
197L, 103L, 128L, 129L, 126L, 127L, 80L, 76L, 79L, 78L, 193L,
198L, 200L, 101L, 100L, 108L, 81L, 83L, 82L, 73L, 74L, 75L, 77L,
91L, 92L, 176L, 177L, 178L, 180L, 179L, 1L, 2L, 3L, 4L, 5L, 93L,
94L, 95L, 96L, 99L, 98L, 97L, 128L, 129L, 126L, 127L, 176L, 178L,
177L, 179L, 180L, 94L, 97L, 95L, 96L, 105L, 107L, 106L, 109L,
104L, 233L, 234L, 143L, 144L, 145L, 93L, 108L, 101L, 100L, 103L,
58L), .Label = c("AhQ001", "AhQ002", "AhQ003", "AhQ004", "AhQ005",
"AhQ006", "BvB001", "BvB002", "BvB003", "BvB028", "BvB029", "BvB033",
"BvB034", "BvExA1", "BvExA2", "BvExA3", "BvExB1", "BvExB2", "BvExB3",
"BvExC1", "BvExC2", "BvExC3", "BvExD1", "BvExD2", "BvExD3", "BvQ004",
"BvQ005", "BvQ006", "BvQ008", "BvQ009", "BvQ028", "BvQ029", "BvQ030",
"BvQ031", "BvQ056", "BvQ057", "BvQ061", "BvQ062", "BvQ074", "BvQ075",
"BvQ076", "BvQ077", "BvQ078", "BvQ104", "BvQ105", "BvQ120", "BvQ121",
"BvQ182", "BvQ183", "DeQ001", "DeQ002", "DeQ003", "DeQ004", "DeQ005",
"DsQ001", "DsQ002", "DsQ003", "DsQ004", "DsQ005", "DsQ006", "DsQ007",
"DsQ008", "DsQ009", "DsQ010", "DsQ011", "DsQ023", "DsQB01", "DsQB02",
"DsQB03", "DsQB04", "DsQB05", "DsQB06", "EhQ001", "EhQ002", "EhQ003",
"EhQ004", "EhQ005", "EhQ006", "EhQ007", "EhQ008", "EhQJ01", "EhQJ02",
"EhQJ03", "ErQ001", "ErQ002", "ErQ003", "ErQ004", "ErQ005", "ErQ006",
"ErQ007", "GiQ001", "GiQ002", "HaQ001", "HaQ002", "HaQ003", "HaQ004",
"HaQ005", "HaQ006", "HaQ007", "HkQ001", "HkQ002", "HkQ003", "HkQ004",
"HkQ005", "HkQ006", "HkQ007", "HkQ008", "HkQ009", "HkQ010", "JdQ001",
"JdQ002", "JdQ003", "JdQ004", "JdQ005", "JdQ006", "KoQ001", "KoQ002",
"KoQ003", "KoQ004", "KoQ005", "KoQ006", "KrQ001", "KrQ002", "KrQ003",
"KrQ004", "KrQ005", "KrQ006", "KrQ007", "KrQ008", "LhH004", "LhPro1",
"LhPro2", "LhPro4", "LhPRro3", "LhQ001", "LhQ002", "LhX031",
"NmQ001", "NmQ002", "NmQ003", "NmQ004", "NmQ005", "NrQ001", "NrQ002",
"NrQ003", "NrQ004", "NrQ005", "OkPro1", "OkPro2", "OkQ001", "OlQ001",
"OlQ002", "OlQ003", "OlQ004", "OlQ005", "OlQ006", "OlQ007", "OlR001",
"OlR002", "OmQ001", "OmQ002", "OmQ003", "OmQ004", "OmQ005", "OmQ006",
"OmQ007", "OwQ001", "OwQ002", "OwQ003", "OwQ004", "PeH011", "PeH012",
"PeH013", "PeH014", "RhQ001", "SbQ001", "SbQ002", "SbQ003", "SbQ004",
"SbQ005", "StQ001", "StQ002", "StQ003", "StQ004", "StQ005", "SzQ001",
"SzQ002", "SzQ003", "SzQ004", "VdR070", "VhQ001", "VhQ002", "VhQ003",
"VhQ004", "VhQ005", "VhQ006", "VhQ007", "VhQ008", "VhQ009", "VhQ010",
"VhQ011", "VhQ012", "VhQ013", "VhQ014", "VhQ015", "VhQ016", "VhQ017",
"VhQ018", "VhQ019", "VlPro1", "VlPro2", "VlPro3", "VlPro4", "VlQ001",
"VlQ002", "VlQ003", "VlQ004", "VlQ005", "VuQ001", "VuQ002", "VuQ003",
"VuQ004", "VuQ005", "VuQ006", "VuT001", "VuT002", "VuT003", "WaQ001",
"WaQ002", "WaQ003", "WaQ004", "WaQ005", "WaQ006", "WaQ007", "WeQ001",
"WeQ002", "WeQ003", "WeQ004", "WhQ001", "WhQ002", "ZwQ001", "ZwQ002"
), class = "factor")), row.names = c(NA, -611L), class = "data.frame")
As you have multiple Codes for each Terrein, you should use %in%. Also, you need to define each bar count. Try this
# user interface ----
ui <- fluidPage(
tabsetPanel(
tabPanel("diagnostische tabellen",fluid = TRUE,
titlePanel("PQ analyse"),
sidebarLayout(
sidebarPanel(
helpText("selecteer terrein waar je de PQ-data van wil bekijken"),
#make first dropdownmenu for area
selectInput("terrein",
label = "Kies een terrein",
choices = sort((unique(df$Terrein))),
selected = 1),
uiOutput("mycode")
),
mainPanel(plotOutput("map1"))))
)
)
# Server logic ----------------------------
server <- function(input, output){
# ceate a reactive list of PQ-codes based on previous selection for area
output$mycode <- renderUI({
data_available <- df[df$Terrein == input$terrein, "Code"]
checkboxGroupInput("code",
label = "PQ-code",
choices = sort(unique(data_available)),
selected = unique(data_available))
})
## filter the data for the graph
filtered_data <- reactive({
dat <- filter(df, Terrein == input$terrein & Code %in% input$code)
data <- dat %>% group_by(Code) %>%
dplyr::summarise(n=n())
data
})
## GGplot graph
output$map1 <- renderPlot({
ggplot(filtered_data(), aes( x = Code, y=n, fill = Code )) +
geom_bar(position = position_stack(reverse = TRUE), stat = "identity")+
theme(axis.text.x = element_text(angle = 45, size = 15))+
scale_fill_brewer()+
labs(fill=NULL)+
ggtitle(paste("Aantal herhalingen PQ's op",input$terrein))
})
}
# Run app
shinyApp(ui, server)
You will get this output:
Please note that there is no Jaar defined, so you may need to define it.

Tabu search in R

Good evening,
As part of a data analysis course we have been thrown into the Metaheuristics realm.....and I am really struggling to understand how to implement a Tabu search in R since my background in programming is rather limited.
I haven't found any R or Python example on Google or youtube either so I'm really praying I'll find something here.
The problem I have is similar to the "location problem" in optimisation. I need to find the best combination of Hubs that minimizes the total distance between Hubs and nodes.
I need to find 5 hubs, and the total capacity for each one is 120
nodes <- structure(list(node_number = 1:50,
x = c(2L, 80L, 36L, 57L, 33L, 76L, 77L, 94L,
89L, 59L, 39L, 87L, 44L, 2L, 19L, 5L,
58L, 14L, 43L, 87L, 11L, 31L, 51L, 55L,
84L, 12L, 53L, 53L, 33L, 69L, 43L, 10L,
8L, 3L, 96L, 6L, 59L, 66L, 22L, 75L, 4L,
41L, 92L, 12L, 60L, 35L, 38L, 9L, 54L, 1L),
y = c(62L, 25L, 88L, 23L, 17L, 43L, 85L, 6L, 11L,
72L, 82L, 24L, 76L, 83L, 43L, 27L, 72L, 50L,
18L, 7L, 56L, 16L, 94L, 13L, 57L, 2L, 33L, 10L,
32L, 67L, 5L, 75L, 26L, 1L, 22L, 48L, 22L, 69L,
50L, 21L, 81L, 97L, 34L, 64L, 84L, 100L, 2L, 9L, 59L, 58L),
node_demand = c(3L, 14L, 1L, 14L, 19L, 2L, 14L, 6L,
7L, 6L, 10L, 18L, 3L, 6L, 20L, 4L,
14L, 11L, 19L, 15L, 15L, 4L, 13L,
13L, 5L, 16L, 3L, 7L, 14L, 17L,
3L, 3L, 12L, 14L, 20L, 13L, 10L,
9L, 6L, 18L, 7L, 20L, 9L, 1L, 8L,
5L, 1L, 7L, 9L, 2L)),
.Names = c("node_number", "x", "y", "node_demand"),
class = "data.frame", row.names = c(NA, -50L))
hubs_required = 5
total_capacity = 120
My strategy was to create a distance matrix, then I will create another 50 x 50 matrix to represent wether a node becomes a hub or not, and finally I will multiply both and add all the distances to get the total distance.
I created the dataframe:
nodes_df <- as.data.frame(nodes)
colnames(nodes_df) <- c("x", "y", "node_demand")
rownames(nodes_df) <- paste('Node',1:50)
I created the distance matrix
distance_df <-as.data.frame(as.matrix(round(dist(nodes_df,method = "euclidean",diag = TRUE,upper = TRUE))))
colnames(distance_df) <- paste("Node",1:50)
I created the node demand matrix:
demand <- as.vector(rep(c(nodes_df[,'node_demand']),50))
demand_matrix <- matrix(demand,nrow=50,ncol=50,byrow = TRUE)
diag(demand_matrix) <- 0
demand_matrix <- as.data.frame(demand_matrix)
I created an empty matrix to show whether a node becomes a hub "1" or not "0"
hubs_matrix <- matrix(0,nrow = 50,ncol = 50,byrow = TRUE)
colnames(hubs_matrix) <- paste("Hub",1:50)
rownames(hubs_matrix) <- paste("Node",1:50)
Then to create the initial solution I randomly assign Hubs and calculate the distance and demand.
set.seed(37)
hubs_matrix <- do.call("cbind", lapply(1:50, function(x) sample(c(1, rep(0, 49)), 50)))
sum_distances <- (hubs_matrix * distance_df)
sum(rowSums(sum_distances))
The idea is to try different combinations of '1'' and '0' as to minimise the total distance but I am having the following issues:
I got no idea how to do the local search and do the permutations from the initial solution.
I got no idea how to prevent R to use the best solution for a certain period of time, i.e the Tabu list
I got no idea how to deal with the supply restriction for each node ( total demand from each node < 120), I could do it with a loop but since in this case I'm multiplying matrices I'm pretty lost.
Anybody could give me a hand???
Many thanks!

Subsetting from a dataframe in R

I have sampled 'n' rows from a dataframe called nodes:
nodes <- structure(list(node_number = 1:50,
x = c(2L, 80L, 36L, 57L, 33L, 76L, 77L, 94L,
89L, 59L, 39L, 87L, 44L, 2L, 19L, 5L,
58L, 14L, 43L, 87L, 11L, 31L, 51L, 55L,
84L, 12L, 53L, 53L, 33L, 69L, 43L, 10L,
8L, 3L, 96L, 6L, 59L, 66L, 22L, 75L, 4L,
41L, 92L, 12L, 60L, 35L, 38L, 9L, 54L, 1L),
y = c(62L, 25L, 88L, 23L, 17L, 43L, 85L, 6L, 11L,
72L, 82L, 24L, 76L, 83L, 43L, 27L, 72L, 50L,
18L, 7L, 56L, 16L, 94L, 13L, 57L, 2L, 33L, 10L,
32L, 67L, 5L, 75L, 26L, 1L, 22L, 48L, 22L, 69L,
50L, 21L, 81L, 97L, 34L, 64L, 84L, 100L, 2L, 9L, 59L, 58L),
node_demand = c(3L, 14L, 1L, 14L, 19L, 2L, 14L, 6L,
7L, 6L, 10L, 18L, 3L, 6L, 20L, 4L,
14L, 11L, 19L, 15L, 15L, 4L, 13L,
13L, 5L, 16L, 3L, 7L, 14L, 17L,
3L, 3L, 12L, 14L, 20L, 13L, 10L,
9L, 6L, 18L, 7L, 20L, 9L, 1L, 8L,
5L, 1L, 7L, 9L, 2L)),
.Names = c("node_number", "x", "y", "node_demand"),
class = "data.frame", row.names = c(NA, -50L))
To sample I use this code:
hubs <- nodes[sample(1:total_nodes, hubs_required, replace = FALSE),]
Which returns :
node_number x y node_demand
33 33 8 26 12
14 14 2 83 6
42 42 41 97 20
13 13 44 76 3
10 10 59 72 6
I would like to return all the rows that haven't been selected so that I can perform a series of calculations on them.
I thought that using something like data[-sample,] would work but I get the following error
Error in xj[i] : invalid subscript type 'list'.
Anybody know who could I get these values?
It would be easier to keep the list of indexes that selected. Somthing like
hubs <- nodes[keep <- sample(1:total_nodes, hubs_required, replace = FALSE),]
other_hubs <- nodes[-keep, ]
Otherwise, if your data has some sort of key/ID, you can do something like
other_hubs <- nodes[nodes%node_number %in% hubs$node_number, ]
or with dplyr, this can be an anti-join
nodes %>% anti_join(hubs, by="node_number")

Resources