Averaging dataframe based on current row-value and preceeding rows - r

I have a simple data set with the following form
df<- data.frame(c(10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20),
c(80, 80, 80, 80, 80, 80, 80, 80, 90, 90, 90, 90, 90, 90, 90, 90, 80, 80, 80, 80, 80, 80, 80, 80, 90, 90, 90, 90, 90, 90, 90, 90),
c(1, 1, 2, 2, 3, 3, 4, 4, 1, 1, 2, 2, 3, 3, 4, 4, 1, 1, 2, 2, 3, 3, 4, 4, 1, 1, 2, 2, 3, 3, 4, 4),
c(25, 75, 20, 40, 60, 50, 20, 10, 20, 30, 40, 60, 25, 75, 20, 40, 5, 5, 2, 4, 6, 5, 2, 1, 2, 3, 4, 6, 2, 7, 2, 4))
colnames(df)<-c("car_number", "year", "marker", "val")
What I am trying to do is quite simple, actually: Per car_number, I want to find the average of the values associated with a marker -value and the preceeding 3 values. So for the example data above the output I want is
car=10, year=80 1: 50
car=10, year=80 2: 40
car=10, year=80 3: 45
car=10, year=80 4: 37.5
car=10, year=90 1: 31.25
car=10, year=90 2: 36.25
car=10, year=90 3: 35
car=10, year=90 4: 38.75
car=20, year=80 1: 5
car=20, year=80 2: 4
car=20, year=80 3: 4.5
car=20, year=80 4: 3.75
car=20, year=90 1: 3.125
car=20, year=90 2: 3.625
car=20, year=90 3: 3.375
car=20, year=90 4: 3.750
Note that for simplicity of the example the markers above come in pairs of two. That is not the case with the real data, so I am thinking a general solution will contain some sort of group_by (?)
Any efficient solution is welcome!
Here is a second example data set and output:
df<- data.frame(c(10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20),
c(80, 80, 80, 80, 80, 80, 80, 80, 90, 90, 90, 90, 90, 90, 90, 90, 80, 80, 80, 80, 80, 80, 80, 80, 90, 90, 90, 90, 90, 90, 90, 90),
c(1, 2, 2, 2, 3, 3, 4, 4, 1, 1, 2, 2, 3, 3, 3, 4, 1, 1, 1, 2, 3, 3, 4, 4, 4, 1, 2, 2, 3, 3, 3, 4),
c(25, 75, 20, 40, 60, 50, 20, 10, 20, 30, 40, 60, 25, 75, 20, 40, 5, 5, 2, 4, 6, 5, 2, 1, 2, 3, 4, 6, 2, 7, 2, 4))
colnames(df)<-c("car_number", "year", "marker", "val")
And the output is (based on the rules above)
car=10, year=80 1: Mean{{25}] = 25
car=10, year=80 2: Mean[{40, 20, 75, 25}] = 40
car=10, year=80 3: Mean[{50, 60, 40, 20, 75, 25}] = 45
car=10, year=80 4: Mean[{10, 20, 50, 60, 40, 20, 75, 25}] = 37.5
car=10, year=90 1: Mean[{30, 20, 10, 20, 50, 60, 40, 20, 75}] = 36.11
car=10, year=90 2: Mean[{60, 40, 30, 20, 10, 20, 50, 60}] = 36.25
car=10, year=90 3: Mean[{20, 75, 25, 60, 40, 30, 20, 10, 20}] = 33.33
car=10, year=90 4: Mean[{40, 20, 75, 25, 60, 40, 30, 20}] = 38.75
car=20, year=80 1: Mean[{2, 5, 5}] = 4
car=20, year=80 2: Mean[{4, 2, 5, 5}] = 4
car=20, year=80 3: Mean[{5, 6, 4, 2, 5, 5}] = 4.5
car=20, year=80 4: Mean[{2, 1, 2, 5, 6, 4, 2, 5, 5}] = 3.55
car=20, year=90 1: Mean[{3, 2, 1, 2, 5, 6, 4}] = 3.29
car=20, year=90 2: Mean[{6, 4, 3, 2, 1, 2, 5, 6}] = 3.625
car=20, year=90 3: Mean[{2, 7, 2, 6, 4, 3, 2, 1, 2}] = 3.22
car=20, year=90 4: Mean[{4, 2, 7, 2, 6, 4, 3}] = 4

A first group_by computes the mean by car_number, year, marker, and retains its weight (number of rows).
A second group_by by car_number allows us to retrieve lagging means and weights to compute the desired mean:
library(purrr)
library(dplyr)
df %>%
arrange(car_number, year, marker) %>%
group_by(car_number, year, marker) %>%
summarise(mean_1 = mean(val, na.rm = TRUE), weight = n()) %>%
group_by(car_number) %>%
mutate(mean_2 = pmap_dbl(
list(mean_1, lag(mean_1), lag(mean_1, 2), lag(mean_1, 3),
weight, lag(weight), lag(weight, 2), lag(weight, 3)),
~ weighted.mean(c(..1, ..2, ..3, ..4),
c(..5, ..6, ..7, ..8),
na.rm = TRUE)
)) %>%
ungroup()
Result:
# # A tibble: 16 × 6
# car_number year marker mean_1 weight mean_2
# <dbl> <dbl> <dbl> <dbl> <int> <dbl>
# 1 10 80 1 50.0 2 50.000
# 2 10 80 2 30.0 2 40.000
# 3 10 80 3 55.0 2 45.000
# 4 10 80 4 15.0 2 37.500
# 5 10 90 1 25.0 2 31.250
# 6 10 90 2 50.0 2 36.250
# 7 10 90 3 50.0 2 35.000
# 8 10 90 4 30.0 2 38.750
# 9 20 80 1 5.0 2 5.000
# 10 20 80 2 3.0 2 4.000
# 11 20 80 3 5.5 2 4.500
# 12 20 80 4 1.5 2 3.750
# 13 20 90 1 2.5 2 3.125
# 14 20 90 2 5.0 2 3.625
# 15 20 90 3 4.5 2 3.375
# 16 20 90 4 3.0 2 3.750
Edit: Alternative syntax for purrr versions prior to 0.2.2.9000:
df %>%
arrange(car_number, year, marker) %>%
group_by(car_number, year, marker) %>%
summarise(mean_1 = mean(val, na.rm = TRUE), weight = n()) %>%
group_by(car_number) %>%
mutate(mean_2 = pmap_dbl(
list(mean_1, lag(mean_1), lag(mean_1, 2), lag(mean_1, 3),
weight, lag(weight), lag(weight, 2), lag(weight, 3)),
function(a, b, c, d, e, f, g, h)
weighted.mean(c(a, b, c, d),
c(e, f, g, h),
na.rm = TRUE)
)) %>%
ungroup()

Just throwing a base R solution in the mix. We can make a custom function using Reduce with accumulate = TRUE and tail(x, 4) to ensure that only last 3 observations will be included. All these after we average the data set by car_type, year, marker, i.e.
f1 <- function(x){
sapply(Reduce(c, x, accumulate = TRUE), function(i) mean(tail(i,4)))
}
dd <- aggregate(val ~ car_number+year+marker, df, mean)
dd <- dd[order(dd$car_number, dd$year, dd$marker),]
dd$new_avg <- with(dd, ave(val, car_number, FUN = f1))
dd
# car_number year marker val new_avg
#1 10 80 1 50.0 50.000
#5 10 80 2 30.0 40.000
#9 10 80 3 55.0 45.000
#13 10 80 4 15.0 37.500
#3 10 90 1 25.0 31.250
#7 10 90 2 50.0 36.250
#11 10 90 3 50.0 35.000
#15 10 90 4 30.0 38.750
#2 20 80 1 5.0 5.000
#6 20 80 2 3.0 4.000
#10 20 80 3 5.5 4.500
#14 20 80 4 1.5 3.750
#4 20 90 1 2.5 3.125
#8 20 90 2 5.0 3.625
#12 20 90 3 4.5 3.375
#16 20 90 4 3.0 3.750

Here is a method with data.table that modifies Frank's suggestion in David Arenburg's answer here.
# aggregate data by car_number, year, and marker
dfNew <- setDT(df)[, .(val=mean(val)), by=.(car_number, year, marker)]
# calculate average of current a previous three values
dfNew[, val := rowMeans(dfNew[,shift(val, 0:3), by=car_number][, -1], na.rm=TRUE)]
The first line is a standard aggregation call. The second line makes some changes to the rowMeans method in the linked answer. rowMeans is fed a data.table of the shifted values, where the shift occurs by car_number (thanks to sotos for catching this), which is chained to a statement that drops the first column (using -1), which is the car_number column returned in the first part of the chain.
this returns
car_number year marker val
1: 10 80 1 50.000
2: 10 80 2 40.000
3: 10 80 3 45.000
4: 10 80 4 37.500
5: 10 90 1 31.250
6: 10 90 2 36.250
7: 10 90 3 35.000
8: 10 90 4 38.750
9: 20 80 1 5.000
10: 20 80 2 4.000
11: 20 80 3 4.500
12: 20 80 4 3.750
13: 20 90 1 3.125
14: 20 90 2 3.625
15: 20 90 3 3.375
16: 20 90 4 3.750

You can do it this way:
df %>%
group_by(car_number, year, marker) %>%
summarise(s = sum(val), w = n()) %>% # sum and number of values
group_by(car_number) %>%
mutate(S = cumsum(s) - cumsum(lag(s, 4, default=0))) %>% # sum of last four s
mutate(W = cumsum(w) - cumsum(lag(w, 4, default=0))) %>% # same for the weights
mutate(result = S/W)
Output of your second example:
# Source: local data frame [16 x 8]
# Groups: car_number [2]
#
# car_number year marker s w S W result
# <dbl> <dbl> <dbl> <dbl> <int> <dbl> <int> <dbl>
# 1 10 80 1 25 1 25 1 25.000000
# 2 10 80 2 135 3 160 4 40.000000
# 3 10 80 3 110 2 270 6 45.000000
# 4 10 80 4 30 2 300 8 37.500000
# 5 10 90 1 50 2 325 9 36.111111
# 6 10 90 2 100 2 290 8 36.250000
# 7 10 90 3 120 3 300 9 33.333333
# 8 10 90 4 40 1 310 8 38.750000
# 9 20 80 1 12 3 12 3 4.000000
# 10 20 80 2 4 1 16 4 4.000000
# 11 20 80 3 11 2 27 6 4.500000
# 12 20 80 4 5 3 32 9 3.555556
# 13 20 90 1 3 1 23 7 3.285714
# 14 20 90 2 10 2 29 8 3.625000
# 15 20 90 3 11 3 29 9 3.222222
# 16 20 90 4 4 1 28 7 4.000000
Edit:
It's probably more efficient with package RcppRoll, you can try that: S = roll_sum(c(0, 0, 0, s), 4) (and same for W).

considering df as your input, you can use dplyr and zoo and try:
grouping only over car_number, you can try:
df %>%
group_by(car_number, year, marker) %>%
summarise(mm = mean(val)) %>%
group_by(car_number) %>%
mutate(rM=rollapply(mm, if_else(row_number() < 4, marker, 4), FUN=mean, align="right"))%>%
select(year, rM)
which gives:
Source: local data frame [16 x 3]
Groups: car_number [2]
car_number year rM
<dbl> <dbl> <dbl>
1 10 80 50.000
2 10 80 40.000
3 10 80 45.000
4 10 80 37.500
5 10 90 31.250
6 10 90 36.250
7 10 90 35.000
8 10 90 38.750
9 20 80 5.000
10 20 80 4.000
11 20 80 4.500
12 20 80 3.750
13 20 90 3.125
14 20 90 3.625
15 20 90 3.375
16 20 90 3.750

Related

Mix "color_bar" and "style" in formattable package

I'm using formattable package and I want to personalize my table but I can't in the way I want.
Here is my table
structure(list(PJ = c(4, 4, 4, 4, 4, 4), V = c(4, 2, 2, 2, 1,
1), E = c(0, 0, 0, 0, 0, 0), D = c(0, 2, 2, 2, 3, 3), GF = c(182,
91, 92, 185, 126, 119), GC = c(84, 143, 144, 115, 141, 168),
Dif = c(98, -52, -52, 70, -15, -49), Pts = c(12, 6, 6, 6,
3, 3)), class = "data.frame", row.names = c("Player1", "Player2",
"Player3", "Player4", "Player5", "Player6"))
It looks like this:
PJ V E D GF GC Dif Pts
Player1 4 4 0 0 182 84 98 12
Player2 4 2 0 2 91 143 -52 6
Player3 4 2 0 2 92 144 -52 6
Player4 4 2 0 2 185 115 70 6
Player5 4 1 0 3 126 141 -15 3
Player6 4 1 0 3 119 168 -49 3
If I want the column GF in bold, I use
formattable(TAB.df, list(
GF = formatter("span",style = style("font.weight"="bold"))
))
If I want a color_bar I run this code:
formattable(TAB.df, list(
GF = color_bar("lightgreen")
))
Nevertheless, I don't know how to combine them and get the "color_bar" with "bold" numbers.

Trying to label sequentially within groups of dataframe R

I have a subset of my dataframe:
df = data.frame(retailer_id = c(1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2),
store_id = c(166, 166, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167),
quad_id = c(2017010104, 2017012904, 2017010104, 2017012904, 2017022604, 2017032604 ,2017042304, 2017052104, 2017061804,
2017071604, 2017081304, 2017091004, 2017100804, 2017110504, 2017120304, 2017123104, 2018012804, 2018022504, 2018032504, 2018042204))
where 2017010104 corresponds to the date 01/01/2017 and so on. I am trying to label these different quad_ids sequentially with reference to the year. So for example I am trying to get the output:
df = data.frame(retailer_id = c(1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2),
store_id = c(166, 166, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167),
quad_id = c(2017010104, 2017012904, 2017010104, 2017012904, 2017022604, 2017032604 ,2017042304, 2017052104, 2017061804,
2017071604, 2017081304, 2017091004, 2017100804, 2017110504, 2017120304, 2017123104, 2018012804, 2018022504, 2018032504, 2018042204),
Snum = c(1, 2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1, 2, 3, 4))
where you can see for retailer_id = 2, store_id = 167, the weeks for the year 2017 are labeled 1-14 and then when the week begins with 2018 it starts counting sequentially from 1 again until it will reach a week that starts with 2019 within this grouping.
I tried:
DT <- data.table(df)
DT[, Snum := seq_len(.N), by = list(retailer_id, store_id)]
However, this is not labeling sequentially by year, instead it is labelling sequentially by store_id. Is there a way to fix this? (this example code is only showing two retailers and two stores, whereas my actual dataframe and hundreds of different retailers and stores)
Here's a solution using tidyverse
df = data.frame(retailer_id = c(1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2),
store_id = c(166, 166, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167, 167),
quad_id = c(2017010104, 2017012904, 2017010104, 2017012904, 2017022604, 2017032604 ,2017042304, 2017052104, 2017061804,
2017071604, 2017081304, 2017091004, 2017100804, 2017110504, 2017120304, 2017123104, 2018012804, 2018022504, 2018032504, 2018042204))
library(tidyverse)
getYear = function(x) {
x %>%
str_extract("^\\d{4}") %>%
as.integer() %>%
return()
}
tmp = df %>%
mutate(year = getYear(quad_id)) %>%
group_by(year, retailer_id, store_id) %>%
mutate(Snum = 1:n())
> tmp
# A tibble: 20 x 5
# Groups: year, retailer_id, store_id [3]
retailer_id store_id quad_id year Snum
<dbl> <dbl> <dbl> <int> <int>
1 1 166 2017010104 2017 1
2 1 166 2017012904 2017 2
3 2 167 2017010104 2017 1
4 2 167 2017012904 2017 2
5 2 167 2017022604 2017 3
6 2 167 2017032604 2017 4
7 2 167 2017042304 2017 5
8 2 167 2017052104 2017 6
9 2 167 2017061804 2017 7
10 2 167 2017071604 2017 8
11 2 167 2017081304 2017 9
12 2 167 2017091004 2017 10
13 2 167 2017100804 2017 11
14 2 167 2017110504 2017 12
15 2 167 2017120304 2017 13
16 2 167 2017123104 2017 14
17 2 167 2018012804 2018 1
18 2 167 2018022504 2018 2
19 2 167 2018032504 2018 3
20 2 167 2018042204 2018 4
Note that if your data isn't sorted by retailer_id, store_id and year that would cause an issue.
We could use str_match from stringr package together with regex '^[[:digit:]]{4}' to match for the first four digits:
library(dplyr)
library(stringr)
df %>%
group_by(Snum = str_match(quad_id, '^[[:digit:]]{4}')) %>%
mutate(Snum = row_number())
output:
retailer_id store_id quad_id Snum
<dbl> <dbl> <dbl> <int>
1 1 166 2017010104 1
2 1 166 2017012904 2
3 2 167 2017010104 3
4 2 167 2017012904 4
5 2 167 2017022604 5
6 2 167 2017032604 6
7 2 167 2017042304 7
8 2 167 2017052104 8
9 2 167 2017061804 9
10 2 167 2017071604 10
11 2 167 2017081304 11
12 2 167 2017091004 12
13 2 167 2017100804 13
14 2 167 2017110504 14
15 2 167 2017120304 15
16 2 167 2017123104 16
17 2 167 2018012804 1
18 2 167 2018022504 2
19 2 167 2018032504 3
20 2 167 2018042204 4

Find maximum in a group, subset by a subset from a different dataframe, to select other value's

I have two data.frames df1 with raw data. df2 has information on where to look in df1.
df1 has groups, defined by "id". In those groups, a subset is defined by df2$value_a1 and df2$value_a2, which represent the range of rows to look in the group. In that subsetgroup I want to find the maximum value_a, to select value_b.
code for df1 and df2
df1 <- data.frame("id" = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3), "value_a" = c(0, 10, 21, 30, 43, 53, 69, 81, 93, 5, 16, 27, 33, 45, 61, 75, 90, 2, 11, 16, 24, 31, 40, 47, 60, 75, 88), "value_b" = c(100, 101, 100, 95, 90, 104, 88, 84, 75, 110, 105, 106, 104, 95, 109, 96, 89, 104, 104, 104, 103, 106, 103, 101, 99, 98, 97), "value_c" = c(0, -1, -2, -2, -2, -2, -1, -1, 0, 0, 0, 0, 1, 1, 2, 2, 1, -1, 0, 0, 1, 1, 2, 2, 1, 1, 0), "value_d" = c(1:27))
df2 <- data.frame("id" = c(1, 2, 3), "value_a1" = c(21, 33, 16), "value_a2" = c(69, 75, 60))
This is df1
id value_a value_b value_c value_d
1 1 0 100 0 1
2 1 10 101 -1 2
3 1 21 100 -2 3
4 1 30 95 -2 4
5 1 43 90 -2 5
6 1 53 104 -2 6
7 1 69 88 -1 7
8 1 81 84 -1 8
9 1 93 75 0 9
10 2 5 110 0 10
11 2 16 105 0 11
12 2 27 106 0 12
13 2 33 104 1 13
14 2 45 95 1 14
15 2 61 109 2 15
16 2 75 96 2 16
17 2 90 89 1 17
18 3 2 104 -1 18
19 3 11 104 0 19
20 3 16 104 0 20
21 3 24 103 1 21
22 3 31 106 1 22
23 3 40 103 2 23
24 3 47 101 2 24
25 3 60 99 1 25
26 3 75 98 1 26
27 3 88 97 0 27
This is df2
id value_a1 value_a2
1 1 21 69
2 2 33 75
3 3 16 60
My result would be df3, which would look like this
id value_a value_c
1 1 53 -2
2 2 61 2
3 3 31 1
I wrote this code to show my line of thinking.
df3 <- df1 %>%
group_by(id) %>%
filter(value_a >= df2$value_a1 & value_a <= df2$value_a2) %>%
filter(value_a == max(value_a)) %>%
pull(value_b)
This however generates a value with three entry's:
[1] 88 95 99
These are not the maximum value_b's...
Perhaps by() would work, but this gets stuck on using a function on two different df's.
It feels like I'm almost there, but still far away...
You can try this. I hope this helps.
df1 %>% left_join(df2) %>% mutate(val=ifelse(value_a>value_a1 & value_a<value_a2,value_b,NA)) %>%
group_by(id) %>% summarise(val=max(val,na.rm=T))
# A tibble: 3 x 2
id val
<dbl> <dbl>
1 1 104
2 2 109
3 3 106
Why don't you try a merge?
Then with data.table syntax:
library(data.table)
df3 <- merge(df1, df2, by = "id", all.x = TRUE)
max_values <- df3[value_a > value_a1 & value_a < value_a2, max(value_b), by = "id"]
max_values
# id V1
# 1: 1 104
# 2: 2 109
# 3: 3 106
I would do this using data.table package since is just what I'm used to
library(data.table)
dt.1 <- data.table("id" = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3), "value_a" = c(0, 10, 21, 30, 43, 53, 69, 81, 93, 5, 16, 27, 33, 45, 61, 75, 90, 2, 11, 16, 24, 31, 40, 47, 60, 75, 88), "value_b" = c(100, 101, 100, 95, 90, 104, 88, 84, 75, 110, 105, 106, 104, 95, 109, 96, 89, 104, 104, 104, 103, 106, 103, 101, 99, 98, 97), "value_c" = c(0, -1, -2, -2, -2, -2, -1, -1, 0, 0, 0, 0, 1, 1, 2, 2, 1, -1, 0, 0, 1, 1, 2, 2, 1, 1, 0), "value_d" = c(1:27))
dt.2 <- data.table("id" = c(1, 2, 3), "value_a1" = c(21, 33, 16), "value_a2" = c(69, 75, 60))
dt.3 <- dt.1[id %in% dt.2[,id],max(value_b), by="id"]
setnames(dt.3, "V1", "max_value_b")
dt.3
To get corresponding line where b is the max values there are several ways, here's one where I only modified a line from the previous code
dt.1[id %in% dt.2[,id],.SD[which.max(value_b), .(value_a, value_b, value_c, value_d)], by="id"]
.SD means the sub-table you already selected with by so for each id selects the local max b and then returns a table which.max() selects the row, and finally .() is an alias for list, so lists the columns you wish from that table.
Perhaps a more readable approach is to first select the desired rows
max.b.rows <- dt.1[id %in% dt.2[,id], which.max(value_b), by="id"][,V1]
dt.3 <- dt.1[max.b.rows,]
BTW, the id %in% dt.2[,id] part is just there to make sure you only select maxima for those ids in table 2
Best

Why can't I plot my model selection plot correctly?

I am working with multiple regression models. After running the dredge function, I got approximately 54 000 different combinations. I selected the first 300 models and ran this code:
par(mar=c(1,4,10,3))
> plot(fitt, labels = c("Intercept",
+ "YOFE",
+ "'RW Closeness'",
+ "'LW Closeness'",
+ "Age",
+ "SES",
+ "'GAD-7 Score'",
+ "Fantasy",
+ "'Personal Distress'",
+ "'Empathic Concern'",
+ "'Perspective Taking'",
+ "'PHQ-9 Score'",
+ "'Religioius Affinity'",
+ "'Agreement with IH'",
+ "'Moral Judgement of IH'",
+ "'Harm Assessment of IH'",
+ "'Agreement with IB'",
+ "'Moral Judgement of IB'",
+ "RMET",
+ "Sex"),ylab = expression("Cumulative" ~italic(w[i]*(AICc))),col = c(colfunc(1)), border = "gray30",labAsExpr = TRUE)
10 minutes later, I got this error:
Error in (function (text, side = 3, line = 0, outer = FALSE, at = NA, :
zero-length 'text' specified
In addition: Warning message:
In max(strwidth(arg[["text"]], cex = arg$cex, units = "in")) :
no non-missing arguments to max; returning -Inf
And this is the output plot:
I've tried plotting only the first model and the same error appears:
This also happens when using the whole model selection table (54 000 combinations).
What is a solution to this?
I'm running the latest version of R and RStudio on my 2016 12 inch Macbook.
Note: I've tried increasing the plot-window size manually by dragging the edges without any improvement.
This is what I'd like my plot to look like:
EDIT: Here is the data file data and the code.
modeloglobal<-lm(PROMEDIO_CREENCIA_NFALSA_CORONAVIRUS~Edad+Sex+
AnEdu+
Estrato_1+
GAD_TOTAL+
PHQ_TOTAL+
PracticRel_2+
CercanPolDer_1+
CercanPolIz_1+
RMET_TOTAL+
IRI_PREOCUPACIÓN_EMPATICA+
IRI_FANTASÍA+
IRI_MALESTAR_PERSONAL+
IRI_TOMA_DE_PERSPECTIVA+
PROMEDIO_DILEMAS_BI_ACTUARIGUAL_CORONAVIRUS+
PROMEDIO_DILEMAS_BI_BIENOMAL_CORONAVIRUS+
PROMEDIO_DI_SINPOL_ACTUARIGUAL+
PROMEDIO_DI_SINPOL_BIENOMAL+
PROMEDIO_DI_SINPOL_DANO, data=fake_news,na.action="na.fail")
library(MuMIn)
fitt<-dredge(modeloglobal,trace=2)
m.sel <- model.sel(fitt)
m.sel2 <- m.sel[1:300,]
library(binovisualfields)
And the code that runs the error (using a subset of the first 300 rows):
par(mar=c(1,4,10,3))
> plot(m.sel2, labels = c("Intercept",
+ "YOFE",
+ "'RW Closeness'",
+ "'LW Closeness'",
+ "Age",
+ "SES",
+ "'GAD-7 Score'",
+ "Fantasy",
+ "'Personal Distress'",
+ "'Empathic Concern'",
+ "'Perspective Taking'",
+ "'PHQ-9 Score'",
+ "'Religioius Affinity'",
+ "'Agreement with IH'",
+ "'Moral Judgement of IH'",
+ "'Harm Assessment of IH'",
+ "'Agreement with IB'",
+ "'Moral Judgement of IB'",
+ "RMET",
+ "Sex"),ylab = expression("Cumulative" ~italic(w[i]*(AICc))),col = c(colfunc(1)), border = "gray30",labAsExpr = TRUE)
EDIT 2: Here's the data frame I got from dput().
ResponseId Edad Sex Genero Nacion Resid Estrato_1 Gastos salud
1 R_25GEak825Ohmb9G 18 Female Femenino Colombia Colombia 7 Seguro privado
2 R_1kT7u0PALDHV8H6 20 Female Femenino Colombia Colombia 5 Seguro privado
3 R_2cpBb5Ifzj7lVGs 21 Female Femenino Colombia Colombia 6 Seguro privado
4 R_sGqNUMTXTJzwC09 20 Male Masculino Colombia Colombia 5 Seguro del Estado
5 R_2Cpixt9Z5FJkhg1 36 Male Masculino Colombia Colombia 6 Otro (especifique)
6 R_3QFq50SZNs6CePA 18 Female Femenino Colombia Colombia 7 Seguro privado
Relig PracticRel_2 AnEdu Q161 Ecron Epsiq Q183 Eneu Q184
1 Ninguna 0 15 Estudiante 1 0 <NA> 0 <NA>
2 Cristianismo (Catolicismo) 2 15 Estudiante 0 0 <NA> 0 <NA>
3 Cristianismo (Catolicismo) 2 19 Estudiante 0 0 <NA> 0 <NA>
4 Cristianismo (Catolicismo) 2 15 Estudiante 0 0 <NA> 0 <NA>
5 Cristianismo (Catolicismo) 1 17 Empleado de tiempo completo 0 0 <NA> 0 <NA>
6 Cristianismo (Catolicismo) 4 15 Estudiante 0 0 <NA> 0 <NA>
NPviven Sustancias Pviven AdhAS LevantarAS_1 CumplimAS_1 HorasFuera
1 1 1 Padres 1 5 6 Menos de una hora
2 3 0 Padres,Hermanos 1 1 6 Menos de una hora
3 4 0 Padres,Hermanos 1 2 6 Menos de una hora
4 4 0 Padres,Hermanos 1 2 6 Menos de una hora
5 3 0 Pareja,Hijos 1 2 3 Entre cuatro y seis horas
6 3 0 Padres,Hermanos 1 2 6 Entre una y tres horas
Apoyo CV19_1 ContagUd ContagEC Prob_1_contagio Prob_2_familiar_contagio
1 1 No 0 81 100
2 4 No 0 81 35
3 6 No 0 60 80
4 4 No 0 4 15
5 5 No 0 40 40
6 6 No 0 79 86
Prob_3_contagio_poblaciongeneral Caract_1 Caract_2 Inv_3 Caract_3 Caract_4 Caract_5 Caract_6 Caract_8
1 87 4 2 1 6 4 5 4 5
2 81 5 4 3 4 4 5 2 3
3 80 4 4 1 6 6 6 1 2
4 20 6 5 5 2 1 5 1 5
5 60 2 1 2 5 4 3 2 3
6 70 5 4 2 5 6 2 5 6
Caract_9 Caract_11 Caract_14 INV_15 Caract_15 Caract_16 Caract_17 CompPan_1 CompPan_2 CompPan_3
1 5 3 2 4 3 5 5 1 6 1
2 4 5 4 5 2 3 3 4 5 8
3 6 1 6 6 1 6 6 1 1 1
4 5 5 2 6 1 3 1 1 3 2
5 4 1 1 5 2 2 2 2 2 2
6 6 2 3 5 2 6 5 2 7 3
CompPan_4 CompPan_5 CompPan_6 CercanPolDer_1 CercanPolIz_1 IDpol_1 PHQ_TOTAL GAD_TOTAL
1 5 5 7 8 2 5 8 6
2 8 8 8 7 3 5 4 3
3 3 2 4 6 3 4 2 3
4 4 3 3 5 5 4 3 3
5 3 3 2 5 5 4 2 2
6 6 2 7 3 8 3 7 7
INTEROCEPCION_TOTAL BIS BAS_FUN_SEEKING BAS_REWARD_RESPONSIVENESS BAS_DRIVE BAS_TOTAL
1 45 19 14 19 11 44
2 44 20 10 17 14 41
3 24 17 10 19 13 42
4 17 17 9 14 8 31
5 36 21 10 17 11 38
6 41 25 6 17 13 36
IRI_TOMA_DE_PERSPECTIVA IRI_MALESTAR_PERSONAL IRI_FANTASÍA IRI_PREOCUPACIÓN_EMPATICA RMET_TOTAL
1 14 13 14 19 7
2 18 11 14 20 4
3 17 4 10 20 10
4 16 9 11 12 7
5 10 11 7 10 10
6 16 11 16 18 8
PROMEDIO_TIEMPO_REACCION_RMET PROMEDIO_CREENCIA_NFALSA_TODAS PROMEDIO_CREENCIA_NFALSA_CORONAVIRUS
1 2.411750 2.8 2.666667
2 3.348500 2.8 2.333333
3 3.261083 2.4 2.000000
4 6.390500 2.2 1.666667
5 13.212667 1.8 1.333333
6 4.218583 3.6 2.666667
PROMEDIO_CREENCIA_NFALSA_OTRO PROMEDIO_TIEMPOREACCION_NFALSA PROMEDIO_CREENCIA_NVERDADERA_TODAS
1 3.0 4.3438 3.333333
2 3.5 9.4222 3.000000
3 3.0 5.9734 3.666667
4 3.0 10.1448 2.666667
5 2.5 16.3196 1.333333
6 5.0 7.1954 3.333333
PROMEDIO_CREENCIA_NVERDADERA_CORONAVIRUS PROMEDIO_CREENCIA_NVERDADERA_OTRO
1 5 5
2 4 5
3 6 5
4 5 3
5 1 3
6 6 4
PROMEDIO_TIEMPOREACCION_NVERDADERA PROMEDIO_CREENCIA_NMISLEADING_TODAS
1 5.6440 2.666667
2 7.0430 2.666667
3 8.0265 3.666667
4 4.0495 3.000000
5 32.2400 1.666667
6 9.5830 4.333333
PROMEDIO_TIEMPOREACCION_NMISLEADING PROMEDIO_DILEMAS_BI_BIENOMAL_CORONAVIRUS
1 5.726667 1.000000
2 12.012333 4.000000
3 5.753000 4.333333
4 4.969667 1.333333
5 15.233000 0.000000
6 30.045667 3.666667
PROMEDIO_DILEMAS_BI_ACTUARIGUAL_CORONAVIRUS DILEMA_BI_CONTROL_BIENOMAL DILEMA_BI_CONTROL_ACTUARIGUAL
1 5.666667 4 7
2 7.666667 5 4
3 9.666667 2 6
4 4.333333 0 2
5 3.666667 -3 2
6 9.333333 4 10
PROMEDIO_DILEMAS_BI_BIENOMAL_JUNTOS PROMEDIO_DILEMAS_BI_ACTUARIGUAL_JUNTOS
1 1.75 6.00
2 4.25 6.75
3 3.75 8.75
4 1.00 3.75
5 -0.75 3.25
6 3.75 9.50
PROMEDIO_DILEMAS_DI_BIENOMAL PROMEDIO_DILEMAS_DI_ACTUARIGUAL PROMEDIO_DILEMAS_DI_DANO
1 0.5000000 6.666667 5.666667
2 1.8333333 7.666667 6.166667
3 0.5000000 5.666667 5.333333
4 1.6666667 5.000000 5.500000
5 0.8333333 4.833333 5.666667
6 0.1666667 5.166667 7.000000
TIEMPOREACCION_DILEMAS_DI TIEMPOREACCION_DILEMAS_BI PROMEDIO_DI_SINPOL_BIENOMAL
1 12.140500 7.89900 0.2
2 9.130667 9.99550 1.2
3 6.998333 9.25175 -1.0
4 1.857833 2.84125 0.4
5 19.014333 32.82850 0.8
6 11.633667 16.92000 0.2
PROMEDIO_DI_SINPOL_ACTUARIGUAL PROMEDIO_DI_SINPOL_DANO COMPRAS_COVID19 PERCEPCION_RIESGO_TOTAL
1 7.00 7.25 4.166667 39
2 8.00 6.75 6.833333 37
3 4.25 7.25 2.000000 42
4 4.50 7.00 2.666667 38
5 5.00 7.75 2.333333 26
6 5.50 7.75 4.500000 46
PERCEPCION_RIESGO_INDICE PROB_CONTAGIO_TOTAL PROMEDIO_DILEMASPOLITICOS_BIENOMAL
1 3.9 89.33333 1.0
2 3.7 65.66667 2.5
3 4.2 73.33333 4.0
4 3.8 13.00000 4.0
5 2.6 46.66667 0.5
6 4.6 78.33333 0.0
PROMEDIO_DILEMASPOLITICOS_ACTUARIGUAL PROMEDIO_DILEMASPOLITICOS_DANO D31_1_DI D32_2_DI D33_3_DI
1 6.0 2.5 -2 4 9
2 7.0 5.0 3 9 7
3 8.5 1.5 -3 3 8
4 6.0 2.5 0 3 8
5 4.5 1.5 -2 4 8
6 4.5 5.5 4 9 7
D41_1_DI D42_2_DI D43_3_DI D51_1_DI D52_2_DI D53_3_DI D61_1_DI D62_2_DI D63_3_DI D71_1_DIP D72_2_DIP
1 -1 7 7 5 10 4 -1 7 9 0 4
2 1 8 9 0 7 4 2 8 7 3 7
3 0 6 7 1 5 6 -3 3 8 3 7
4 0 5 8 4 7 3 -2 3 9 4 3
5 3 7 9 1 3 7 2 6 7 -2 2
6 1 8 6 0 4 9 -4 1 9 -4 1
D73_3_DIP D81_1_DIP D82_2_DIP D83_3_DIP D91_1_BI D92_2_BI D101_1_BI D102_2_BI D111_1_BI D112_2_BI
1 3 2 8 2 -3 4 3 9 3 4
2 6 2 7 4 3 8 5 8 4 7
3 2 5 10 1 5 10 5 10 3 9
4 2 4 9 3 4 9 0 2 0 2
5 2 3 7 1 -1 3 3 6 -2 2
6 8 4 8 3 4 9 5 10 2 9
D121_1_BI D122_2_BI total_iri promedio_falsaymisleading prediccioncompraspercprob
1 4 7 60 2.750 4.249759
2 5 4 63 2.750 4.404450
3 2 6 51 2.875 4.431635
4 0 2 48 2.500 5.143974
5 -3 2 38 1.750 3.765907
6 4 10 61 3.875 4.893797
prediccioncomprasperc
1 4.474456
2 4.439994
3 4.521980
4 4.689385
5 3.762449
6 4.967286
Here is the raw dput() output:
structure(list(ResponseId = c("R_25GEak825Ohmb9G", "R_1kT7u0PALDHV8H6",
"R_2cpBb5Ifzj7lVGs", "R_sGqNUMTXTJzwC09", "R_2Cpixt9Z5FJkhg1",
"R_3QFq50SZNs6CePA"), Edad = c(18, 20, 21, 20, 36, 18), Sex = structure(c(2L,
2L, 2L, 1L, 1L, 2L), .Label = c("Male", "Female"), class = "factor"),
Genero = c("Femenino", "Femenino", "Femenino", "Masculino",
"Masculino", "Femenino"), Nacion = c("Colombia", "Colombia",
"Colombia", "Colombia", "Colombia", "Colombia"), Resid = c("Colombia",
"Colombia", "Colombia", "Colombia", "Colombia", "Colombia"
), Estrato_1 = c(7, 5, 6, 5, 6, 7), `Gastos salud` = c("Seguro privado",
"Seguro privado", "Seguro privado", "Seguro del Estado",
"Otro (especifique)", "Seguro privado"), Relig = c("Ninguna",
"Cristianismo (Catolicismo)", "Cristianismo (Catolicismo)",
"Cristianismo (Catolicismo)", "Cristianismo (Catolicismo)",
"Cristianismo (Catolicismo)"), PracticRel_2 = c(0, 2, 2,
2, 1, 4), AnEdu = c(15, 15, 19, 15, 17, 15), Q161 = c("Estudiante",
"Estudiante", "Estudiante", "Estudiante", "Empleado de tiempo completo",
"Estudiante"), Ecron = c(1, 0, 0, 0, 0, 0), Epsiq = c(0,
0, 0, 0, 0, 0), Q183 = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), Eneu = c(0,
0, 0, 0, 0, 0), Q184 = c(NA_character_, NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), NPviven = c("1",
"3", "4", "4", "3", "3"), Sustancias = c(1, 0, 0, 0, 0, 0
), Pviven = c("Padres", "Padres,Hermanos", "Padres,Hermanos",
"Padres,Hermanos", "Pareja,Hijos", "Padres,Hermanos"), AdhAS = c(1,
1, 1, 1, 1, 1), LevantarAS_1 = c(5, 1, 2, 2, 2, 2), CumplimAS_1 = c(6,
6, 6, 6, 3, 6), HorasFuera = c("Menos de una hora", "Menos de una hora",
"Menos de una hora", "Menos de una hora", "Entre cuatro y seis horas",
"Entre una y tres horas"), `Apoyo CV19_1` = c(1, 4, 6, 4,
5, 6), ContagUd = c("No", "No", "No", "No", "No", "No"),
ContagEC = c(0, 0, 0, 0, 0, 0), Prob_1_contagio = c(81, 81,
60, 4, 40, 79), Prob_2_familiar_contagio = c(100, 35, 80,
15, 40, 86), Prob_3_contagio_poblaciongeneral = c(87, 81,
80, 20, 60, 70), Caract_1 = c(4, 5, 4, 6, 2, 5), Caract_2 = c(2,
4, 4, 5, 1, 4), Inv_3 = c(1, 3, 1, 5, 2, 2), Caract_3 = c(6,
4, 6, 2, 5, 5), Caract_4 = c(4, 4, 6, 1, 4, 6), Caract_5 = c(5,
5, 6, 5, 3, 2), Caract_6 = c(4, 2, 1, 1, 2, 5), Caract_8 = c(5,
3, 2, 5, 3, 6), Caract_9 = c(5, 4, 6, 5, 4, 6), Caract_11 = c(3,
5, 1, 5, 1, 2), Caract_14 = c(2, 4, 6, 2, 1, 3), INV_15 = c(4,
5, 6, 6, 5, 5), Caract_15 = c(3, 2, 1, 1, 2, 2), Caract_16 = c(5,
3, 6, 3, 2, 6), Caract_17 = c(5, 3, 6, 1, 2, 5), CompPan_1 = c(1,
4, 1, 1, 2, 2), CompPan_2 = c(6, 5, 1, 3, 2, 7), CompPan_3 = c(1,
8, 1, 2, 2, 3), CompPan_4 = c(5, 8, 3, 4, 3, 6), CompPan_5 = c(5,
8, 2, 3, 3, 2), CompPan_6 = c(7, 8, 4, 3, 2, 7), CercanPolDer_1 = c(8,
7, 6, 5, 5, 3), CercanPolIz_1 = c(2, 3, 3, 5, 5, 8), IDpol_1 = c(5,
5, 4, 4, 4, 3), PHQ_TOTAL = c(8, 4, 2, 3, 2, 7), GAD_TOTAL = c(6,
3, 3, 3, 2, 7), INTEROCEPCION_TOTAL = c(45, 44, 24, 17, 36,
41), BIS = c(19, 20, 17, 17, 21, 25), BAS_FUN_SEEKING = c(14,
10, 10, 9, 10, 6), BAS_REWARD_RESPONSIVENESS = c(19, 17,
19, 14, 17, 17), BAS_DRIVE = c(11, 14, 13, 8, 11, 13), BAS_TOTAL = c(44,
41, 42, 31, 38, 36), IRI_TOMA_DE_PERSPECTIVA = c(14, 18,
17, 16, 10, 16), IRI_MALESTAR_PERSONAL = c(13, 11, 4, 9,
11, 11), IRI_FANTASÍA = c(14, 14, 10, 11, 7, 16), IRI_PREOCUPACIÓN_EMPATICA = c(19,
20, 20, 12, 10, 18), RMET_TOTAL = c(7, 4, 10, 7, 10, 8),
PROMEDIO_TIEMPO_REACCION_RMET = c(2.41175, 3.3485, 3.26108333333333,
6.3905, 13.2126666666667, 4.21858333333333), PROMEDIO_CREENCIA_NFALSA_TODAS = c(2.8,
2.8, 2.4, 2.2, 1.8, 3.6), PROMEDIO_CREENCIA_NFALSA_CORONAVIRUS = c(2.66666666666667,
2.33333333333333, 2, 1.66666666666667, 1.33333333333333,
2.66666666666667), PROMEDIO_CREENCIA_NFALSA_OTRO = c(3, 3.5,
3, 3, 2.5, 5), PROMEDIO_TIEMPOREACCION_NFALSA = c(4.3438,
9.4222, 5.9734, 10.1448, 16.3196, 7.1954), PROMEDIO_CREENCIA_NVERDADERA_TODAS = c(3.33333333333333,
3, 3.66666666666667, 2.66666666666667, 1.33333333333333,
3.33333333333333), PROMEDIO_CREENCIA_NVERDADERA_CORONAVIRUS = c(5,
4, 6, 5, 1, 6), PROMEDIO_CREENCIA_NVERDADERA_OTRO = c(5,
5, 5, 3, 3, 4), PROMEDIO_TIEMPOREACCION_NVERDADERA = c(5.644,
7.043, 8.0265, 4.0495, 32.24, 9.583), PROMEDIO_CREENCIA_NMISLEADING_TODAS = c(2.66666666666667,
2.66666666666667, 3.66666666666667, 3, 1.66666666666667,
4.33333333333333), PROMEDIO_TIEMPOREACCION_NMISLEADING = c(5.72666666666667,
12.0123333333333, 5.753, 4.96966666666667, 15.233, 30.0456666666667
), PROMEDIO_DILEMAS_BI_BIENOMAL_CORONAVIRUS = c(1, 4, 4.33333333333333,
1.33333333333333, 0, 3.66666666666667), PROMEDIO_DILEMAS_BI_ACTUARIGUAL_CORONAVIRUS = c(5.66666666666667,
7.66666666666667, 9.66666666666667, 4.33333333333333, 3.66666666666667,
9.33333333333333), DILEMA_BI_CONTROL_BIENOMAL = c(4, 5, 2,
0, -3, 4), DILEMA_BI_CONTROL_ACTUARIGUAL = c(7, 4, 6, 2,
2, 10), PROMEDIO_DILEMAS_BI_BIENOMAL_JUNTOS = c(1.75, 4.25,
3.75, 1, -0.75, 3.75), PROMEDIO_DILEMAS_BI_ACTUARIGUAL_JUNTOS = c(6,
6.75, 8.75, 3.75, 3.25, 9.5), PROMEDIO_DILEMAS_DI_BIENOMAL = c(0.5,
1.83333333333333, 0.5, 1.66666666666667, 0.833333333333333,
0.166666666666667), PROMEDIO_DILEMAS_DI_ACTUARIGUAL = c(6.66666666666667,
7.66666666666667, 5.66666666666667, 5, 4.83333333333333,
5.16666666666667), PROMEDIO_DILEMAS_DI_DANO = c(5.66666666666667,
6.16666666666667, 5.33333333333333, 5.5, 5.66666666666667,
7), TIEMPOREACCION_DILEMAS_DI = c(12.1405, 9.13066666666666,
6.99833333333333, 1.85783333333333, 19.0143333333333, 11.6336666666667
), TIEMPOREACCION_DILEMAS_BI = c(7.899, 9.9955, 9.25175,
2.84125, 32.8285, 16.92), PROMEDIO_DI_SINPOL_BIENOMAL = c(0.2,
1.2, -1, 0.4, 0.8, 0.2), PROMEDIO_DI_SINPOL_ACTUARIGUAL = c(7,
8, 4.25, 4.5, 5, 5.5), PROMEDIO_DI_SINPOL_DANO = c(7.25,
6.75, 7.25, 7, 7.75, 7.75), COMPRAS_COVID19 = c(4.16666666666667,
6.83333333333333, 2, 2.66666666666667, 2.33333333333333,
4.5), PERCEPCION_RIESGO_TOTAL = c(39, 37, 42, 38, 26, 46),
PERCEPCION_RIESGO_INDICE = c(3.9, 3.7, 4.2, 3.8, 2.6, 4.6
), PROB_CONTAGIO_TOTAL = c(89.3333333333333, 65.6666666666667,
73.3333333333333, 13, 46.6666666666667, 78.3333333333333),
PROMEDIO_DILEMASPOLITICOS_BIENOMAL = c(1, 2.5, 4, 4, 0.5,
0), PROMEDIO_DILEMASPOLITICOS_ACTUARIGUAL = c(6, 7, 8.5,
6, 4.5, 4.5), PROMEDIO_DILEMASPOLITICOS_DANO = c(2.5, 5,
1.5, 2.5, 1.5, 5.5), D31_1_DI = c(-2, 3, -3, 0, -2, 4), D32_2_DI = c(4,
9, 3, 3, 4, 9), D33_3_DI = c(9, 7, 8, 8, 8, 7), D41_1_DI = c(-1,
1, 0, 0, 3, 1), D42_2_DI = c(7, 8, 6, 5, 7, 8), D43_3_DI = c(7,
9, 7, 8, 9, 6), D51_1_DI = c(5, 0, 1, 4, 1, 0), D52_2_DI = c(10,
7, 5, 7, 3, 4), D53_3_DI = c(4, 4, 6, 3, 7, 9), D61_1_DI = c(-1,
2, -3, -2, 2, -4), D62_2_DI = c(7, 8, 3, 3, 6, 1), D63_3_DI = c(9,
7, 8, 9, 7, 9), D71_1_DIP = c(0, 3, 3, 4, -2, -4), D72_2_DIP = c(4,
7, 7, 3, 2, 1), D73_3_DIP = c(3, 6, 2, 2, 2, 8), D81_1_DIP = c(2,
2, 5, 4, 3, 4), D82_2_DIP = c(8, 7, 10, 9, 7, 8), D83_3_DIP = c(2,
4, 1, 3, 1, 3), D91_1_BI = c(-3, 3, 5, 4, -1, 4), D92_2_BI = c(4,
8, 10, 9, 3, 9), D101_1_BI = c(3, 5, 5, 0, 3, 5), D102_2_BI = c(9,
8, 10, 2, 6, 10), D111_1_BI = c(3, 4, 3, 0, -2, 2), D112_2_BI = c(4,
7, 9, 2, 2, 9), D121_1_BI = c(4, 5, 2, 0, -3, 4), D122_2_BI = c(7,
4, 6, 2, 2, 10), total_iri = c(60, 63, 51, 48, 38, 61), promedio_falsaymisleading = c(2.75,
2.75, 2.875, 2.5, 1.75, 3.875), prediccioncompraspercprob = c(`1` = 4.24975892576113,
`2` = 4.40445037029013, `3` = 4.43163539588384, `4` = 5.14397435590305,
`5` = 3.76590707825915, `6` = 4.8937968160894), prediccioncomprasperc = c(`1` = 4.47445595202732,
`2` = 4.4399943212902, `3` = 4.52198006754018, `4` = 4.68938453833302,
`5` = 3.7624488758014, `6` = 4.96728571465517)), row.names = c(NA,
6L), class = c("tbl_df", "tbl", "data.frame"))

2 columns into list and sort in R

Let's say we have two list
x <- c(1, 3, 4, 2, 6, 5)
y <- c(12, 14, 15, 61, 71, 21)
I want to combine into a list so that we have 2 column x and y and values should be in same order.
x <- c(1, 3, 4, 2, 6, 5)
y <- c(12, 14, 15, 61, 71, 21)
After you have a list I want to sort it on y so the final list looks like
x <- c(1, 3, 4, 5, 2, 6)
y <- c(12, 14, 15, 21, 61, 71)
I am really new to R.
I tried list(x,y) but it seems to make a
list(1, 3, 4, 2, 6, 5, 12, 14, 15, 61, 71, 21)
so I was wondering someone could help me.
You need to put them in a data.frame first and then use order:
x <- c(1, 3, 4, 2, 6, 5)
y <- c(-12, 14, 15, 61, 71, 21)
DF <- data.frame(x, y)
> DF[order(DF$y),]
x y
1 1 -12
2 3 14
3 4 15
6 5 21
4 2 61
5 6 71
keeping as a list, using lapply:
x <- c(1, 3, 4, 2,6,5)
y <- c(12, 14,15,61,71,21)
l <- list(x = x, y = y)
## thelatemail
lapply(l, `[`, order(l$y))
# $x
# [1] 1 3 4 5 2 6
#
# $y
# [1] 12 14 15 21 61 71
a more explicit version of the short one given by #thelatemail above but doesn't preserve the names:
lapply(seq_along(l), function(x) l[[x]][order(l$y)])
# [[1]]
# [1] 1 3 4 5 2 6
#
# [[2]]
# [1] 12 14 15 21 61 71
or rapply:
rapply(l, function(x) x[order(l$y)], how = 'list')
# $x
# [1] 1 3 4 5 2 6
#
# $y
# [1] 12 14 15 21 61 71

Resources