Graph doesn't looking correct after running rpart - r

The goal of this code to run rpart to create a model to predict quality/wine quality. I then tried to graph the in-sample and out of sample from validating the model. The plot outputted has number of leaves as the x and loss on the y.
Here is my code:
# rm(list=ls())
### read the dataset and make sure to change the diresctory
wine_quality <- read.csv("C:/Users/machu/Documents/data_analytics_program/wine_quality.csv")
### there are some data missing, I have come up several ways to deal with that:
# there are pros and cons for both. I will leave this to you guys which way to choose
## 1. drop rows that include na
wine_quality = na.omit(wine_quality)
## 2. replace the na with the mean of the columns
# for(i in 1:ncol(wine_quality)){
# wine_quality[is.na(wine_quality[,i]), i] <- mean(wine_quality[,i], na.rm = TRUE)
# }
### changing quality to factor type
wine_quality$quality <- factor(wine_quality$quality)
wine_quality$type <- factor(wine_quality$type )
### rpart
library(rpart)
library(tree)
library(RColorBrewer)
library(rattle)
#train, val, test
set.seed(99)
n=nrow(wine_quality)
n1=floor(n/2)
n2=floor(n/4)
n3=n-n1-n2
ii = sample(1:n,n)
train = wine_quality[ii[1:n1],]
train = subset(train, select = -1)
val = wine_quality[ii[n1+1:n2],]
val = subset(val, select = -1)
test = wine_quality[ii[n1+n2+1:n3],]
test = subset(test, select = -1)
### get big tree at first
big.tree = rpart(quality~.,
method="class",
data=train,
control=rpart.control(minsplit=5,
cp=.0001))
nbig = length(unique(big.tree$where))
cat('Number of leaf nodes: ',nbig,'\n')
cpvec = big.tree$cptable[,"CP"] #cp values to try
ntree = length(cpvec) #number of cv values = number of trees fit.
iltree = rep(0,ntree) #in-sample loss
oltree = rep(0,ntree) #out-of-sample loss
sztree = rep(0,ntree) #size of each tree
for(i in 1:ntree) {
if((i %% 5)==0) cat('tree i: ',i, "out of", ntree, '\n')
temptree = prune(big.tree,cp=cpvec[i]) #Pruned tree by cp
sztree[i] = length(unique(temptree$where)) #Number of leaves
predicted_value_is = c()
for(k in nrow(predict(temptree))){
predicted_value_is = c(predicted_value_is, which.max(predict(temptree)[k,]+2))
}
iltree[i] = sum(train$quality!=predicted_value_is) #in-sample loss
ofit = predict(temptree,val) #Validation prediction
predicted_value_val = c()
for(j in nrow(ofit)){
predicted_value_val = c(predicted_value_val, which.max(ofit[j,]+2))
}
oltree[i] = sum(val$quality!=predicted_value_val) #out-of-sample loss
}
### RMSE out-of-sample
oltree=sqrt(oltree/nrow(val))
### RMSE in-sample
iltree = sqrt(iltree/nrow(train))
rgl = range(c(iltree,oltree))
plot(range(sztree),rgl,
type='n', #Type = n removes points from plot
xlab='Number of Leaves',ylab='Loss')
points(sztree,iltree,
pch=15, #Type of point
col='red')
points(sztree,oltree,
pch=16, #Type of point
col='blue')
legend("center", #Position of the legend
legend=c('in-sample','out-of-sample'), #Text in the legend
pch=c(15,16), #Types of points
col=c('red','blue')) #Color of pointsenter
Here is an example of the data:
structure(list(X = c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,
47, 48, 49, 50, 51, 52, 53, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96,
97, 99, 100, 101, 102, 103, 104), type = structure(c(2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L), .Label = c("red", "white"), class = "factor"), fixed.acidity = c(7,
6.3, 8.1, 7.2, 7.2, 8.1, 6.2, 7, 6.3, 8.1, 8.1, 8.6, 7.9, 6.6,
8.3, 6.6, 6.3, 7.4, 6.5, 6.2, 6.4, 6.8, 7.6, 6.6, 7, 6.9, 7,
7.4, 7.2, 8.5, 8.3, 7.4, 5.8, 7.3, 6.5, 7, 7.3, 7.3, 6.7, 6.7,
7, 6.6, 6.7, 7.4, 6.2, 6.2, 7, 6.9, 7.2, 6.6, 6.2, 6.4, 6.9,
7.2, 6, 6.6, 7.4, 6.8, 6, 7, 6.6, 7.2, 6.2, 6.4, 6.7, 6.7, 7.4,
6.2, 6.8, 6, 8.6, 6.7, 7.4, 7.1, 7, 7.4, 6.7, 6.8, 7.1, 7.1,
7.3, 7.1, 7.1, 6.8, 7.1, 7.1, 7.2, 6.1, 6.9, 6.9, 7.5, 7.1, 6,
8.6, 6, 7.4, 7.1, 6, 7.5, 7.4), volatile.acidity = c(0.27, 0.3,
0.28, 0.23, 0.23, 0.28, 0.32, 0.27, 0.3, 0.22, 0.27, 0.23, 0.18,
0.16, 0.42, 0.17, 0.48, 0.34, 0.31, 0.66, 0.31, 0.26, 0.67, 0.27,
0.25, 0.24, 0.28, 0.27, 0.32, 0.24, 0.14, 0.25, 0.27, 0.28, 0.39,
0.33, 0.24, 0.24, 0.23, 0.24, 0.31, 0.24, 0.23, 0.18, 0.45, 0.46,
0.31, 0.19, 0.19, 0.25, 0.16, 0.18, 0.25, 0.21, 0.19, 0.38, 0.2,
0.22, 0.19, 0.47, 0.38, 0.24, 0.35, 0.26, 0.25, 0.23, 0.24, 0.27,
0.3, 0.27, 0.23, 0.23, 0.24, 0.18, 0.32, 0.18, 0.54, 0.22, 0.2,
0.34, 0.22, 0.43, 0.44, 0.25, 0.43, 0.44, 0.39, 0.27, 0.24, 0.21,
0.17, 0.26, 0.34, 0.265, 0.34, 0.25, 0.12, 0.21, 0.305, 0.25),
citric.acid = c(0.36, 0.34, 0.4, 0.32, 0.32, 0.4, 0.16, 0.36,
0.34, 0.43, 0.41, 0.4, 0.37, 0.4, 0.62, 0.38, 0.04, 0.42,
0.14, 0.48, 0.38, 0.42, 0.14, 0.41, 0.32, 0.35, 0.39, 0.48,
0.36, 0.39, 0.34, 0.36, 0.2, 0.43, 0.23, 0.32, 0.39, 0.39,
0.39, 0.39, 0.26, 0.27, 0.26, 0.31, 0.26, 0.25, 0.26, 0.35,
0.31, 0.29, 0.33, 0.35, 0.35, 0.34, 0.26, 0.15, 0.36, 0.24,
0.26, 0.07, 0.15, 0.27, 0.03, 0.24, 0.13, 0.31, 0.29, 0.43,
0.23, 0.28, 0.46, 0.31, 0.29, 0.36, 0.34, 0.3, 0.28, 0.31,
0.34, 0.2, 0.3, 0.61, 0.62, 0.31, 0.61, 0.62, 0.63, 0.43,
0.33, 0.33, 0.32, 0.29, 0.66, 0.36, 0.66, 0.37, 0.32, 0.24,
0.4, 0.37), residual.sugar = c(20.7, 1.6, 6.9, 8.5, 8.5,
6.9, 7, 20.7, 1.6, 1.5, 1.45, 4.2, 1.2, 1.5, 19.25, 1.5,
1.1, 1.1, 7.5, 1.2, 2.9, 1.7, 1.5, 1.3, 9, 1, 8.7, 1.1, 2,
10.4, 1.1, 2.05, 14.95, 1.7, 5.4, 1.2, 17.95, 17.95, 2.5,
2.9, 7.4, 1.4, 1.4, 1.4, 4.4, 4.4, 7.4, 5, 1.6, 1.1, 1.1,
1, 1.3, 11.9, 12.4, 4.6, 1.2, 4.9, 12.4, 1.1, 4.6, 1.4, 1.2,
6.4, 1.2, 2.1, 10.1, 7.8, 4.6, 4.8, 1, 2.1, 10.1, 1.4, 1.3,
8.8, 5.4, 1.4, 16, 6.1, 8.2, 11.8, 11.8, 13.3, 11.8, 11.8,
11, 7.5, 1.7, 1.8, 1.7, 12.4, 15.9, 1.2, 15.9, 13.5, 9.6,
12.1, 18.9, 13.5), chlorides = c(0.045, 0.049, 0.05, 0.058,
0.058, 0.05, 0.045, 0.045, 0.049, 0.044, 0.033, 0.035, 0.04,
0.044, 0.04, 0.032, 0.046, 0.033, 0.044, 0.029, 0.038, 0.049,
0.074, 0.052, 0.046, 0.052, 0.051, 0.047, 0.033, 0.044, 0.042,
0.05, 0.044, 0.08, 0.051, 0.053, 0.057, 0.057, 0.172, 0.173,
0.069, 0.057, 0.06, 0.058, 0.063, 0.066, 0.069, 0.067, 0.062,
0.068, 0.057, 0.045, 0.039, 0.043, 0.048, 0.044, 0.038, 0.092,
0.048, 0.035, 0.044, 0.038, 0.064, 0.04, 0.041, 0.046, 0.05,
0.056, 0.061, 0.063, 0.054, 0.046, 0.05, 0.043, 0.042, 0.064,
0.06, 0.053, 0.05, 0.063, 0.047, 0.045, 0.044, 0.05, 0.045,
0.044, 0.044, 0.049, 0.035, 0.034, 0.04, 0.044, 0.046, 0.034,
0.046, 0.06, 0.054, 0.05, 0.059, 0.06), free.sulfur.dioxide = c(45,
14, 30, 47, 47, 30, 30, 45, 14, 28, 11, 17, 16, 48, 41, 28,
30, 17, 34, 29, 19, 41, 25, 16, 56, 35, 32, 17, 37, 20, 7,
31, 22, 21, 25, 38, 45, 45, 63, 63, 28, 33, 33, 38, 63, 62,
28, 32, 31, 39, 21, 39, 29, 37, 50, 25, 44, 30, 50, 17, 25,
31, 29, 27, 81, 30, 21, 48, 50.5, 31, 9, 30, 21, 31, 20,
26, 21, 34, 51, 47, 42, 54, 52, 69, 54, 52, 55, 65, 47, 48,
51, 62, 26, 15, 26, 52, 64, 55, 44, 52), total.sulfur.dioxide = c(170,
132, 97, 186, 186, 97, 136, 170, 132, 129, 63, 109, 75, 143,
172, 112, 99, 171, 133, 75, 102, 122, 168, 142, 245, 146,
141, 132, 114, 142, 47, 100, 179, 123, 149, 138, 149, 149,
158, 157, 160, 152, 154, 167, 206, 207, 160, 150, 173, 124,
82, 108, 191, 213, 147, 78, 111, 123, 147, 151, 78, 122,
120, 124, 174, 96, 105, 244, 238.5, 201, 72, 96, 105, 87,
69, 103, 105, 114, 166, 164, 207, 155, 152, 202, 155, 152,
156, 243, 136, 136, 148, 240, 164, 80, 164, 192, 162, 164,
170, 192), density = c(1.001, 0.994, 0.9951, 0.9956, 0.9956,
0.9951, 0.9949, 1.001, 0.994, 0.9938, 0.9908, 0.9947, 0.992,
0.9912, 1.0002, 0.9914, 0.9928, 0.9917, 0.9955, 0.9892, 0.9912,
0.993, 0.9937, 0.9951, 0.9955, 0.993, 0.9961, 0.9914, 0.9906,
0.9974, 0.9934, 0.992, 0.9962, 0.9905, 0.9934, 0.9906, 0.9999,
0.9999, 0.9937, 0.9937, 0.9954, 0.9934, 0.9934, 0.9931, 0.994,
0.9939, 0.9954, 0.995, 0.9917, 0.9914, 0.991, 0.9911, 0.9908,
0.9962, 0.9972, 0.9931, 0.9926, 0.9951, 0.9972, 0.991, 0.9931,
0.9927, 0.9934, 0.9903, 0.992, 0.9926, 0.9962, 0.9956, 0.9958,
0.9964, 0.9941, 0.9926, 0.9962, 0.9898, 0.9912, 0.9961, 0.9949,
0.9929, 0.9985, 0.9946, 0.9966, 0.9974, 0.9975, 0.9972, 0.9974,
0.9975, 0.9974, 0.9957, 0.99, 0.9899, 0.9916, 0.9969, 0.9979,
0.9913, 0.9979, 0.9975, 0.9962, 0.997, 1, 0.9975), pH = c(3,
3.3, 3.26, 3.19, 3.19, 3.26, 3.18, 3, 3.3, 3.22, 2.99, 3.14,
3.18, 3.54, 2.98, 3.25, 3.24, 3.12, 3.22, 3.33, 3.17, 3.47,
3.05, 3.42, 3.25, 3.45, 3.38, 3.19, 3.1, 3.2, 3.47, 3.19,
3.37, 3.19, 3.24, 3.13, 3.21, 3.21, 3.11, 3.1, 3.13, 3.22,
3.24, 3.16, 3.27, 3.25, 3.13, 3.36, 3.35, 3.34, 3.32, 3.31,
3.13, 3.09, 3.3, 3.11, 3.36, 3.03, 3.3, 3.02, 3.11, 3.15,
3.22, 3.22, 3.14, 3.33, 3.13, 3.1, 3.32, 3.69, 2.95, 3.33,
3.13, 3.26, 3.31, 2.94, 3.27, 3.39, 3.21, 3.17, 3.33, 3.11,
3.12, 3.22, 3.11, 3.12, 3.09, 3.12, 3.26, 3.25, 3.21, 3.04,
3.14, 2.95, 3.14, 3, 3.4, 3.34, 2.99, 3), sulphates = c(0.45,
0.49, 0.44, 0.4, 0.4, 0.44, 0.47, 0.45, 0.49, 0.45, 0.56,
0.53, 0.63, 0.52, 0.67, 0.55, 0.36, 0.53, 0.5, 0.39, 0.35,
0.48, 0.51, 0.47, 0.5, 0.44, 0.53, 0.49, 0.71, 0.53, 0.4,
0.44, 0.37, 0.42, 0.35, 0.28, 0.36, 0.36, 0.36, 0.34, 0.46,
0.56, 0.56, 0.53, 0.52, 0.52, 0.46, 0.48, 0.44, 0.58, 0.46,
0.35, 0.52, 0.5, 0.36, 0.38, 0.34, 0.46, 0.36, 0.34, 0.38,
0.46, 0.54, 0.49, 0.42, 0.64, 0.35, 0.51, 0.6, 0.71, 0.49,
0.64, 0.35, 0.37, 0.65, 0.56, 0.37, 0.77, 0.6, 0.42, 0.46,
0.45, 0.46, 0.48, 0.45, 0.46, 0.44, 0.47, 0.4, 0.41, 0.44,
0.42, 0.5, 0.36, 0.5, 0.44, 0.41, 0.39, 0.46, 0.44), alcohol = c(8.8,
9.5, 10.1, 9.9, 9.9, 10.1, 9.6, 8.8, 9.5, 11, 12, 9.7, 10.8,
12.4, 9.7, 11.4, 9.6, 11.3, 9.5, 12.8, 11, 10.5, 9.3, 10,
10.4, 10, 10.5, 11.6, 12.3, 10, 10.2, 10.8, 10.2, 12.8, 10,
11.2, 8.6, 8.6, 9.4, 9.4, 9.8, 9.5, 9.5, 10, 9.8, 9.8, 9.8,
9.8, 11.7, 11, 10.9, 10.9, 11, 9.6, 8.9, 10.2, 9.9, 8.6,
8.9, 10.5, 10.2, 10.3, 9.1, 12.6, 9.8, 10.7, 9.5, 9, 9.5,
10, 9.1, 10.7, 9.5, 12.7, 12, 9.3, 9, 10.6, 9.2, 10, 9.5,
8.7, 8.7, 9.7, 8.7, 8.7, 8.7, 9, 12.6, 12.6, 11.5, 9.2, 8.8,
11.4, 8.8, 9.1, 9.4, 9.4, 9, 9.1), quality = structure(c(4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 3L, 3L, 5L, 3L, 5L,
4L, 4L, 3L, 6L, 5L, 6L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 4L, 4L,
4L, 3L, 3L, 3L, 4L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 2L, 3L,
4L, 3L, 4L, 5L, 5L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 3L, 4L,
4L, 3L, 5L, 3L, 6L, 3L, 4L, 3L, 3L, 4L, 6L, 3L, 5L, 5L, 3L,
3L, 4L, 4L, 3L, 4L, 3L, 4L, 4L, 3L, 4L, 4L, 3L, 5L, 5L, 5L,
4L, 4L, 5L, 4L, 3L, 3L, 3L, 3L, 3L), .Label = c("3", "4",
"5", "6", "7", "8", "9"), class = "factor")), row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13",
"14", "15", "16", "17", "19", "20", "21", "22", "23", "24", "25",
"26", "27", "28", "29", "30", "31", "32", "33", "35", "36", "37",
"38", "39", "40", "41", "42", "43", "44", "45", "46", "47", "48",
"49", "50", "51", "52", "53", "54", "56", "57", "58", "59", "60",
"61", "62", "63", "64", "65", "66", "67", "68", "69", "70", "71",
"72", "73", "74", "75", "76", "77", "78", "79", "80", "81", "82",
"83", "84", "85", "86", "88", "89", "90", "91", "92", "93", "94",
"95", "96", "97", "98", "100", "101", "102", "103", "104", "105"
), class = "data.frame")
The result I'm getting on the chart doesn't seem right based on what I'm used to. I think it should be more rounded in terms of the points on the graph. At the moment, it just doesn't look right to me. Does anyone have any ideas what might going on? Any feedback provided is very appreciated.

Related

Is there a way to have the y-axis in the middle of a mirror plot?

I currently have this graph:
I'd like to know if it is possible to have the y-axis in between the blue part and the red part. I've tried to use facet_grid in a first place, but couldn't get any result.
Does anyone have a solution or an idea please?
Here's my graph code:
ggplot(test,
aes(x = age,
y = agriculteurs_exploitants,
fill = sexe)) +
geom_area() +
geom_line(aes(y = ensemble_des_personnes_en_emploi)) +
scale_x_continuous(breaks = seq(15,75,5),
labels = c(seq(15,70,5),"75 ou +")) +
scale_y_continuous(limits = c(-3,3),
breaks = c(-3,-2,-1,0,1,2,3),
labels = c("3","2","1","0","1","2","3")) +
coord_flip()
Because of the coord_flip, the variable I would like to have in between the two sides is the x axis, the variable age.
Here's the data:
structure(list(age = c(15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56,
57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,
73, 74, 75, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27,
28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75
), sexe = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L
), .Label = c("Femme", "Homme"), class = "factor"), agriculteurs_exploitants = c(0,
0, 0.01, 0.01, 0.01, 0.03, 0.03, 0.05, 0.06, 0.1, 0.1, 0.13,
0.18, 0.2, 0.24, 0.22, 0.24, 0.27, 0.3, 0.3, 0.37, 0.36, 0.39,
0.37, 0.4, 0.45, 0.45, 0.5, 0.6, 0.65, 0.61, 0.72, 0.73, 0.7,
0.81, 0.82, 0.92, 1, 1.03, 1.03, 1.16, 1.1, 1.1, 1.12, 1.12,
0.92, 0.78, 0.52, 0.43, 0.33, 0.31, 0.25, 0.21, 0.16, 0.17, 0.12,
0.07, 0.07, 0.06, 0.05, 0.49, -0.01, -0.02, -0.03, -0.06, -0.11,
-0.18, -0.24, -0.33, -0.4, -0.46, -0.6, -0.64, -0.75, -0.87,
-0.89, -1.07, -1.14, -1.21, -1.25, -1.32, -1.32, -1.32, -1.37,
-1.44, -1.47, -1.49, -1.62, -1.67, -1.77, -1.89, -2.01, -2.11,
-2.19, -2.23, -2.35, -2.48, -2.7, -2.72, -2.84, -2.82, -2.84,
-2.75, -2.76, -2.72, -2.56, -1.99, -1.58, -1.08, -0.83, -0.66,
-0.51, -0.45, -0.35, -0.29, -0.22, -0.18, -0.11, -0.1, -0.08,
-0.06, -0.56), ensemble_des_personnes_en_emploi = c(0.02, 0.05,
0.07, 0.17, 0.29, 0.42, 0.52, 0.65, 0.75, 0.85, 0.94, 0.99, 1.03,
1.06, 1.09, 1.1, 1.11, 1.13, 1.16, 1.18, 1.19, 1.21, 1.2, 1.19,
1.19, 1.2, 1.23, 1.27, 1.32, 1.34, 1.36, 1.35, 1.32, 1.33, 1.33,
1.32, 1.32, 1.31, 1.28, 1.26, 1.23, 1.18, 1.12, 1.06, 0.97, 0.74,
0.57, 0.34, 0.26, 0.2, 0.12, 0.08, 0.06, 0.05, 0.04, 0.03, 0.03,
0.02, 0.02, 0.01, 0.14, -0.06, -0.13, -0.17, -0.29, -0.4, -0.54,
-0.63, -0.72, -0.81, -0.9, -0.99, -1.04, -1.09, -1.12, -1.15,
-1.18, -1.19, -1.21, -1.24, -1.26, -1.28, -1.29, -1.29, -1.28,
-1.28, -1.28, -1.32, -1.35, -1.4, -1.42, -1.44, -1.43, -1.41,
-1.39, -1.39, -1.39, -1.38, -1.37, -1.35, -1.32, -1.27, -1.22,
-1.16, -1.08, -0.97, -0.64, -0.49, -0.35, -0.27, -0.21, -0.15,
-0.12, -0.09, -0.08, -0.06, -0.05, -0.04, -0.03, -0.02, -0.02,
-0.14)), row.names = c(NA, -122L), class = c("tbl_df", "tbl",
"data.frame"))
Thanks!
One option to get a shared axis which I borrowed from here would be to make use of ggpol::facet_share:
test$sexe <- factor(test$sexe, levels = c("Homme", "Femme"))
library(ggpol)
#> Loading required package: ggplot2
ggplot(test,
aes(x = age,
y = agriculteurs_exploitants,
fill = sexe)) +
geom_area() +
geom_line(aes(y = ensemble_des_personnes_en_emploi)) +
geom_point(aes(y = ifelse(sexe == "Homme", -3, 3)),
color = "transparent") +
scale_x_continuous(breaks = seq(15,75,5),
labels = c(seq(15,70,5),"75 ou +")) +
scale_y_continuous(expand = expansion(mult = c(0, 0)), breaks = -3:3,
labels = abs(-3:3)) +
facet_share(~sexe, scales = "free_x") +
coord_flip() +
labs(x = NULL)
Maybe something like this if you're willing to convert to discrete v continuous y axis.
dat <- test
dat$cut_age <- cut(dat$age, 11, right = F)
dat %>%
mutate(agriculteurs_exploitants = ifelse(dat$agriculteurs_exploitants >= 0, dat$agriculteurs_exploitants+.2, dat$agriculteurs_exploitants-.2)) %>%
mutate(ensemble_des_personnes_en_emploi = ifelse(dat$ensemble_des_personnes_en_emploi >= 0, dat$ensemble_des_personnes_en_emploi+.2, dat$ensemble_des_personnes_en_emploi-.2)) %>%
ggplot() +
#geom_area(aes(x = age, y = agriculteurs_exploitants, fill = sexe)) +
geom_boxplot(aes(x = cut_age, y = agriculteurs_exploitants, fill = sexe), alpha = .3) +
geom_point(aes(x = cut_age, y = ensemble_des_personnes_en_emploi, color = sexe)) +
geom_text(aes(x = cut_age, y = 0, label = cut_age), size = 2.5) +
coord_flip() +
scale_y_continuous(limits = c(-3,3),
breaks = c(-3,-2,-1,0,1,2,3),
labels = c("3","2","1","0","1","2","3")) +
scale_color_manual(values = c("firebrick", "steelblue3")) +
theme_classic() +
theme(axis.text.y = element_blank()) +
theme(axis.ticks.y.left = element_blank()) +
theme(axis.line.y = element_blank()) +
theme(axis.title.y = element_blank())
Also tells a bit more of the story...

which() and intersect() in order to get certain variables out of a dataset

I have a dataset I am trying to get only a certain portion of according to specific criteria but am stuck on how to do so. I think that it has something to do with using the which() and intersect() functions, but I am unfamiliar with them and don't know how to use them.
I have a dataset with a bunch of states, the amounts of gun deaths in the state and their "Brady Score". I am trying to get the states with gun deaths under 4 (per 100,000) and Brady Scores under 0, and then arrange them in a table.
This is the data I am working with:
dput(Guns)
structure(list
(Jurisdiction = structure(1:51, .Label = c("Alabama",
"Alaska", "Arizona", "Arkansas", "California", "Colorado", "Connecticut",
"D.C.", "Delaware", "Florida", "Georgia", "Hawaii", "Idaho",
"Illinois", "Indiana", "Iowa", "Kansas", "Kentucky", "Louisiana",
"Maine", "Maryland", "Massachusetts", "Michigan", "Minnesota",
"Mississippi", "Missouri", "Montana", "Nebraska", "Nevada", "New Hampshire",
"New Jersey", "New Mexico", "New York", "North Carolina", "North Dakota",
"Ohio", "Oklahoma", "Oregon", "Pennsylvania", "Rhode Island",
"South Carolina", "South Dakota", "Tennessee", "Texas", "Utah",
"Vermont", "Virginia", "Washington", "West Virginia", "Wisconsin",
"Wyoming"), class = "factor"),
Homicide.rate = c(7.1, 4.1, 5.5,
5.9, 5, 3.1, 4.1, 13.9, 6.2, 5.2, 5.9, 2.1, 1.8, 5.8, 4.7, 1.5,
2.9, 4.5, 10.8, 1.9, 6.3, 1.8, 7, 1.8, 7.4, 6.5, 2.7, 2.9, 4.5,
1.1, 4.4, 5.6, 3.5, 4.9, 4, 4.3, 5.7, 2.4, 5.4, 3.2, 6.9, 3,
6, 4.4, 1.8, 1.3, 3.8, 3, 3.9, 3, 2.4),
Gun.accident.rate = c(0.44,
0, 0.1, 0.41, 0.08, 0.18, 0, 0, 0, 0.13, 0.29, 0, 0.38, 0.14,
0.22, 0.11, 0.2, 0.4, 0.75, 0, 0.07, 0, 0.09, 0.08, 0.65, 0.33,
0.43, 0.29, 0.13, 0, 0, 0, 0.02, 0.29, 0, 0.14, 0.39, 0.13, 0.28,
0, 0.41, 0, 0.37, 0.18, 0, 0, 0.13, 0.12, 0.23, 0.09, 0),
Sum = c(7.5,
4.1, 5.6, 6.3, 5.1, 3.3, 4.1, 13.9, 6.2, 5.3, 6.2, 2.1, 2.2,
5.9, 4.9, 1.6, 3.1, 4.9, 11.6, 1.9, 6.4, 1.8, 7.1, 1.9, 8.1,
6.8, 3.1, 3.2, 4.6, 1.1, 4.4, 5.6, 3.5, 5.2, 4, 4.4, 6.1, 2.5,
5.7, 3.2, 7.3, 3, 6.4, 4.6, 1.8, 1.3, 3.9, 3.1, 4.1, 3.1, 2.4
),
Brady.score = c(3.5, -7, -8, 1, 75, 14.5, 70, 50, 34.5, 3,
2, 58.5, 0, 45, 4.5, 14, -4, -3.5, -2, 3, 66.5, 60.5, 15, 19.5,
-4, -0.5, -3, 6.5, 1.5, 5.5, 68.5, 0, 65.5, 1.5, 2, 10, 1, 11,
20, 41.5, 1, -4.5, 2, 1.5, -2, -4, 7, 19.5, 3, 13, -5),
Brady.grade = structure(c(8L,
10L, 10L, 10L, 1L, 5L, 1L, 2L, 3L, 10L, 10L, 4L, 10L, 2L, 8L,
6L, 10L, 10L, 10L, 10L, 1L, 4L, 5L, 5L, 10L, 10L, 10L, 7L, 10L,
8L, 1L, 10L, 1L, 10L, 10L, 7L, 10L, 9L, 5L, 3L, 10L, 10L, 10L,
10L, 10L, 10L, 7L, 5L, 10L, 6L, 10L), .Label = c("A?", "B", "B?",
"B+", "C", "C?", "D", "D?", "D+", "F"), class = "factor")),
class = "data.frame", row.names = c(NA,
-51L))
So far I have this:
LowB=(Guns$Brady.score<0)
LowD=(Guns$Sum<4)
LowB1=Guns[LowB,]$Brady.score
LowD1=Guns[LowD,]$Sum
intersect(LowB1,LowD1)
I have succeeded in converting the Brady Scores and Gun Deaths (Sum) into numerical variables, but now have no idea how to align them into a table where each state matches to its correspondent Brady Score and Gun Death sum.
To reiterate, what I want to get at the end is a table with the states that have both a Brady score below a certain number and a Sum below a certain number, where all three variables correspond to eachother. Is there any way I can do this? Thank you.
We can use subset and select the columns that we need.
subset(Guns, Brady.score < 0 & Sum < 4,
select = c('Jurisdiction', 'Sum', 'Brady.score'))
# Jurisdiction Sum Brady.score
#17 Kansas 3.1 -4.0
#27 Montana 3.1 -3.0
#42 South Dakota 3.0 -4.5
#45 Utah 1.8 -2.0
#46 Vermont 1.3 -4.0
#51 Wyoming 2.4 -5.0
In dplyr, we can use filter and select.
library(dplyr)
Guns %>%
filter(Brady.score < 0 & Sum < 4) %>%
select(Jurisdiction, Sum, Brady.score)
An additional Base R method:
df[which(df$Brady.score < 0 & df$Sum < 4),]

How to determine a value in a column immediately before a value in another column in R?

Plot
Following is a plot of speeds of two vehicles over time. The subject vehicle (blue) is following the lead vehicle (red) in the same lane. So, the speed profile of subject vehicle is very similar to lead vehicle's.
I have manually labelled the points where a vehicle changes its speed by acceleration/deceleration. Now, I want to determine these points from the data. Following are the sample data:
Data
> dput(veh)
structure(list(Time = c(287, 288, 289, 290, 291, 292, 293, 294,
295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307,
308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320,
321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331), fit_p = c(NA,
NA, NA, 8.86, 8.5, 8.15, 7.79, 7.44, 7.08, 6.73, 6.38, 6.1, 6.48,
6.86, 7.24, 7.63, 8.01, 8.38, 8.58, 8.68, 8.7, 8.53, 8.33, 8.12,
7.92, 7.71, 7.74, 8.1, 8.45, 8.8, 9.15, 9.29, 9.22, 9.16, 9.09,
9.13, 9.25, 9.37, 9.49, 9.51, 9.34, 9.17, NA, NA, NA), psi_p2 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 298, NA, NA, NA, NA,
NA, 304, 305, NA, 307, NA, NA, NA, NA, NA, 313, NA, NA, NA, 317,
NA, NA, NA, 321, NA, NA, NA, NA, 326, NA, NA, NA, NA, NA), slo_p = c(-0.35,
-0.35, -0.35, -0.35, -0.35, -0.35, -0.35, -0.35, -0.35, -0.35,
-0.35, 0.38, 0.38, 0.38, 0.38, 0.38, 0.38, 0.2, 0.02, 0.02, -0.2,
-0.2, -0.2, -0.2, -0.2, -0.2, 0.35, 0.35, 0.35, 0.35, -0.06,
-0.06, -0.06, -0.06, 0.12, 0.12, 0.12, 0.12, 0.12, -0.17, -0.17,
-0.17, -0.17, -0.17, -0.17), fit_v = c(NA, NA, NA, 9.16, 8.57,
7.99, 7.4, 7.23, 7.13, 7.04, 6.94, 6.85, 6.75, 6.66, 7.07, 7.57,
8.06, 8.56, 9.04, 9.15, 9.26, 9.37, 9.15, 8.92, 8.68, 8.45, 8.22,
7.99, 8.03, 8.24, 8.55, 8.87, 9.02, 8.96, 8.89, 8.82, 8.75, 8.99,
9.28, 9.47, 9.42, 9.37, NA, NA, NA), psi_v2 = c(NA, NA, NA, NA,
NA, NA, 293, NA, NA, NA, NA, NA, NA, 300, NA, NA, NA, NA, 305,
NA, NA, 308, NA, NA, NA, NA, NA, 314, 315, 316, NA, NA, 319,
NA, NA, NA, 323, NA, NA, 326, NA, NA, NA, NA, NA), slo_v = c(-0.59,
-0.59, -0.59, -0.59, -0.59, -0.59, -0.1, -0.1, -0.1, -0.1, -0.1,
-0.1, -0.1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.11, 0.11, 0.11, -0.23,
-0.23, -0.23, -0.23, -0.23, -0.23, 0.04, 0.16, 0.32, 0.32, 0.32,
-0.07, -0.07, -0.07, -0.07, 0.29, 0.29, 0.29, -0.05, -0.05, -0.05,
-0.05, -0.05, -0.05)), .Names = c("Time", "fit_p", "psi_p2",
"slo_p", "fit_v", "psi_v2", "slo_v"), row.names = c(NA, -45L), class = "data.frame")
In the column psi_v2, I have the time where subject vehicle changed the speed. These are all the S points. The points where the lead vehicle changed the speed are in the column psi_p2. But, I only want to determine the location of those points in psi_p2 which happened immediately before point S. These points are all the L points on the plot. For instance, S1 happened at psi_v2=300, therefore, L1 is 298 in psi_p2.
Question
I guess that I need to use which() to determine the relevant points from psi_p2. But I don't know how to code the part where only the "immediately before" point is picked.
Once the points are identified, I want to check if the subject vehicle accelerated in response to lead vehicle's acceleration. The acceleration of subject vehicle is in slo_v and that of lead vehicle is inslo_p. Example: For S1, slo_v = 0.5, and for L1, slo_p = 0.38. Since subject vehicle accelerated due to acceleration of lead vehicle, we call it "opening" (or "closing" in opposite case).
So, my desired output is:
structure(list(Time = 287:331, fit_p = c(NA, NA, NA, 8.86, 8.5,
8.15, 7.79, 7.44, 7.08, 6.73, 6.38, 6.1, 6.48, 6.86, 7.24, 7.63,
8.01, 8.38, 8.58, 8.68, 8.7, 8.53, 8.33, 8.12, 7.92, 7.71, 7.74,
8.1, 8.45, 8.8, 9.15, 9.29, 9.22, 9.16, 9.09, 9.13, 9.25, 9.37,
9.49, 9.51, 9.34, 9.17, NA, NA, NA), psi_p2 = c(NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, 298L, NA, NA, NA, NA, NA, 304L, 305L,
NA, 307L, NA, NA, NA, NA, NA, 313L, NA, NA, NA, 317L, NA, NA,
NA, 321L, NA, NA, NA, NA, 326L, NA, NA, NA, NA, NA), slo_p = c(-0.35,
-0.35, -0.35, -0.35, -0.35, -0.35, -0.35, -0.35, -0.35, -0.35,
-0.35, 0.38, 0.38, 0.38, 0.38, 0.38, 0.38, 0.2, 0.02, 0.02, -0.2,
-0.2, -0.2, -0.2, -0.2, -0.2, 0.35, 0.35, 0.35, 0.35, -0.06,
-0.06, -0.06, -0.06, 0.12, 0.12, 0.12, 0.12, 0.12, -0.17, -0.17,
-0.17, -0.17, -0.17, -0.17), fit_v = c(NA, NA, NA, 9.16, 8.57,
7.99, 7.4, 7.23, 7.13, 7.04, 6.94, 6.85, 6.75, 6.66, 7.07, 7.57,
8.06, 8.56, 9.04, 9.15, 9.26, 9.37, 9.15, 8.92, 8.68, 8.45, 8.22,
7.99, 8.03, 8.24, 8.55, 8.87, 9.02, 8.96, 8.89, 8.82, 8.75, 8.99,
9.28, 9.47, 9.42, 9.37, NA, NA, NA), psi_v2 = c(NA, NA, NA, NA,
NA, NA, 293L, NA, NA, NA, NA, NA, NA, 300L, NA, NA, NA, NA, 305L,
NA, NA, 308L, NA, NA, NA, NA, NA, 314L, 315L, 316L, NA, NA, 319L,
NA, NA, NA, 323L, NA, NA, 326L, NA, NA, NA, NA, NA), slo_v = c(-0.59,
-0.59, -0.59, -0.59, -0.59, -0.59, -0.1, -0.1, -0.1, -0.1, -0.1,
-0.1, -0.1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.11, 0.11, 0.11, -0.23,
-0.23, -0.23, -0.23, -0.23, -0.23, 0.04, 0.16, 0.32, 0.32, 0.32,
-0.07, -0.07, -0.07, -0.07, 0.29, 0.29, 0.29, -0.05, -0.05, -0.05,
-0.05, -0.05, -0.05), label = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 9L, 1L, 1L, 1L, 3L, 10L, 1L,
4L, 11L, 1L, 1L, 1L, 1L, 5L, 1L, 1L, 12L, 6L, 1L, 13L, 1L, 7L,
1L, 14L, 1L, 1L, 8L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "L1",
"L2", "L3", "L4", "L5", "L6", "L7&S7", "S1", "S2", "S3", "S4",
"S5", "S6"), class = "factor"), condition = structure(c(1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L,
3L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 2L, 1L,
1L, 1L, 3L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L), .Label = c("", "closing",
"opening"), class = "factor")), .Names = c("Time", "fit_p", "psi_p2",
"slo_p", "fit_v", "psi_v2", "slo_v", "label", "condition"), class = "data.frame", row.names = c(NA,
-45L))
Kindly guide me what function should I use to identify these points? I prefer using dplyr because I have multiple pairs like this example. An operation for one data frame can then be used on all others using group_by().

Random effects model in R - error

I am running econometric model with panel data in R. I am using plm package and pooled model and fixed effects model works great. But I get this error when trying to do random effects model and I don't know how to fix it.
There is my whole dataset and code:
auto <- structure(list(Country = structure(c(1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 6L, 6L,
6L, 6L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 10L,
10L, 10L, 10L, 11L, 11L, 11L, 11L), .Label = c("Bahrain", "Cuba",
"China", "Kuwait", "Lao PDR", "Qatar", "Saudi Arabia", "Swaziland",
"Syria", "United Arab Emirates", "Vietnam"), class = "factor"),
Year = structure(c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L,
3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L,
1L, 2L, 3L, 4L), .Label = c("1971", "1981", "1991", "2001"
), class = "factor"), AVG_GR_. = c(2.44, -2.93, 1.77, -1.04,
3.17, 3.5, -1.59, 5.13, 4.29, 7.51, 9.42, 9.83, -7.39, -5.52,
10.72, -0.14, 1.77, 3.38, 3.68, 5.33, -1.55, -5.72, 4.64,
1.5, 6.06, -5.25, 0.54, 2.28, 6.99, 2.82, 0.82, 1.12, 6.72,
-2, 3.09, 2.15, -1.06, -4.88, 0.2, -6.04, 1.61, 3.21, 5.88,
6.24), GDP_PC = c(17444.65, 19550.76, 15970.05, 18212.71,
2067.93, 3127.98, 3221.25, 3081.73, 153.5, 231.14, 491.26,
1207.52, 70184.35, 23911.92, 9559.35, 27681.03, 162.06, 212.46,
261.98, 386.38, 72617.74, 55370.39, 31970, 51090.02, 13752.55,
21124.79, 12891.51, 12446.49, 881.75, 1595.82, 1995.8, 2191.36,
738.63, 1349.2, 1057.84, 1380.2, 88377.72, 75348.77, 43306.13,
45038.43, 164.15, 194.45, 267.17, 481.92), POP_. = c(5.39,
3.26, 3.03, 6.49, 1.22, 0.75, 0.5, 0.13, 1.91, 1.71, 0.95,
0.6, 6.22, 4.16, -0.66, 4.61, 1.93, 2.7, 2.42, 1.73, 7.44,
7.9, 2.23, 11.57, 5.43, 5.12, 2.2, 3.08, 3.07, 3.64, 2.12,
1.16, 3.45, 3.35, 2.77, 2.78, 15.96, 5.94, 5.3, 10.95, 2.29,
2.3, 1.62, 0.97), CONSUMP_. = c(64.21, 52.81, 51.47, 40.51,
54.58, 54.96, 62.74, 54.02, 51.72, 51.01, 45.63, 39, 27.44,
48.61, 49.76, 35.74, 90.19, 90.65, 89.15, 70.38, 21.33, 26.27,
26.84, 16.81, 22.96, 46.85, 44.2, 31.61, 54.77, 74.9, 80.42,
79.36, 67.09, 69.71, 69.92, 61.26, 15.28, 33.07, 46.79, 59.97,
90, 89.89, 73.9, 65.33), GOV_CON_. = c(11.1, 19.55, 19.21,
14.27, 31.67, 31.66, 29.47, 34.91, 12.99, 14.11, 14.53, 14.1,
12.04, 23.7, 48.98, 18.45, 8.05, 8.29, 7.21, 8.96, 20.47,
36.49, 31.09, 14.5, 16.02, 30.12, 26.94, 22.53, 19.07, 17.11,
17.65, 14.76, 19.93, 19.6, 12.75, 12.67, 10.87, 19.27, 16.99,
7.66, 6.73, 6.85, 7.46, 6.19), CAP_FORM_. = c(34.15, 32.51,
24.24, 26.56, 25.94, 25.49, 10.76, 10.7, 34.57, 35.19, 37.79,
42.21, 13.55, 18.68, 17.9, 17.28, 7.57, 10.24, 16.68, 30.28,
22.49, 18.37, 26.13, 36.58, 22.59, 22.7, 20.49, 23.68, 30.77,
21.42, 17.65, 14.55, 25.34, 20.68, 22.53, 23.48, 29.93, 26.28,
27.29, 22.63, 14.45, 14.46, 25.22, 36.44), NAT_RES_. = c(27.42,
20.18, 17.52, 23.34, 1.81, 1.87, 2.5, 3.42, 41.09, 38.83,
40.09, 17.91, 66.53, 41.25, 35.94, 48.41, 5.28, 4.2, 3.01,
10.15, 63.5, 40.84, 39.7, 54.17, 57.89, 31.24, 32.74, 42.77,
6.47, 3.64, 2.25, 1.32, 9.55, 9.14, 14.19, 22.92, 51.04,
37.08, 27.99, 31.36, 3.95, 4.17, 8.39, 13.57), TRADE = c(1.69,
1.48, 1.37, 1.34, 0.77, 0.76, 0.33, 0.34, 0.11, 0.21, 0.35,
0.58, 1.03, 0.99, 1.09, 0.9, 0.15, 0.23, 0.63, 0.57, 0.95,
0.82, 0.85, 0.91, 0.89, 0.76, 0.66, 0.8, 1.47, 1.54, 1.42,
1.62, 0.51, 0.44, 0.66, 0.71, 1.1, 0.97, 1.37, 1.23, 0.62,
0.62, 0.86, 1.43), INFL_. = c(13.26, 3.24, 1.64, 5.65, 5.22,
0.11, 5.49, 2.44, 1.17, 5.72, 6.85, 4.2, 31.52, -0.47, 3.25,
7.29, 43.86, 56.9, 32.37, 7.95, 20.84, -1.59, 3.18, 8.65,
26.67, -1.16, 2.4, 5.73, 10.71, 11.36, 10.97, 8.04, 11.62,
17.43, 6.74, 6.78, 28.31, 1.25, 2.03, 6.94, 7.05, 156.6,
18.99, 9.45), LIFE_EXP = c(67.39, 71.47, 73.66, 75.55, 72.28,
74.46, 75.6, 77.81, 65.7, 68.43, 70.64, 73.99, 68.17, 71.25,
72.92, 73.79, 47.79, 51.39, 58.38, 64.68, 71.16, 74.31, 76.18,
77.53, 58.65, 66.77, 71.16, 74.03, 51.33, 57.45, 54.96, 46.81,
63.01, 68.42, 72.03, 74.56, 65.49, 70.19, 73.24, 75.66, 62.69,
69.09, 72.28, 74.66), EDU_T = c(0.68, 1.59, 2.63, 3.14, 0.75,
1.46, 2.81, 3.84, 0.37, 0.62, 1.08, 1.71, 1.41, 2.71, 3.53,
3.54, 0.16, 0.35, 0.65, 1, 1.61, 2.11, 2.5, 3.06, 1.06, 1.44,
2.13, 2.66, 0.35, 0.74, 1.07, 0.91, 0.34, 0.74, 1.27, 1.3,
1.14, 1.65, 2.61, 3.85, 0.67, 1.21, 0.67, 1.54)), .Names = c("Country",
"Year", "AVG_GR_.", "GDP_PC", "POP_.", "CONSUMP_.", "GOV_CON_.",
"CAP_FORM_.", "NAT_RES_.", "TRADE", "INFL_.", "LIFE_EXP", "EDU_T"
), row.names = c(1L, 2L, 3L, 4L, 9L, 10L, 11L, 12L, 5L, 6L, 7L,
8L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 25L, 26L, 27L, 28L,
29L, 30L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L,
42L, 43L, 44L, 45L, 46L, 47L, 48L), class = c("plm.dim", "data.frame"
))
Y <- cbind(auto$AVG_GR_.)
X <- cbind(auto$GDP_PC, auto$POP_., auto$CONSUMP_., auto$GOV_CON_.,
auto$CAP_FORM_., auto$NAT_RES_., auto$TRADE, auto$INFL_.,
auto$LIFE_EXP, auto$EDU_T)
pdata <- plm.data(auto, c("Country", "Year"))
random <- plm(Y~X, data=pdata, model="random")
Everything is OK until the last row. I get this error:
Error in if (sigma2$id < 0) stop(paste("the estimated variance of the", :
missing value where TRUE/FALSE needed
Thanks for your help :)
I am looking for help, but solved your problem. The first column has row. Names automatically filled in. You need to delete first column.
This worked:
> pdata <- pdata[,2:13];
> random <- plm(Y~X, data=pdata, model="random")
Just replace last row of your code with the above two lines.

geom_line won't plot inside barplot

I want to make a barplot with two line grpahs included. The dput of my data:
structure(list(month = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L), .Label = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"), class = "factor"), aantal = c(84, 85, 83, 97, 104, 97, 65, 69, 63, 55, 65, 77, 84, 85, 83, 97, 104, 97, 65, 69, 63, 55, 65, 77, 84, 85, 83, 97, 104, 97, 65, 69, 63, 55, 65, 77, 84, 85, 83, 97, 104, 97, 65, 69, 63, 55, 65, 77), gem.mag = c(1.36, 1.44, 1.21, 1.13, 1.25, 1.29, 1.34, 1.38, 1.33, 1.37, 1.31, 1.25, 1.36, 1.44, 1.21, 1.13, 1.25, 1.29, 1.34, 1.38, 1.33, 1.37, 1.31, 1.25, 1.36, 1.44, 1.21, 1.13, 1.25, 1.29, 1.34, 1.38, 1.33, 1.37, 1.31, 1.25, 1.36, 1.44, 1.21, 1.13, 1.25, 1.29, 1.34, 1.38, 1.33, 1.37, 1.31, 1.25), temp = c(3.1, 3.3, 6.2, 9.2, 13.1, 15.6, 17.9, 17.5, 14.5, 10.7, 6.7, 3.7, 3.1, 3.3, 6.2, 9.2, 13.1, 15.6, 17.9, 17.5, 14.5, 10.7, 6.7, 3.7, 3.1, 3.3, 6.2, 9.2, 13.1, 15.6, 17.9, 17.5, 14.5, 10.7, 6.7, 3.7, 3.1, 3.3, 6.2, 9.2, 13.1, 15.6, 17.9, 17.5, 14.5, 10.7, 6.7, 3.7), difftemp = c(14.9, 14.7, 11.8, 8.8, 4.9, 2.4, 0.1, 0.5, 3.5, 7.3, 11.3, 14.3, 14.9, 14.7, 11.8, 8.8, 4.9, 2.4, 0.1, 0.5, 3.5, 7.3, 11.3, 14.3, 14.9, 14.7, 11.8, 8.8, 4.9, 2.4, 0.1, 0.5, 3.5, 7.3, 11.3, 14.3, 14.9, 14.7, 11.8, 8.8, 4.9, 2.4, 0.1, 0.5, 3.5, 7.3, 11.3, 14.3), mag.cat = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4), value = c(19, 27, 32, 46, 40, 36, 23, 27, 22, 18, 19, 25, 58, 46, 45, 46, 58, 52, 35, 28, 34, 32, 39, 44, 7, 10, 6, 5, 6, 8, 6, 12, 7, 3, 7, 8, 0, 2, 0, 0, 0, 1, 1, 2, 0, 2, 0, 0)), .Names = c("month", "aantal", "gem.mag", "temp", "difftemp", "mag.cat", "value"), row.names = c(NA, 48L), class = "data.frame")
I tryed to plot the two lines inside the barplot with the following code:
ggplot(maand.long) +
geom_bar(aes(x = month, y = value, fill = as.factor(mag.cat)),
stat = "identity", width = 0.7) +
geom_hline(yintercept = mean(maand.long$aantal), size = 1, color = "red") +
geom_line(aes(x = month, y = difftemp))
The hline is plotted, but the normal line is not. How can I solve this problem?
ggplot tries to plot a line for each level of month since this variable is a factor. There are two possibilities to solve this problem:
Use as.numeric
geom_line(aes(x = as.numeric(month), y = difftemp))
Use group = 1
geom_line(aes(x = month, y = difftemp, group = 1))
The complete code:
ggplot(maand.long) +
geom_bar(aes(x = month, y = value, fill = as.factor(mag.cat)),
stat = "identity", width = 0.7) +
geom_hline(yintercept = mean(maand.long$aantal), size = 1, color = "red") +
geom_line(aes(x = month, y = difftemp, group = 1))

Resources