R - Easy significant test on 2 dataframes - r

I am stucking a simple statistical comparism of 2 dataframes. Both dataframes consist of different kind of observations (columns) and the observation days (rows). I counted the number of occurrences for each day and each case. I dont have the same number of observation days, the observations took place under different conditions and I want to find out if there is a significant difference between those two dataframes. So basically I want to compare Case1 of df1 with Case1 of df2. For that I calculated the number of occurrences per day of each dataframe and compared them (in%).
In reality I have thousands of these dataframes and all have different number of rows.
My problem is now, how can I get an idea of which of the results are significant? How can I see if only 9 day of observation is too less to be significant?
I tried to perform a Chi-Square test, is that the right thing to do?
Here is Dataframe 1:
structure(list(Case1 = c(17L, 9L, 4L, 3L, 5L, 4L, 5L, 4L, 6L, 13L,
7L, 17L, 9L, 11L, 10L, 8L, 7L, 22L, 7L, 14L, 15L, 13L, 17L, 7L,
13L, 12L, 10L, 16L, 7L, 6L, 13L, 10L, 12L, 12L, 11L, 13L, 12L,
9L, 11L, 12L, 14L, 10L, 11L, 14L, 15L, 9L, 12L, 13L, 19L, 14L,
10L, 10L, 4L, 10L, 9L, 11L, 10L, 4L, 6L, 3L, 11L, 10L, 7L, 8L,
12L, 8L, 7L, 3L, 5L, 5L, 6L, 5L, 8L, 10L, 9L, 3L, 5L, 9L, 9L,
4L, 9L, 7L, 8L, 6L, 4L, 7L, 6L, 9L, 4L, 17L, 16L, 9L, 16L, 12L,
9L, 10L, 14L, 6L, 17L, 14L, 14L, 11L, 10L, 11L, 15L, 12L, 11L,
15L, 10L, 12L, 12L, 5L, 7L, 7L, 15L, 9L, 8L, 14L, 15L, 20L, 8L,
12L, 12L, 19L, 10L, 18L, 6L, 14L, 17L, 17L, 17L, 13L, 12L, 10L,
15L, 11L, 17L, 12L, 8L, 15L, 9L, 9L, 13L, 14L, 9L, 6L, 18L, 5L,
8L, 8L, 5L, 7L, 4L, 6L, 4L, 6L, 4L, 7L, 7L, 8L, 4L, 6L, 9L, 4L,
4L, 5L, 9L, 2L, 4L, 4L, 7L, 10L, 7L, 8L, 4L), Case2 = c(17L, 9L,
4L, 3L, 5L, 4L, 4L, 3L, 6L, 11L, 6L, 10L, 9L, 7L, 9L, 6L, 7L,
20L, 7L, 11L, 12L, 12L, 15L, 6L, 10L, 10L, 9L, 14L, 6L, 6L, 12L,
9L, 10L, 10L, 9L, 10L, 11L, 7L, 10L, 12L, 14L, 8L, 9L, 10L, 15L,
9L, 11L, 10L, 14L, 13L, 10L, 8L, 4L, 9L, 8L, 11L, 6L, 4L, 6L,
2L, 8L, 6L, 7L, 8L, 12L, 6L, 7L, 2L, 4L, 4L, 5L, 4L, 8L, 8L,
8L, 3L, 4L, 8L, 8L, 4L, 9L, 5L, 7L, 6L, 3L, 6L, 6L, 9L, 4L, 15L,
12L, 8L, 15L, 11L, 7L, 9L, 13L, 6L, 12L, 12L, 14L, 10L, 10L,
9L, 14L, 11L, 10L, 11L, 9L, 11L, 9L, 4L, 7L, 7L, 14L, 8L, 8L,
13L, 13L, 16L, 7L, 10L, 10L, 13L, 10L, 16L, 6L, 14L, 16L, 16L,
17L, 10L, 10L, 7L, 15L, 10L, 17L, 12L, 8L, 12L, 8L, 9L, 13L,
12L, 9L, 6L, 13L, 5L, 7L, 8L, 5L, 3L, 2L, 6L, 4L, 5L, 4L, 7L,
6L, 6L, 4L, 6L, 7L, 3L, 3L, 4L, 5L, 1L, 4L, 3L, 6L, 8L, 7L, 7L,
3L), Case3 = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 2L, 1L, 7L, 0L,
4L, 1L, 2L, 0L, 2L, 0L, 3L, 3L, 1L, 2L, 1L, 3L, 2L, 1L, 2L, 1L,
0L, 1L, 1L, 2L, 2L, 2L, 3L, 1L, 2L, 1L, 0L, 0L, 2L, 2L, 4L, 0L,
0L, 1L, 3L, 5L, 1L, 0L, 2L, 0L, 1L, 1L, 0L, 4L, 0L, 0L, 1L, 3L,
4L, 0L, 0L, 0L, 2L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 2L, 1L, 0L, 1L,
1L, 1L, 0L, 0L, 2L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 2L, 4L, 1L, 1L,
1L, 2L, 1L, 1L, 0L, 5L, 2L, 0L, 1L, 0L, 2L, 1L, 1L, 1L, 4L, 1L,
1L, 3L, 1L, 0L, 0L, 1L, 1L, 0L, 1L, 2L, 4L, 1L, 2L, 2L, 6L, 0L,
2L, 0L, 0L, 1L, 1L, 0L, 3L, 2L, 3L, 0L, 1L, 0L, 0L, 0L, 3L, 1L,
0L, 0L, 2L, 0L, 0L, 5L, 0L, 1L, 0L, 0L, 4L, 2L, 0L, 0L, 1L, 0L,
0L, 1L, 2L, 0L, 0L, 2L, 1L, 1L, 1L, 4L, 1L, 0L, 1L, 1L, 2L, 0L,
1L, 1L)), .Names = c("Case1", "Case2", "Case3"), class = "data.frame", row.names = c(NA,
-175L))
Here is Dataframe 2:
structure(list(Case1 = c(9L, 11L, 10L, 4L, 9L, 6L, 4L, 7L, 13L),
Case2 = c(7L, 10L, 8L, 4L, 8L, 4L, 3L, 6L, 8L), Case3 = c(2L, 1L,
2L, 0L, 1L, 2L, 1L, 1L, 5L)), .Names = c("Case1", "Case2", "Case3"), class = "data.frame", row.names = c(NA,
-9L))

Related

Fitting zero inflated poisson to plot it in R

I have the following data
data<-c(1L, 4L, 5L, 10L, 13L, 8L, 3L, 5L, 13L, 9L, 5L, 10L, 9L, 4L,
4L, 13L, 10L, 10L, 7L, 7L, 3L, 1L, 11L, 4L, 5L, 9L, 10L, 3L,
2L, 7L, 8L, 4L, 5L, 6L, 3L, 4L, 13L, 7L, 8L, 6L, 5L, 3L, 10L,
4L, 8L, 8L, 2L, 9L, 5L, 2L, 8L, 7L, 6L, 6L, 6L, 4L, 3L, 9L, 11L,
6L, 7L, 7L, 3L, 4L, 18L, 14L, 8L, 9L, 5L, 3L, 7L, 3L, 8L, 3L,
9L, 3L, 4L, 7L, 7L, 5L, 8L, 7L, 10L, 9L, 9L, 11L, 8L, 3L, 9L,
10L, 11L, 9L, 12L, 13L, 9L, 15L, 11L, 13L, 3L, 24L, 11L, 13L,
14L, 14L, 5L, 10L, 6L, 10L, 8L, 9L, 13L, 5L, 8L, 8L, 6L, 17L,
11L, 11L, 8L, 2L, 14L, 6L, 1L, 7L, 5L, 3L, 12L, 6L, 10L, 7L,
15L, 9L, 7L, 3L, 9L, 11L, 3L, 5L, 14L, 7L, 3L, 20L, 17L, 14L,
7L, 11L, 11L, 2L, 4L, 9L, 5L, 10L, 7L, 10L, 13L, 7L, 18L, 13L,
18L, 20L, 16L, 9L, 5L, 13L, 16L, 11L, 9L, 7L, 12L, 13L, 21L,
9L, 7L, 13L, 4L, 7L, 5L, 13L, 19L, 17L, 8L, 7L, 4L, 18L, 14L,
8L, 8L, 16L, 13L, 9L, 14L, 8L, 20L, 7L, 12L, 14L, 8L, 16L, 10L,
9L, 20L, 5L, 7L, 8L, 16L, 11L, 10L, 12L, 20L, 5L, 2L, 21L, 16L,
18L, 0L, 16L, 4L, 6L, 16L, 6L, 15L, 15L, 10L, 8L, 13L, 22L, 14L,
5L, 8L, 11L, 14L, 7L, 9L, 7L, 7L, 8L, 5L, 12L, 6L, 20L, 10L,
17L, 9L, 7L, 13L, 9L, 13L, 15L, 18L, 10L, 8L, 10L, 12L, 16L,
16L, 11L, 13L, 8L, 8L, 20L, 16L, 11L, 14L, 18L, 10L, 8L, 17L,
24L, 8L, 15L, 16L, 9L, 10L, 22L, 15L, 16L, 16L, 20L, 16L, 7L,
12L, 10L, 16L, 16L, 17L, 16L, 13L, 4L, 14L, 14L, 18L, 11L, 4L,
3L, 10L, 19L, 9L, 9L, 10L, 4L, 9L, 9L, 5L, 6L, 13L, 7L, 4L, 2L,
7L, 13L, 6L, 4L, 3L, 6L, 5L, 2L, 9L, 6L, 10L, 9L, 3L, 2L, 7L,
12L, 14L, 12L, 12L, 2L, 4L, 7L, 5L, 7L, 9L, 5L, 6L, 6L, 9L, 10L,
6L, 11L, 4L, 6L, 3L, 5L, 3L, 5L, 4L, 10L, 7L, 4L, 6L, 9L, 11L,
6L, 10L, 3L, 1L, 9L, 9L, 11L, 8L, 3L, 5L, 7L, 6L, 8L, 8L, 9L,
4L, 2L, 5L, 7L, 13L, 6L, 12L, 3L, 9L, 7L, 4L, 6L, 8L, 11L, 9L,
4L, 5L, 10L, 11L, 17L, 15L, 3L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 1L, 0L, 0L, 0L, 0L,
0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 2L, 1L, 2L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
3L, 16L, 17L, 6L, 6L, 9L, 6L, 12L, 6L, 13L, 6L, 5L, 9L, 6L, 14L,
2L, 17L, 4L, 10L, 6L, 1L, 15L, 8L, 8L, 5L, 7L, 7L, 8L, 12L, 2L,
3L, 7L, 11L, 6L, 9L, 10L, 11L, 11L, 4L, 12L, 1L, 7L, 6L, 3L,
8L, 11L, 7L, 6L, 5L, 5L, 11L, 7L, 7L, 6L, 7L, 5L, 7L, 10L, 5L,
4L, 7L, 5L, 9L, 7L, 14L, 10L, 4L, 9L, 5L, 10L, 12L, 14L, 6L,
5L, 12L, 5L, 3L, 8L, 8L, 4L, 9L, 9L, 12L, 2L, 8L, 5L, 4L, 5L,
1L, 4L, 4L, 7L, 6L, 8L, 10L, 13L, 9L, 4L, 8L, 8L, 9L, 12L, 4L,
7L, 6L, 5L, 5L, 7L, 2L, 5L, 10L, 0L, 4L, 6L, 5L, 3L, 8L, 2L,
1L, 1L, 6L, 6L, 1L, 2L, 5L, 9L, 10L, 7L, 10L, 3L, 12L, 7L, 4L,
1L, 5L, 6L, 6L, 5L, 4L, 1L, 5L, 0L, 8L, 6L, 4L, 1L, 7L, 5L, 3L,
8L, 3L, 0L, 3L, 2L, 0L, 6L, 10L, 0L, 8L, 3L, 0L, 1L, 1L, 5L,
7L, 0L, 1L, 0L, 3L, 1L, 9L, 2L, 8L, 1L, 0L, 0L, 5L, 1L, 0L, 2L,
1L, 0L, 7L, 1L, 2L, 0L, 0L, 4L, 4L, 10L, 0L, 6L, 4L, 3L, 0L,
4L, 1L, 3L, 1L, 0L, 0L, 0L, 5L, 0L, 6L, 6L, 3L, 5L, 0L, 4L, 0L,
2L, 3L, 5L, 2L, 4L, 3L, 1L, 1L, 0L, 2L, 0L, 3L, 0L, 3L, 4L, 4L,
7L, 0L, 0L, 1L, 9L, 0L, 3L, 0L, 4L, 0L, 3L, 4L, 5L, 0L, 0L, 4L,
3L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L,
0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 1L, 0L, 0L, 1L, 2L, 0L, 0L, 0L, 2L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 2L, 0L, 0L,
0L, 0L, 2L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 13L, 10L, 13L, 10L, 11L,
8L, 27L, 8L, 12L, 20L, 15L, 9L, 10L, 3L, 8L, 13L, 16L, 13L, 12L,
13L, 10L, 14L, 14L, 10L, 10L, 7L, 13L, 12L, 12L, 23L, 7L, 12L,
6L, 7L, 10L, 8L, 13L, 16L, 10L, 11L, 18L, 7L, 15L, 18L, 10L,
9L, 15L, 4L, 3L, 9L, 12L, 2L, 6L, 4L, 4L, 8L, 4L, 7L, 11L, 9L,
7L, 9L, 15L, 7L, 7L, 14L, 15L, 6L, 3L, 7L, 6L, 22L, 7L, 8L, 6L,
12L, 7L, 11L, 10L, 6L, 10L, 6L, 5L, 16L, 11L, 11L, 6L, 9L, 10L,
4L, 14L, 7L, 6L, 4L, 9L, 4L, 7L, 10L, 11L, 8L, 6L, 7L, 3L, 8L,
8L, 12L, 7L, 13L, 5L, 4L, 10L, 6L, 8L, 7L, 11L, 3L, 3L, 5L, 4L,
4L, 11L, 3L, 3L, 3L, 3L, 7L, 4L, 5L, 3L, 5L, 1L, 5L, 2L, 5L,
6L, 6L, 4L, 3L, 6L, 7L, 3L, 8L, 1L, 3L, 5L, 9L, 9L, 10L, 6L,
9L, 7L, 5L, 5L, 10L, 6L, 9L, 2L, 6L, 6L, 1L, 6L, 4L, 5L, 3L,
3L, 3L, 3L, 3L, 2L, 6L, 1L, 5L, 3L, 4L, 9L, 3L, 8L, 5L, 7L, 5L,
10L, 5L, 4L, 0L, 8L, 6L, 4L, 6L, 7L, 4L, 3L, 1L, 3L, 3L, 6L,
5L, 7L, 3L, 7L, 2L, 2L, 6L, 4L, 3L, 3L, 2L, 2L, 4L, 2L, 5L, 5L,
7L, 3L, 5L, 2L, 2L, 1L, 5L, 1L, 3L, 2L, 5L, 3L, 1L, 4L, 0L, 1L,
4L, 3L, 2L, 2L, 2L, 6L, 3L, 4L, 2L, 2L, 8L, 4L, 3L, 6L, 6L, 2L,
4L, 11L, 3L, 4L, 4L, 5L, 5L, 1L, 5L, 2L, 7L, 3L, 2L, 4L, 2L,
3L, 6L, 3L, 11L, 7L, 5L, 9L, 5L, 6L, 5L, 9L, 6L, 5L, 7L, 1L,
14L, 7L, 7L, 7L, 2L, 5L, 5L, 9L, 2L, 9L, 2L, 6L, 2L, 9L, 4L,
3L, 4L, 9L, 7L, 6L, 5L, 4L, 5L, 6L, 4L, 5L, 2L, 5L, 4L, 7L, 3L,
9L, 6L, 9L, 7L, 2L, 7L, 6L, 7L, 3L, 4L, 8L, 3L, 8L, 10L, 3L,
3L, 5L, 4L, 8L, 6L, 5L, 4L, 5L, 1L, 6L, 6L, 8L, 9L, 5L, 10L,
1L, 8L, 7L, 7L, 6L, 5L, 1L, 5L, 8L, 11L, 2L, 6L, 7L, 6L, 5L,
20L, 8L, 10L, 7L, 5L, 2L, 5L, 3L, 17L, 6L, 5L, 0L, 1L, 1L, 9L,
1L)
I have run a ZINB model and I know that it is the best fit for my data. I want to demonstrate on a graph that this distribution is my best option. I am using fitdist
library(fitdistrplus)
library(gamlss)
nb<-fitdist(data, "nbinom")
pois<-fitdist(data, "pois")
zinb<-fitdist(data, 'ZANBI',start = list(mu = 4, sigma = 0.2))
par(mfrow = c(2, 2))
plot.legend <- c("Negative binomial", "Poisson", "ZINB")
My problem is that, just as I wanted to demonstrate that nbinom and pois are not the best fit, I can't do it with zero inflated poissonZIP.
I am using gamlss
zip<-fitdist(data, 'ZIP',start = list(mu = 7.09, sigma = 4.5))
Here I'm using the values suggested in here considering mean(data[data != 0]) and var(data[data != 0]). I always get:
Error in fitdist(data, "ZIP", start = list(mu = 7.09, sigma = 4.5)) :
the function mle failed to estimate the parameters,
with the error code 100
In addition: Warning messages:
1: In fitdist(data, "ZIP", start = list(mu = 7.09, sigma = 4.5)) :
The dZIP function should return a zero-length vector when input has length zero and not raise an error
2: In fitdist(data, "ZIP", start = list(mu = 7.09, sigma = 4.5)) :
The pZIP function should return a zero-length vector when input has length zero and not raise an error
How can I plot a ZIP of my values to demonstrate is not the best fit?
The following arguments on the ZIP fit worked for me:
A start sigma < 1.
The Nelder-Mead optimizer
A (lower, upper) bounds for the optimization parameters mu and sigma set respectively to (0, Inf) and (0, 1),
The result of running the following code on your data array is below, which confirms that the Zero-Inflated Negative Binomial is the best fit (based on AIC and BIC).
library(fitdistrplus)
library(gamlss)
nb<-fitdist(data, "nbinom")
pois<-fitdist(data, "pois")
zinb<-fitdist(data, 'ZANBI',start = list(mu = 4, sigma = 0.2))
zip<-fitdist(data, 'ZIP', start = list(mu = 7.09, sigma = 0.5), discrete=TRUE,
optim.method="Nelder-Mead", lower = c(0, 0), upper = c(Inf, 1))
print(nb)
print(pois)
print(zinb)
print(zip)
cdfcomp(list(nb, zinb, pois, zip))
gofstat(list(nb, zinb, pois, zip))
The only thing that worries me is that the standard error of the estimated parameters for the ZIP fit are NA...
Partial OUTPUT
Fitting of the distribution ' nbinom ' by maximum likelihood
Parameters:
estimate Std. Error
size 1.007110 0.05297338
mu 5.548579 0.16643396
Fitting of the distribution ' pois ' by maximum likelihood
Parameters:
estimate Std. Error
lambda 5.548313 0.06522914
Fitting of the distribution ' ZANBI ' by maximum likelihood
Parameters:
estimate Std. Error
mu 6.8886199 0.1549058
sigma 0.3401722 0.0266448
Fitting of the distribution ' ZIP ' by maximum likelihood
Parameters:
estimate Std. Error
mu 7.0869552 NA
sigma 0.2171502 NA
Goodness-of-fit criteria
1-mle-nbinom 2-mle-ZANBI 3-mle-pois 4-mle-ZIP
Akaike's Information Criterion 7302.831 7141.004 10169.16 7981.985
Bayesian Information Criterion 7313.177 7151.350 10174.33 7992.331

Panel regression with cross sectional averages

I am estimating a panel regression model, and I need to add the cross sectional average of the dependent variable and regressors to the model.
I am struggling to implement the cross sectional averages in R. Can anyone help me out.
So I have a panel regression code below - using plm package.
I need to add cross sectional average of variable A, B, C and D to the right hand side of the regression
library(plm)
panel_fe <- plm(A ~ B+ C + D, model = "fd", effect="individual", data = PanelS)
So my final regression model would be like this A = B+ C+D + A_bar + B_bar + C_bar + D_bar, where A_bar, B_bar , C_bar and D_bar are the cross sectional averages of A, B,C and D respectively.
My panel datasets is below, PanelS.
structure(list(Country = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L), .Label = c("CountryA", "CountryB",
"CountryC", "CountryD", "CountryE", "CountryF", "CountryG", "CountryH",
"CountryI", "CountryJ"), class = "factor"), Year = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L,
18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L,
20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L), .Label = c("2000", "2001",
"2002", "2003", "2004", "2005", "2006", "2007", "2008", "2009",
"2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017",
"2018", "2019"), class = "factor"), A = c(0.051539, 0.064525,
0.014292, 0.018774, 0.035449, 0.021988, 0.02396, 0.011415, 0.010358,
-0.029607, -0.020427, -0.012734, 0.006683, 0.007373, -0.039712,
-0.005499, 0.008682, 0.015326, 0.020524, 0.015101, 0.035355,
0.031157, 0.023387, 0.024198, 0.035353, 0.053873, 0.038743, 0.042338,
0.034935, 0.015377, 0.010599, 0.015154, 0.002919, 0.024291, 0.043819,
0.015901, 0.01897, 0.027767, 0.015992, 0.041976, 0.011223, 0.006144,
0.000778, 0.005873, 0.007194, -0.022017, -0.023338, -0.037765,
-0.049356, 0.026135, 0.035633, 0.015691, -0.006196, -0.00025,
0.001181, -0.001472, -0.009324, -0.022664, -0.022623, -0.019586,
-0.012207, -0.004603, -0.013073, -0.010771, -0.009882, -0.014417,
-0.031812, -0.043885, -0.050883, -0.039834, -0.020299, -0.000684,
0.011216, 0.005419, 0.000939, -0.005508, 0.006266, -0.008077,
-0.016137, -0.012681, 0.031612, 0.043729, 0.009314, 0.002734,
-0.012284, 0.002403, 0.016807, 0.019995, 0.033096, 0.024383,
0.010588, 0.019833, 0.031837, 0.03127, 0.029059, 0.020708, 0.019296,
0.017787, 0.032074, 0.027125, 0.005673, 0.003698, -5.3e-05, 0.001794,
-0.011977, -0.008686, -0.031588, -0.039411, -0.073931, -0.076715,
-0.039171, -0.025797, -0.007637, 0.00345, 0.009101, 0.01674,
-0.006968, -0.019178, -0.02438, -0.039663, 0.078313, 0.06707,
0.062822, 0.050771, 0.041274, 0.043921, 0.046429, 0.039418, 0.034671,
0.017356, 0.001054, 0.00414, 0.00226, 0.00275, 0.00085, 0.00495,
0.001276, -0.001446, -0.005771, -0.007513, 0.053734, 0.038679,
0.017375, 0.01438, 0.018403, 0.032943, 0.025539, 0.032463, 0.032267,
0.034009, 0.018229, 0.008958, 0.010079, 0.00749, 0.000604, 0.001948,
0.011782, 0.013253, 0.007898, 0.007546, 0.018052, -0.001123,
-0.012597, -0.042292, -0.058516, -0.022736, -0.03841, -0.050843,
-0.073979, -0.097242, -0.024712, 0.038037, 0.048685, -0.00624,
0.075575, 0.044947, 0.097171, 0.086809, 0.079856, 0.068521, 0.008062,
-0.00911, -0.010527, -4.3e-05, 0.002428, 0.004422, 0.008752,
0.019602, 0.01724, 0.01965, -0.008816, 0.011466, 0.020956, 0.021873,
0.021772, 0.024495, 0.021354, 0.015267, 0.018769, 0.016904),
C = c(0.75345, 0.70657, 0.645051, 0.510055, 0.433786, 0.35728,
0.265817, 0.208721, 0.163261, 0.130248, 0.136607, 0.153873,
0.152275, 0.166592, 0.170559, 0.27089, 0.259813, 0.292847,
0.253142, 0.222618, 0.56764082, 0.523543, 0.485083, 0.49081,
0.461501, 0.44156, 0.374122, 0.315494, 0.27346, 0.333132,
0.401818, 0.425879, 0.460709, 0.448942, 0.440456, 0.442703,
0.397737, 0.372338, 0.359446, 0.340254, 0.064305, 0.05107,
0.047682, 0.056584, 0.055981, 0.051134, 0.047025, 0.046318,
0.037655, 0.045041, 0.071989, 0.066074, 0.061057, 0.097641,
0.101621, 0.105545, 0.09996, 0.099131, 0.091119, 0.082012,
0.120817, 0.120871, 0.138383, 0.13023, 0.141247, 0.146088,
0.119133, 0.100396, 0.084592, 0.185873, 0.368416, 0.479167,
0.4367, 0.421837, 0.400428, 0.416259, 0.37072, 0.40398, 0.390126,
0.371126, 0.079576, 0.074647, 0.076712, 0.074295, 0.074504,
0.079053, 0.080224, 0.082991, 0.082006, 0.15357, 0.161465,
0.201522, 0.190049, 0.219974, 0.236873, 0.227428, 0.219862,
0.200938, 0.223426, 0.209529, 0.217219, 0.224867, 0.258694,
0.248207, 0.221093, 0.189452, 0.159052, 0.124236, 0.119492,
0.123362, 0.217807, 0.296186, 0.339882, 0.371345, 0.376212,
0.391509, 0.378059, 0.373931, 0.351043, 0.347354, 0.440547,
0.424547, 0.409236, 0.401795, 0.427482, 0.426416, 0.399297,
0.381117, 0.339041, 0.325607, 0.415314, 0.469047, 0.482712,
0.536225, 0.562292, 0.598259, 0.636417, 0.631764, 0.612668,
0.596271, 0.605061, 0.503479, 0.518971, 0.498057, 0.492731,
0.484527, 0.486885, 0.43596, 0.388967, 0.374978, 0.407324,
0.381025, 0.371731, 0.375149, 0.402248, 0.449982, 0.437387,
0.422554, 0.407331, 0.389125, 0.989067, 1.049344, 1.070812,
1.048631, 1.014561, 1.028734, 1.073949, 1.036117, 1.03103,
1.094155, 1.267447, 1.474942, 1.752192, 1.619444, 1.784347,
1.802256, 1.770079, 1.807951, 1.792139, 1.862386, 0.601394,
0.590658, 0.579365, 0.597035, 0.633089, 0.649877, 0.673465,
0.667047, 0.639942, 0.655222, 0.729901, 0.823816, 0.79801,
0.811354, 0.787169, 0.756694, 0.72207, 0.692768, 0.651024,
0.617801), B = c(0.147502302, 0.043680673, -0.212478849,
-0.266834333, -0.228099071, -0.199890362, -0.968175801, 1.047500546,
1.273127656, 1.227657506, -0.286068921, -1.356896168, -1.442625298,
-0.291748363, 2.029875219, 1.099611751, -1.112127832, -0.894025857,
0.103213651, 0.286801553, 0.756833023, 0.591945192, 0.525259532,
0.466656359, 0.706692697, -2.361722697, -2.777257989, -4.097114222,
-4.564987155, 2.317853991, 3.44030537, 3.034469093, 5.845290721,
0.403542521, 0.128582254, 0.817094156, -0.886707561, -2.998573025,
-0.491794488, -0.856367773, 0.023343476, -0.209503364, -0.084839186,
-0.146285026, -0.256672799, -0.093852713, 0.145824486, 0.434606031,
0.966980327, 0.67904687, -0.292659443, -0.487763914, -0.084930583,
-0.32722087, -0.442172133, -0.168366978, -0.186469629, 0.046322287,
0.181126569, 0.303486593, 0.171541123, -0.348150815, -0.407466419,
-0.624622679, -0.354132366, -0.15050691, 0.700892294, 0.67692383,
1.014111655, 0.862019536, 0.395600738, -0.256706715, -0.542246369,
-0.539422399, -0.405088653, -0.247954994, -0.497333992, -0.010723655,
0.393516751, 0.169750037, -0.581903347, -0.730163914, 0.351894514,
0.629568917, 0.882078894, 0.760041333, -0.564317727, -0.57799292,
-0.433736512, 0.513350369, 0.55464973, -0.224497194, -0.074326596,
-0.123301819, -0.432013928, -0.25316664, -0.374406673, 0.116449941,
0.308969388, 0.252824183, 2.398228162, -0.033362631, -1.681378615,
-3.655293426, -2.793256764, -3.636310622, 0.149490332, 3.951131246,
7.177449077, 4.831325877, 2.050070679, 1.314471427, -1.687424783,
-3.796189127, -3.329685346, -1.695252718, -3.010416797, -2.414597902,
1.199960369, 4.661041564, 0.531518012, -1.384184059, -0.64216453,
-0.13206166, 0.249287935, -0.153010531, -0.987952985, -1.71711917,
-0.678751076, 0.890062065, 1.663691535, 1.883735194, 2.171029985,
2.383501603, 1.490313839, -0.732542129, -0.291797363, -1.655272704,
-1.613245217, -1.275038743, -0.789256935, -3.589249982, 0.502475039,
1.840081099, 1.141218417, 3.130100399, 3.94751837, 0.97811035,
0.013586974, -3.245960526, -2.068241886, -1.82476664, -1.481654499,
0.37039449, -1.516414277, -1.722381744, 0.683458083, 0.153189319,
3.410781995, 0.067011953, -3.09418792, -4.09753755, -4.682167411,
-1.333607727, 2.505605899, -4.332639317, -2.190945016, 4.048457741,
11.60535564, 13.61047901, 5.145259686, -0.712611552, -3.385649938,
7.214394614, -10.34401695, -1.841542179, -6.437949187, -4.545422837,
-0.012548047, 2.881273043, 3.227611639, 10.96399365, 16.38843255,
14.72001327, -13.84595255, -10.51570643, -13.59695535, -36.70577424,
-12.07070647, 12.51742535, 52.88207865, 9.143152612, -7.818895359,
-15.57456939, -21.31957866, -23.55720863, -5.574415019, 5.783084584,
12.02189272, 22.93207708), D = c(0.77780751, 0.793229898,
0.80623893, 0.821155065, 0.836880111, 0.854312944, 0.873660631,
0.890537317, 0.907536298, 0.912375095, 0.929637942, 0.946439284,
0.965000087, 0.97726773, 0.986870808, 1, 1.019208507, 1.037842597,
1.054711181, 1.072171599, 0.534008473, 0.566583199, 0.58762954,
0.601043497, 0.63362178, 0.673913677, 0.719447102, 0.799187909,
0.864173776, 0.899162389, 0.909465125, 0.96350569, 0.978220642,
0.971679886, 0.976158221, 1, 1.025374896, 1.065804414, 1.108567186,
1.166769344, 0.588726028, 0.64526073, 0.733094431, 0.718268082,
0.746291144, 0.799900392, 0.846050389, 0.894179583, 1.015232882,
0.982856394, 1.012948099, 1.041332642, 1.032947106, 1.013566583,
0.980944689, 1, 1.020576612, 1.061740647, 1.117831183, 1.159906251,
0.750587042, 0.769670674, 0.790024355, 0.801712216, 0.817505148,
0.83991247, 0.856517319, 0.878345181, 0.914006005, 0.920044857,
0.949573071, 0.955207703, 0.978810398, 0.985618398, 0.996205139,
1, 1.004364708, 1.017159213, 1.021013703, 1.02682649, 0.825278825,
0.836048671, 0.847570474, 0.858769029, 0.86834942, 0.871868036,
0.875331803, 0.890827568, 0.898928134, 0.915485416, 0.921392822,
0.931246968, 0.945182975, 0.963702812, 0.981800571, 1, 1.013277522,
1.026999204, 1.044176589, 1.067069774, 0.490666665, 0.523850087,
0.54906662, 0.570457925, 0.597126217, 0.632406036, 0.689467717,
0.775073059, 0.828560075, 0.827109078, 0.842215091, 0.887572897,
0.923280339, 0.960610381, 0.988936452, 1, 1.022699304, 1.054533263,
1.098615084, 1.134067127, 0.757140805, 0.809228408, 0.851488047,
0.884918505, 0.889385715, 0.916751643, 0.948479832, 0.960072842,
0.956196673, 0.911566837, 0.884542463, 0.89644222, 0.917048164,
0.929279352, 0.929337342, 1, 1.010128912, 1.026719845, 1.029923385,
1.062349178, 0.786853444, 0.804351028, 0.831286834, 0.859995963,
0.886334727, 0.906191485, 0.937863282, 0.969963165, 1.012104032,
1.038112793, 1.036283847, 1.046222, 1.043339336, 1.02279939,
1.002888566, 1, 0.994233243, 0.998082845, 0.997049083, 0.998951287,
0.740171055, 0.770579402, 0.802054487, 0.833603662, 0.865965514,
0.90147914, 0.937354271, 0.969378485, 0.99123068, 0.992657113,
0.994179737, 0.993983379, 0.992844694, 0.99680058, 0.994574042,
1, 1.003228988, 1.016266499, 1.028341184, 1.04261954, 0.801617134,
0.817716283, 0.834621959, 0.850140657, 0.863935678, 0.880664424,
0.899645623, 0.9226463, 0.944486016, 0.945115307, 0.95522518,
0.964280334, 0.975483583, 0.983073825, 0.988745617, 1, 1.005225593,
1.010468623, 1.020086873, 1.032605559)), row.names = c("CountryA-2000",
"CountryA-2001", "CountryA-2002", "CountryA-2003", "CountryA-2004",
"CountryA-2005", "CountryA-2006", "CountryA-2007", "CountryA-2008",
"CountryA-2009", "CountryA-2010", "CountryA-2011", "CountryA-2012",
"CountryA-2013", "CountryA-2014", "CountryA-2015", "CountryA-2016",
"CountryA-2017", "CountryA-2018", "CountryA-2019", "CountryB-2000",
"CountryB-2001", "CountryB-2002", "CountryB-2003", "CountryB-2004",
"CountryB-2005", "CountryB-2006", "CountryB-2007", "CountryB-2008",
"CountryB-2009", "CountryB-2010", "CountryB-2011", "CountryB-2012",
"CountryB-2013", "CountryB-2014", "CountryB-2015", "CountryB-2016",
"CountryB-2017", "CountryB-2018", "CountryB-2019", "CountryC-2000",
"CountryC-2001", "CountryC-2002", "CountryC-2003", "CountryC-2004",
"CountryC-2005", "CountryC-2006", "CountryC-2007", "CountryC-2008",
"CountryC-2009", "CountryC-2010", "CountryC-2011", "CountryC-2012",
"CountryC-2013", "CountryC-2014", "CountryC-2015", "CountryC-2016",
"CountryC-2017", "CountryC-2018", "CountryC-2019", "CountryD-2000",
"CountryD-2001", "CountryD-2002", "CountryD-2003", "CountryD-2004",
"CountryD-2005", "CountryD-2006", "CountryD-2007", "CountryD-2008",
"CountryD-2009", "CountryD-2010", "CountryD-2011", "CountryD-2012",
"CountryD-2013", "CountryD-2014", "CountryD-2015", "CountryD-2016",
"CountryD-2017", "CountryD-2018", "CountryD-2019", "CountryE-2000",
"CountryE-2001", "CountryE-2002", "CountryE-2003", "CountryE-2004",
"CountryE-2005", "CountryE-2006", "CountryE-2007", "CountryE-2008",
"CountryE-2009", "CountryE-2010", "CountryE-2011", "CountryE-2012",
"CountryE-2013", "CountryE-2014", "CountryE-2015", "CountryE-2016",
"CountryE-2017", "CountryE-2018", "CountryE-2019", "CountryF-2000",
"CountryF-2001", "CountryF-2002", "CountryF-2003", "CountryF-2004",
"CountryF-2005", "CountryF-2006", "CountryF-2007", "CountryF-2008",
"CountryF-2009", "CountryF-2010", "CountryF-2011", "CountryF-2012",
"CountryF-2013", "CountryF-2014", "CountryF-2015", "CountryF-2016",
"CountryF-2017", "CountryF-2018", "CountryF-2019", "CountryG-2000",
"CountryG-2001", "CountryG-2002", "CountryG-2003", "CountryG-2004",
"CountryG-2005", "CountryG-2006", "CountryG-2007", "CountryG-2008",
"CountryG-2009", "CountryG-2010", "CountryG-2011", "CountryG-2012",
"CountryG-2013", "CountryG-2014", "CountryG-2015", "CountryG-2016",
"CountryG-2017", "CountryG-2018", "CountryG-2019", "CountryH-2000",
"CountryH-2001", "CountryH-2002", "CountryH-2003", "CountryH-2004",
"CountryH-2005", "CountryH-2006", "CountryH-2007", "CountryH-2008",
"CountryH-2009", "CountryH-2010", "CountryH-2011", "CountryH-2012",
"CountryH-2013", "CountryH-2014", "CountryH-2015", "CountryH-2016",
"CountryH-2017", "CountryH-2018", "CountryH-2019", "CountryI-2000",
"CountryI-2001", "CountryI-2002", "CountryI-2003", "CountryI-2004",
"CountryI-2005", "CountryI-2006", "CountryI-2007", "CountryI-2008",
"CountryI-2009", "CountryI-2010", "CountryI-2011", "CountryI-2012",
"CountryI-2013", "CountryI-2014", "CountryI-2015", "CountryI-2016",
"CountryI-2017", "CountryI-2018", "CountryI-2019", "CountryJ-2000",
"CountryJ-2001", "CountryJ-2002", "CountryJ-2003", "CountryJ-2004",
"CountryJ-2005", "CountryJ-2006", "CountryJ-2007", "CountryJ-2008",
"CountryJ-2009", "CountryJ-2010", "CountryJ-2011", "CountryJ-2012",
"CountryJ-2013", "CountryJ-2014", "CountryJ-2015", "CountryJ-2016",
"CountryJ-2017", "CountryJ-2018", "CountryJ-2019"), class = c("pdata.frame",
"data.frame"), index = structure(list(Country = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L), .Label = c("CountryA",
"CountryB", "CountryC", "CountryD", "CountryE", "CountryF", "CountryG",
"CountryH", "CountryI", "CountryJ"), class = "factor"), Year = structure(c(1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L,
18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L,
19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 1L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 19L,
20L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 18L, 19L, 20L), .Label = c("2000", "2001",
"2002", "2003", "2004", "2005", "2006", "2007", "2008", "2009",
"2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017",
"2018", "2019"), class = "factor")), class = c("pindex", "data.frame"
), row.names = c(NA, 200L)))
You can use function Between from package plm to calculate the cross sectional averages and add them to your data:
library(plm)
# PanelS is a pdata.frame (otherwise use pdata.frame(your_data, index))
PanelS$A_bar <- Between(PanelS$A)
PanelS$B_bar <- Between(PanelS$B)
PanelS$C_bar <- Between(PanelS$C)
PanelS$D_bar <- Between(PanelS$D)
mod <- plm(A ~ B + C + D + A_bar + B_bar + C_bar + D_bar, model = "pooling", effect="individual", data = PanelS)
summary(mod)
# Pooling Model
#
# Call:
# plm(formula = A ~ B + C + D + A_bar + B_bar + C_bar + D_bar,
# data = PanelS, effect = "individual", model = "pooling")
#
# Balanced Panel: n = 10, T = 20, N = 200
#
# Residuals:
# Min. 1st Qu. Median 3rd Qu. Max.
# -0.06143690 -0.01311792 0.00070253 0.01186605 0.05107105
#
# Coefficients:
# Estimate Std. Error t-value Pr(>|t|)
# (Intercept) -0.00000000000001042 0.03313743211380626 0.0000 1.000000
# B -0.00076930351859426 0.00020566635571130 -3.7405 0.000242 ***
# C 0.10827039012266901 0.00949296134830719 11.4053 < 0.00000000000000022 ***
# D -0.04222788490989914 0.01136058813979121 -3.7171 0.000264 ***
# A_bar 0.99999999999911215 0.09632471140222754 10.3816 < 0.00000000000000022 ***
# C_bar -0.10827039012256123 0.01033406661607372 -10.4770 < 0.00000000000000022 ***
# D_bar 0.04222788490990802 0.03874710199411169 1.0898 0.277145
# ---
# Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
#
# Total Sum of Squares: 0.17549
# Residual Sum of Squares: 0.07128
# R-Squared: 0.59382
# Adj. R-Squared: 0.58119
# F-statistic: 47.0268 on 6 and 193 DF, p-value: < 0.000000000000000222
Note that it seems like you want to estimate a fixed effects model but your estimation has model = "fd" to estimate a first-differenced model. Also note that the cross sectional averages will drop out of the estimation of a fixed effects model.

Facets: organising their order and organising the levels within facets

I would like to please organise the following plots so that facets are printed out from most to least busy (i.e. Hemiptera, Coleoptera, Hymenoptera, Siphonaptera, Lepidoptera, etc.)
I would also like to order the levels within each facet like in Coleoptera. I realise that the X-labels will change order too so I need each facet to print out its own X-label according the level order.
I have already read many threads and that's how I was able to organise Coleoptera. But now I want it to be more tidy.
This is the data (let me know if this format is ok, if not I can try another way):
structure(list(Order = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L,
9L, 10L, 10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L), .Label = c("Coleoptera",
"Dermaptera", "Dictyoptera", "Diptera", "Hemiptera", "Hymenoptera",
"Lepidoptera", "Phthiraptera", "Psocoptera", "Siphonaptera",
"Thysanoptera"), class = "factor"), Nrange = structure(c(1L,
3L, 4L, 5L, 6L, 7L, 8L, 10L, 11L, 12L, 14L, 14L, 1L, 10L, 1L,
3L, 4L, 6L, 7L, 10L, 11L, 12L, 14L, NA, 1L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 11L, 12L, 14L, NA, 1L, 4L, 5L, 6L, 7L, 8L, 10L, 11L,
12L, 14L, 15L, NA, 1L, 2L, 4L, 5L, 6L, 7L, 8L, 10L, 11L, 12L,
13L, 14L, 4L, 10L, 11L, 12L, 14L, 1L, 4L, 10L, 11L, 12L, 13L,
14L, 1L, 5L, 10L, 1L, 4L, 6L, 7L, 10L, 11L, 12L, 14L), .Label = c("Africa",
"Africa, Asia", "Americas", "Asia", "Asia-Temp", "Asia-Trop",
"Australasia", "C&S America", "Cosmopolitan", "Cryptogenic",
"N America", "S America", "Trop", "Trop, SubTrop", "Unknown"), class = "factor"),
Records = c(16L, 1L, 9L, 7L, 11L, 17L, 1L, 15L, 8L, 8L, 5L,
1L, 2L, 1L, 5L, 1L, 1L, 1L, 1L, 9L, 9L, 2L, 1L, 4L, 11L,
10L, 30L, 15L, 9L, 2L, 2L, 2L, 34L, 11L, 21L, 1L, 21L, 16L,
8L, 1L, 14L, 3L, 5L, 25L, 4L, 2L, 1L, 1L, 8L, 1L, 10L, 1L,
2L, 1L, 1L, 8L, 5L, 2L, 1L, 2L, 2L, 9L, 1L, 2L, 1L, 3L, 1L,
12L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 1L, 1L, 1L, 3L,
3L, 2L)), .Names = c("Order", "Nrange", "Records"), row.names = c(NA,
-83L), class = c("grouped_df", "tbl_df", "tbl", "data.frame"), vars = "Order", drop = TRUE)
This is the reordering that I guess is affecting only Coleoptera.
xy<-x%>%
mutate(Nrange=reorder(Nrange,-Records,sum))
This is the plot:
to_plot<-xy %>%
filter(!is.na(Nrange))
ggplot(to_plot,aes(x=Nrange,y=Records,fill=Nrange))+
geom_col()+
theme(axis.text.x = element_text(angle=90, vjust=0.7), legend.position = "none") +
facet_wrap(~Order,ncol=3)+
labs(title="Insects recorded as alien-invasive to mainland Spain",
subtitle="Native ranges vs number of records",
caption="Data source: DAISIE (http://www.europe-aliens.org/)")
And this is the plot:
enter image description here
Assuming you're using the tidyverse (based on your code):
library(tidyverse)
xy <- x %>%
ungroup() %>%
mutate(
Order = fct_reorder(Order, Records, sum, .desc = TRUE)
)
xy %>%
filter(!is.na(Nrange)) %>%
ggplot() +
aes(x = Nrange, y = Records, fill = Nrange) +
geom_col() +
facet_wrap(~Order, ncol = 3)
fct_reorder comes from the forcats package, which I believe is now a part of the tidyverse.
Or, using base R, something like this:
xy <- x
record_sums <- tapply(xy$Records, xy$Order, sum)
levels(xy$Order) <- levels(xy$Order)[order(record_sums, decreasing = TRUE)]

Ggplot2 geom_line error

I have a daaset which consists of data points over a time series for the proportion of people living in urban/rural areas for a number of countries. Sadly, not all countries have data for the same years. I have been trying to produce a simple line plot to show the different proportions of people living in different locations by year, but as each country has a different number of data points I am running into trouble.
I think this is because some of the countries only have data for a single year and using geom_line from ggplot2 throws the following error:
geom_path: Each group consist of only one observation. Do you need to
adjust the group aesthetic?
I was hoping that there would be some way to override this, or perhaps just plot a single point where a COUNTRY only has data for a single year. Does anyone know if this is possible, or indeed, if this is actually what this error means?!!?
Any help greatly appreciated!!!
Thanks
Here is my data:
structure(list(COUNTRY = structure(c(1L, 2L, 2L, 3L, 3L, 3L,
4L, 4L, 4L, 4L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L,
8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 10L, 11L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L, 1L,
2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 6L, 6L, 6L, 6L, 7L,
7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 10L, 11L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 14L,
14L, 14L, 14L, 1L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L,
6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 9L, 9L, 9L,
9L, 9L, 10L, 11L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 13L,
13L, 13L, 13L, 14L, 14L, 14L, 14L, 1L, 2L, 2L, 3L, 3L, 3L, 4L,
4L, 4L, 4L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 8L,
8L, 8L, 9L, 9L, 9L, 9L, 9L, 10L, 11L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L, 1L, 2L,
2L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L,
7L, 7L, 7L, 7L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 10L, 11L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 13L, 14L, 14L,
14L, 14L, 1L, 2L, 2L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 5L, 5L, 6L,
6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 9L, 9L, 9L, 9L,
9L, 10L, 11L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 13L, 13L,
13L, 13L, 14L, 14L, 14L, 14L, 1L, 2L, 2L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 5L, 5L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L,
8L, 9L, 9L, 9L, 9L, 9L, 10L, 11L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L), class = "factor", .Label = c("Comoros",
"Eritrea", "Ethiopia", "Kenya", "Lesotho", "Madagascar", "Malawi",
"Namibia", "Rwanda", "South Africa", "Swaziland", "Tanzania",
"Zambia", "Zimbabwe")), Year = structure(c(5L, 12L, 4L, 25L,
16L, 9L, 22L, 13L, 7L, 2L, 23L, 15L, 22L, 14L, 6L, 1L, 24L, 15L,
9L, 1L, 13L, 6L, 19L, 9L, 1L, 24L, 21L, 16L, 9L, 1L, 7L, 19L,
24L, 13L, 8L, 5L, 1L, 18L, 10L, 4L, 20L, 11L, 5L, 1L, 24L, 17L,
8L, 3L, 5L, 12L, 4L, 25L, 16L, 9L, 22L, 13L, 7L, 2L, 23L, 15L,
22L, 14L, 6L, 1L, 24L, 15L, 9L, 1L, 13L, 6L, 19L, 9L, 1L, 24L,
21L, 16L, 9L, 1L, 7L, 19L, 24L, 13L, 8L, 5L, 1L, 18L, 10L, 4L,
20L, 11L, 5L, 1L, 24L, 17L, 8L, 3L, 5L, 12L, 4L, 25L, 16L, 9L,
22L, 13L, 7L, 2L, 23L, 15L, 22L, 14L, 6L, 1L, 24L, 15L, 9L, 1L,
13L, 6L, 19L, 9L, 1L, 24L, 21L, 16L, 9L, 1L, 7L, 19L, 24L, 13L,
8L, 5L, 1L, 18L, 10L, 4L, 20L, 11L, 5L, 1L, 24L, 17L, 8L, 3L,
5L, 12L, 4L, 25L, 16L, 9L, 22L, 13L, 7L, 2L, 23L, 15L, 22L, 14L,
6L, 1L, 24L, 15L, 9L, 1L, 13L, 6L, 19L, 9L, 1L, 24L, 21L, 16L,
9L, 1L, 7L, 19L, 24L, 13L, 8L, 5L, 1L, 18L, 10L, 4L, 20L, 11L,
5L, 1L, 24L, 17L, 8L, 3L, 5L, 12L, 4L, 25L, 16L, 9L, 22L, 13L,
7L, 2L, 23L, 15L, 22L, 14L, 6L, 1L, 24L, 15L, 9L, 1L, 13L, 6L,
19L, 9L, 1L, 24L, 21L, 16L, 9L, 1L, 7L, 19L, 24L, 13L, 8L, 5L,
1L, 18L, 10L, 4L, 20L, 11L, 5L, 1L, 24L, 17L, 8L, 3L, 5L, 12L,
4L, 25L, 16L, 9L, 22L, 13L, 7L, 2L, 23L, 15L, 22L, 14L, 6L, 1L,
24L, 15L, 9L, 1L, 13L, 6L, 19L, 9L, 1L, 24L, 21L, 16L, 9L, 1L,
7L, 19L, 24L, 13L, 8L, 5L, 1L, 18L, 10L, 4L, 20L, 11L, 5L, 1L,
24L, 17L, 8L, 3L, 5L, 12L, 4L, 25L, 16L, 9L, 22L, 13L, 7L, 2L,
23L, 15L, 22L, 14L, 6L, 1L, 24L, 15L, 9L, 1L, 13L, 6L, 19L, 9L,
1L, 24L, 21L, 16L, 9L, 1L, 7L, 19L, 24L, 13L, 8L, 5L, 1L, 18L,
10L, 4L, 20L, 11L, 5L, 1L, 24L, 17L, 8L, 3L), class = "factor", .Label = c("1992",
"1993", "1994", "1995", "1996", "1997", "1998", "1999", "2000",
"2000/1", "2001/2", "2002", "2003", "2003/4", "2004", "2005",
"2005/6", "2006", "2006/7", "2007", "2007/8", "2008/9", "2009",
"2010", "2011")), location = structure(c(1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L), .Label = c("Urban",
"Rural", "Total", "Capital.City", "Other.Cities.towns", "Urban.Non.slum",
"Urban.Slum"), class = "factor"), percent = c(63.0434782608696,
93.8, 87, 79.5642604795185, 65.4240807416892, 63.0791092522326,
90.448386469558, 85.9419999774024, 92.7603614781794, 84.0437368780105,
89.9792286718626, 91.0916571421351, 87.1132950026762, 73.8624315865239,
60.8311005575454, 66.7, 96, 86.8, 90.6243926153181, 90.6911141749493,
90.7602286016099, 93.0377175475414, 86.073106379954, 84.253722056373,
77.8178199148702, 97.3, 91.8332260789258, 89.612164524266, 89.9070989918367,
94.9, 85.1351949905457, 94.8358752154967, 92.9, 89.656599879838,
90.2634019334124, 94.4, 91.6241263241579, 76.7337303943862, 68.4233513070184,
74.15601627144, 88.4802888646634, 85.4643913454376, 89.7457528950664,
81.3025210084024, 83.0579155525397, 71.5857386620092, 86.2324062094295,
87.687478493975, 63.5379061371841, 78.5, 40.7, 51.7763728811622,
32.2441768813334, 22.3138981723172, 83.3699691175754, 69.6742912391579,
76.0526239692028, 83.7290062290807, 77.4758329101792, 83.8081963934296,
67.5805226154664, 55.8951299980461, 41.9921451192584, 52.2, 92.5,
77.6, 82.0322170392223, 85.2850090044269, 70.8031150919282, 47.108593681531,
82.2215412952297, 78.3643348536815, 74.4253468485616, 94.8, 90.1711142192198,
85.0338348718722, 86.3134329333052, 90.4, 79.2813256726705, 90.7077549957666,
82.5, 77.7236217339155, 75.3278238729086, 77.7, 78.4592126267142,
67.1145693585691, 55.3459024734839, 57.8463881286199, 83.5604620304044,
83.9259722574938, 84.4589780509803, 73.3992444632325, 77.544833952707,
63.0503715222555, 75.6808008503601, 85.6943513045284, 63.4, 84.2,
51, 55.7151220012609, 34.9, 26.6, 85, 72.5, 79.2, 83.8, 80.3,
84.9, 69.6, 59, 46, 54, 93, 78.7, 83.2, 85.9, 76.7, 57.5, 83.8,
80.4, 75.6, 95, 90.4, 85.6, 86.9, 90.6, 82.2, 91.5, 84.5, 79.9,
78.1, 80.9, 81.2, 68.1, 56.8, 59.6, 84.9, 84.4, 86.5, 77, 79.1337842548663,
65.6, 79.1, 86.3, 68.421052631579, 96.1, 93.3, 93.461209969107,
82.2712525836501, 88.2708936990495, 87.6298001816506, 87.6386027991385,
93.1818181818183, 86.6666666666668, 88.1030398041979, 90.4761904761904,
83.4297434324662, 86.3744073211853, 83.6107223166148, 78.3, NA,
72.8, 80.952380952381, 87.5, 96.9073193030442, 99.1348508752745,
85.5297651573129, 86.4793919321843, 79.4520547945208, 98.2, 92.4613307718678,
85.4590408924955, 83.9378238341966, 92.1, 81.1594202898552, 96.0232554251852,
NA, 88.0377726639494, 83.690767555447, 93.4, 90.0349966633017,
71.2508707571865, 72, 79.4082828804656, 91.8032786885246, 84.5238095238095,
87.8787878787881, 75.6097560975609, 81.0643061692494, 68.4708412135189,
84.9056603773584, 89.5522388059702, 61.6438356164384, 91.7, 79.5,
77.0004220956012, 61.061381883032, 58.756042602018, 91.2594694272412,
85.20149612163, 92.4956062313464, 82.622382662868, 91.4036416540165,
91.6169313256523, 89.2957214499669, 67.6757501795213, 48.1479760952102,
NA, NA, 94.2, 94.3553068539161, 91.8799748693178, 89.3739230258784,
92.1418739343887, 86.4757947454868, 81.0102236379536, 77.0100025126874,
NA, 91.3720851411616, 92.2, 92.5003150086683, 97.8260869565219,
87.1461797069698, 93.5168077834096, NA, 90.1780793791367, 92.9758067301415,
94.9, 91.8829499602467, 81.749280834314, 65.1853441661798, 69.0503609949116,
87.2562445664681, 85.8298270239758, 90.6673511683335, 83.2861189801694,
84.9006282245266, 73.65452177457, 87.3075692692965, 85.5310215524833,
83.3333333333333, NA, NA, 98.5990187756088, 84.4640706359058,
NA, 93.9158337759274, 91.5744358611439, 100, NA, NA, NA, 88.7824144772468,
85.1972665683085, 89.54493171236, NA, NA, 89.8, NA, 100, 97.6261376125643,
96.3196943955923, 92.0952338262334, 87.9266080431752, 80.9429968520701,
NA, NA, 92.8, 95.2886158200472, 100, 86.4199793410402, NA, NA,
89.9001648604344, NA, NA, 91.5033109800214, 83.8918470610424,
73.9339911532972, 88.6921281548131, 94.309068022859, 85.3299585067346,
93.7362934447331, 86.5384615384618, 83.7424288707868, NA, 86.3836615391687,
88.1866796344726, 58.1081081081081, NA, NA, 75.7976468146464,
62.1289432084197, NA, 88.1488735873722, 84.2108238885019, 89.8335978405451,
NA, NA, NA, 86.9222656846515, 70.3584041024493, 70.9023609260137,
NA, NA, 85.9, NA, 89.8689917369566, 90.3864925686512, 92.628169473785,
80.9468895007753, 78.7885741638367, 75.4005791241575, NA, NA,
88.4, 87.7139456942162, 92.3809523809525, 83.7645232075473, NA,
NA, 89.567507133125, NA, NA, 91.6433898994358, 73.6225283043976,
65.9223049858496, 72.3148320483822, 86.2596215693035, 85.6224026570651,
87.4940330171337, 78.7499999999997, 81.9949404453665, NA, 84.5563115043796,
87.0190820047277)), .Names = c("COUNTRY", "Year", "location",
"percent"), row.names = c(NA, -336L), class = "data.frame")
I want to produce a simple plot with ggplot2 that is facetted by COUNTRY. I can do this fine using geom_point:
ggplot(meas_melt, aes(Year, percent, colour=location))+ geom_point() + facet_wrap(~COUNTRY)
However, if I try and produce a line plot with geom_line (ggplot(meas_melt, aes(Year, percent, colour=location))+ geom_line() + facet_wrap(~COUNTRY))
I get the following error:
geom_path: Each group consist of only one observation. Do you need to
adjust the group aesthetic?
I had thought that this could be because a couple of the countries have only one year's worth of data so I subsetted the date to remove these three countries like so:
ggplot(meas_melt, aes(Year, percent, colour=location))+ geom_line(data=meas_melt[!meas_melt$COUNTRY %in% c('Comoros','South Africa','Swaziland'),]) + facet_wrap(~COUNTRY)
However, I get the same error!
#Sven's answer is correct but fixes only part of the problem. Note how there's no plot for Comoros, South Africe, or Swaziland. This is because in your data, sometimes year is, e.g., 2006 or 2007, and sometimes it is "2006/7".
data[meas_melt$COUNTRY=="Swaziland",]
COUNTRY Year location percent
32 Swaziland 2006/7 Urban 94.83588
80 Swaziland 2006/7 Rural 90.70775
128 Swaziland 2006/7 Total 91.50000
176 Swaziland 2006/7 Capital.City 96.02326
224 Swaziland 2006/7 Other.Cities.towns 93.51681
272 Swaziland 2006/7 Urban.Non.slum NA
320 Swaziland 2006/7 Urban.Slum NA
Those countries really have only one "year" (hence, no line). More importantly, these odd year designations distort your x-axis. You can see that using the scales="free" argument to facet_wrap(...):
ggplot(meas_melt, aes(x=Year,y=percent, color=location)) +
geom_line(aes(group=location)) +facet_wrap(~COUNTRY, scales="free") +
theme(axis.text.x=element_text(angle=90, vjust=0.5, size=8),
legend.position="bottom")
Which produces this:
You have to specify aes(group = location) inside geom_line:
library(ggplot2)
ggplot(meas_melt, aes(Year, percent, colour=location)) +
geom_line(aes(group = location)) +
facet_wrap(~COUNTRY)

geom.point in ggplot2, conditional shape

I'm putting together my first plot with ggplot2. I need to set a shape for values == 0. Here's my dataset and what I got so far :
structure(list(Var1 = structure(c(1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L,
12L, 13L, 14L, 15L, 16L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
10L, 11L, 12L, 13L, 14L, 15L, 16L, 1L, 2L, 3L, 4L, 5L, 6L, 7L,
8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 1L, 2L, 3L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 1L, 2L, 3L,
4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L,
14L, 15L, 16L), .Label = c("MD-1", "MD-2", "MD-3", "MD-4", "ME-1",
"ME-2", "ME-3", "ME-4", "ME-5", "ME-6", "MF-1", "MF-2", "MF-4",
"MF-6", "MF-7", "MF-8"), class = "factor"), Var2 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L,
8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 9L,
9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L,
11L, 11L, 11L, 11L, 11L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L,
12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L), .Label = c("FD-1", "FD-2",
"FD-5", "FD-6", "FD-7", "FE-2", "FE-3", "FE-4", "FE-5", "FE-6",
"FF-1", "FF-2"), class = "factor"), Freq = c(35L, 4L, 5L, 2L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 4L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
14L, 15L, 4L, 3L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 0L, 1L, 2L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 3L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 13L, 2L, 5L, 7L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 2L, 0L, 0L, 1L, 1L)), .Names = c("Var1",
"Var2", "Freq"), row.names = c(NA, -192L), class = "data.frame")
Here's the base of my plot
p <- ggplot(mat.bub, aes(Var1, Var2))
p + geom_point(aes(size = Freq))
Now, how to set geom.point to a specific shape if Freq==0 ? Here's what I tried so far:
p <- ggplot(mat.bub, aes(Var1, Var2,size=Freq))
p + geom_point(aes(Var1[Freq==0], Var2[Freq==0]), colour="black", shape=3, size=5, na.rm = T)
Inspired from this answer :
Modifying the shape for a subset of points with ggplot2
But I get an "arguments imply differing number of rows: 162, 192" error. Of course Var1 and Var2 are not numerical, that's what's different from the mtcars example.
How could I achieve this conditional shaping ? What am I missing ?
Thanx for any help !
As per my note on the answer you link to, try this:
p <- ggplot(mat.bub, aes(Var1, Var2,size=Freq)) + geom_point()
p + geom_point(data = subset(mat.bub,Freq == 0), colour="black", shape=3, size=5, na.rm = T)
As an explanation, while subsetting in the variables is possible, I much prefer handing each geom the specific subset of whatever data frame I'm dealing with. I find that easier to keep straight in my head, and is apparently less confusing for ggplot as well.

Resources