Manual annotate a ggplot with different labels, in different facets - r

JD Long helped me with this: question about manual annotation.
But is it possible to do something similar on a facetted plot, such that the label style corresponds to the linestyle (aestetics) and in a way that I can annotate different facets individually?
Some data:
funny <- structure(list(Institution = structure(c(1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L,
3L, 3L, 3L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L), .Label = c("Q-branch",
"Some-Ville", "Spectre"), class = "factor"), Type = structure(c(5L,
6L, 1L, 3L, 5L, 6L, 2L, 4L, 5L, 6L, 2L, 4L, 5L, 6L, 2L, 4L, 5L,
6L, 2L, 4L, 5L, 6L, 2L, 4L, 5L, 6L, 2L, 4L, 5L, 6L, 2L, 4L, 5L,
6L, 2L, 4L, 5L, 6L, 2L, 4L, 5L, 6L, 2L, 4L, 5L, 6L, 2L, 4L), .Label = c("Korte videregående uddannelser",
"Mammas beer", "Mellemlange videregående uddannelser", "Tastes good",
"Unknown", "Your"), class = "factor"), År = c(2008L, 2008L,
2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L, 2008L,
2008L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2006L, 2006L,
2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L,
2006L), Mndr = c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 15L, 15L,
15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 27L, 27L, 27L,
27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L), Data = c(159L,
NA, NA, 23L, 204L, NA, NA, 12L, 256L, NA, NA, 24L, 166L, 6L,
NA, 43L, 228L, NA, NA, 20L, 196L, 11L, NA, 37L, 99L, 14L, 9L,
96L, 147L, 7L, 5L, 91L, 100L, 10L, 7L, 126L, 60L, 17L, 6L, 106L,
78L, 18L, 13L, 140L, 48L, 23L, 5L, 136L)), .Names = c("Institution",
"Type", "År", "Mndr", "Data"), class = "data.frame", row.names = c(NA,
-48L))
And a facetted plot:
ggplot(funny, aes(Mndr, y=Data, group=Type, col=Type)) +
geom_line() +
facet_grid(.~Institution)
Thanks in advance for your help!

The idea is that for each manual annotation you have to define not only the label, but all the variables that define the panel, color, etc. The following code adds two labels in different panels.
pl <- ggplot(funny, aes(Mndr, y=Data, group=Type, col=Type))+geom_line()
+facet_grid(.~Institution) #your plot
nd <- data.frame(Institution=c("Q-branch","Some-Ville"), #panel
Type=c("Unknown", "Tastes good"), #color
Mndr=c(7,12), #x-coordinate of label
Data= c(170,50), #y-coordinate of label
Text=c("Label 1", "Label 2")) #label text
# add labels to plot:
pl <- pl + geom_text(aes(label=Text), data=nd, hjust=0, legend=FALSE)
pl
The legend=FALSE option will ensure that the small a's denoting the text are not added to the legend. You don't have to have a data frame for the labels, you could have a separate geom_text for each, but I find this way simpler.

Related

Automate coding (sum) in R

First at all I would like to apologise if I did not use the correct jargon.
I have the dataset as below which contains a wide range of categories
Here some excerpt from dput (using droplevels)
structure(list(
x = c(2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L, 2010L,
2010L, 2010L), *[ME: there are more years than 2010...]*
y = c(7.85986, 185.81068, 107.24097, 7094.74649,
1.4982, 185.77319, 5090.79354, 167.58584, 4189.64609, 157.08277,
3927.06932, 2.86732, 71.683, 4.70123, 117.53085, 2.93452, 73.36292,
1.4982, 18.18734, 901.14744, 0.90268, 13.77532, 613.38298, 0.01845,
0.0681, 7.19925, 3.75315, 0.14333, 136.54008, 0.04766, 0.59077,
28.97255, 0.38608, 115.05258, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
x1 = structure(c(4L, 2L, 3L, 1L, 4L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 4L, 2L, 1L, 4L, 2L, 1L, 4L, 2L,
1L, 2L, 4L, 1L, 4L, 2L, 1L, 4L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L), .Label = c("All greenhouse gases - (CO2 equivalent)",
"CH4", "CO2", "N2O"), class = "factor"),
x2 = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Austria",
class = "factor"),
x4 = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 3L, 3L, 4L,
4L, 5L, 5L, 6L, 6L, 7L, 7L, 8L, 8L, 8L, 9L, 9L, 9L, 10L,
10L, 10L, 11L, 11L, 11L, 12L, 12L, 12L, 13L, 13L, 14L, 14L,
15L, 15L, 16L, 16L, 17L, 17L, 18L, 18L), .Label = c("3",
"3.1", "3.A", "3.A.1", "3.A.2", "3.A.3", "3.A.4", "3.B",
"3.B.1", "3.B.2", "3.B.3", "3.B.4", "3.B.5", "3.C", "3.C.1",
"3.C.2", "3.C.3", "3.C.4"), class = "factor")), class = "data.frame",
row.names = c(NA,
-44L))
I want to know whether the of the sum of subcategories in x4 (e.g. 3.B.1+3.B.2+...+3.B.n) equal the figure stated in the parent category (e.g. 3.B). (i.e. the in the csv stated sum) for a given year and country. I want to verify the sums.
For get the sum of the subcategories I have this
sum(df$y[df$x4 %in% c("3.A.1", "3.A.2", "3.A.3", "3.A.4") & x ==
"2010" & x2 == "Austria"])
To receive the sum of the parent category I have this
sum(df$y[df$x4 %in% c("3.A") & x == "2010" & x2 == "Austria"])
Next I would need an operation which checks whether the results of both codes are equal (True/false). However, I have more than 20 countries, 20 years, dozens of categories to check. With my newby approach I would be writing code for ages...
is there anyway to automate this? Basically, I am looking for a code which is able to do the following
1) Run for one category, go to next one
2) once done with categories change year and start again with categories
3) ... same for countries....
Any sort of help would be appreciated and even a suggestions how to use the right jargon in the title. Thanks in any case
Here's a potential solution using dplyr (might require some tweaking based on the full dataset):
require(dplyr)
# Create two columns - one that shows only the parent category number, and one that tells you if it's a parent or child; note that the regex here makes some assumptions on the format of your data.
mutate(df,parent=gsub("(.?\\..?)\\..*", "\\1", df$x4),
type=ifelse(parent==x4,"Parent","Child")) %>%
# Sum the children y's by category, year and country
group_by(parent, type, x, x2) %>%
summarize(sum(y)) %>%
# See if the sum of the children is equal to the parent y
tidyr::spread(type,`sum(y)`) %>%
mutate(equals=isTRUE(all.equal(Child,Parent)))
Result using your (new) data:
parent x x2 Child Parent equals
<chr> <int> <fct> <dbl> <dbl> <lgl>
1 3 2010 Austria NA 7396. FALSE
2 3.1 2010 Austria NA 5278. FALSE
3 3.A 2010 Austria 4357. 4357. TRUE
4 3.B 2010 Austria 921. 921. TRUE
5 3.C 2010 Austria 0 0 TRUE
I can see from your new data that you have two levels of parents. My solution will only work for the second level (e.g. 3.1 and its children), but can be easily tweaked to also work for the top level.

dplyr data manipulation to multiply columns by a value

I have a dataframe that looks like the following (dput at the end):
region type age_group year value
AO1 p 0 1990 12
AO1 p 5 1990 10
AO1 p 10 1990 8
AO1 p 15 1990 14
AO1 p 20 1990 19
...
AO1 p 80 1990 12
AO1 p 1 1990 0.54
AO1 p 2 1990 0.46
AO1 p 3 1990 1
where the last three lines express the percentage of males (1) and female (2) and total (3).
What I would like to do is to produce two more variables value.m and value.f by multiplying value by the correct percentage
In this case, value.m would use 0.54 and value.f 0.46 for year 1990 in region AO1
dt$value.m <- dt %>%
group_by(region, type, age_num, year) %>%
mutate(value.m=value*???)
Any ideas?
dt <- structure(list(region = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L,1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 4L, 4L, 4L, 2L, 2L, 2L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 4L, 4L, 4L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 4L, 4L,
4L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 1L, 1L, 4L, 4L, 4L, 2L, 2L, 2L), .Label =
c("AO1", "AO11", "AO22", "AO3"), class = "factor"), age = structure(c(1L,
10L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 1L, 10L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 1L, 10L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 1L, 10L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 19L, 18L, 20L,
19L, 18L, 20L, 19L, 18L, 20L, 19L, 18L, 20L, 21L, 30L, 22L, 23L,
24L, 25L, 26L, 27L, 28L, 29L, 31L, 32L, 33L, 34L, 35L, 36L, 37L,
21L, 30L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 31L, 32L, 33L,
34L, 35L, 36L, 37L, 21L, 30L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 21L, 30L, 22L, 23L, 24L,
25L, 26L, 27L, 28L, 29L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 39L,
38L, 40L, 39L, 38L, 40L, 39L, 38L, 40L, 39L, 38L, 40L, 1L, 10L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 11L, 12L, 13L, 14L, 15L, 16L,
17L, 1L, 10L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 11L, 12L, 13L,
14L, 15L, 16L, 17L, 1L, 10L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L,
11L, 12L, 13L, 14L, 15L, 16L, 17L, 1L, 10L, 2L, 3L, 4L, 5L, 6L,
7L, 8L, 9L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 19L, 18L, 20L,
19L, 18L, 20L, 19L, 18L, 20L, 19L, 18L, 20L, 21L, 30L, 22L, 23L,
24L, 25L, 26L, 27L, 28L, 29L, 31L, 32L, 33L, 34L, 35L, 36L, 37L,
21L, 30L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 31L, 32L, 33L,
34L, 35L, 36L, 37L, 21L, 30L, 22L, 23L, 24L, 25L, 26L, 27L, 28L,
29L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 21L, 30L, 22L, 23L, 24L,
25L, 26L, 27L, 28L, 29L, 31L, 32L, 33L, 34L, 35L, 36L, 37L, 39L,
38L, 40L, 39L, 38L, 40L, 39L, 38L, 40L, 39L, 38L, 40L), .Label = c("c_0_4",
"c_10_14", "c_15_19", "c_20_24", "c_25_29", "c_30_34", "c_35_39",
"c_40_44", "c_45_49", "c_5_9", "c_50_54", "c_55_59", "c_60_64",
"c_65_69", "c_70_74", "c_75_79", "c_80+", "c_f", "c_m", "c_total_sex",
"p_0_4", "p_10_14", "p_15_19", "p_20_24", "p_25_29", "p_30_34",
"p_35_39", "p_40_44", "p_45_49", "p_5_9", "p_50_54", "p_55_59",
"p_60_64", "p_65_69", "p_70_74", "p_75_79", "p_80+", "p_f", "p_m",
"p_total_sex"), class = "factor"), age_num = c(0L, 5L, 10L, 15L,
20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L,
0L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L,
65L, 70L, 75L, 80L, 0L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L,
45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 0L, 5L, 10L, 15L, 20L,
25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 0L, 5L, 10L, 15L,
20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L,
0L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L,
65L, 70L, 75L, 80L, 0L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L,
45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 0L, 5L, 10L, 15L, 20L,
25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 0L, 5L, 10L, 15L,
20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L,
0L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L,
65L, 70L, 75L, 80L, 0L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L,
45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 0L, 5L, 10L, 15L, 20L,
25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 0L, 5L, 10L, 15L,
20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L,
0L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L,
65L, 70L, 75L, 80L, 0L, 5L, 10L, 15L, 20L, 25L, 30L, 35L, 40L,
45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 0L, 5L, 10L, 15L, 20L,
25L, 30L, 35L, 40L, 45L, 50L, 55L, 60L, 65L, 70L, 75L, 80L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L), year = c(2006L, 2006L,
2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L,
2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L,
2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L,
2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L,
2006L, 2006L,
2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L,
2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L,
2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L,
2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L,
2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L,
2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L,
2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L,
2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L,
2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L,
2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L, 2006L,
2006L, 2006L, 2006L, 2006L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L,
2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L, 2007L), value
= c(79.6, 55.1, 44.6, 44.3,
26.8, 9.5, 7.2, 6.5, 5.6, 2.4, 0.6, 5.2, 7.6, 10.4, 12, 13.5,
13.5, 42.4, 23.1, 14.7, 12.5, 3.9, 1.4, 2.4, 5, 4.2, 7, 7.6,
10.2, 9.5, 11.1, 12.1, 13.8, 14.1, 30.5, 18.1, 14.6, 7.6, 1.4,
3.3, 4.1, 6.9, 8, 9.9, 9.8, 13.5, 13.1, 14.1, 14.2, 14.6, 14.6,
60.1, 52.1, 52.5, 64.1, 45.5, 26.9, 10.6, 7.7, 8.7, 0.4, 0.5,
4.1, 8.8, 9.9, 12.4, 13.3, 14, 216.8, 227.6, 459.7, 115.8, 112.3,
243.5, 85, 87.9, 188.2, 241.6, 253.9, 510.8, 0.2, 0.15, 0.13,
0.13, 0.09, 0.053, 0.05, 0.05, 0.04, 0.03, 0.03, 0.024, 0, 0.01,
0.016, 0, 0, 0.22, 0.15, 0.12, 0.11, 0.07, 0.05, 0.05, 0.04,
0.04, 0.03, 0.03, 0.02, 0.02, 0.02, 0.01, 0.01, 0, 0.2, 0.19,
0.15, 0.11, 0.07, 0.06, 0.06, 0.04, 0.04, 0.03, 0.03, 0.01, 0.01,
0.01, 0.01, 0, 0, 0.14, 0.13, 0.13, 0.15, 0.12, 0.08, 0.05, 0.04,
0.05, 0.03, 0.03, 0.02, 0.01, 0.01, 0.01, 0, 0, 0.49, 0.51, 1,
0.51, 0.49, 1, 0.49, 0.51, 1, 0.49, 0.51, 1, 241.9, 175.54, 146.5,
138.46, 108.14, 73.94, 66.58, 64.78, 58.9, 43.86, 49.1, 36.5,
33.38, 25.54, 21.66, 18.42, 18.58, 243.74, 163.86, 130.22, 121.42,
96.1, 80.3, 63.9, 55.02, 49.02, 41.78, 51.74, 35.22, 32.66, 25.78,
23.06, 18.66, 18.14, 152.5, 109.9, 93.34, 82.62, 61.7, 56.06,
44.38, 38.26, 33.02, 29.58, 30.86, 21.86, 21.18, 17.62, 17.86,
15.86, 15.58, 196.82, 175.74, 180.46, 182.3, 153.22, 118.18,
81.34, 70.46, 65.82, 47.7, 54.66, 38.54, 29.42, 25.58, 20.38,
18.18, 17.18, 547.58, 566.78, 1100.38, 519.1, 522.78, 1028.06,
310.54, 322.26, 618.82, 619.62, 647.02, 1252.66, 0.206, 0.15,
0.126, 0.122, 0.088, 0.052, 0.05, 0.05, 0.04, 0.03, 0.032, 0.02,
0.02, 0.01, 0.01, 0, 0.002, 0.222, 0.15, 0.118, 0.108, 0.074,
0.054, 0.05, 0.04, 0.038, 0.028, 0.032, 0.02, 0.02, 0.018, 0.01,
0.008, 0, 0.23, 0.158, 0.142, 0.11, 0.074, 0.064, 0.056, 0.04,
0.038, 0.028, 0.03, 0.012, 0.01, 0.01, 0.01, 0, 0, 0.144, 0.132,
0.134, 0.14, 0.118, 0.082, 0.054, 0.042, 0.046, 0.028, 0.032,
0.02, 0.01, 0.01, 0.008, 0, 0, 0.49, 0.51, 1, 0.57, 0.43, 1,
0.4, 0.6, 1, 0.3, 0.7, 1)), .Names = c("region", "age", "age_num",
"year", "value"), class = "data.frame", row.names = c(NA, -320L))
Step 1: merge year and region in one variable (I work on dt, that you've dput-ed)
new.dt <- dt %>% mutate(regyear = paste(region, year))
Step 2: create data.frame with your p_m's and regyear only:
p.m.s<-new.dt %>%
filter(age=='p_m') %>%
select(regyear, value) %>%
rename(pm=value) # to avoid duplicated names in new.df and p.m.s
Step 3: the same with p_f's:
p.f.s<-new.dt %>% filter(age=='p_f') %>% select(regyear, value) %>% rename(pf=value)
Step 4: get what you need :)
new.dt %>%
left_join(p.m.s) %>% # add p_m's
left_join(p.f.s) %>% # add p_f's
mutate(value.m=value*pm, value.f=value*pf) %>%
select(-c(regyear,pm,pf)) # clean up
Hope this hepled!
Hi in the data you gave the variable type is called age. So be careful about this. According to your data you can accomplish that doing this
dt %>% join(dt %>% filter(age=="p_m" & region==region)
%>% select(region,value) %>% setNames(c("region","p_m")),by= "region")
%>% join(dt %>% filter(age=="p_f" & region==region) %>% select(region,value)
%>% setNames(c("region","p_f")),by= "region")
%>% mutate (value.m=value*p_m, value.f=value*p_f)
%>% select(-c(p_m,p_f))
This code filter p_m and p_f for each region and join with the original table.
Then use mutate to calculate the value, then drop the column p_m and p_f

Plot values in ggplot geom_lines

I have a dataframe like this one:
> dput(df)
structure(list(OBBLIGATORIO = structure(c(2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("no",
"yes"), class = "factor"), COUNTRY = structure(c(16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L,
16L, 16L, 16L, 16L), .Label = c("Austria", "Belgium", "Bulgaria",
"Croatia", "Cyprus", "Czech Republic", "Denmark", "Estonia",
"Finland", "France", "Germany", "Greece", "Hungary", "Iceland",
"Ireland", "Italy", "Latvia", "Lithuania", "Luxembourg", "Malta",
"Norway", "Poland", "Portugal", "Romania", "Slovakia", "Slovenia",
"Spain", "Sweden", "United Kingdom of Great Britain and Northern Ireland"
), class = "factor"), YEAR = c(2003L, 2006L, 2007L, 2008L, 2009L,
2010L, 1995L, 1996L, 1997L, 1998L, 1999L, 2000L, 2001L, 2002L,
2003L, 2006L, 2007L, 2008L, 2009L, 2010L, 1995L, 1996L, 1997L,
1998L, 1999L, 2000L, 2001L, 2002L, 2003L, 2006L, 2007L, 2008L,
2009L, 2010L, 1995L, 1996L, 1997L, 1998L, 1999L, 2000L, 2001L,
2002L, 2003L, 2006L, 2007L, 2008L, 2009L, 2010L, 1995L, 1996L,
1997L, 1998L, 1999L, 2000L, 2001L, 2002L, 2003L, 2006L, 2007L,
2008L, 2009L, 2010L, 1995L, 1996L, 1997L, 1998L, 1999L, 2000L,
2001L, 2002L, 2003L, 2006L, 2007L, 2008L, 2009L, 2010L, 1995L,
1996L, 1997L, 1998L, 1999L, 2000L, 2001L, 2002L, 2003L, 2006L,
2007L, 2008L, 2009L, 2010L, 1995L, 1996L, 1997L, 1998L, 1999L,
2000L, 2001L, 2002L), AGE = structure(c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "Total", class = "factor"),
`CAUSE OF DEATH` = c("Acute poliomyelitis", "Acute poliomyelitis",
"Acute poliomyelitis", "Acute poliomyelitis", "Acute poliomyelitis",
"Acute poliomyelitis", "Acute poliomyelitis", "Acute poliomyelitis",
"Acute poliomyelitis", "Acute poliomyelitis", "Acute poliomyelitis",
"Acute poliomyelitis", "Acute poliomyelitis", "Acute poliomyelitis",
"Diphtheria", "Diphtheria", "Diphtheria", "Diphtheria", "Diphtheria",
"Diphtheria", "Diphtheria", "Diphtheria", "Diphtheria", "Diphtheria",
"Diphtheria", "Diphtheria", "Diphtheria", "Diphtheria", "Measles",
"Measles", "Measles", "Measles", "Measles", "Measles", "Measles",
"Measles", "Measles", "Measles", "Measles", "Measles", "Measles",
"Measles", "Tetanus", "Tetanus", "Tetanus", "Tetanus", "Tetanus",
"Tetanus", "Tetanus", "Tetanus", "Tetanus", "Tetanus", "Tetanus",
"Tetanus", "Tetanus", "Tetanus", "Tuberculosis", "Tuberculosis",
"Tuberculosis", "Tuberculosis", "Tuberculosis", "Tuberculosis",
"Tuberculosis", "Tuberculosis", "Tuberculosis", "Tuberculosis",
"Tuberculosis", "Tuberculosis", "Tuberculosis", "Tuberculosis",
"Viral hepatitis", "Viral hepatitis", "Viral hepatitis",
"Viral hepatitis", "Viral hepatitis", "Viral hepatitis",
"Viral hepatitis", "Viral hepatitis", "Viral hepatitis",
"Viral hepatitis", "Viral hepatitis", "Viral hepatitis",
"Viral hepatitis", "Viral hepatitis", "Whooping cough", "Whooping cough",
"Whooping cough", "Whooping cough", "Whooping cough", "Whooping cough",
"Whooping cough", "Whooping cough", "Whooping cough", "Whooping cough",
"Whooping cough", "Whooping cough", "Whooping cough", "Whooping cough"
), VALUE = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 4L, 2L, 2L, 2L, 1L, 1L, 6L, 7L, 7L, 1L, 2L,
3L, 2L, 5L, 12L, 9L, 13L, 9L, 13L, 8L, 17L, 14L, 16L, 18L,
15L, 19L, 11L, 10L, 25L, 24L, 21L, 22L, 23L, 20L, 34L, 32L,
31L, 30L, 29L, 28L, 27L, 26L, 41L, 42L, 43L, 45L, 46L, 47L,
33L, 35L, 36L, 37L, 38L, 39L, 40L, 44L, 1L, 2L, 1L, 1L, 1L,
2L, 2L, 2L, 1L, 3L, 1L, 1L, 1L, 1L), .Label = c("0", "1",
"2", "3", "6", "7", "9", "17", "18", "19", "21", "22", "27",
"28", "30", "31", "37", "41", "42", "301", "329", "333",
"344", "350", "396", "413", "415", "460", "517", "558", "597",
"609", "622", "647", "681", "1087", "1349", "1413", "1448",
"1499", "1576", "1654", "1725", "1948", "2531", "2665", "2757"
), class = "factor"), ID = 1:98), .Names = c("OBBLIGATORIO",
"COUNTRY", "YEAR", "AGE", "CAUSE OF DEATH", "VALUE", "ID"), row.names = c(NA,
-98L), class = "data.frame")
I want to obtain a chart that:
on x axis there are values from YEAR column
on y axis there are
values from VALUE column data are divided by CAUSE OF DEATH column
So something like:
I try:
x11()
ggplot(df, aes(x = df$`YEAR`, y = df$`VALUE`, fill = df$`CAUSE OF DEATH`, colour = df$`CAUSE OF DEATH`)) +
geom_density(alpha = 0.1) +
xlim(1995, 2010)
But the result is completely different from the one I want.
Thanks
I'm not sure what your actual question is, but one problem with your dataframe is that the VALUE column is currently defined as a factor, not as as a numeric. I think that remedying this will go a long way to solving your problem. I do this post-facto below (i.e. after the dataframe is already created), but if you are getting the data into R via a read.table() or similar command, you can specify the class of your columns at data frame creation time, which is probably a better approach.
In my code below I use the dplyr package for manipulating dataframes. It's quite powerful, but for this particular example it isn't doing anything that base R couldn't do.
require(ggplot2)
require(dplyr)
require(magrittr)
df <- ### YOUR dput output goes here ###
# fix the problem with the `VALUE` column
df %<>% mutate(VALUE = VALUE %>% as.character %>% as.numeric)
# equivalent in base R:
# df$VALUE <- as.numeric(as.character(df$VALUE))
# make a graph (is it the one you want?)
df %>% group_by(YEAR, `CAUSE OF DEATH`) %>%
summarize(value = sum(VALUE)) %>%
ggplot(aes(x = YEAR, y = value, color = `CAUSE OF DEATH`)) +
geom_line() +
theme_bw() +
geom_point()
# save graph for uploading to SO
ggsave('SO37230266.png')
The result is this graph:

How to change the order of the bars in accordance with the group variable in a barplot using lattice in R?

I am making a bar plot using lattice in R where I have data for 4 different years on sources of irrigation for different states. using my code, the bar plot is coming fine but I wish the bar corresponding to the year 1996 to be plotted first followed by the bar corresponding to year 2001 etc. so as to show the increasing area being irrigated by tube-wells. However, I am unable to change the ordering. Here is my data and the R code. Many thanks for your help.
# sample data
irr_atlas <- structure(list(state = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("ANDHRA PRADESH",
"KARNATAKA", "MADHYA PRADESH", "RAJASTHAN"), class = "factor"),
st_code = c(28L, 28L, 28L, 28L, 28L, 28L, 28L, 28L, 28L,
28L, 28L, 28L, 28L, 28L, 28L, 28L, 29L, 29L, 29L, 29L, 29L,
29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 23L,
23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L, 23L,
23L, 23L, 23L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 8L), year = c(1996L, 1996L, 1996L, 1996L,
2001L, 2001L, 2001L, 2001L, 2006L, 2006L, 2006L, 2006L, 2011L,
2011L, 2011L, 2011L, 1996L, 1996L, 1996L, 1996L, 2001L, 2001L,
2001L, 2001L, 2006L, 2006L, 2006L, 2006L, 2011L, 2011L, 2011L,
2011L, 1996L, 1996L, 1996L, 1996L, 2001L, 2001L, 2001L, 2001L,
2006L, 2006L, 2006L, 2006L, 2011L, 2011L, 2011L, 2011L, 1996L,
1996L, 1996L, 1996L, 2001L, 2001L, 2001L, 2001L, 2006L, 2006L,
2006L, 2006L, 2011L, 2011L, 2011L, 2011L), irr_area = c(1.84066,
0.942819, 0.82886, 0.853502, 1.54922, 0.825659, 0.542492,
1.53412, 1.72969, 0.70271, 0.637221, 1.53894, 1.99893, 0.678425,
0.819829, 1.70708, 0.921594, 0.231669, 0.316999, 0.358529,
0.91339, 0.207157, 0.426549, 0.481061, 0.921255, 0.18192,
0.426145, 0.547193, 0.930802, 0.148065, 0.377149, 1.51843,
1.59425, 0.112145, 2.67683, 0.540054, 1.48056, 0.030502,
1.63696, 0.563948, 1.12595, 0.058667, 2.46494, 1.15004, 1.10444,
0.157069, 2.64378, 2.14177, 1.55814, 0.106623, 2.71347, 0.644683,
1.35746, 0.030586, 2.41845, 0.935234, 1.76933, 0.054374,
2.46197, 1.76918, 1.62587, 0.050299, 2.14737, 2.82708),irr_source = structure(c(1L,2L, 4L, 3L, 1L, 2L, 4L, 3L, 1L, 2L, 4L, 3L, 1L, 2L, 4L, 3L,
1L, 2L, 4L, 3L, 1L, 2L, 4L, 3L, 1L, 2L, 4L, 3L, 1L, 2L, 4L,
3L, 1L, 2L, 4L, 3L, 1L, 2L, 4L, 3L, 1L, 2L, 4L, 3L, 1L, 2L,
4L, 3L, 1L, 2L, 4L, 3L, 1L, 2L, 4L, 3L, 1L, 2L, 4L, 3L, 1L,
2L, 4L, 3L), .Label = c("Canal", "Tank", "Tube", "Well"), class = "factor")), .Names = c("state","st_code", "year", "irr_area", "irr_source"), class = "data.frame", row.names = c(NA, -64L))
Code for plot...
library(lattice)
barchart(~irr_area | factor(state) + factor(irr_source),
group=year, data=irr_atlas, auto.key=list(space="right"))
As mentioned, ordering of groups in R graphics is usually determined by the ordering of the factor variable. So, you can reorder your factors with factor and its levels argument.
library(lattice)
barchart(~irr_area | factor(state) + factor(irr_source),
group=factor(year, levels=sort(unique(year), decreasing=T)), # change the order of years
data=irr_atlas, auto.key=list(space="right"))
You can switch it back the other way by changing decreasing=F.

Looping subsets in plm

I'm trying to program something quite simple (I think) in R, but I can't seem to get it right. I have a dataset of 50 countries (1 to 50) for 15 years each and about 20 variables per country. For now I am only testing one variable (OS) on my dependent variable (SMD). I would like to do this with a loop country by country so I would get the output for each country in stead of the overall output.
I thought it would be wise to create a subset first (to be able to look at country 1 first, after which my loop should increase the number for country and test country 2). I believe my regression at the bottom of the page should give me the output for country 1 in stead of the overall score for the entire dataset. However I keep getting these errors:
> pdata <- plm.data(newdata, index=c("Country","Date"))
series are constants and have been removed
> pooling <- plm(Y ~ X, data=pdata, model= "pooling")
series Country, xRegion are constants and have been removed
Error in model.matrix.pFormula(formula, data, rhs = 1, model = model, :
NA in the individual index variable
> summary(pooling)
Error in summary(pooling) : object 'pooling' not found
I might be looking at this all wrong, but I believe that without getting this to work, there is no point in going further with programming the loop itself. Any advice on solving my errors, or other ways of programming a loop are really appreciated.
My code:
rm(list = ls())
mydata <- read.table(file = file.choose(), header = TRUE, dec = ",")
names(mydata)
attach(mydata)
Y <- cbind(SMD)
X <- cbind(OS)
newdata <- subset(mydata, Country %in% c(1))
newdata
pdata <- plm.data(newdata, index=c("Country","Date"))
pooling <- plm(Y ~ X, data=pdata, model= "pooling")
summary(pooling)
Edit: data sample of first 2 countries which causes same error
dput(mydata)
structure(list(Region = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("NAF", "SAME"), class = "factor"), Country = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L), Date = c(1995L, 1996L, 1997L, 1998L,
1999L, 2000L, 2001L, 2002L, 2003L, 2004L, 2005L, 2006L, 2007L,
2008L, 2009L, 2010L, 2011L, 2012L, 2013L, 2014L, 1995L, 1996L,
1997L, 1998L, 1999L, 2000L, 2001L, 2002L, 2003L, 2004L, 2005L,
2006L, 2007L, 2008L, 2009L, 2010L, 2011L, 2012L, 2013L, 2014L
), OS = structure(c(19L, 25L, 27L, 15L, 22L, 20L, 23L, 9L, 7L,
5L, 2L, 1L, 4L, 3L, 6L, 10L, 11L, 13L, 11L, 8L, 26L, 25L, 31L,
29L, 28L, 21L, 30L, 24L, 24L, 16L, 11L, 14L, 12L, 17L, 18L, 29L,
32L, 32L, 33L, 34L), .Label = c("51.5", "52.2", "55.6", "56.4",
"56.7", "57.7", "57.8", "58.3", "59", "59.2", "59.6", "59.9",
"60.2", "60.4", "61.1", "61.2", "62.2", "62.3", "62.8", "63.2",
"63.3", "63.8", "63.9", "64.2", "64.3", "64.5", "64.7", "65.3",
"65.5", "65.6", "66.4", "68", "69.6", "70.7"), class = "factor"),
SMD = structure(c(7L, 12L, 20L, 21L, 17L, 15L, 13L, 10L,
14L, 22L, 23L, 33L, 1L, 32L, 29L, 34L, 28L, 25L, NA, NA,
9L, 6L, 8L, 4L, 2L, 35L, 3L, 36L, 5L, 11L, 16L, 18L, 24L,
19L, 26L, 31L, 27L, 30L, NA, NA), .Label = c("100.3565662",
"13.44788845", "13.45858747", "13.56815534", "15.05892471",
"17.63789658", "18.04088718", "18.3101351", "19.34226196",
"21.25530884", "21.54423145", "23.75898948", "24.08770926",
"26.39817342", "29.44079001", "31.40605191", "34.46667996",
"34.52913657", "35.66070947", "36.4419931", "39.16875621",
"44.0126137", "45.72949566", "49.13062679", "54.83730247",
"56.87886311", "59.80971583", "60.5658962", "69.20148901",
"70.91362874", "72.64845214", "73.97139238", "75.20140919",
"76.18378138", "9.570435019", "9.867635305"), class = "factor")), .Names = c("Region",
"Country", "Date", "OS", "SMD"), class = "data.frame", row.names = c(NA,
-40L))
Are you sure you need to use plm?? This produces a list of summaries by country.
# convert factors to numeric
mydata$SMD <- as.numeric(mydata$SMD)
mydata$OS <- as.numeric(mydata$OS)
# Using lapply(...)
smry <- lapply(unique(mydata$Country),
function(cntry)
summary(lm(SMD~OS,data=mydata[mydata$Country==cntry,])))
# Same thing, using for loop
smry <- list()
for (cntry in unique(mydata$Country)) {
smry <- list(smry,
summary(lm(SMD~OS,data=mydata[mydata$Country==cntry,])))
}
In your dataset, SMD and OS are factors, which need to be converted to numeric first.

Resources