ggpredict error: $ operator not defined for this S4 class - r

I have formulated a mixed linear regression model that I would like to plot.
I wanted to do so by generating predictive values with ggPredict from
ggiraphExtra .
My model is as follows:
lme9v <- lmer(Log(Visit) ~ Slope_degree*Path_dist * AS_sa + (1|Location), data= merged_dataset_05, REML=F)
*
note that Log(Visit) is actually notated with backtick escapes.
ggPredict(lme9v, interactive=TRUE)
However, I get the error:
Error: $ operator not defined for this S4 class
What causes the error to occur and what should I do differently?
This is my code:
structure(list(Station = structure(1:6, .Label = c("BL0102S",
"BL0102V", "BL01R", "BL01S", "BL0405-2R", "BL0405-2S", "BL0405R",
"BL0405S", "BL1112S", "BL1112V", "BL11R", "BL11S", "BL1314-2R",
"BL1314-2S", "BL1516S", "BL1516V", "D0405S", "D0405V", "DF0405S",
"DF0405V", "DF04S", "DF04V", "DF0708S", "DF0708V", "DF07R", "DF07S",
"DF0809R", "DF0809S", "DF12R", "DF12S", "DF14S", "DF14V", "FM06R",
"FM06S", "FM0708S", "FM0708V", "FM0910R", "FM0910S", "FM1415R",
"FM1415S", "TB0405S", "TB0405V", "TB12S", "TB12V", "WMW0102R",
"WMW0102S", "WMW0203S", "WMW0203V", "WMW02S", "WMW02V"), class = "factor"),
Geom = structure(c(2L, 1L, 1L, 2L, 1L, 2L), .Label = c("L",
"S"), class = "factor"), Slope_degree = c(24L, 5L, 0L, 23L,
5L, 35L), MDD = c(5, 5, 4.7, 4.7, 4.3, 5.3), CC = c(84L,
83L, 82L, 80L, 78L, 74L), CT = c(61L, 15L, 7L, 33L, 22L,
30L), Corrected_10m = c(10.9, 10, 10, 10.9, 10, 12.2), Av_litter_depth = c(2.89,
4, 7, 4.22, 3.83, 3.58), Content = c(20.34, 23.26, 23.23,
23.74, 17.5, 20.24), Slope_radian = c(0.41887902, 0.087266463,
0, 0.401425728, 0.087266463, 0.610865238), AB_a = c(0L, 0L,
0L, 0L, 1L, 0L), AB_sa = c(9L, 2L, 0L, 1L, 5L, 0L), AS_a = c(0L,
0L, 0L, 0L, 3L, 0L), AS_sa = c(2L, 1L, 0L, 0L, 2L, 0L), Days_deployed = c(15L,
15L, 14L, 14L, 14L, 14L), Count = c(23L, 29L, 9L, 20L, 85L,
43L), Tottime = c(295, 318, 66, 199, 1386, 745), Path_dist = c(659.4047198,
1021.11078342, 516.51545352, 997.8758996, 988.18342935, 957.66932416
), Count_rate = c(9.2, 11.6, 4.10334346504559, 9.11854103343465,
42.358803986711, 17.3854447439353), Time_use = c(118, 127.2,
30.0911854103343, 90.7294832826748, 690.697674418605, 301.212938005391
), `Log(Time)` = c(4.77068462446567, 4.84576065090602, 3.40423228535731,
4.50788236805538, 6.53770220909723, 5.70781744986838), `Log(Visit)` = c(2.21920348405499,
2.45100509811232, 1.4118021206671, 2.21030981688487, 3.7461762858377,
2.85563334718238), Location = c(1, 1, 2, 2, 3, 3)), row.names = c(NA,
-6L), groups = structure(list(Station = structure(1:6, .Label = c("BL0102S",
"BL0102V", "BL01R", "BL01S", "BL0405-2R", "BL0405-2S", "BL0405R",
"BL0405S", "BL1112S", "BL1112V", "BL11R", "BL11S", "BL1314-2R",
"BL1314-2S", "BL1516S", "BL1516V", "D0405S", "D0405V", "DF0405S",
"DF0405V", "DF04S", "DF04V", "DF0708S", "DF0708V", "DF07R", "DF07S",
"DF0809R", "DF0809S", "DF12R", "DF12S", "DF14S", "DF14V", "FM06R",
"FM06S", "FM0708S", "FM0708V", "FM0910R", "FM0910S", "FM1415R",
"FM1415S", "TB0405S", "TB0405V", "TB12S", "TB12V", "WMW0102R",
"WMW0102S", "WMW0203S", "WMW0203V", "WMW02S", "WMW02V"), class = "factor"),
Geom = structure(c(2L, 1L, 1L, 2L, 1L, 2L), .Label = c("L",
"S"), class = "factor"), Slope_degree = c(24L, 5L, 0L, 23L,
5L, 35L), MDD = c(5, 5, 4.7, 4.7, 4.3, 5.3), CC = c(84L,
83L, 82L, 80L, 78L, 74L), CT = c(61L, 15L, 7L, 33L, 22L,
30L), Corrected_10m = c(10.9, 10, 10, 10.9, 10, 12.2), Av_litter_depth = c(2.89,
4, 7, 4.22, 3.83, 3.58), Content = c(20.34, 23.26, 23.23,
23.74, 17.5, 20.24), Slope_radian = c(0.41887902, 0.087266463,
0, 0.401425728, 0.087266463, 0.610865238), AB_a = c(0L, 0L,
0L, 0L, 1L, 0L), AB_sa = c(9L, 2L, 0L, 1L, 5L, 0L), AS_a = c(0L,
0L, 0L, 0L, 3L, 0L), AS_sa = c(2L, 1L, 0L, 0L, 2L, 0L), .rows = list(
1L, 2L, 3L, 4L, 5L, 6L)), row.names = c(NA, -6L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), na.action = structure(c(`31` = 31L,
`32` = 32L, `47` = 47L, `48` = 48L), class = "omit"))

Related

How to create a column in a data frame that bins n rows of the data frame consecutively

I have a data frame
structure(list(id = c(31068L, 4741L, 31068L, 48783L, 48783L,
4741L, 40866L, 602L, 43498L, 602L, 47365L, 27643L, 30657L, 47365L,
2456L, 30657L, 48783L, 40866L, 31068L, 31068L), chrom = structure(c(10L,
2L, 10L, 18L, 18L, 2L, 1L, 12L, 15L, 12L, 3L, 14L, 13L, 3L, 5L,
13L, 18L, 1L, 10L, 10L), .Label = c("chr1", "chr10", "chr11",
"chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18",
"chr19", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8",
"chr9", "chrM", "chrUn", "chrX", "chrY"), class = "factor"),
strand = structure(c(2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L), .Label = c("-",
"+"), class = "factor"), position = c(32233920L, 63320175L,
32233915L, 105252695L, 105252690L, 63320180L, 160222550L,
60750690L, 111386910L, 60750685L, 89019895L, 155820080L,
146439265L, 89019890L, 56738465L, 146439260L, 105252700L,
160222545L, 32233910L, 32233925L), state = c(1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L), probability = c(0.0609068958401659, 0.0652616906258002,
0.0692068771991058, 0.0692812165331229, 0.0701354955671266,
0.0703768347393804, 0.0711103024384338, 0.0741845396230543,
0.0743938664894217, 0.0749688062517259, 0.079612884618025,
0.0813520378059492, 0.0826542716746912, 0.0846949902404464,
0.0861396527789957, 0.0864410597153605, 0.0871058913414357,
0.087430325878555, 0.0878512688974869, 0.0880022007028806
), differential = c(-0.23435362433765, -0.23435362433765,
-0.171785943061141, 0.196483908128415, 0.251738193935959,
-0.23435362433765, -0.23435362433765, -0.278594311118605,
0.242648431724363, -0.278594311118605, -0.278594311118605,
0.209140348612477, -0.28152682524624, -0.234047089198976,
0.290374904597332, -0.242344380735883, 0.196483908128415,
-0.23435362433765, -0.171785943061141, -0.155502764975432
), tag1 = c(0L, 0L, 0L, 15L, 15L, 0L, 0L, 5L, 30L, 5L, 5L,
10L, 2L, 3L, 15L, 7L, 15L, 0L, 0L, 0L), mut1 = c(0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), tag2 = c(15L, 15L, 11L, 5L, 2L, 15L, 15L, 25L,
15L, 25L, 25L, 0L, 21L, 19L, 0L, 25L, 5L, 15L, 11L, 10L),
mut2 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), group = c(1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)), .Names = c("id",
"chrom", "strand", "position", "state", "probability", "differential",
"tag1", "mut1", "tag2", "mut2", "group"), class = c("data.table",
"data.frame"), row.names = c(NA, -20L), .internal.selfref = <pointer: 0x102819778>)
and I would like to create a new column df$bin such that the bin column creates bins of length n of consecutive values.
For example, if n = 2, I would want there to be 2 bins where the bin column looks like bin = c(1,1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,2)
such that the values in for df$bin would be 1:n and each value would be repeated nrow(df)/n times.
You can use the base R function cut for this:
df$bin = cut(1:nrow(df), breaks = 2, labels = FALSE)

R: It's a named list but indexing by $name doesn't work?

I'm trying to learn R but I'm having problems understanding these data structures. I was under the impression that named lists were indexed by
'listName$index', but there seems to be some other form of named list. Look at the following example
> m <- mongo(collection = "games", db = "games")
> someGame = m$find(someQuery)
> typeof(someGame)
[1] "list"
> typeof(someGame$players)
[1] "list"
> someGame$players
benchmarks.tower_damage.raw benchmarks.tower_damage.pct kills_per_min
1 0 0.0608506 NA
2 105 0.2522049 0.07380074
3 1977 0.7609042 0.33210332
4 2042 0.9973893 0.07380074
So someGame is a named list, so we can access someGame$players. But that is also a list. But attemtping someGame$players$kills_per_min, or someGame$players['kills_per_min'] just returns NULL.
So how do I access for example 'kills_per_min'?
dput output:
> dput(someGame$players)
list(structure(list(match_id = c(10216L, 10216L, 10216L, 10216L
), player_slot = c(0L, 1L, 128L, 129L), account_id = c(86480929L,
86480927L, 86480949L, 86502042L), assists = c(0L, 0L, 2L, 5L),
deaths = c(4L, 7L, 1L, 2L), denies = c(3L, 1L, 1L, 20L),
gold = c(374L, 697L, 412L, 2408L), gold_per_min = c(186L,
217L, 401L, 319L), gold_spent = c(4324L, 4825L, 10865L, 6488L
), hero_damage = c(1270L, 2494L, 9157L, 4108L), hero_healing = c(0L,
0L, 0L, 0L), hero_id = c(6L, 20L, 15L, 26L), item_0 = c(0L,
0L, 29L, 23L), item_1 = c(0L, 0L, 125L, 29L), item_2 = c(0L,
0L, 41L, 10L), item_3 = c(0L, 0L, 162L, 104L), item_4 = c(0L,
0L, 0L, 16L), item_5 = c(0L, 0L, 22L, 0L), kills = c(0L,
2L, 9L, 2L), last_hits = c(36L, 31L, 73L, 64L), leaver_status = c(0L,
0L, 0L, 0L), level = c(9L, 11L, 14L, 14L), tower_damage = c(0L,
105L, 1977L, 2042L), xp_per_min = c(176L, 241L, 431L, 388L
), personaname = c("dotatournament", "dotatournament", "dotatournament",
"dotatournament"), radiant_win = c(FALSE, FALSE, FALSE, FALSE
), start_time = c(1313270783L, 1313270783L, 1313270783L,
1313270783L), duration = c(1626L, 1626L, 1626L, 1626L), cluster = c(0L,
0L, 0L, 0L), lobby_type = c(1L, 1L, 1L, 1L), game_mode = c(0L,
0L, 0L, 0L), patch = c(2L, 2L, 2L, 2L), isRadiant = c(TRUE,
TRUE, FALSE, FALSE), win = c(0L, 0L, 1L, 1L), lose = c(1L,
1L, 0L, 0L), total_gold = c(5040L, 5880L, 10867L, 8644L),
total_xp = c(4769L, 6531L, 11680L, 10514L), kda = c(0L, 0L,
5L, 2L), abandons = c(0L, 0L, 0L, 0L), benchmarks = structure(list(
gold_per_min = structure(list(raw = c(186L, 217L, 401L,
319L), pct = c(0.019338422, 0.124976543, 0.525310411,
0.635813457)), .Names = c("raw", "pct"), class = "data.frame", row.names = c(NA,
4L)), xp_per_min = structure(list(raw = c(176L, 241L,
431L, 388L), pct = c(0.032788077, 0.14374179, 0.478828399,
0.653969384)), .Names = c("raw", "pct"), class = "data.frame", row.names = c(NA,
4L)), kills_per_min = structure(list(raw = c(0, 0.073800738,
0.332103321, 0.073800738), pct = c(0.059764432, 0.443610433,
0.897738133, 0.311854753)), .Names = c("raw", "pct"), class = "data.frame", row.names = c(NA,
4L)), last_hits_per_min = structure(list(raw = c(1.328413284,
1.143911439, 2.693726937, 2.361623616), pct = c(0.056347244,
0.490335898, 0.430710417, 0.973774772)), .Names = c("raw",
"pct"), class = "data.frame", row.names = c(NA, 4L)),
hero_damage_per_min = structure(list(raw = c(46.863468635,
92.029520295, 337.896678967, 151.586715867), pct = c(0.027991857,
0.124225934, 0.641605607, 0.419247656)), .Names = c("raw",
"pct"), class = "data.frame", row.names = c(NA, 4L)),
hero_healing_per_min = structure(list(raw = c(0L, 0L,
0L, 0L), pct = c(0.989966555, 0.839744793, 0.848677923,
0.8167794)), .Names = c("raw", "pct"), class = "data.frame", row.names = c(NA,
4L)), tower_damage = structure(list(raw = c(0L, 105L,
1977L, 2042L), pct = c(0.0608506, 0.252204916, 0.760904171,
0.997389344)), .Names = c("raw", "pct"), class = "data.frame", row.names = c(NA,
4L))), .Names = c("gold_per_min", "xp_per_min", "kills_per_min",
"last_hits_per_min", "hero_damage_per_min", "hero_healing_per_min",
"tower_damage"), class = "data.frame", row.names = c(NA,
4L)), kills_per_min = c(NA, 0.073800738, 0.332103321, 0.073800738
)), .Names = c("match_id", "player_slot", "account_id", "assists",
"deaths", "denies", "gold", "gold_per_min", "gold_spent", "hero_damage",
"hero_healing", "hero_id", "item_0", "item_1", "item_2", "item_3",
"item_4", "item_5", "kills", "last_hits", "leaver_status", "level",
"tower_damage", "xp_per_min", "personaname", "radiant_win", "start_time",
"duration", "cluster", "lobby_type", "game_mode", "patch", "isRadiant",
"win", "lose", "total_gold", "total_xp", "kda", "abandons", "benchmarks",
"kills_per_min"), class = "data.frame", row.names = c(NA, 4L)))

R software: error when using cozigam() function

I am modelling the potential distribution of a species using COZIGAM package. I have the response variable ("pb", which tells where the species is present) and the predictor variables (e.g. altitude, temperature, precipitation, etc).
When I run this formula:
# devtools::install_github('AndrewLJackson/COZIGAM')
coz.model <- cozigam(formula=pb ~ s(altitude) + s(combustible) + s(distribution) + s(e1) + s(e2) + s(e3) + s(euc.human) + s(euc.river) + s(fccarb) + s(fccmat) + s(forarb) + s(aspect) + s(slope) + s(precipitation) + s(radiation) + s(tipestr_class) + s(tipestr_forest) + s(tmean), data=sdmdata2, family=poisson)
it appears an error warning, which is:
Error in as.matrix(x) : object 'altitude' not found
However, when I run as.matrix(sdmdata2), 'altitude' variable exits in my matrix. The output of dput(head(sdmdata2)) is:
structure(list(X = 1:6, pb = c(2L, 2L, 2L, 2L, 2L, 2L), altitude = c(879L,
1094L, 1035L, 410L, 342L, 665L), combustible = c(6L, 6L, 3L,
0L, 3L, 3L), distribution = c(6L, 6L, 6L, 0L, 6L, 0L), e1 = c(4L,
4L, 2L, 0L, 4L, 0L), e2 = c(0L, 0L, 2L, 0L, 2L, 0L), e3 = c(0L,
0L, 4L, 0L, 2L, 0L), euc.human = c(790.569397, 3201.562012, 1750,
250, 250, 1952.562012), euc.river = c(0, 4069.705078, 353.5534058,
1030.776001, 559.0170288, 0), fccarb = c(90L, 70L, 40L, 0L, 30L,
0L), fccmat = c(5L, 10L, 35L, 0L, 60L, 80L), forarb = c(1L, 1L,
2L, 0L, 5L, 0L), aspect = c(6L, 8L, 6L, 4L, 3L, 3L), slope = c(5L,
3L, 5L, 2L, 6L, 5L), precipitation = c(87.01500702, 79.57628632,
81.86239624, 75.10630798, 49.58106995, 69.55927277), radiation = c(160.1408997,
163.4971008, 161.8542938, 157.9179993, 159.2113953, 160.6203003
), tipestr_class = c(1L, 1L, 1L, 7L, 1L, 2L), tipestr_forest = c(6L,
6L, 6L, 0L, 6L, 0L), tmean = c(141.7760925, 134.9530029, 141.9192047,
171.9972992, 186.2566986, 157.0391998)), .Names = c("X", "pb",
"altitude", "combustible", "distribution", "e1", "e2", "e3", "euc.human",
"euc.river", "fccarb", "fccmat", "forarb", "aspect", "slope",
"precipitation", "radiation", "tipestr_class", "tipestr_forest",
"tmean"), row.names = c(NA, 6L), class = "data.frame")
Do someone know what is the problem?

Multiple NA's for the last variables of linear regression model in R

I am trying to run a linear regression model where I have dummy variables in my data to indicate if a certain predictor variable is not present. I have a total of 15 predictor variables.
No matter the order of my predictor variables, the last five variables always result in NA.
This problem is almost exactly the same as the one asked here: linear regression "NA" estimate just for last coefficient
I tried adding -1 or +0 to the code
lm(H~id11+id21+id22+id23+id24+id31+id41+id42+id43+id52+id71+id81+id82+id90+id95, data=macro.shed)
And that resulted in only one less value being NA. So now I have 4, instead of 5, predictor variables being NA.
I am reading in my data from csv documents.
This is my code:
watershed = read.csv("nlcd_2000_watershed.csv")
macro_2000 = read.csv("wapp_macro_2000.csv")
temp1 = matrix(watershed$Area,ncol=15,byrow=T)
nlcd_watershed = data.frame(cbind(unique(watershed$WaterID),temp1)) names(nlcd_watershed)=c("WaterID",paste("id",unique(watershed$Value),sep=""))
macro.shed = merge(macro_2000,nlcd_watershed,by.x="WaterID",by.y="WaterID")
data.frame(unique(watershed$Value),unique(watershed$NLCD))
This is my data for macro.shed:
dput(macro.shed)
structure(list(WaterID = c(1L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 10L, 10L, 10L, 10L, 11L), ID = structure(c(1L, 16L,
2L, 9L, 10L, 11L, 12L, 13L, 15L, 8L, 3L, 4L, 5L, 6L, 7L, 14L), .Label = c("L1",
"L10", "L11", "L12", "L13", "L14", "L15", "L16", "L2", "L3",
"L4", "L5", "L6", "L7", "L8", "L9"), class = "factor"), Date = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = "8/20/2001", class = "factor"),
UTMX = c(607308L, 607112L, 598526L, 592235L, 603094L, 597749L,
605523L, 608668L, 600517L, 601806L, 597548L, 593815L, 591453L,
607187L, 606851L, 589528L), UTMY = c(4639040L, 4643780L,
4622470L, 4608350L, 4629780L, 4623340L, 4634330L, 4636950L,
4628160L, 4630380L, 4621720L, 4611960L, 4607960L, 4636480L,
4636020L, 4605120L), Watershed = structure(c(1L, 1L, 2L,
3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 10L, 10L, 10L, 10L, 11L), .Label = c("Cold Spring Creek",
"Drake Brook", "Dutchess County Airport", "East Branch Wappinger",
"Great Spring Creek", "Grist Mill Creek", "Hunns Lake Creek",
"Little Wappinger", "Upton Lake Creek", "Wappinger Creek",
"Wappinger Falls"), class = "factor"), richness = c(37L,
20L, 32L, 14L, 23L, 20L, 23L, 28L, 25L, 32L, 31L, 30L, 23L,
33L, 19L, 19L), H = c(0.9, 1, 0.9, 0.8, 1, 0.8, 0.7, 1, 1,
1, 1, 1, 1, 1, 0.9, 1), EPT = c(18L, 14L, 13L, 3L, 15L, 12L,
15L, 19L, 15L, 21L, 17L, 16L, 13L, 20L, 13L, 12L), DOM = c(62.1,
61.5, 64.1, 73.7, 53.4, 74, 80.3, 59.2, 55.6, 56.8, 57.4,
59.4, 54.2, 59.8, 66, 52.2), PMA = c(58.1, 51, 59.3, 39.9,
58.4, 45.2, 54.5, 75.3, 56.2, 64.3, 66, 53.7, 55.6, 60.4,
52.3, 42.4), FBI = c(3.8, 3.4, 4, 3.9, 3.6, 4.2, 5.2, 3.8,
3.5, 4.1, 3.7, 3.7, 4, 3.8, 3.5, 3.6), BAP = c(8.3, 6.8,
7.8, 3.9, 7.4, 6, 6.8, 8.4, 7.5, 8.2, 8.3, 7.8, 6.8, 8.3,
6.6, 6), Insects.sample = c(7123L, 516L, 2061L, 1341L, 921L,
961L, 580L, 1567L, 1180L, 4226L, 4133L, 1400L, 2325L, 2596L,
687L, 609L), id11 = c(216900L, 216900L, 4923900L, 131400L,
1806300L, 0L, 41945400L, 250200L, 200700L, 1908000L, 4500L,
4500L, 4500L, 4500L, 4500L, 25427700L), id21 = c(83700L,
83700L, 1163700L, 1290600L, 0L, 0L, 11841300L, 2824200L,
110700L, 136800L, 9000L, 9000L, 9000L, 9000L, 9000L, 9145800L
), id22 = c(111600L, 111600L, 596700L, 7245000L, 63900L,
11700L, 7293600L, 5060700L, 323100L, 179100L, 55800L, 55800L,
55800L, 55800L, 55800L, 3876300L), id23 = c(413100L, 413100L,
611100L, 1817100L, 0L, 0L, 11107800L, 208800L, 1713600L,
33300L, 204300L, 204300L, 204300L, 204300L, 204300L, 6268500L
), id24 = c(239400L, 239400L, 4547700L, 193500L, 26100L,
10800L, 48636900L, 88200L, 1139400L, 41400L, 16200L, 16200L,
16200L, 16200L, 16200L, 14818500L), id31 = c(63900L, 63900L,
14319000L, 526500L, 139500L, 0L, 58785300L, 398700L, 1723500L,
73800L, 0L, 0L, 0L, 0L, 0L, 31161600L), id41 = c(384300L,
384300L, 4142700L, 0L, 86400L, 0L, 9641700L, 357300L, 3166200L,
392400L, 0L, 0L, 0L, 0L, 0L, 963900L), id42 = c(729000L,
729000L, 508500L, 209700L, 13500L, 0L, 4072500L, 682200L,
2137500L, 31500L, 10800L, 10800L, 10800L, 10800L, 10800L,
3993300L), id43 = c(1224000L, 1224000L, 1266300L, 1532700L,
0L, 418500L, 6607800L, 695700L, 1356300L, 10800L, 78300L,
78300L, 78300L, 78300L, 78300L, 5419800L), id52 = c(16200L,
16200L, 57600L, 600300L, 17100L, 0L, 1730700L, 958500L, 120600L,
101700L, 20700L, 20700L, 20700L, 20700L, 20700L, 0L), id71 = c(22500L,
22500L, 780300L, 208800L, 5400L, 0L, 1139400L, 533700L, 7085700L,
582300L, 0L, 0L, 0L, 0L, 0L, 198000L), id81 = c(221400L,
221400L, 3398400L, 0L, 1649700L, 0L, 287100L, 155700L, 6300900L,
1511100L, 13500L, 13500L, 13500L, 13500L, 13500L, 264600L
), id82 = c(665100L, 665100L, 1513800L, 41400L, 447300L,
0L, 3083400L, 132300L, 616500L, 53100L, 2943900L, 2943900L,
2943900L, 2943900L, 2943900L, 931500L), id90 = c(2142000L,
2142000L, 826200L, 215100L, 0L, 17705700L, 630000L, 1156500L,
590400L, 15300L, 4598100L, 4598100L, 4598100L, 4598100L,
4598100L, 311400L), id95 = c(4628700L, 4628700L, 113400L,
4897800L, 0L, 10526400L, 358200L, 2281500L, 1431900L, 33300L,
4982400L, 4982400L, 4982400L, 4982400L, 4982400L, 0L)), .Names = c("WaterID",
"ID", "Date", "UTMX", "UTMY", "Watershed", "richness", "H", "EPT",
"DOM", "PMA", "FBI", "BAP", "Insects.sample", "id11", "id21",
"id22", "id23", "id24", "id31", "id41", "id42", "id43", "id52",
"id71", "id81", "id82", "id90", "id95"), row.names = c(NA, -16L
), class = "data.frame")
How do I make it so that the last variables are not resulting in NAs?
You're trying to fit 14 predictors (15 if you include an intercept) with only 16 observations.
That's not enough data to calculate that many parameters, which is why you're only getting estimates for some of them.
You'll need to use some sort of regularisation or model selection, but even then your estimates will be sensitive to the method you choose.
To add on to the answer provided by #Pete, many of your variables have high collinearity. To visualize this easily,
library(corrplot)
corPlot <- cor( macro.shed[, c(15:29)])
corPlot <- cor(x)
corrplot(corPlot, method = "number")

bar chart of constant height for factors in time series

I am a beginner to try R for making graphs. Please help me. I have data of multiple columns (time series). Each column holds factors (please see the one column example data below). I would like to make a constant height (say 1 unit) bar chart of the time series and would like to represent “A” and “B” in different colors with the DATE on the x axis. Any tip?
Thanking you in advance!
DATE GROUP
2011.06.18 00:00:00 R
2011.06.18 06:00:00 L
2011.06.18 12:00:00 R
2011.06.18 18:00:00 R
2011.06.19 00:00:00 L
2011.06.19 06:00:00 L
2011.06.19 12:00:00 R
2011.06.19 18:00:00 L
2011.06.20 00:00:00 L
2011.06.20 06:00:00 L
2011.06.20 12:00:00 R
2011.06.20 18:00:00 L
2011.06.21 00:00:00 R
2011.06.21 06:00:00 L
Assuming your data are in dat, but with an extra column:
dat <- structure(list(DATE = structure(list(sec = c(0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0), min = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L), hour = c(0L, 6L, 12L, 18L, 0L, 6L,
12L, 18L, 0L, 6L, 12L, 18L, 0L, 6L), mday = c(18L, 18L, 18L,
18L, 19L, 19L, 19L, 19L, 20L, 20L, 20L, 20L, 21L, 21L), mon = c(5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), year = c(111L,
111L, 111L, 111L, 111L, 111L, 111L, 111L, 111L, 111L, 111L, 111L,
111L, 111L), wday = c(6L, 6L, 6L, 6L, 0L, 0L, 0L, 0L, 1L, 1L,
1L, 1L, 2L, 2L), yday = c(168L, 168L, 168L, 168L, 169L, 169L,
169L, 169L, 170L, 170L, 170L, 170L, 171L, 171L), isdst = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), .Names = c("sec",
"min", "hour", "mday", "mon", "year", "wday", "yday", "isdst"
), class = c("POSIXlt", "POSIXt")), GROUP = structure(c(2L, 1L,
2L, 2L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 2L, 1L), .Label = c("L",
"R"), class = "factor"), GROUP2 = structure(c(1L, 2L, 2L, 1L,
2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L), .Label = c("L", "R"), class = "factor")), .Names = c("DATE",
"GROUP", "GROUP2"), row.names = c(NA, -14L), class = "data.frame")
Then I think this does what you want. First count the elements == to one of the classes
counts <- apply(dat[, 2:3], 1, function(x) sum(x == "R"))
then compute the other count and bind to a matrix:
countmat <- t(cbind(L = (NCOL(dat) - 1) - counts, R = counts))
then we plot using barplot()
op <- par(mar = c(9,4,4,2) + 0.1, las = 2)
mids <- barplot(countmat, ylim = c(0,2.5),
legend.text = c("L","R"),
args.legend = list(x = "top", bty = "n"))
axis(side = 1, at = mids, labels = as.character(dat$DATE))
par(op)
which produces:
See the help pages of the individual functions for explanations on the arguments.
Edit: If you just want to do this for an individual column, then this isn't the most interesting graph, but...
count2 <- with(dat, GROUP == "R")
countmat2 <- t(cbind(R = count2, L = !count2))
op <- par(mar = c(9,4,4,2) + 0.1, las = 2)
mids <- barplot(countmat2, ylim = c(0, 1.5),
legend.text = c("R","L"),
args.legend = list(x = "top", bty = "n"))
axis(side = 1, at = mids, labels = as.character(dat$DATE))
par(op)
which gives this figure:

Resources