I have data of a big experiment with more than 10 different agricultural traits (e.g. GPC, WGC, TW, TKW...). For every trait I'm using the same principle of analysis (same R scripts). My question is, is there a simple way of how to replace responding variable in whole script at once? Except of using find/replace option.
This is a part of code that I'm using. GPC is a responding variable, and after I do all the analysis on GPC, I have to do the same analysis for WGC, then TW and so on.
mod <- lmer(GPC ~ env/rep + gen*env + (1|env:rep:row) + (1|env:rep:col), data = df)
GEmean <- cast(gen ~ env, data = df, value = "GPC", fun = mean)
Is there a way, for example, of labelling responding variable in the whole script as V1 and then just changing on the beginning of the script what V1 really means, which trait it is?
Any suggestion (except of using find/replace option :D) is very welcome!
Thank you!
You may define the variable as character, then define a RHS and use paste with as.formula,
V1 <- "Y1"
rhs <- "~ X1/X2 + X2*X1 + (1|X1:X2:X3) + (1|X1:X2:X4)"
mod <- lme4::lmer(as.formula(paste(V1, rhs)), data=df)
and cast as usual.
reshape::cast(X3 ~ X2, data=df, value=V1, fun=mean)
# X3 2 3 4 5 6 7 8
# 1 1 NaN NaN 1.25829503 NaN NaN 0.24229907 NaN
# 2 2 NaN NaN NaN NaN -1.66993472 NaN NaN
# 3 3 0.69914031 0.52025487 -0.13089285 -0.45742957 0.68799299 -0.65833219 -0.8931447
# 4 4 0.07498591 -0.06759845 -0.15950271 0.47577484 -0.31345072 0.98613984 NaN
# 5 5 0.34686080 0.84569994 0.22288982 -0.09291759 0.07317659 0.48756834 1.0484134
# 6 6 NaN -0.47471895 -0.01001069 0.03145422 -0.35163411 -0.43344249 NaN
# 7 7 NaN 0.17256757 0.68457304 0.11125033 -0.30236288 0.06483394 NaN
# 8 8 NaN -0.12143693 0.11786080 -0.10686611 0.78027538 0.70950092 NaN
Data:
df <- structure(list(X1 = c(3L, 7L, 0L, 3L, 5L, 6L, 2L, 5L, 4L, 5L,
5L, 4L, 6L, 5L, 6L, 3L, 4L, 6L, 4L, 7L, 6L, 3L, 6L, 4L, 4L, 5L,
4L, 7L, 5L, 6L, 7L, 4L, 5L, 5L, 4L, 8L, 3L, 2L, 5L, 7L, 5L, 5L,
2L, 5L, 6L, 4L, 5L, 3L, 6L, 4L, 5L, 5L, 4L, 6L, 5L, 3L, 3L, 5L,
7L, 5L, 3L, 6L, 5L, 4L, 6L, 2L, 4L, 6L, 5L, 6L, 4L, 6L, 8L, 5L,
7L, 4L, 4L, 8L, 7L, 5L, 5L, 4L, 1L, 4L, 6L, 3L, 6L, 3L, 3L, 5L,
5L, 6L, 7L, 6L, 5L, 5L, 6L, 8L, 6L, 3L), X2 = c(3L, 4L, 6L, 6L,
6L, 5L, 6L, 3L, 3L, 6L, 4L, 2L, 1L, 7L, 6L, 4L, 6L, 5L, 7L, 6L,
5L, 2L, 5L, 8L, 5L, 5L, 3L, 5L, 5L, 5L, 3L, 4L, 9L, 2L, 5L, 5L,
4L, 5L, 5L, 6L, 8L, 9L, 6L, 4L, 10L, 2L, 3L, 5L, 6L, 6L, 4L,
6L, 5L, 4L, 6L, 4L, 6L, 6L, 4L, 3L, 4L, 5L, 4L, 6L, 5L, 6L, 5L,
7L, 9L, 6L, 6L, 5L, 3L, 3L, 2L, 5L, 8L, 5L, 4L, 5L, 8L, 8L, 4L,
3L, 4L, 4L, 5L, 4L, 5L, 3L, 3L, 7L, 5L, 2L, 6L, 5L, 6L, 6L, 5L,
5L), X3 = c(3L, 5L, 7L, 4L, 4L, 5L, 3L, 4L, 4L, 7L, 4L, 3L, 6L,
5L, 8L, 4L, 6L, 5L, 5L, 4L, 6L, 3L, 4L, 6L, 4L, 6L, 6L, 5L, 4L,
3L, 4L, 5L, 3L, 5L, 6L, 6L, 6L, 3L, 7L, 3L, 6L, 5L, 7L, 4L, 8L,
4L, 5L, 3L, 5L, 2L, 4L, 4L, 8L, 4L, 6L, 4L, 3L, 5L, 5L, 5L, 4L,
6L, 5L, 7L, 4L, 6L, 5L, 5L, 7L, 6L, 3L, 4L, 3L, 4L, 5L, 6L, 3L,
3L, 5L, 5L, 8L, 6L, 4L, 4L, 4L, 6L, 1L, 6L, 6L, 5L, 3L, 8L, 5L,
5L, 5L, 8L, 2L, 5L, 8L, 5L), X4 = c(3L, 7L, 6L, 6L, 4L, 6L, 3L,
5L, 4L, 6L, 5L, 6L, 5L, 4L, 5L, 8L, 6L, 7L, 3L, 2L, 2L, 8L, 4L,
5L, 3L, 5L, 5L, 5L, 6L, 5L, 4L, 6L, 4L, 3L, 2L, 5L, 5L, 7L, 4L,
5L, 6L, 7L, 5L, 6L, 7L, 2L, 2L, 6L, 6L, 7L, 4L, 5L, 3L, 4L, 4L,
4L, 5L, 3L, 4L, 8L, 6L, 5L, 2L, 5L, 5L, 3L, 5L, 4L, 8L, 7L, 6L,
3L, 3L, 3L, 3L, 6L, 5L, 7L, 6L, 4L, 6L, 6L, 6L, 5L, 5L, 3L, 8L,
5L, 4L, 3L, 5L, 4L, 5L, 6L, 3L, 4L, 5L, 4L, 7L, 6L), X5 = c(6L,
7L, 3L, 3L, 5L, 4L, 6L, 6L, 7L, 3L, 4L, 4L, 6L, 5L, 5L, 3L, 3L,
5L, 5L, 3L, 5L, 4L, 6L, 6L, 8L, 3L, 7L, 5L, 6L, 5L, 4L, 3L, 5L,
6L, 5L, 5L, 6L, 4L, 3L, 4L, 6L, 6L, 3L, 6L, 8L, 5L, 5L, 6L, 4L,
5L, 5L, 8L, 3L, 6L, 5L, 4L, 4L, 6L, 4L, 6L, 5L, 5L, 5L, 5L, 5L,
7L, 3L, 3L, 4L, 5L, 4L, 9L, 6L, 9L, 4L, 6L, 5L, 7L, 4L, 3L, 7L,
5L, 3L, 4L, 7L, 3L, 5L, 7L, 5L, 5L, 5L, 3L, 6L, 5L, 5L, 7L, 3L,
5L, 4L, 6L), X6 = c(4L, 6L, 3L, 5L, 5L, 4L, 3L, 7L, 4L, 7L, 3L,
7L, 6L, 5L, 4L, 4L, 5L, 5L, 5L, 6L, 5L, 6L, 4L, 5L, 7L, 5L, 3L,
3L, 4L, 4L, 4L, 6L, 3L, 7L, 4L, 5L, 5L, 3L, 1L, 4L, 4L, 2L, 7L,
6L, 5L, 6L, 3L, 6L, 5L, 3L, 2L, 7L, 4L, 5L, 4L, 7L, 2L, 2L, 5L,
4L, 6L, 4L, 7L, 2L, 7L, 6L, 4L, 4L, 6L, 6L, 4L, 8L, 5L, 5L, 4L,
5L, 5L, 3L, 7L, 4L, 5L, 6L, 4L, 5L, 2L, 3L, 5L, 3L, 8L, 2L, 4L,
4L, 4L, 3L, 7L, 5L, 8L, 3L, 4L, 5L), X7 = c(4L, 2L, 2L, 4L, 5L,
5L, 7L, 6L, 5L, 6L, 6L, 4L, 5L, 5L, 6L, 4L, 7L, 4L, 6L, 3L, 2L,
5L, 7L, 5L, 5L, 5L, 9L, 7L, 9L, 5L, 4L, 6L, 8L, 4L, 5L, 4L, 4L,
4L, 5L, 4L, 6L, 5L, 5L, 2L, 4L, 4L, 5L, 5L, 3L, 7L, 5L, 6L, 7L,
5L, 3L, 5L, 5L, 4L, 6L, 3L, 5L, 3L, 4L, 3L, 4L, 5L, 5L, 7L, 7L,
2L, 4L, 7L, 4L, 6L, 5L, 7L, 5L, 4L, 8L, 6L, 8L, 8L, 3L, 4L, 6L,
3L, 6L, 6L, 5L, 2L, 5L, 1L, 7L, 2L, 6L, 7L, 3L, 4L, 7L, 3L),
Y1 = c(1.51381660212862, 0.504478762772218, -2.65378574088482,
1.43068637272239, -1.47684324896984, 1.58472803945312, -0.0922193857071082,
-0.823622560562235, -1.64806412194411, 0.944309458265371,
-0.356936363717069, -0.162015258645447, 0.12477819226619,
-0.288471307345202, -1.18190619708113, -2.39292785982372,
0.851281094397896, 0.941804900216781, -0.285843452847721,
-0.637399290834473, 1.01562437034261, -0.762266388284812,
0.57482379687021, -1.35903991270023, 0.901967856080433, 0.29778523652812,
-0.00277763319220751, -0.231661006943, 1.17555880002615,
1.1776067492362, 1.2999024919628, 2.39038283158516, -0.650154555398476,
-0.953405711072244, 2.07296020843138, 0.186067377477045,
1.27493204613466, 0.233083651874369, -1.35590587652704, 1.08696059670825,
0.597820644193322, -1.19390157791241, -0.452467982110459,
-0.325648996612789, 0.0285958447740426, -0.550033308585066,
-0.541233659925362, 0.213664554548748, 0.0896115607405887,
-2.22657886242746, -0.835933260512375, -0.3410168395334,
0.50787381134198, 0.531258570313673, -0.40901169495662, -0.810128911311157,
-0.432058431678758, -0.10219858745039, -0.254360597321428,
-0.154028509912877, -0.154412368645572, -2.30203996365628,
1.46595811311111, -1.28869300561031, -1.21053214855949, -0.268363291058948,
0.683392481853822, -0.319669130901838, 0.645175024187205,
-0.411699887457752, -0.191809472206714, -0.862892281569729,
-0.496653266818923, -1.82777471788403, -0.316512824335762,
-1.92056252600781, 0.162658786066582, 2.80586044363066, -1.90191012546795,
-1.37325883300246, -0.293687705724852, -0.891121986533703,
0.236105127173931, 1.7496394242358, 1.30113476661601, -0.681215096624127,
1.19455442244611, 0.850906768469736, 0.555565335896252, -0.883430008481552,
0.824345772604795, 0.687913712610013, 1.33576821950678, -0.808845875084407,
-0.0994675295096023, -0.797674770999511, -1.7176681053504,
-0.312094291087325, 0.480884995744134, 0.3981995835995),
Y2 = c(-1.53285914020452, 1.29122978508191, 0.921606245351757,
-0.724085415151949, 0.573933753842347, -1.75990865905637,
-0.913661932893341, 0.843278144910845, -0.427945566890583,
-0.785931108082936, 0.18126807207226, -0.98233866170379,
-1.69942556383645, 0.161515532679225, -0.205045971136367,
-0.197708729131696, 0.761530789270692, -0.361650445168573,
-0.284077129912569, -1.4375454413273, -1.87641468387563,
-0.466142684630522, -1.32675782016916, 0.0766439433592152,
-0.345061291806643, 1.20492953816262, -0.498983588300403,
-0.39017069355203, -0.585536568506376, -0.268542109333508,
-0.304956745639016, 0.6672974421784, -1.33856731439226, 0.260863470997641,
-0.229908534712624, 1.87494902300616, -1.58721820502111,
-1.27757830338367, -0.164744698727624, 0.822502689857642,
0.487841959844427, -0.644277240904474, 0.0252163852645532,
-1.98028441396859, 0.303527219762318, 1.27316308828519, 0.0240727927001065,
-0.42804136638075, -1.20095767996442, -0.632935247840562,
1.3552165382172, -0.343889633450751, -0.62589000162234, 1.36744315735339,
-1.79969327521372, -0.31448607863348, 0.0738981401409877,
-0.699306260552063, -0.699259392290336, 0.312939740584129,
0.61517017083612, 0.159816208307784, -0.0454478235356775,
-0.84835290813545, 0.60453967324254, -0.933994665859771,
-0.122862888262648, 0.499170848210818, -1.04662060255234,
0.24416562707757, -0.239847908251422, 0.892507692301903,
1.52229141743761, -1.34797780685531, 0.720419062498517, 0.0347853939462854,
0.0636886378960745, 1.09832215170708, 0.526957518899046,
-0.0574283783582475, -0.293913833682497, -0.308886513025504,
-0.0843275557037345, -0.761732958784744, 1.20579213927605,
0.53737587291791, 0.669172456819161, 0.1720175113333, 0.207695456368839,
0.129107054181012, 1.22029130711617, -0.388441420234782,
-1.08556449077231, 0.612473701706874, 0.81564534791706, -1.32961150152966,
0.859407793312078, -0.412360608341253, -0.188597814992205,
0.605501867368945)), row.names = c(NA, -100L), class = "data.frame")
You can try lapply on the two functions. e.g.:
cols <- c('GPC', 'WGC', 'TW') # or use the column numbers
mod <- lapply(df[, cols], function(x) lmer(x ~ env/rep + gen*env + (1|env:rep:row) + (1|env:rep:col), data = df))
GEmean <- lapply(df[ , cols], function(x) cast(gen ~ env, data = df, value = x, fun = mean))
The output should be lists of analytical results.
Related
I have a data frame of pairs of genes. There are some pairs which are listed twice but in reverse orientation. How do I remove those pairs which are duplicates (but in reverse orientation)? Thanks!
> dput(all_pairs)
structure(list(gene1 = structure(c(2L, 3L, 4L, 5L, 6L, 7L, 8L,
9L, 10L, 1L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 4L, 5L,
6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 5L, 6L, 7L, 8L, 9L, 10L, 1L,
2L, 3L, 4L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 7L, 8L,
9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 8L, 9L, 10L, 1L, 2L, 3L, 4L,
5L, 6L, 7L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 10L, 1L,
2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L), .Label = c("ASXL1", "BICRA",
"CCDC168", "HRAS", "MUC16", "NOTCH1", "OBSCN", "PLEC", "RREB1",
"TTN"), class = "factor"), gene2 = structure(c(1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L,
8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L), .Label = c("ASXL1", "BICRA",
"CCDC168", "HRAS", "MUC16", "NOTCH1", "OBSCN", "PLEC", "RREB1",
"TTN"), class = "factor")), out.attrs = list(dim = c(10L, 10L
), dimnames = list(Var1 = c("Var1=ASXL1", "Var1=BICRA", "Var1=CCDC168",
"Var1=HRAS", "Var1=MUC16", "Var1=NOTCH1", "Var1=OBSCN", "Var1=PLEC",
"Var1=RREB1", "Var1=TTN"), Var2 = c("Var2=ASXL1", "Var2=BICRA",
"Var2=CCDC168", "Var2=HRAS", "Var2=MUC16", "Var2=NOTCH1", "Var2=OBSCN",
"Var2=PLEC", "Var2=RREB1", "Var2=TTN"))), class = "data.frame", row.names = c(NA,
-90L))
This keeps only one copy of each pair, no matter what the orientation/order is:
all_pairs[!duplicated(t(apply(all_pairs, 1, sort))), ]
I am currently working with a dataset with a binary response variable with 2 levels. I have approx 32 predictor variables - some factors and some numeric. I used glm and based on the p values removed some of the predictor variables that I thought were insignificant. However, when I run the deviance test I always get zero and my ROC curve is upside down - this can be corrected by putting the TPR on the x axis but I think this is incorrect.
Can anyone provide any suggestions on what I could potentially be doing wrong?
Thanks a million!
The code below represents the categories I think are significant. They are all categorical.
data_analysis <- glm(PainDiagnosis~PainLocation+Criterion2+Criterion6+Criterion8+
Criterion9+Criterion13, data=dat, family="binomial") summary(data_analysis) coef(data_analysis) anova(data_analysis, test="Chisq")
resDev_glm <- residuals(fit_glm, type = "deviance")
testDev_glm <- sum(resDev_glm^2)
modMat_glm <- model.matrix(fit_glm) # model matrix
NO_glm <- nrow(unique(modMat_glm)) # number of unique observations
m_glm <- length(fit_glm$coefficients) # number of parameters
nrow(dat)
NO_glm
testDev_glm
1 - pchisq(testDev_glm, NO_glm-m_glm)
library(ROCR)
predObj <- prediction(fitted(fit_glm), dat$PainDiagnosis)
perf <- performance(predObj, "tpr", "fpr")
plot(perf)
abline(0,1, col = "darkorange2", lty = 2) # add bisect line
2L, 2L, 1L, 1L, 1L, 1L), .Label = c("Female", "Male"), class = "factor"),
DurationCurrent = structure(c(5L, 4L, 5L, 6L, 2L, 3L, 6L,
6L, 6L, 2L, 3L, 6L, 2L, 4L, 1L, 4L, 3L, 2L, 4L, 6L, 6L, 6L,
6L, 4L, 6L, 6L, 3L, 5L, 3L, 3L, 4L, 5L, 6L, 6L, 2L, 3L, 5L,
4L, 6L, 5L, 4L, 5L, 6L, 6L, 5L, 6L, 3L, 6L, 4L, 6L, 2L, 4L,
2L, 6L, 3L, 2L, 5L, 3L, 3L, 6L, 2L, 5L, 4L, 6L, 2L, 1L, 4L,
6L, 6L, 2L, 6L, 3L, 4L, 4L, 3L, 2L, 3L, 3L, 3L, 5L, 6L, 5L,
2L, 6L, 5L, 6L, 5L, 5L, 4L, 3L, 5L, 6L, 6L, 3L, 3L, 3L, 3L,
6L, 4L, 5L, 2L, 3L, 5L, 4L, 4L, 4L, 6L, 6L, 2L, 6L, 4L, 3L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 4L, 5L, 3L,
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 5L, 6L, 5L, 6L,
4L, 4L, 4L, 1L, 5L, 1L, 6L, 2L, 1L, 2L, 6L, 6L, 5L, 4L, 3L,
6L, 2L, 2L, 2L, 1L, 6L, 6L, 6L, 2L, 6L, 3L, 6L, 6L, 2L, 6L,
1L, 3L, 3L, 5L, 3L, 1L, 2L, 3L, 6L, 6L, 6L, 6L, 6L, 6L, 6L,
6L, 1L, 6L, 5L, 6L, 6L, 6L, 1L, 2L, 2L, 5L, 6L, 2L, 6L, 6L,
6L, 4L, 6L, 3L, 2L, 6L, 6L, 6L, 6L, 1L, 1L, 2L, 6L, 6L, 6L,
3L, 6L, 6L, 3L, 4L, 6L, 6L, 1L, 3L, 6L, 4L, 2L, 6L, 4L, 6L,
6L, 2L, 3L, 6L, 3L, 2L, 2L, 6L, 3L, 6L, 6L, 5L, 1L, 3L, 1L,
4L, 4L, 6L, 6L, 1L, 6L, 1L, 4L, 6L, 6L, 6L, 6L, 6L, 6L, 5L,
6L, 3L, 6L, 6L, 3L, 6L, 6L, 5L, 6L, 6L, 5L, 6L, 5L, 6L, 6L,
3L, 6L, 6L, 6L, 4L, 6L, 6L, 6L, 6L, 6L, 3L, 5L, 6L, 5L, 4L,
6L, 6L, 6L, 6L, 5L, 4L, 5L, 6L, 2L, 2L, 4L, 6L, 6L, 4L, 6L,
4L, 6L, 4L, 6L, 6L, 3L, 4L, 2L, 3L, 5L, 6L, 2L, 6L, 2L, 3L,
2L, 2L, 4L, 2L, 5L, 4L, 4L, 5L, 6L, 3L, 5L, 3L, 1L, 6L, 6L,
4L, 2L, 4L, 4L, 6L, 6L, 5L, 1L, 6L, 2L, 6L, 2L, 1L), .Label = c("0-3 weeks",
"4-6 weeks", "7-12 weeks", "4-6 months", "7-12 months", "> 1 year"
How do I draw a line over my Poisson curve in R?
This is the code I used for my plot;
plot(dogbites$daily.dogbites, dpois(dogbites$daily.dogbites, dogbites_lambda),ylab="prob(x)", main="Poisson dog bites")
and this is the plot I got:
I'm hoping to get something like this:
May I know what code can I use for this?
Edit: I tried lines function and type = "o" but I got this instead
> dput(dogbites)
structure(list(daily.dogbites = c(1L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 1L, 3L, 0L, 6L, 9L, 15L, 3L, 4L, 3L,
7L, 6L, 1L, 2L, 3L, 4L, 2L, 5L, 3L, 1L, 6L, 2L, 0L, 0L, 3L, 3L,
6L, 1L, 3L, 2L, 2L, 5L, 6L, 7L, 4L, 10L, 4L, 18L, 4L, 3L, 2L,
5L, 4L, 3L, 2L, 6L, 4L, 6L, 6L, 1L, 2L, 5L, 10L, 4L, 4L, 3L,
0L, 3L, 4L, 2L, 3L, 3L, 5L, 5L, 5L, 8L, 13L, 10L, 12L, 4L, 5L,
3L, 3L, 5L, 4L, 2L, 6L, 4L, 2L, 1L, 3L, 3L, 7L, 5L, 3L, 2L, 5L,
6L, 5L, 3L, 6L, 5L, 3L, 6L, 5L, 9L, 7L, 8L, 12L, 5L, 2L, 6L,
8L, 4L, 2L, 3L, 6L, 6L, 7L, 6L, 5L, 3L, 3L, 6L, 4L, 3L, 6L, 2L,
2L, 6L, 2L, 4L, 5L, 3L, 4L, 5L, 9L, 12L, 9L, 16L, 7L, 3L, 2L,
3L, 0L, 1L, 1L, 2L, 2L, 2L, 3L, 1L, 2L, 3L, 6L, 4L, 6L, 2L, 6L,
5L, 8L, 3L, 3L, 6L, 7L, 5L, 9L, 18L, 22L, 0L, 7L, 5L, 7L, 1L,
5L, 2L, 4L, 1L, 4L, 5L, 3L, 9L, 5L, 4L, 2L, 4L, 4L, 0L, 4L, 4L,
5L, 4L, 9L, 8L, 9L, 7L, 4L, 13L, 12L, 24L, 7L, 4L, 5L, 10L, 2L,
2L, 3L, 8L, 8L, 4L, 6L, 6L, 3L, 7L, 6L, 2L, 6L, 5L, 2L, 1L, 7L,
0L, 8L, 11L, 2L, 10L, 3L, 7L, 9L, 10L, 7L, 2L, 2L, 5L, 2L, 1L,
8L, 4L, 4L, 5L, 3L, 3L, 2L, 4L, 7L, 3L, 2L, 1L, 3L, 7L, 9L, 8L,
2L, 4L, 8L, 7L, 4L, 9L, 21L, 3L, 2L, 1L, 5L, 3L, 4L, 3L, 3L,
4L, 4L, 2L, 5L, 5L, 2L, 3L, 1L, 4L, 4L, 0L, 1L, 7L, 4L, 2L, 2L,
1L, 5L, 6L, 3L, 7L, 7L, 14L, 4L, 1L, 4L, 6L, 6L, 1L, 2L, 3L,
2L, 0L, 8L, 3L, 1L, 5L, 1L, 4L, 3L, 5L, 7L, 0L, 3L, 3L, 5L, 2L,
4L, 7L, 6L, 7L, 9L, 19L, 5L, 0L, 3L, 0L, 1L, 3L, 4L, 1L, 5L,
2L, 4L, 3L, 6L, 3L, 4L, 7L, 5L, 9L, 3L, 7L, 6L, 5L, 3L, 6L, 5L,
3L, 5L, 8L, 12L, 5L, 17L, 3L, 3L, 2L, 4L, 5L, 4L, 2L, 2L, 1L,
3L, 5L, 4L, 3L, 2L, 1L, 2L, 4L, 2L, 1L, 1L, 1L, 2L, 2L, 1L, 1L
)), class = "data.frame", row.names = c(NA, -378L))
> dput(dogbites_lambda)
4.50529100529101
You need to sort the data by the x axis values
set.seed(42)
x = sample(1:25)
y = dpois(x, 5)
graphics.off()
plot(sort(x), y[order(x)], type = "o")
I know you usually should use a barplot for categorical variables but in my case somebody split the continuous variable into groups and it would be nice to have a histogram anyway.
This is what I want to get (except as a histogram):
par(oma=c(2,0,0,0)) #so labels are not cut off
barplot(table(hhincome),ylab = "Frequency", main = "Netto houshold income",
border="black", col="grey",las=2)
(Note: A histogram has no space between bars and a x-axis)
Data:
hhincome <- structure(c(4L, 4L, 1L, 6L, 8L, 1L, 4L, 5L, 2L, 3L, 1L, 5L, 1L, 7L, 6L, 7L, 3L, 2L, 6L, 7L, 8L, 4L, 7L, 8L, 7L, 4L, 5L, 5L, 5L, 9L, 7L, 5L, 8L, 8L, 6L, 5L, 5L, 3L, 5L, 4L, 3L, 5L, 3L, 5L, 4L, 4L, 5L, 7L, 6L, 7L, 2L, 6L, 1L, 7L, 4L, 4L, 5L, 2L, 4L, 6L, 6L, 8L, 6L, 7L, 4L, 7L, 9L, 1L, 4L, 6L, 2L, 6L, 8L, 6L, 5L, 8L, 7L, 9L, 7L, 9L, 8L, 5L, 5L, 7L, 6L, 2L, 7L, 6L, 6L, 1L, 7L, 7L, 2L, 6L, 6L, 6L, 7L, 5L, 2L, 2L, 9L, 6L, 7L, 7L, 5L, 6L, 6L, 5L, 5L, 7L, 8L, 6L, 6L, 3L, 7L, 6L, 4L, 5L, 5L, 4L, 8L, 3L, 4L, 6L, 5L, 7L, 3L, 4L, 7L, 5L, 3L, 6L, 2L, 2L, 5L, 2L, 4L, 8L, 4L, 3L, 2L, 7L, 2L, 5L, 2L, 1L, 8L, 7L, 3L, 6L, 6L, 7L, 2L, 9L, 3L, 3L, 5L, 7L, 7L, 5L, 6L, 8L, 5L, 6L, 5L, 5L, 7L, 6L, 5L, 5L, 6L, 10L, 3L, 6L, 6L, 3L, 2L, 4L, 9L, 2L, 6L, 7L, 1L, 5L, 6L, 5L, 4L, 7L, 5L, 2L, 6L, 3L, 3L, 2L, 7L, 6L, 6L, 5L, 7L, 6L, 1L, 7L, 3L, 2L, 5L, 5L, 3L, 3L, 3L, 4L, 1L, 7L, 5L, 3L, 3L, 3L, 8L, 6L, 3L, 2L, 5L, 5L, 4L, 1L, 4L, 1L, 2L, 6L, 4L, 5L, 5L, 8L, 3L, 7L, 7L, 3L, 4L, 4L, 4L, 3L, 4L, 6L, 3L, 3L, 4L, 7L, 2L, 6L, 8L, 5L, 3L, 3L, 6L, 2L, 3L, 4L, 3L, 5L, 5L, 7L, 8L, 6L, 6L, 8L, 4L, 7L, 9L, 1L, 5L, 3L, 2L, 3L, 6L, 3L, 4L, 6L, 3L, 7L, 3L, 1L, 6L, 8L, 4L, 4L, 5L, 6L, 8L, 4L, 4L, 2L, 8L, 6L, 5L, 1L, 4L, 6L, 3L, 5L, 6L, 6L, 4L, 4L, 7L, 8L, 3L, 3L, 4L, 6L, 1L, 6L, 7L, 7L, 1L, 3L, 5L, 6L, 7L, 2L, 3L, 6L, 3L, 2L, 7L, 9L, 3L, 10L, 6L, 9L, 3L, 5L, 11L, 10L, 7L, 8L, 8L, 5L, 5L, 3L, 5L, 8L, 9L, 3L, 2L, 6L, 7L, 5L, 5L, 7L, 5L, 8L, 7L, 11L, 7L, 3L, 3L, 5L, 6L, 8L, 2L, 5L, 6L, 6L, 9L, 4L, 5L, 6L, 7L, 6L, 3L, 8L, 7L, 6L, 9L, 7L, 7L, 4L, 7L, 9L, 3L, 9L, 6L, 11L, 6L, 9L, 4L, 7L, 2L, 7L, 8L, 6L, 8L, 6L, 6L, 6L, 5L, 5L, 2L, 4L, 9L, 7L, 6L, 9L, 5L, 3L, 8L, 2L, 5L, 4L, 7L, 4L, 8L, 6L, 1L, 6L, 5L, 9L, 6L, 7L, 1L, 1L, 4L, 3L, 11L, 3L, 6L, 5L, 2L, 7L, 5L, 6L, 8L, 8L, 3L, 4L, 9L, 6L, 5L, 7L, 8L, 8L, 6L, 8L, 1L, 3L, 5L, 8L, 1L, 6L, 7L, 9L, 8L, 4L, 4L, 6L, 5L, 7L, 6L, 7L, 7L, 3L, 9L, 5L, 8L, 11L, 3L, 7L, 6L, 7L, 8L, 8L, 2L, 2L, 3L, 2L, 5L, 6L, 5L, 7L, 4L, 7L, 2L, 7L, 2L, 2L, 4L, 7L, 6L, 9L, 8L, 5L, 1L, 6L, 3L, 10L, 1L, 7L, 4L, 7L, 5L, 6L, 8L, 4L, 8L, 4L, 5L, 8L, 6L, 7L, 7L, 8L, 7L, 7L, 6L, 7L, 5L, 7L, 9L, 5L, 7L, 4L, 2L, 7L, 3L, 6L, 3L, 8L, 5L, 2L, 6L, 7L, 7L), .Label = c("Less than 500 €", "500-900 €", "900-1300 €", "1300-1500 €", "1500-2000 €", "2000-2600 €", "2600-3500 €", "3500-4500 €", "4500-6000 €", "6000-8000 €", "8000€ or more"), class = "factor")
Thanks to Zheyuan Li I already have my answer. Instead of forcing a histogram, I can simply create a barplot that looks excatly like a histogram:
par(oma=c(2,0,0,0)) #so labels are not cut off
barplot(table(hhincome2), space = 0, # set space between bars to zero
ylab = "Frequency", main = "Netto houshold income",
border="black", col="grey",las=2)
axis(1, at = hhincome,labels = FALSE) # at x-axis at category borders
box()
Edit: I just found an alternative way:
h <- hist(as.numeric(hhincome2) #as.numeric converst factor levels to numeric values
, xlab = "", ylab = "Frequency", main = "Netto houshold income \n(with normal disttribution curve)",
border="black", col="grey",las=2,
xaxt='n') #this supresses the x-axis which would disply levels instead values
axis(1, at = hhincome2, labels = hhincome2, las=2) #just add factor level labels as labels
box()
This way it's possible to add a normal disttribution curve as well:
xfit<-seq(min(as.numeric(hhincome2)),max(as.numeric(hhincome2)),length=1100)
yfit<-dnorm(xfit,mean=mean(as.numeric(hhincome2)),sd=sd(as.numeric(hhincome2)))
yfit <- yfit*diff(h$mids[1:2])*length(as.numeric(hhincome2))
lines(xfit, yfit, col="black", lwd=2)
I know you usually should use a barplot for categorical variables but in my case somebody split the continuous variable into groups and it would be nice to have a histogram anyway.
This is what I want to get (except as a histogram):
par(oma=c(2,0,0,0)) #so labels are not cut off
barplot(table(hhincome),ylab = "Frequency", main = "Netto houshold income",
border="black", col="grey",las=2)
(Note: A histogram has no space between bars and a x-axis)
Data:
hhincome <- structure(c(4L, 4L, 1L, 6L, 8L, 1L, 4L, 5L, 2L, 3L, 1L, 5L, 1L, 7L, 6L, 7L, 3L, 2L, 6L, 7L, 8L, 4L, 7L, 8L, 7L, 4L, 5L, 5L, 5L, 9L, 7L, 5L, 8L, 8L, 6L, 5L, 5L, 3L, 5L, 4L, 3L, 5L, 3L, 5L, 4L, 4L, 5L, 7L, 6L, 7L, 2L, 6L, 1L, 7L, 4L, 4L, 5L, 2L, 4L, 6L, 6L, 8L, 6L, 7L, 4L, 7L, 9L, 1L, 4L, 6L, 2L, 6L, 8L, 6L, 5L, 8L, 7L, 9L, 7L, 9L, 8L, 5L, 5L, 7L, 6L, 2L, 7L, 6L, 6L, 1L, 7L, 7L, 2L, 6L, 6L, 6L, 7L, 5L, 2L, 2L, 9L, 6L, 7L, 7L, 5L, 6L, 6L, 5L, 5L, 7L, 8L, 6L, 6L, 3L, 7L, 6L, 4L, 5L, 5L, 4L, 8L, 3L, 4L, 6L, 5L, 7L, 3L, 4L, 7L, 5L, 3L, 6L, 2L, 2L, 5L, 2L, 4L, 8L, 4L, 3L, 2L, 7L, 2L, 5L, 2L, 1L, 8L, 7L, 3L, 6L, 6L, 7L, 2L, 9L, 3L, 3L, 5L, 7L, 7L, 5L, 6L, 8L, 5L, 6L, 5L, 5L, 7L, 6L, 5L, 5L, 6L, 10L, 3L, 6L, 6L, 3L, 2L, 4L, 9L, 2L, 6L, 7L, 1L, 5L, 6L, 5L, 4L, 7L, 5L, 2L, 6L, 3L, 3L, 2L, 7L, 6L, 6L, 5L, 7L, 6L, 1L, 7L, 3L, 2L, 5L, 5L, 3L, 3L, 3L, 4L, 1L, 7L, 5L, 3L, 3L, 3L, 8L, 6L, 3L, 2L, 5L, 5L, 4L, 1L, 4L, 1L, 2L, 6L, 4L, 5L, 5L, 8L, 3L, 7L, 7L, 3L, 4L, 4L, 4L, 3L, 4L, 6L, 3L, 3L, 4L, 7L, 2L, 6L, 8L, 5L, 3L, 3L, 6L, 2L, 3L, 4L, 3L, 5L, 5L, 7L, 8L, 6L, 6L, 8L, 4L, 7L, 9L, 1L, 5L, 3L, 2L, 3L, 6L, 3L, 4L, 6L, 3L, 7L, 3L, 1L, 6L, 8L, 4L, 4L, 5L, 6L, 8L, 4L, 4L, 2L, 8L, 6L, 5L, 1L, 4L, 6L, 3L, 5L, 6L, 6L, 4L, 4L, 7L, 8L, 3L, 3L, 4L, 6L, 1L, 6L, 7L, 7L, 1L, 3L, 5L, 6L, 7L, 2L, 3L, 6L, 3L, 2L, 7L, 9L, 3L, 10L, 6L, 9L, 3L, 5L, 11L, 10L, 7L, 8L, 8L, 5L, 5L, 3L, 5L, 8L, 9L, 3L, 2L, 6L, 7L, 5L, 5L, 7L, 5L, 8L, 7L, 11L, 7L, 3L, 3L, 5L, 6L, 8L, 2L, 5L, 6L, 6L, 9L, 4L, 5L, 6L, 7L, 6L, 3L, 8L, 7L, 6L, 9L, 7L, 7L, 4L, 7L, 9L, 3L, 9L, 6L, 11L, 6L, 9L, 4L, 7L, 2L, 7L, 8L, 6L, 8L, 6L, 6L, 6L, 5L, 5L, 2L, 4L, 9L, 7L, 6L, 9L, 5L, 3L, 8L, 2L, 5L, 4L, 7L, 4L, 8L, 6L, 1L, 6L, 5L, 9L, 6L, 7L, 1L, 1L, 4L, 3L, 11L, 3L, 6L, 5L, 2L, 7L, 5L, 6L, 8L, 8L, 3L, 4L, 9L, 6L, 5L, 7L, 8L, 8L, 6L, 8L, 1L, 3L, 5L, 8L, 1L, 6L, 7L, 9L, 8L, 4L, 4L, 6L, 5L, 7L, 6L, 7L, 7L, 3L, 9L, 5L, 8L, 11L, 3L, 7L, 6L, 7L, 8L, 8L, 2L, 2L, 3L, 2L, 5L, 6L, 5L, 7L, 4L, 7L, 2L, 7L, 2L, 2L, 4L, 7L, 6L, 9L, 8L, 5L, 1L, 6L, 3L, 10L, 1L, 7L, 4L, 7L, 5L, 6L, 8L, 4L, 8L, 4L, 5L, 8L, 6L, 7L, 7L, 8L, 7L, 7L, 6L, 7L, 5L, 7L, 9L, 5L, 7L, 4L, 2L, 7L, 3L, 6L, 3L, 8L, 5L, 2L, 6L, 7L, 7L), .Label = c("Less than 500 €", "500-900 €", "900-1300 €", "1300-1500 €", "1500-2000 €", "2000-2600 €", "2600-3500 €", "3500-4500 €", "4500-6000 €", "6000-8000 €", "8000€ or more"), class = "factor")
Thanks to Zheyuan Li I already have my answer. Instead of forcing a histogram, I can simply create a barplot that looks excatly like a histogram:
par(oma=c(2,0,0,0)) #so labels are not cut off
barplot(table(hhincome2), space = 0, # set space between bars to zero
ylab = "Frequency", main = "Netto houshold income",
border="black", col="grey",las=2)
axis(1, at = hhincome,labels = FALSE) # at x-axis at category borders
box()
Edit: I just found an alternative way:
h <- hist(as.numeric(hhincome2) #as.numeric converst factor levels to numeric values
, xlab = "", ylab = "Frequency", main = "Netto houshold income \n(with normal disttribution curve)",
border="black", col="grey",las=2,
xaxt='n') #this supresses the x-axis which would disply levels instead values
axis(1, at = hhincome2, labels = hhincome2, las=2) #just add factor level labels as labels
box()
This way it's possible to add a normal disttribution curve as well:
xfit<-seq(min(as.numeric(hhincome2)),max(as.numeric(hhincome2)),length=1100)
yfit<-dnorm(xfit,mean=mean(as.numeric(hhincome2)),sd=sd(as.numeric(hhincome2)))
yfit <- yfit*diff(h$mids[1:2])*length(as.numeric(hhincome2))
lines(xfit, yfit, col="black", lwd=2)