Problems in ggplot in R - r

Data Sets
> dput(head(spdistuc,50))
structure(list(Lane = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L), Vehicle.class = c(2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L),
speedmph = c(0, 3.4, 6.8, 10.2, 13.6, 17, 20.4, 23.8, 27.2,
30.6, 34, 37.4, 3.4, 6.8, 10.2, 13.6, 17, 20.4, 23.8, 27.2,
30.6, 34, 37.4, 6.8, 10.2, 13.6, 17, 20.4, 23.8, 27.2, 30.6,
34, 0, 3.4, 6.8, 10.2, 13.6, 17, 20.4, 23.8, 27.2, 30.6,
34, 37.4, 0, 3.4, 6.8, 10.2, 13.6, 17), cprob = c(0, 0.01,
0.04, 0.08, 0.14, 0.22, 0.32, 0.5, 0.73, 0.95, 0.99, 1, 0,
0, 0.03, 0.07, 0.16, 0.3, 0.51, 0.81, 0.99, 1, 1, 0, 0.03,
0.05, 0.1, 0.21, 0.49, 0.84, 1, 1, 0, 0, 0.01, 0.01, 0.06,
0.1, 0.17, 0.4, 0.76, 0.95, 1, 1, 0, 0, 0.01, 0.01, 0.02,
0.04)), .Names = c("Lane", "Vehicle.class", "speedmph", "cprob"
), row.names = c(6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 25L, 26L, 27L, 28L, 29L, 30L, 31L, 32L, 33L, 34L, 35L,
40L, 41L, 42L, 43L, 44L, 45L, 46L, 47L, 48L, 64L, 65L, 66L, 67L,
68L, 69L, 70L, 71L, 72L, 73L, 74L, 75L, 88L, 89L, 90L, 91L, 92L,
93L), class = "data.frame")
> dput(head(cspdistuv,50))
structure(list(lanem = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), cars.speedmph = c(18,
20, 22, 24, 26, 28, 30, 32, 34, 36, 10, 15, 20, 25, 30, 35, 5,
10, 15, 20, 25, 30, 35, 0, 5, 10, 15, 20, 25, 30, 35, 5, 10,
15, 20, 25, 30, 35), cars.prob = c(0, 0.13, 0.17, 0.2, 0.2, 0.27,
0.37, 0.8, 0.97, 1, 0, 0.03, 0.13, 0.4, 0.77, 1, 0, 0.03, 0.17,
0.27, 0.5, 0.8, 1, 0, 0.03, 0.1, 0.27, 0.53, 0.6, 0.83, 1, 0,
0.07, 0.17, 0.33, 0.53, 0.8, 1)), .Names = c("lanem", "cars.speedmph",
"cars.prob"), row.names = c(NA, 38L), class = "data.frame")
Problem
I plotted the spdistuc:
cu1 <- ggplot() + geom_point(data = spdistuc, mapping = aes(x=speedmph, y = cprob, color = 'observed')) + facet_wrap(~Lane) + theme_bw() + my.theme()
This gave me following:
But when I added another plot on the existing one,
cu2 <- cu1 + geom_point(data = cspdistuv, mapping = aes(x = cars.speedmph, y = cars.prob, color = 'simulated-default')) + facet_wrap(~lanem)
I got the following:
Question
Why the existing plot ("observed") changed? You can see more than 1 point for a single value on x-axis. What am I doing wrong?

Expanding my comment into an answer:
The problem is you use "Lane" in the first dataset and "lanem" in the second.
This can be fixed by making the column names the same.
names(cspdistuv)[names(cspdistuv) == "lanem"] <- "Lane"
When this change is made, you should not need to include facet_wrap in your cu2 definition. It will still be remembered from cu1's definition.

Related

ggstattsplot : Why ggwithinstats not working with categorical data

I'm trying to run the ggwithinstats function from ggstatplot but I get the following error.
This is I'm running
ggwithinstats( # independent samples
data = dat,
x = FAB,
y = BM_percentage,
plot.type = "box", # for boxplot
type = "nonparametric", # for wilcoxon
centrality.plotting = FALSE # remove median
)
########################
ggwithinstats(
df2,
x = FAB,
y = BM_percentage
)
I get the error for the first code
Error in validObject(.Object) : invalid class “dsparseModelMatrix”
object: superclass "Mnumeric" not defined in the environment of the
object's class Error in validObject(.Object) : invalid class
“dsparseModelMatrix” object: superclass "Mnumeric" not defined in the
environment of the object's class Error in complete.cases(x, y) : not
all arguments have the same length
And for the second code I get this
Error in p.adjust(pp[lower.tri(pp, TRUE)], p.adjust.method) :
(list) object cannot be coerced to type 'double'
My data which I'm trying to run
b <- dput(dat)
structure(list(FAB = structure(c(5L, 1L, 5L, 3L, 2L, 4L, 6L,
2L, 1L, 6L, 5L, 1L, 5L, 1L, 5L, 6L, 3L, 5L, 2L, 5L, 3L, 3L, 3L,
1L, 3L, 1L, 1L, 1L, 6L, 4L, 2L, 5L, 4L, 3L, 2L, 3L, 2L, 3L, 3L,
1L, 5L, 2L, 2L, 3L, 2L, 5L, 4L, 6L, 5L, 3L, 1L, 3L, 5L, 5L, 3L,
5L, 2L, 2L, 1L, 5L, 2L, 2L, 3L, 2L, 5L, 4L, 4L, 6L, 3L, 3L, 5L,
2L, 2L, 2L, 3L, 5L, 2L, 3L, 6L, 1L, 2L, 3L, 2L, 3L, 3L, 6L, 2L,
2L, 5L, 3L, 3L, 3L, 2L, 5L, 2L, 4L, 1L, 6L, 6L, 3L, 3L, 5L, 6L,
2L, 1L, 5L, 2L, 4L, 5L, 2L, 6L, 6L, 3L, 4L, 5L, 3L, 2L, 4L, 6L,
2L, 5L, 4L, 2L, 4L, 5L, 3L, 3L, 3L, 3L, 5L, 2L, 2L, 3L, 1L, 3L,
3L, 2L, 2L, 6L, 4L, 2L, 4L, 4L, 3L, 2L, 5L, 5L), .Label = c("M0",
"M1", "M2", "M3", "M4", "M5"), class = "factor"), WBC = c(76.7,
5, 5, 27.7, 10.7, 2.1, 78.5, 8.2, 47.2, 72.1, 67.5, 2.9, 22.2,
1, 15.2, 7.3, 12.6, 27.6, 46.4, 27.1, 34.2, 33.5, 2.5, 2.3, 8.3,
61.6, 47.6, 5.6, 137.2, 3.4, 48, 13.6, 0.9, 5.5, 1.5, 4.1, 90.4,
34.8, 3.1, 2.3, 37, 34, 2.7, 17, 29.4, 50.3, 0.5, 4, 12.4, 12,
111, 77.3, 2.9, 37.9, 8.3, 57.1, 11.5, 37.5, 6.1, 90.6, 80.5,
10.1, 1.5, 30.8, 2.3, 0.4, 86.4, 34.6, 15.1, 35.9, 17.9, 0.7,
33.2, 45.6, 98.8, 26.1, 134.4, 98.2, 8.4, 2.1, 67.9, 51.8, 5.1,
2.6, 43.1, 6.7, 30.5, 171.9, 29.7, 75.2, 45, 11.5, 22.9, 131.5,
63.7, 1.6, 5.4, 116.2, 14.9, 202.7, 18.7, 52.9, 99.2, 13.5, 14.5,
2.7, 1.2, 8.2, 30.9, 103.6, 93, 5.1, 3.4, 0.9, 4.9, 42.1, 6.4,
1.5, 59.3, 88.1, 25.9, 31.5, 223.8, 29, 9.9, 1.7, 0.6, 14.3,
61.6, 2.2, 1.2, 16, 11, 92, 29.4, 32.4, 42.8, 2.9, 6.7, 1.2,
13.1, 1, 3.6, 4.3, 39.8, 19.6, 101.3), TMB = c(0, 0.733333333333,
0.3, 0.266666666667, 0.466666666667, 0.333333333333, 0.233333333333,
0.2, 0.5, 0.133333333333, 0.333333333333, 0.566666666667, 0.3,
0.766666666667, 0.166666666667, 0.233333333333, 0.4, 0.266666666667,
0.533333333333, 1.13333333333, 0.233333333333, 0.1, 0.4, 0.4,
0.333333333333, 0.4, 0.5, 0.4, 0.1, 0.2, 0.566666666667, 0.466666666667,
0.2, 0.733333333333, 0.5, 0.333333333333, 0.2, 0.333333333333,
0.4, 0.266666666667, 0.0666666666667, 0.266666666667, 0.2, 0.433333333333,
0.566666666667, 0.0666666666667, 0.166666666667, 0.533333333333,
0.3, 0.433333333333, 0, 0.4, 0.466666666667, 0.0666666666667,
0.333333333333, 0, 0.7, 0.4, 0.233333333333, 0.3, 0.0333333333333,
0.4, 0.566666666667, 0.0333333333333, 0.0333333333333, 0.266666666667,
0.0333333333333, 0.4, 0.466666666667, 0.166666666667, 0.633333333333,
0.366666666667, 0.233333333333, 0.466666666667, 0.1, 0.0666666666667,
0.4, 0.366666666667, 0.1, 0.166666666667, 0.266666666667, 0.466666666667,
0.266666666667, 0.333333333333, 0.0333333333333, 0.1, 0.5, 0.333333333333,
0.333333333333, 0.266666666667, 0, 0.466666666667, 0.233333333333,
0.166666666667, 0.266666666667, 0.333333333333, 0.433333333333,
0.1, 0.0666666666667, 0.4, 0.2, 0.133333333333, 0.533333333333,
0.2, 0.4, 0.433333333333, 0.1, 0.2, 0.0666666666667, 0.233333333333,
0.1, 0, 0.3, 0.266666666667, 0.233333333333, 0.6, 0.533333333333,
0.2, 0.2, 0.5, 0.0333333333333, 0.0333333333333, 0.0666666666667,
0.166666666667, 0.5, 0.5, 0.166666666667, 0.3, 0.4, 0.3, 0.4,
0.466666666667, 0.433333333333, 0.4, 0.266666666667, 0.3, 0.4,
0.6, 0.0333333333333, 0.0666666666667, 0.333333333333, 0.3, 0.1,
0.333333333333, 0.333333333333, 0.2, 0.0333333333333), BM_percentage = c(82L,
83L, 91L, 72L, 68L, 88L, 32L, 91L, 59L, 87L, 89L, 99L, 35L, 90L,
75L, 41L, 63L, 69L, 81L, 51L, 53L, 34L, 63L, 75L, 47L, 95L, 42L,
55L, 83L, 90L, 84L, 61L, 100L, 64L, 62L, 67L, 77L, 39L, 43L,
75L, 69L, 73L, 72L, 80L, 90L, 39L, 74L, 78L, 51L, 37L, 92L, 57L,
85L, 40L, 48L, 81L, 86L, 88L, 60L, 98L, 77L, 42L, 46L, 89L, 95L,
90L, 73L, 71L, 32L, 70L, 62L, 32L, 75L, 76L, 56L, 30L, 92L, 76L,
89L, 50L, 79L, 55L, 94L, 80L, 47L, 81L, 90L, 87L, 75L, 46L, 67L,
70L, 86L, 72L, 85L, 40L, 97L, 83L, 57L, 60L, 52L, 90L, 52L, 86L,
74L, 37L, 71L, 91L, 52L, 85L, 90L, 95L, 70L, 82L, 40L, 64L, 40L,
90L, 85L, 86L, 71L, 51L, 77L, 85L, 40L, 37L, 35L, 57L, 48L, 81L,
60L, 62L, 72L, 67L, 56L, 59L, 81L, 33L, 94L, 85L, 72L, 42L, 93L,
40L, 86L, 71L, 79L), Risk_Cyto = structure(c(2L, 4L, 2L, 2L,
4L, 1L, 2L, 2L, 4L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 4L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L,
4L, 4L, 2L, 4L, 1L, 1L, 4L, 4L, 1L, 4L, 1L, 4L, 2L, 4L, 4L, 4L,
2L, 1L, 1L, 1L, 2L, 4L, 2L, 2L, 2L, 2L, 4L, 2L, 2L, 1L, 1L, 4L,
4L, 2L, 2L, 4L, 1L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L, 1L, 2L, 2L,
3L, 4L, 2L, 4L, 2L, 2L, 2L, 4L, 2L, 2L, 2L, 1L, 4L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 4L, 2L, 2L, 1L, 1L, 2L, 2L, 4L, 2L, 1L, 1L, 2L,
2L, 1L, 2L, 2L, 4L, 1L, 2L, 1L, 4L, 4L, 2L, 1L, 3L, 4L, 2L, 4L,
1L, 2L, 4L, 2L, 1L, 2L, 4L, 1L, 2L, 1L, 1L, 1L, 2L, 2L, 2L), .Label = c("Good",
"Intermediate", "N.D.", "Poor"), class = "factor"), Risk_Molecular = structure(c(4L,
4L, 2L, 4L, 4L, 1L, 2L, 2L, 4L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L,
2L, 4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 4L, 4L, 2L, 1L, 2L, 1L, 1L,
2L, 2L, 1L, 4L, 4L, 2L, 4L, 1L, 1L, 4L, 4L, 1L, 4L, 1L, 4L, 2L,
4L, 4L, 4L, 2L, 1L, 1L, 1L, 2L, 4L, 2L, 2L, 4L, 2L, 4L, 2L, 4L,
1L, 1L, 4L, 4L, 2L, 2L, 4L, 1L, 2L, 2L, 2L, 3L, 2L, 2L, 2L, 2L,
1L, 2L, 2L, 3L, 2L, 2L, 4L, 2L, 2L, 2L, 4L, 2L, 2L, 2L, 1L, 4L,
2L, 2L, 2L, 2L, 4L, 2L, 2L, 4L, 2L, 2L, 1L, 1L, 2L, 2L, 4L, 2L,
1L, 1L, 2L, 2L, 1L, 2L, 2L, 4L, 1L, 2L, 1L, 4L, 4L, 2L, 1L, 3L,
4L, 2L, 4L, 1L, 2L, 4L, 2L, 1L, 2L, 4L, 1L, 2L, 1L, 1L, 1L, 2L,
2L, 2L), .Label = c("Good", "Intermediate", "N.D.", "Poor"), class = "factor")), class = "data.frame", row.names = c("TCGA-AB-2856",
"TCGA-AB-2849", "TCGA-AB-2971", "TCGA-AB-2930", "TCGA-AB-2891",
"TCGA-AB-2872", "TCGA-AB-2851", "TCGA-AB-3011", "TCGA-AB-2949",
"TCGA-AB-2981", "TCGA-AB-2965", "TCGA-AB-2822", "TCGA-AB-2828",
"TCGA-AB-2959", "TCGA-AB-2973", "TCGA-AB-2987", "TCGA-AB-2986",
"TCGA-AB-2921", "TCGA-AB-2863", "TCGA-AB-3009", "TCGA-AB-2812",
"TCGA-AB-2940", "TCGA-AB-2996", "TCGA-AB-2983", "TCGA-AB-2859",
"TCGA-AB-2913", "TCGA-AB-2917", "TCGA-AB-2929", "TCGA-AB-2825",
"TCGA-AB-2897", "TCGA-AB-2900", "TCGA-AB-2846", "TCGA-AB-2862",
"TCGA-AB-3002", "TCGA-AB-2871", "TCGA-AB-2819", "TCGA-AB-2901",
"TCGA-AB-2920", "TCGA-AB-2966", "TCGA-AB-2814", "TCGA-AB-2942",
"TCGA-AB-2870", "TCGA-AB-2847", "TCGA-AB-2844", "TCGA-AB-2806",
"TCGA-AB-2911", "TCGA-AB-2980", "TCGA-AB-2861", "TCGA-AB-2916",
"TCGA-AB-2878", "TCGA-AB-2944", "TCGA-AB-2817", "TCGA-AB-2830",
"TCGA-AB-2892", "TCGA-AB-2858", "TCGA-AB-2815", "TCGA-AB-2877",
"TCGA-AB-2939", "TCGA-AB-2890", "TCGA-AB-2811", "TCGA-AB-2918",
"TCGA-AB-2898", "TCGA-AB-2908", "TCGA-AB-2866", "TCGA-AB-2842",
"TCGA-AB-2991", "TCGA-AB-2823", "TCGA-AB-2910", "TCGA-AB-2915",
"TCGA-AB-2977", "TCGA-AB-2912", "TCGA-AB-2943", "TCGA-AB-2881",
"TCGA-AB-2988", "TCGA-AB-3000", "TCGA-AB-2948", "TCGA-AB-2895",
"TCGA-AB-2931", "TCGA-AB-2956", "TCGA-AB-2936", "TCGA-AB-2934",
"TCGA-AB-2914", "TCGA-AB-2992", "TCGA-AB-2869", "TCGA-AB-2946",
"TCGA-AB-2894", "TCGA-AB-2963", "TCGA-AB-2928", "TCGA-AB-2924",
"TCGA-AB-2818", "TCGA-AB-2975", "TCGA-AB-2874", "TCGA-AB-2979",
"TCGA-AB-2826", "TCGA-AB-2990", "TCGA-AB-3001", "TCGA-AB-2885",
"TCGA-AB-2835", "TCGA-AB-2873", "TCGA-AB-2955", "TCGA-AB-2845",
"TCGA-AB-2836", "TCGA-AB-2925", "TCGA-AB-2884", "TCGA-AB-2820",
"TCGA-AB-2899", "TCGA-AB-3008", "TCGA-AB-2994", "TCGA-AB-2889",
"TCGA-AB-2853", "TCGA-AB-2896", "TCGA-AB-2893", "TCGA-AB-2867",
"TCGA-AB-2999", "TCGA-AB-2888", "TCGA-AB-2839", "TCGA-AB-2865",
"TCGA-AB-3007", "TCGA-AB-2932", "TCGA-AB-2976", "TCGA-AB-2834",
"TCGA-AB-2840", "TCGA-AB-2880", "TCGA-AB-2998", "TCGA-AB-2813",
"TCGA-AB-2882", "TCGA-AB-2995", "TCGA-AB-2950", "TCGA-AB-2810",
"TCGA-AB-2935", "TCGA-AB-2821", "TCGA-AB-2952", "TCGA-AB-2886",
"TCGA-AB-2805", "TCGA-AB-2876", "TCGA-AB-2808", "TCGA-AB-2937",
"TCGA-AB-2927", "TCGA-AB-2883", "TCGA-AB-2982", "TCGA-AB-2919",
"TCGA-AB-3012", "TCGA-AB-2841", "TCGA-AB-2875", "TCGA-AB-2984",
"TCGA-AB-2970", "TCGA-AB-2933"))
I'm not sure what is going wrong here given that in my X there are groups and in y there is the dependent variable.
Any suggestion or help would be really appreciated.
ggwithinstats is for plotting repeated measures in individual subjects, but the fact that you have different sized groups for FAB and no ID column suggests that this is not what you have. If the groups are different sizes, you will always get an error because you cannot have a repeated measures structure.
I suspect you are looking for ggbetweenstats rather than ggwithinstats, since all you appear to be doing is comparing different groups.
ggbetweenstats( # independent samples
data = dat,
x = FAB,
y = BM_percentage,
type = "nonparametric", # for wilcoxon
plot.type = "box", # for boxplot
centrality.plotting = FALSE # remove median
)

R: Compute multiple correlations by group (and save output to csv file)

Is there a way to make a file with the correlation statistic between the raw number of fish observed ("num") and each environmental data column ("temp", "do", etc.) by species ("group")?
*As well as correlations between the means and medians of num vs. env. factors?
I'd also like to be able to choose which correlation method to use (Pearson correlation, Kendall rank correlation, Spearman correlation, etc.)
My data:
zeros <- structure(list(year = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L), .Label = c("2019", "2020"), class = "factor"), season = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("dry", "wet"), class = "factor"),
site = structure(c(1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L,
1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L, 1L, 1L, 2L, 2L, 3L,
3L, 4L, 4L, 5L, 5L, 1L, 1L, 2L, 2L, 3L, 3L, 4L, 4L, 5L, 5L
), .Label = c("1", "2", "3", "4", "5"), class = "factor"),
group = structure(c(1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L,
2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L, 1L, 2L
), .Label = c("Hardhead silverside", "Sailfin molly"), class = "factor"),
num = c(0, 8, 0, 9, 0, 13, 0, 9, 0, 10, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 7, 0, 2,
0, 3, 0, 13, 0), temp = c(23L, 36L, 35L, 34L, 30L, 28L, 18L,
19L, 33L, 33L, 25L, 20L, 33L, 23L, 36L, 32L, 28L, 17L, 34L,
31L, 26L, 34L, 26L, 35L, 15L, 25L, 26L, 20L, 18L, 14L, 23L,
17L, 26L, 17L, 17L, 19L, 29L, 31L, 18L, 15L), sal = c(12.5,
25.5, 8.5, 15.5, 17.5, 27.5, 9.5, 31.5, 1.5, 34.5, 25.5,
21.5, 10.5, 8.5, 32.5, 19.5, 6.5, 5.5, 15.5, 28.5, 6.5, 3.5,
29.5, 13.5, 7.5, 16.5, 3.5, 28.5, 22.5, 5.5, 9.5, 12.5, 29.5,
24.5, 8.5, 32.5, 37.5, 3.5, 12.5, 19.5), do = c(9.66, 7.66,
1.66, 14.66, 15.66, 1.66, 14.66, 15.66, 0.66, 5.66, 10.66,
11.66, 4.66, 0.66, 13.66, 1.66, 13.66, 6.66, 6.66, 10.66,
9.66, 15.66, 9.66, 15.66, 4.66, 13.66, 1.66, 11.66, 6.66,
8.66, 12.66, 0.66, 6.66, 0.66, 9.66, 16.66, 1.66, 10.66,
15.66, 10.66), depth = c(120L, 161L, 52L, 52L, 43L, 105L,
165L, 23L, 79L, 136L, 41L, 59L, 65L, 118L, 122L, 69L, 137L,
88L, 152L, 105L, 108L, 79L, 96L, 80L, 22L, 110L, 157L, 118L,
126L, 93L, 156L, 64L, 74L, 24L, 111L, 113L, 157L, 78L, 121L,
130L)), class = "data.frame", row.names = c(NA, -40L))
The first part of your question is straightforward:
zeros.spl <- split(zeros, zeros$group)
zeros.cors <- sapply(zeros.spl, function(x) cor(x[, "num"], x[, 6:9]))
dimnames(zeros.cors)[[1]] <- colnames(zeros)[6:9]
zeros.cors
# Hardhead silverside Sailfin molly
# temp -0.3080334 0.36174046
# sal 0.1393580 0.47095129
# do 0.2544695 -0.06646818
# depth 0.1296208 0.08777425
t(zeros.cors)
# temp sal do depth
# Hardhead silverside -0.3080334 0.1393580 0.25446948 0.12962078
# Sailfin molly 0.3617405 0.4709513 -0.06646818 0.08777425
Use write.csv(zeros.cors, file="results.csv") or write.csv(t(zeros.cors), file="results.csv") depending on what you want the rows/cols to be.
The second question is not clear. The means/medians of a group will be a single value so you cannot correlate it with the environmental variables. You could compute the means by group with aggregate:
aggregate(zeros[, 5:9], by=list(zeros$group), "mean")
# Group.1 num temp sal do depth
# 1 Hardhead silverside 1.45 25.95 15.35 8.51 105.20
# 2 Sailfin molly 2.45 25.00 18.90 9.06 90.25
aggregate(zeros[, 5:9], by=list(zeros$group), "median")
# Group.1 num temp sal do depth
# 1 Hardhead silverside 0 26 11.5 9.66 115.5
# 2 Sailfin molly 0 24 19.5 10.66 90.5

Changing the style of symbols in the legend so they are identical to those shown on the plot

I am trying change the legend so it is more representative of what is shown on the plot and is up to a publishing standard.
Here is an example I am trying to follow....
As you can see, the symbols in the legend are identical to those shown in the plot.
Here is my graph....
I am not happy with the legend that ggplot produces and I cant find a way to alter it so it matches the published example above.
If I understood your question correctly, you could just modify the the answer to your previous question by #dc37 Moving error bars in a line graph with three factors. I only added two lines for linetype.
library(Rmisc)
library(ggplot2)
tglf3 <- summarySE(df, measurevar="form",
groupvars=c("P","cultivar","Waterlogging"),na.rm=TRUE)
pd <- position_dodge(0.5)
ggplot(tglf3,aes(x=P, y=form,
shape = interaction(cultivar, Waterlogging),
color = interaction(cultivar, Waterlogging),
linetype = interaction(cultivar, Waterlogging)))+
geom_errorbar(aes(ymin=form-se, ymax=form+se), width=.6,position=pd) +
geom_point(size=3.5,position=pd) +
geom_line(position=pd) +
theme_bw() +
theme(legend.position = 'top') +
guides(color=guide_legend(ncol=2, title = "Legend"),
shape = guide_legend(ncol =2, title = "Legend"),
linetype = guide_legend(ncol =2, title = "Legend"))
Edit
Initially, I thought it cannot be done in ggplot2, but I have managed to trick ggplot2 to get what you want(?). I have learned some new tricks through this.
p <-ggplot(tglf3, aes(x=P, y=form,
color = interaction(cultivar, Waterlogging),
shape = interaction(cultivar, Waterlogging),
linetype = interaction(cultivar, Waterlogging)))+
geom_errorbar(aes(ymin=form-se, ymax=form+se), width=.6,position=pd) +
geom_point(size=3.5,position=pd) +
geom_line(position=pd) +
theme_bw() +
theme(legend.position = 'top') +
guides(linetype = guide_legend(ncol=2, title = "Riverina \nYarloop"),
shape = guide_legend(ncol =2, title = "Riverina \nYarloop"),
color = guide_legend(ncol =2, title = "Riverina \nYarloop"))
df <- droplevels(df)
brks <- levels(interaction(df$cultivar, df$Waterlogging))
lbs <- c("Non-waterlogged", "Non-waterlogged", "Waterlogged", "Waterlogged")
p + scale_shape_discrete(breaks=brks, labels=lbs) +
scale_color_discrete(breaks=brks, labels=lbs) +
scale_linetype_discrete(breaks=brks, labels=lbs)
Data
> dput(df)
structure(list(pot = c(41L, 42L, 43L, 44L, 61L, 62L, 63L, 64L,
45L, 46L, 47L, 48L, 65L, 66L, 67L, 68L, 49L, 50L, 51L, 52L, 69L,
70L, 71L, 72L, 53L, 54L, 55L, 56L, 73L, 74L, 75L, 76L, 57L, 58L,
59L, 60L, 77L, 78L, 79L, 80L, 81L, 82L, 83L, 84L, 101L, 102L,
103L, 104L, 85L, 86L, 87L, 88L, 105L, 106L, 107L, 108L, 89L,
90L, 91L, 92L, 109L, 110L, 111L, 112L, 93L, 94L, 95L, 96L, 113L,
114L, 115L, 116L, 97L, 98L, 99L, 100L, 117L, 118L, 119L, 120L
), rep = c(1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L, 2L, 3L, 4L, 1L,
2L, 3L, 4L), cultivar = structure(c(4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("Dinninup", "Riverina",
"Seaton Park", "Yarloop"), class = "factor"), Waterlogging = structure(c(2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L,
2L, 2L, 2L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L), .Label = c("Non-waterlogged",
"Waterlogged"), class = "factor"), P = c(12.1, 12.1, 12.1, 12.1,
12.1, 12.1, 12.1, 12.1, 15.17, 15.17, 15.17, 15.17, 15.17, 15.17,
15.17, 15.17, 18.24, 18.24, 18.24, 18.24, 18.24, 18.24, 18.24,
18.24, 24.39, 24.39, 24.39, 24.39, 24.39, 24.39, 24.39, 24.39,
48.35, 48.35, 48.35, 48.35, 48.35, 48.35, 48.35, 48.35, 12.1,
12.1, 12.1, 12.1, 12.1, 12.1, 12.1, 12.1, 15.17, 15.17, 15.17,
15.17, 15.17, 15.17, 15.17, 15.17, 18.24, 18.24, 18.24, 18.24,
18.24, 18.24, 18.24, 18.24, 24.39, 24.39, 24.39, 24.39, 24.39,
24.39, 24.39, 24.39, 48.35, 48.35, 48.35, 48.35, 48.35, 48.35,
48.35, 48.35), form = c(2.81, 2.64, 2.59, 3.28, 3.18, 2.57, 2.9,
3, 2.38, 2.72, 2.58, 2.73, 3.06, 3.01, 3.01, 2.77, 2.95, 2.36,
2.91, 2.38, 3.33, 3.19, 3.17, 3.16, 3.16, 3.2, 2.58, 3.71, 3.11,
2.7, 2.92, 1.93, 2.95, 2.57, 2.68, 2.48, 3.34, 2.75, 2.52, 1.88,
1.19, 0.57, 0.64, 0.66, 1.13, 1.28, 0.85, 0.96, 1.34, 2.14, 0.63,
1.27, 1.13, 0.64, 1.21, 1.95, 1.11, 0.91, 0.75, 0.63, 1.06, 1.07,
1.05, 0.8, 1.41, 1.13, 0.75, 0.89, 1.98, 1.27, 1.01, 1, 1.16,
0.64, 0.64, 1.02, 1.03, 1.13, 0.79, 0.6)), row.names = 41:120, class = "data.frame")

ggplot2 Error in eval(expr, envir, enclos) : object not found

Data Sets
> dput(head(spdistbc,50))
structure(list(Lane = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), Vehicle.class = c(2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L),
speedmph = c(0, 3.4, 6.8, 10.2, 13.6, 17, 20.4, 23.8, 27.2,
30.6, 34, 37.4, 40.8, 0, 3.4, 6.8, 10.2, 13.6, 17, 20.4,
23.8, 27.2, 30.6, 34, 37.4, 40.8, 3.4, 6.8, 10.2, 13.6, 17,
20.4, 23.8, 27.2, 30.6, 34, 37.4, 40.8, 0, 3.4, 6.8, 10.2,
13.6, 17, 20.4, 23.8, 27.2, 30.6, 34, 37.4), cprob = c(0,
0, 0.03, 0.06, 0.11, 0.2, 0.28, 0.43, 0.56, 0.75, 0.91, 0.97,
1, 0, 0, 0.01, 0.01, 0.02, 0.05, 0.17, 0.36, 0.57, 0.76,
0.93, 0.99, 1, 0, 0.01, 0.01, 0.04, 0.07, 0.16, 0.32, 0.55,
0.76, 0.94, 0.99, 1, 0, 0, 0, 0.01, 0.03, 0.06, 0.11, 0.25,
0.47, 0.74, 0.92, 0.98)), .Names = c("Lane", "Vehicle.class",
"speedmph", "cprob"), row.names = c(7L, 8L, 9L, 10L, 11L, 12L,
13L, 14L, 15L, 16L, 17L, 18L, 19L, 26L, 27L, 28L, 29L, 30L, 31L,
32L, 33L, 34L, 35L, 36L, 37L, 38L, 42L, 43L, 44L, 45L, 46L, 47L,
48L, 49L, 50L, 51L, 52L, 53L, 66L, 67L, 68L, 69L, 70L, 71L, 72L,
73L, 74L, 75L, 76L, 77L), class = "data.frame")
> dput(head(cspdistbv,50))
structure(list(lanem = c(6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L,
7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L,
9L, 9L, 9L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 11L, 11L,
11L, 11L), cars = structure(c(34, 35, 36, 37, 38, 39, 40, 24,
26, 28, 30, 32, 34, 36, 38, 40, 20, 25, 30, 35, 40, 10, 15, 20,
25, 30, 35, 40, 10, 15, 20, 25, 30, 35, 40, 35, 40, 45, 50, 55,
0, 0.03, 0.07, 0.17, 0.67, 0.93, 1, 0, 0.03, 0.1, 0.1, 0.2, 0.27,
0.33, 0.8, 1, 0, 0.1, 0.31, 0.52, 1, 0, 0.07, 0.27, 0.37, 0.5,
0.77, 1, 0, 0.03, 0.07, 0.23, 0.4, 0.77, 1, 0, 0.13, 0.47, 0.77,
1), .Dim = c(40L, 2L), .Dimnames = list(NULL, c("speedmph", "prob"
)))), .Names = c("lanem", "cars"), row.names = c(NA, 40L), class = "data.frame")
Problem
I created the plot using spdistbc:
cb1 <- ggplot() + geom_point(data = spdistbc, mapping = aes(x=speedmph, y = cprob, color = 'observed')) + facet_wrap(~Lane) + theme_bw() + my.theme()
Which gave me this:
But when I combine another plot from the second data frame using following code:
cb2 <- cb1 + geom_point(data = cspdistbv, mapping = aes(x = cars.speedmph, y = cars.prob, color = 'simulated-default')) + facet_wrap(~lanem)
I get the error:
Error in eval(expr, envir, enclos) : object 'cars.speedmph' not found
Question
You can see in the cspdistbv data frame, there is a column named cars.speedmph, then why R can't find it? Please help.
Somehow you've created an invalid data.frame. You've stored a matrix in the second column of cspdistbv; dim(cspdistbv) thinks it only has two columns and this interferes with proper naming and such. I'm not sure how you created it, but you can fix it with
cspdistbv <- cbind.data.frame(lanem=cspdistbv[,1], cspdistbv[,2])
And then
cb1 <- ggplot() + geom_point(data = spdistbc, mapping = aes(x=speedmph,
y = cprob, color = 'observed')) + facet_wrap(~Lane) + theme_bw()
cb2 <- cb1 + geom_point(data = cspdistbv, mapping = aes(x = speedmph,
y = prob, color = 'simulated-default')) + facet_wrap(~lanem)
should work

Show x-axis and y-axis for every plot in facet_grid

As similar question has been asked before (here) but I can't adjust the solution provided there to my specific problem. For every plot shown below, there should be a x as well as y-axis.
The data:
dput(df_nSubj)
structure(list(nSubj = c(10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L,
10L, 10L, 10L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L,
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L,
20L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L,
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 40L,
40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L,
40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 50L, 50L, 50L,
50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L,
50L, 50L, 50L, 50L, 50L, 50L, 50L, 50L), family = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 4L, 4L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 1L,
1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L,
3L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("AOV", "MLM1", "MLM2",
"MLM3"), class = "factor"), Spher = structure(c(1L, 1L, 1L, 2L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L,
1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L,
1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L,
1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L,
1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L,
2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 2L, 2L, 2L, 1L, 1L,
1L, 2L, 2L, 2L), .Label = c("met", "vio"), class = "factor"),
effSize = c(0.2, 0.5, 0.8, 0.2, 0.5, 0.8, 0.2, 0.5, 0.8,
0.2, 0.5, 0.8, 0.2, 0.5, 0.8, 0.2, 0.5, 0.8, 0.2, 0.5, 0.8,
0.2, 0.5, 0.8, 0.2, 0.5, 0.8, 0.2, 0.5, 0.8, 0.2, 0.5, 0.8,
0.2, 0.5, 0.8, 0.2, 0.5, 0.8, 0.2, 0.5, 0.8, 0.2, 0.5, 0.8,
0.2, 0.5, 0.8, 0.2, 0.5, 0.8, 0.2, 0.5, 0.8, 0.2, 0.5, 0.8,
0.2, 0.5, 0.8, 0.2, 0.5, 0.8, 0.2, 0.5, 0.8, 0.2, 0.5, 0.8,
0.2, 0.5, 0.8, 0.2, 0.5, 0.8, 0.2, 0.5, 0.8, 0.2, 0.5, 0.8,
0.2, 0.5, 0.8, 0.2, 0.5, 0.8, 0.2, 0.5, 0.8, 0.2, 0.5, 0.8,
0.2, 0.5, 0.8, 0.2, 0.5, 0.8, 0.2, 0.5, 0.8, 0.2, 0.5, 0.8,
0.2, 0.5, 0.8, 0.2, 0.5, 0.8, 0.2, 0.5, 0.8, 0.2, 0.5, 0.8,
0.2, 0.5, 0.8), pow = c(0.12, 0.53, 0.84, 0.1, 0.4, 0.74,
0.13, 0.55, 0.84, 0.11, 0.44, 0.74, 0.12, 0.5, 0.82, 0.1,
0.43, 0.76, 0.12, 0.49, 0.81, 0.1, 0.43, 0.76, 0.2, 0.84,
0.99, 0.15, 0.72, 0.97, 0.21, 0.81, 0.98, 0.17, 0.69, 0.95,
0.2, 0.83, 0.99, 0.17, 0.75, 0.97, 0.19, 0.82, 0.99, 0.17,
0.75, 0.97, 0.32, 0.95, 1, 0.23, 0.87, 1, 0.32, 0.92, 1,
0.25, 0.83, 0.99, 0.3, 0.94, 1, 0.24, 0.89, 1, 0.3, 0.94,
1, 0.24, 0.89, 1, 0.41, 0.99, 1, 0.29, 0.96, 1, 0.4, 0.97,
1, 0.3, 0.92, 1, 0.38, 0.99, 1, 0.32, 0.97, 1, 0.37, 0.98,
1, 0.32, 0.97, 1, 0.5, 1, 1, 0.36, 0.98, 1, 0.47, 0.99, 1,
0.36, 0.96, 1, 0.47, 1, 1, 0.4, 0.99, 1, 0.46, 1, 1, 0.4,
0.99, 1)), class = "data.frame", .Names = c("nSubj", "family",
"Spher", "effSize", "pow"), row.names = c(NA, -120L))
plot:
require(ggplot2)
require(grid)
pl1 <- ggplot(data=df_nSubj,aes(x=nSubj,y=pow,group=family))+
geom_point(aes(shape=family))+geom_line()+
labs(x="Number of subjects",y="Power",shape="")+
scale_y_continuous(limits=c(0.2,1),breaks=c(0.2,0.4,0.6,0.8,1))+
guides(shape = guide_legend(ncol = 4))+
facet_grid(Spher~effSize)+
theme_bw()+
theme(legend.position = "top",
panel.margin = unit(2, "lines"),
legend.key = element_blank(),
strip.text.x = element_blank(),
strip.text.y = element_blank(),
strip.background = element_blank(),
panel.border=element_blank(),
axis.line=element_line(),
axis.title.x = element_text(vjust=-0.5))
Thanks in advance
Change facet_grid(Spher~effSize) to facet_wrap(Spher~effSize, scales = "free")
I'm late to this party, but I have two workarounds currently.
The first option is to add horizontal and/or vertical line geoms with intercepts set to -Inf, which requires turning off plot clipping. To the original plot, add the following lines:
geom_hline(aes(yintercept=-Inf)) +
geom_vline(aes(xintercept=-Inf)) +
coord_cartesian(clip="off")
Fake Axis Option
Alternatively, use the lemon package and its facet command. This will reproduce the axes and their ticks without adding the labels.
library(lemon)
And change facet_grid(Spher~effSize) to facet_rep_grid(Spher~effSize)
Lemon Package Option
The ggh4x package also has this functionality via its facet_wrap2 function

Resources