Histogram and density plots with multiple groups - r

I have a dataset consist of 4 variables: CR, EN, LC and VU:
View first few values of my dateset
CR = c(2, 9, 10, 14, 24, 27, 29, 30, 34, 43, 50, 74, 86, 105, 140, 155, 200, …)
EN = c(24, 52, 86, 110, 144, 154, 206, 242, 300, 302, 366, 403, 422, 427, 427, 434, 448, …)
LC = c(447, 476, 543, 580, 647, 685, 745, 763, 819, 821, 863, 904, 908, 926, 934, 951, 968, …)
VU = c(75, 96, 97, 217, 297, 498, 511, 551, 560, 564, 570, 575, 609, 673, 681, 700, 755,...)
I want to create a histogram of a group of these variables in a plot by R that shows the normal distribution and density, a plot similar to the one below...
Could you please help me?

Here are the distributions, a clear-cut use of geom_density.
But first, to address "grouping", we need to pivot/reshape the data so that ggplot2 can automatically handle grouping. This will result in a column with a character (or factor) for each of the "CR", "EN", "LC", or "VU", and another column with the particular value. When pivoting, there is typically one or more columns that are preserved (an id, an x-value, a time/date, or something similar), but we don't have any data that would suggest something to preserve.
longdat <- tidyr::pivot_longer(dat, everything())
longdat
# # A tibble: 68 × 2
# name value
# <chr> <dbl>
# 1 CR 2
# 2 EN 24
# 3 LC 447
# 4 VU 75
# 5 CR 9
# 6 EN 52
# 7 LC 476
# 8 VU 96
# 9 CR 10
# 10 EN 86
# # … with 58 more rows
# # ℹ Use `print(n = ...)` to see more rows
ggplot(longdat, aes(x = value, group = name, fill = name)) +
geom_density(alpha = 0.2)
tidyr::pivot_longer works, one can also use melt from either reshape2 or data.table:
longdat <- reshape2::melt(dat, c())
## names are 'variable' and 'value' instead of 'name' and 'value'
Data
dat <- structure(list(CR = c(2, 9, 10, 14, 24, 27, 29, 30, 34, 43, 50, 74, 86, 105, 140, 155, 200), EN = c(24, 52, 86, 110, 144, 154, 206, 242, 300, 302, 366, 403, 422, 427, 427, 434, 448), LC = c(447, 476, 543, 580, 647, 685, 745, 763, 819, 821, 863, 904, 908, 926, 934, 951, 968), VU = c(75, 96, 97, 217, 297, 498, 511, 551, 560, 564, 570, 575, 609, 673, 681, 700, 755)), class = "data.frame", row.names = c(NA, -17L))

Related

Calculate Latitudinal Range of species abundance

I'm trying to calculate the latitudinal ranges for species (ASVs) along a transect but cannot get my head around how to do this.
Basically I want the maximum latitude minus the minimum latitude where a species is present, i.e. abundance does not equal 0. For examples Species 1 is present for the first time at -35 deg S and present for the last time at -40 deg S, it's latitudinal range would be 5 degrees. Thanks!
My data looks like this:
> dput(test[1:30, c(1:5)])
structure(list(Station_neat = c("001_DCM", "001_SA", "003_DCM",
"003_SA", "005_DCM", "005_SA", "007_DCM", "007_SA", "009_DCM",
"009_SA", "011_DCM", "011_SA", "013_DCM", "013_SA", "015_DCM",
"015_SA", "017_DCM", "017_SA", "019_DCM", "019_SA", "021_DCM",
"021_SA", "023_DCM", "023_SA", "025_DCM", "025_SA", "027_DCM",
"027_SA", "029_DCM", "029_SA"), Lat = c(-29.997, -29.997, -30.9975,
-30.9975, -31.9995, -31.9995, -32.99816667, -32.99816667, -34.00016667,
-34.00016667, -34.9995, -34.9995, -36.00083333, -36.00083333,
-36.9985, -36.9985, -38.00016667, -38.00016667, -38.99833333,
-38.99833333, -39.999, -39.999, -40.99783333, -40.99783333, -42.0005,
-42.0005, -42.99633333, -42.99633333, -43.9975, -43.9975), asv_3 = c(80,
0, 65, 0, 41, 0, 50, 0, 44, 0, 53, 0, 59, 0, 38, 0, 43, 0, 25,
0, 29, 51, 35, 22, 133, 35, 159, 83, 965, 414), asv_4 = c(766,
694, 286, 791, 421, 1202, 382, 431, 484, 684, 431, 529, 454,
722, 621, 370, 472, 439, 394, 243, 414, 518, 297, 300, 574, 396,
395, 1359, 1113, 541), asv_5 = c(1314, 2812, 729, 2874, 915,
3720, 1226, 2046, 940, 1783, 1220, 2627, 986, 3195, 1514, 566,
590, 1603, 325, 667, 748, 932, 616, 339, 1167, 1088, 988, 2333,
1563, 2146)), row.names = c(NA, 30L), class = "data.frame")
Edit: ASVs (e.g. asv_4) are my species. I have about 600 of these.
Edit 2: Scatterplot with mean latitudinal range and latitude (see comment):
Maximum latitude minus the minimum latitude where a species is present, i.e. abundance does not equal 0
A base solution:
lapply(test[grepl("asv", names(test))], \(x) diff(range(test$Lat[x > 0])))
# $asv_3
# [1] 14.0005
#
# $asv_4
# [1] 14.0005
#
# $asv_5
# [1] 14.0005
Its dplyr equivalent:
library(dplyr)
test %>%
summarise(across(starts_with("asv"), ~ diff(range(Lat[.x > 0]))))
# asv_3 asv_4 asv_5
# 1 14.0005 14.0005 14.0005

Repositioning and increasing the weight of borders around dendrogram produced with R 'plot' function

I am trying to cut a dendrogram into three classes using the rect.hclust function, but when I export the graph, it cuts off the borders at the bottom of the graph. In addition, I would like to increase the weight of the borders, but I am not sure how to do this, as the lwd argument doesn't seem to exist for this function. What can I do to fix these parameters?
Data:
cluster <- data.frame(plot=c(1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20),
meas1 = c(443,
836,
903,
684,
94,
125,
733,
846,
625,
234,
437,
775,
269,
774,
17,
502,
80,
51,
523,
229),
meas2 = c(735,
574,
793,
261,
961,
136,
404,
138,
45,
935,
698,
675,
594,
497,
152,
153,
30,
667,
547,
745),
meas3 = c(23,
526,
36,
93,
708,
970,
399,
111,
456,
439,
569,
503,
337,
213,
399,
850,
614,
491,
28,
452))
Code for hierarchical clustering and dendrogram generation:
#Generate distance matrix
dist_mat <- dist(cluster, method = 'euclidean')
#Hierarchical clustering
hclust_avg <- hclust(dist_mat, method = 'average')
#Cut into 3 classes
cut_avg <- cutree(hclust_avg, k = 3)
#Plot dendrogram
plot(hclust_avg, xlab = "", ylab = "Euclidean Distance", sub = "",
main = "Cluster Dendrogram", lwd = 2)
#Generate borders around each group
rect.hclust(hclust_avg, k=3, border = 2:6)
Exporting the graph looks as follows:
I need the red-green-blue borders to be thicker (higher border weights), and I also need them to not be cut off at the bottom of the graph.
I bypassed the problem of the missing lower edge of the rectangle by setting the figure margins to zero before drawing the rectangle.
The line width of the rectangles can be set by setting par(lwd), e.g. par(lwd=4), as in the example below:
cluster <- data.frame(plot=1:20,
meas1 = c(443,
836,
903,
684,
94,
125,
733,
846,
625,
234,
437,
775,
269,
774,
17,
502,
80,
51,
523,
229),
meas2 = c(735,
574,
793,
261,
961,
136,
404,
138,
45,
935,
698,
675,
594,
497,
152,
153,
30,
667,
547,
745),
meas3 = c(23,
526,
36,
93,
708,
970,
399,
111,
456,
439,
569,
503,
337,
213,
399,
850,
614,
491,
28,
452))
#Generate distance matrix
dist_mat <- dist(cluster, method = 'euclidean')
#Hierarchical clustering
hclust_avg <- hclust(dist_mat, method = 'average')
#Cut into 3 classes
cut_avg <- cutree(hclust_avg, k = 3)
pars <- par()
#Plot dendrogram
plot(hclust_avg, xlab = "", ylab = "Euclidean Distance", sub = "",
main = "Cluster Dendrogram", lwd = 2)
par(lwd=4, mar=c(0,0,0,0))
#Generate borders around each group
rect.hclust(hclust_avg, k=3, border = 2:6)
# reset par
par(lwd=pars$lwd, mar=pars$mar)
Created on 2020-06-30 by the reprex package (v0.3.0)

How can I check and add missing rows in a data frame based on an index vector?

I need to add missing rows from "count" based on the "numberclass" that is missing. "numberclass" is the column of the "count" dataset that should go from 1 to 652, but misses some numbers and ends at 645.
To achieve that, I made an index vector that goes from 1 to 652 called c1.
How can I use rbind to add the missing rows that are missing in "count"?
Those missing rows should contain the appropriate number in "numberclass" that is missing and a 0 on the column "sum" in the "count" data frame.
visual example
count
numberclass sum
1 1 3.45
2 2 32.45
3 3 23.11
4 5 21.33
5 6 1.54
c1
V1
1 1
2 2
3 3
4 4
5 5
6 6
finalcount
numberclass sum
1 1 3.45
2 2 32.45
3 3 23.11
4 4 0
5 5 21.33
6 6 1.54
dput(c1)
1:652
> dput(count)
structure(list(numberclass = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,
43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58,
59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74,
75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90,
91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104,
105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130,
131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156,
157, 158, 159, 160, 161, 162, 163, 164, 166, 167, 168, 169, 170,
171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183,
184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196,
197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222,
223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235,
236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248,
249, 251, 252, 253, 255, 256, 257, 258, 259, 260, 261, 262, 263,
264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276,
277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289,
290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302,
303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315,
316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328,
329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341,
342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354,
355, 356, 357, 358, 360, 361, 362, 363, 364, 365, 366, 367, 368,
369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381,
382, 383, 384, 385, 386, 387, 388, 389, 391, 392, 393, 394, 395,
396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408,
409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421,
422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434,
435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447,
448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460,
461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473,
474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486,
487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499,
500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512,
513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525,
526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538,
539, 540, 541, 542, 543, 545, 546, 547, 548, 549, 550, 551, 552,
554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566,
567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579,
580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592,
593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605,
606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618,
619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631,
632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644,
645, 646, 647, 648, 649, 650, 651, 652), sum = c(237.750666386555,
189.540342857143, 351.867604761905, 195.005685714286, 308.574686424686,
18.2691666666667, 85.6063492063492, 330.872041913642, 12.5832666666667,
81.3559523809524, 940.085002447968, 38.9222222222222, 67.6095238095238,
52.4340924675325, 48.9761904761905, 190.221922510823, 67.2384948051948,
106.311044372294, 50.4888222222222, 40.4883365079365, 146.992341452991,
43.6190142857143, 133.421034293119, 234.662733903319, 41.3940476190476,
27.5869769119769, 4.77619047619048, 1.14404345238095, 33.7083333333333,
44.2833333333333, 22.9526315789474, 21.5833333333333, 10.65,
2.75, 73.0113858363858, 9.41666666666667, 10.9, 30.3830128205128,
58.9269230769231, 1.39285714285714, 267.691666666667, 58.0575757575758,
48.1547008547009, 82.8479908979909, 57.6404761904762, 0.333333333333333,
15.0952380952381, 62.5674603174603, 155.280158730159, 39.9, 82.6307359307359,
24.6282467532468, 301.294040989729, 336.528306349206, 19.0833333333333,
110.152380952381, 151.278584609835, 27.3151515151515, 326.42688974359,
148.124206349206, 250.934674989716, 791.586193783953, 284.357225111163,
26.3166666666667, 689.571152020736, 211.649312496276, 143.23373015873,
104.389479365079, 1977.09488512611, 278.063024283429, 635.353051458803,
255.639689709121, 182.388611918596, 121.218055555556, 53.5880285714286,
29.8071514529915, 289.396377133977, 261.427877777778, 13.0333333333333,
120.082323232323, 26.4499333333333, 118.030555555556, 3.16666666666667,
3.5, 1.27692307692308, 1327.43098544718, 359.099526103064, 886.03077133796,
77.9476163059163, 3.7, 204.405522222222, 42.3193805944056, 83.1319512987013,
32.0430735930736, 100.999933333333, 41.4505205838606, 359.286551817072,
134.815597663857, 120.851665339892, 68.6170634920635, 120.464757456432,
98.7313991341991, 138.937179487179, 18.4913941580642, 8.9984237984238,
238.521621356421, 123.083044733045, 363.372644000444, 39.2380952380952,
3.16666666666667, 19.6226551226551, 53.5838383838384, 34.581746031746,
4.95, 131.300949206349, 445.728384935065, 109.100990092656, 364.408721825397,
61.5416666666667, 222.299498645799, 16.0214285714286, 13.5833333333333,
35.9928238095238, 522.570385291901, 92.072619047619, 451.015331590632,
276.63253968254, 61.6666666666667, 56.875, 246.15873015873, 52.5833,
73.5964119047619, 28.1214646464646, 30.1333333333333, 53.9054945054945,
206.796237085137, 111.121428571429, 182.169199264787, 59.3175971087736,
64.3332722832723, 16.9333333333333, 13.9166666666667, 23.3833333333333,
33.8173992673993, 1.50952380952381, 1.1, 47.9876584126984, 33.6666666666667,
31.7166666666667, 42.5094738594739, 193.209163059163, 36.8706349206349,
56.4786214285714, 125.781411481369, 1326.37051628773, 128.802066528312,
176.118690340834, 124.811656943091, 221.328297720058, 92.4357277483439,
5.54453781512605, 11.934710550887, 34.1893281555046, 297.559209282097,
10.45, 15.9714285714286, 0.333333333333333, 404.635647619048,
1.33333333333333, 423.917088383838, 31.725, 22.2334666666667,
126.991549902454, 46.2095071428571, 19.9333333333333, 9.41666666666667,
36.1666666666667, 101.691628950685, 88.0833333333333, 1.08333333333333,
60.5678571428571, 44.5857142857143, 10.3333333333333, 27.9333333333333,
59.6450530463688, 33.0823773448773, 15.2018740031898, 139.796428571429,
302.865200865801, 58.4464285714286, 7.50238095238095, 253.278364368964,
98.456746031746, 275.551738539239, 224.303773488182, 43.4340004939634,
14.475, 252.068551587302, 193.944014285714, 97.1103202020202,
522.762237662338, 152.027922077922, 495.599785289496, 15.45,
44.4584599224305, 2.63932178932179, 76.913480952381, 18.5944333333333,
80.5424963924964, 52.8404761904762, 19.602380952381, 21.7789854538307,
2.09285714285714, 15.6, 57.8281523809524, 114.880233333333, 2.5,
582.268982688364, 22.8928571428571, 43.5, 71.0449134199134, 13.45,
71.4832666666667, 382.793654822955, 57.6023587301587, 17.8666666666667,
134.694036507937, 8.65833333333333, 6.48333333333333, 167.456313131313,
108.970238095238, 38.0944444444444, 41.4536075036075, 644.437984476377,
64.2714285714286, 1630.6914617297, 81.8621387218045, 977.944218315018,
825.631676469739, 76.9720238095238, 161.353968253968, 70.9142857142857,
122.307142857143, 49.1575757575758, 38.9833333333333, 119.23980017316,
9.5, 7, 9.03333333333333, 0.285714285714286, 2.81558441558442,
34.3352130179203, 423.489491888615, 26.7138582972583, 20.2610666666667,
70.2504356560596, 84.3197993439266, 133.202467136288, 452.717995233655,
320.773420116725, 209.525511634406, 641.329055345934, 9.29166666666667,
20.0666666666667, 23.4825757575758, 42.336926961927, 21.5083333333333,
48.472619047619, 5.68452380952381, 3.61666666666667, 2.66666666666667,
22.6410952702853, 2596.19741576659, 3701.15679179432, 458.475674942574,
0.177777777777778, 236.511739558926, 178.846204916721, 554.69148345371,
109.069139904866, 27.9428571428571, 865.353323873349, 1315.57171181985,
4.94494734487734, 367.766031285642, 519.099162156913, 703.569199879477,
570.161782712288, 55.7592247797747, 424.061781409081, 4.14444444444444,
7.85, 1.5, 203.543559424236, 417.414520853467, 118.026934176934,
13.8930333333333, 5.3, 195.214038429218, 2, 125.901590928837,
20.183510972172, 174.23474402697, 115.783354224877, 20.9589971153889,
64.2541744390332, 30.1928142135642, 653.283386817422, 45.4998949579832,
2.28333333333333, 35.7234848484849, 13.4766233766234, 1, 1, 151.923361772117,
466.496416114588, 241.639269088134, 208.697684171547, 37.1753432142857,
32.7720180265813, 28.2666666666667, 32.9353202020202, 29.3107466063348,
52.1338661616162, 92.2408604474645, 143.825094880675, 146.094892496393,
185.56378660516, 229.435060026582, 35.8161587301587, 358.75152088854,
9.54144989396568, 100.579542891096, 48.5654928571429, 182.120363315018,
92.411123015873, 213.978268831169, 30.4477001960784, 133.023283627484,
1.48156826833297, 8.58333333333333, 4.44443333333333, 38.2468253968254,
56.047481038406, 67.3214285714286, 123.833316666667, 72.7440476190476,
4.04166666666667, 15.0999833333333, 66.4499333333333, 200.083454172494,
6.04285714285714, 160.691602741703, 6.19924242424242, 1.33333333333333,
108.082979449584, 106.752280952381, 14.5075757575758, 17.3920634920635,
131.341230952381, 44.2768897435897, 313.758134920635, 2.16666666666667,
16.6477124183007, 4.75, 23.7767065934066, 114.554377815518, 67.8246376228347,
127.12717047619, 8.01590909090909, 62.9999458874459, 24.5385558774559,
25.4267800865801, 64.9809956302521, 26.8670829004329, 144.936510045837,
18.2714285714286, 181.673313930514, 6.37619047619048, 122.4944,
163.107067798868, 62.2391525974026, 100.821861471861, 66.6090659340659,
151.295802741703, 227.115548340548, 161.469246031746, 20.8428571428571,
98.9682406349206, 84.2357142857143, 63.5107142857143, 587.042635340803,
291.116304438862, 217.717193917194, 314.73560018413, 198.123701298701,
236.697900710401, 410.192568542569, 118.817857142857, 143.350727050727,
81.387055999556, 43.8719696969697, 203.429180541681, 517.788687667888,
61.2261904761905, 382.272785934066, 75.7309523809524, 112.349503174603,
22.7539682539683, 31.7878787878788, 71.6388888888889, 116.672591197691,
31.4399816686581, 139.147260092848, 38.9365079365079, 142.327696091318,
73.9474025974026, 353.130164019063, 49.7790027560675, 247.005519209059,
98.4489704073704, 22.8163324675325, 49.0166666666667, 398.237265694185,
20.0119047619048, 127.929437229437, 29.906746031746, 11.4833333333333,
29.5477994227994, 17.2627344877345, 1, 275.39396990232, 155.285052380952,
191.24167394958, 17.5547619047619, 32.6397907647908, 48.0516145404303,
20.0202991341991, 296.087292678082, 6.05553333333333, 6.30952380952381,
550.020158730159, 398.502413950429, 697.700455175612, 342.769086313686,
100.248412698413, 578.569767384318, 323.557284593185, 578.870478870574,
799.803117448702, 66.4497474747475, 52.7964285714286, 28.2440476190476,
1, 9.15, 0.333333333333333, 101.279396149946, 20.4504329004329,
2, 0.342857142857143, 11.0416666666667, 114.264102564103, 148.394093406593,
17.3285625923784, 10.2605680868839, 109.262121733822, 5.68568095238095,
4.91666666666667, 27.8404512154512, 95.3755683538683, 134.882303769841,
61.262513966589, 16.5333333333333, 64.6593323051948, 37.6535103785104,
42.0317820956821, 17.3730092063492, 81.8735937673438, 44.7111111111111,
17.4607142857143, 70.0927904761905, 148.696792063492, 170.374507625708,
185.520274170274, 177.809072871573, 86.3721112221112, 176.200008488178,
15.1166666666667, 136.109471067821, 48.0101062250443, 166.262856565657,
148.329752057299, 151.820306375846, 4.18642884892885, 13.65,
17.5384920634921, 158.262582783883, 255.417342568543, 29.2134920634921,
197.809798534799, 29.85, 16.9095238095238, 20.8333333333333,
113.602744444444, 44.002380952381, 36.0333333333333, 318.15949047619,
116.7, 9.73333333333333, 459.457291197691, 200.920720879121,
314.905574729437, 468.928687626263, 127.85367965368, 34.46829004329,
127.564573784059, 168.830957864358, 276.134640779221, 201.892396392496,
1946.09400347577, 201.03562536075, 0.54047619047619, 782.099165160003,
425.714983516484, 89.7872682539683, 146.385452539683, 10.6666666666667,
1025.68925909923, 116.007914285714, 276.85727701204, 289.008666233766,
251.763574012009, 83.7539682539683, 348.782956092124, 241.232478499278,
35.9951548451548, 23.8844904761905, 16.75, 15.6583166666667,
23.4777777777778, 5.83333333333333, 262.787474045562, 285.537711241699,
63.2683473389356, 66.3647186147186, 2, 8.83323333333333, 751.311316139416,
20.0833333333333, 3.48333333333333, 313.547763557495, 24.6952380952381,
2.33333333333333, 60.6101524475524, 111.872585714286, 52.7153693528694,
181.421808730159, 86.6900043290043, 223.108003141303, 16.0825757575758,
304.663375396825, 48.2595238095238, 53.0539682539683, 117.610714285714,
3.1, 1.83333333333333, 305.834008148714, 197.169349200473, 0.5,
8, 33.7777777777778, 1.2, 5.58333333333333, 42.6051282051282,
144.887301587302, 65.7499666666667, 963.598530853141, 217.737908305747,
19.827380952381, 3.775, 229.018578571429, 7.19166666666667, 186.860334126984,
9.33333333333333, 0.75, 1, 43.8273809523809, 62.2753634920635,
301.048005944774, 89.4083452763611, 374.762004736842, 166.820046453546,
1058.5261360623, 872.182726540127, 54.4082666666667, 1227.53727689429,
321.227890629965, 148.721916971917, 277.273484848485, 897.280942113442,
226.137230929597, 72.7005952380952, 140.310317460317, 317.511606180094,
209.189406410256, 104.605501434676, 437.805596256685, 273.362576312576,
8.47222222222222, 227.129921804748, 0.943686868686869, 67.7638777888778,
20.4856893106893, 99.1611000111, 166.165773015873, 82.3694444444444,
227.211077777778, 72.4857142857143, 461.993158401598, 78.8, 210.535976984127,
428.665560794761, 35.797619047619, 133.786890638528, 20.4904761904762,
577.348705757576, 404.170196392496, 1101.04344335286, 270.924821327561,
196.366666666667, 5.83333333333333, 81.6839466089466, 516.43132186441,
2.33333333333333, 10.9095238095238, 54.1369047619048, 48.2956349206349,
676.496237656507, 137.799728238428, 14.4768149941046, 355.509695218997,
422.28376567026, 213.912283405959, 177.353159198024, 14.0459013125763
)), row.names = c(NA, -645L), class = c("tbl_df", "tbl", "data.frame"
))
I've found a solution with the tidyr package:
library(tidyr)
count <- as.data.frame(count)
count <- count %>% complete(numberclass = full_seq(numberclass, period = 1),fill=list(sum=0))
No need for an extra index vector.

Plot Cumulative Distribution Function of Kernel in R

My task is to plot cumulative distribution function of asymptotic Kernels. For this purpose, i prepared following R-code for cdf of Log-normal kernel; but my problem is that cdf graph is moving downward. After I used transformation provided by author of kernel (Lognormal kernel ).
Kindly provide suggestions/correction in this problem.
R code:
k <- 200
y <- c(306, 455, 210, 883, 310, 361, 218, 166, 170, 654, 728, 71, 567, 144, 613, 707, 61, 88, 301, 81, 624, 371, 394, 520, 574, 118, 390, 12, 473, 26, 533, 107, 53, 122, 814, 93,731, 460, 153, 433, 145, 583, 95, 303, 519, 643, 765, 735, 189, 53, 246, 689, 65, 5,132, 687, 345, 444, 223, 175, 60, 163, 65, 208, 428, 230, 305, 11, 132, 226, 426, 705,363, 11, 176, 791, 95, 167, 284, 641, 147, 163, 655, 239, 88, 245, 30, 179, 310, 477,166, 450, 364, 107, 177, 156, 11, 429, 351, 15, 181, 283, 201, 524, 13, 212, 524, 288, 363, 442, 199, 550, 54, 558, 207, 92, 60, 293, 202, 353, 267, 371, 387, 457, 337, 201, 222, 62, 353, 163, 31, 340, 229, 182, 156, 329, 291, 179, 268, 142, 194, 320, 181, 285,348, 197, 180, 186, 145, 350, 285, 110, 286, 270, 81, 131, 269, 135, 79, 59, 105, 239, 13, 183, 116)
n <- length(y)
h <- 0.79 * IQR(y) * length(y) ^ (-1/5)
x <- seq(min(y) + 0.05, max(y), length = k)
Fhat <- rep(0, k)
for (j in 1:k) {
for (i in 1:n) {
PhiLN <- matrix(rep(0, k * n), ncol = k)
PhiLN[i, j] <- pnorm((log(y[i])-log(x[j]))/(sqrt(4*log(1+h))))
}
Fhat[j] <- 1/n * (sum(PhiLN[, j]))
}
plot(x, Fhat, type = "l")

How to change distance between breaks for continuous x-axis on ggplot?

I have a dataset with y-axis = diversity indices and x-axis = depth. I am looking at how diversity changes with depth (increases/decreases). It is informative to visualize these changes over depth (so transforming isn't helpful), however it is difficult with the disparity between number of samples for different depths (more samples at shallower versus deeper depths. With the following code:
breaks_depth=c(0,50,100,150,250,350,450,500,1200)
ggplot(data=df, aes(x=Depth, y=Diversity)) +
geom_line()+
scale_y_continuous(breaks=seq(0,1400,200), limits=c(0,1400))+
scale_x_continuous(breaks=breaks_depth, limits=c(0,1200))
I get the following plot:
I would like to get a plot such that the distance between 500m and 1200m depth is smaller and the distance between the shallower depths (0-150m) is greater. Is this possible? I have tried expand and different break and limit variations. The dput() of this dataset can be found here. The rownames are the sample IDs and the columns I am using for the plot are: y-axis=invsimpson_rd, and x-axis=Depth_rd. TIA.
****EDIT*****
Winner code and plot modified from Calum's answer below.
ggplot(data=a_div, aes(x=Depth_rd, y=invsimpson_rd)) +
geom_line()+
scale_y_continuous(breaks=seq(0,1400,200), limits=c(0,1400))+
scale_x_continuous(trans="log10",breaks = c(0,
15,25,50,100,150,200,250,300,350,400,450, seq(600, 1200, by = 200)))
Here's an example with the built in economics dataset. You can see that you can specify the breaks however you want as per usual, but the "sqrt" transformation shifts the actual plotted values to have more space near the beginning of the series. You can use other built in transformations or define your own as well.
EDIT: updated with example data and some comparison of common different trans options.
library(tidyverse)
tbl <- structure(list(yval = c(742, 494, 919, 625, 124, 788, 583, 213, 715, 363, 15, 313, 472, 559, 314, 494, 388, 735, 242, 153, 884, 504, 267, 454, 325, 305, 746, 628, 549, 345, 327, 230, 271, 486, 971, 979, 857, 779, 394, 903, 585, 238, 702, 850, 611, 710, 694, 674, 1133, 468, 784, 634, 234, 61, 325, 505, 693, 1019, 766, 435, 407, 772, 925, 877, 187, 290, 782, 674, 1263, 1156, 935, 499, 791, 797, 537, 308, 761, 744, 674, 764, 560, 805, 540, 427, 711), xval = c(80, 350, 750, 100, 20, 200, 350, 50, 110, 20, 200, 350, 60, 100, 20, 40, 60, 100, 20, 40, 350, 50, 20, 40, 50, 30, 40, 260, 1000, 200, 200, 200, 500, 50, 350, 360, 380, 250, 60, 190, 40, 70, 70, 40, 40, 70, 180, 180, 440, 370, 130, 1200, 20, 20, 30, 80, 120, 200, 220, 120, 40, 80, 350, 750, 20, 80, 200, 320, 500, 220, 160, 80, 140, 350, 100, 40, 350, 100, 200, 340, 60, 40, 100, 60, 40)), .Names = c("yval", "xval"), row.names = c(NA, -85L), class = c("tbl_df", "tbl", "data.frame"))
ggplot(tbl) +
geom_line(aes(x = xval, y = yval)) +
scale_x_continuous(trans = "sqrt", breaks = c(0,50,100,150,250,350,450,500,1200))
ggplot(tbl) +
geom_line(aes(x = xval, y = yval)) +
scale_x_continuous(trans = "log10", breaks = c(0,50,100,150,250,350,450,500,1200))
Created on 2018-04-27 by the reprex package (v0.2.0).

Resources