Sorry if this is a replicate question but I couldn't find the answer to my question.
I have a data frame which is currently:
Depth Year Counts Depth.1 Year.1 Counts.1 Depth.2 Year.2 Counts.2
etc
I want to create a data frame that is instead just Depth Year and Count in 3 separate columns.
I thought that the following would work, but it doesn't. Does anyone know where I am going wrong?
library(data.table)
A <- melt(df, id.vars="Depth","Year","Counts")
structure(list(Depth = c(0.5, 0.5, 0.5, 0.5, 0.5, 0.5), Year = c(2001.539328,
2001.57432, 2001.609313, 2001.644306, 2001.679298, 2001.714291
), Counts = c(2.87e-06, 3.56e-06, 4.38e-06, 5.36e-06, 6.52e-06,
7.94e-06), Depth.1 = c(1.5, 1.5, 1.5, 1.5, 1.5, 1.5), Year.1 = c(1984.293532,
1984.364339, 1984.435146, 1984.505954, 1984.576761, 1984.647568
), Counts.1 = c(1.46e-06, 1.81e-06, 2.22e-06, 2.73e-06, 3.34e-06,
4.07e-06), Depth.2 = c(2.5, 2.5, 2.5, 2.5, 2.5, 2.5), Year.2 = c(1981.470822,
1981.544366, 1981.61791, 1981.691454, 1981.764998, 1981.838542
), Counts.2 = c(1.59e-06, 1.98e-06, 2.45e-06, 3.01e-06, 3.7e-06,
4.53e-06), Depth.3 = c(3.5, 3.5, 3.5, 3.5, 3.5, 3.5), Year.3 = c(1977.871611,
1977.945227, 1978.018842, 1978.092458, 1978.166074, 1978.239689
), Counts.3 = c(1.52e-06, 1.9e-06, 2.34e-06, 2.89e-06, 3.56e-06,
4.35e-06), Depth.4 = c(4.5, 4.5, 4.5, 4.5, 4.5, 4.5), Year.4 = c(1973.91751,
1973.996106, 1974.074703, 1974.1533, 1974.231896, 1974.310493
), Counts.4 = c(2.68e-06, 3.35e-06, 4.17e-06, 5.2e-06, 6.44e-06,
7.93e-06), Depth.5 = c(5.5, 5.5, 5.5, 5.5, 5.5, 5.5), Year.5 = c(1968.953544,
1969.036898, 1969.120252, 1969.203607, 1969.286961, 1969.370316
), Counts.5 = c(1.17e-06, 1.47e-06, 1.84e-06, 2.28e-06, 2.81e-06,
3.47e-06), Depth.6 = c(6.5, 6.5, 6.5, 6.5, 6.5, 6.5), Year.6 = c(1962.314375,
1962.408079, 1962.501784, 1962.595488, 1962.689193, 1962.782897
), Counts.6 = c(1.5e-06, 1.95e-06, 2.5e-06, 3.18e-06, 4.02e-06,
5.09e-06), Depth.7 = c(7.5, 7.5, 7.5, 7.5, 7.5, 7.5), Year.7 = c(1958.713624,
1958.805065, 1958.896505, 1958.987945, 1959.079385, 1959.170826
), Counts.7 = c(1.12e-06, 1.43e-06, 1.8e-06, 2.25e-06, 2.8e-06,
3.49e-06), Depth.8 = c(8.5, 8.5, 8.5, 8.5, 8.5, 8.5), Year.8 = c(1951.664415,
1951.763029, 1951.861644, 1951.960258, 1952.058872, 1952.157487
), Counts.8 = c(1.03e-06, 1.31e-06, 1.66e-06, 2.09e-06, 2.61e-06,
3.23e-06), Depth.9 = c(9.5, 9.5, 9.5, 9.5, 9.5, 9.5), Year.9 = c(1945.090332,
1945.195825, 1945.301319, 1945.406813, 1945.512306, 1945.6178
), Counts.9 = c(9.86e-07, 1.26e-06, 1.6e-06, 2.01e-06, 2.51e-06,
3.13e-06), Depth.10 = c(10.5, 10.5, 10.5, 10.5, 10.5, 10.5),
Year.10 = c(1935.501068, 1935.619252, 1935.737435, 1935.855618,
1935.973801, 1936.091984), Counts.10 = c(1.65e-06, 2.1e-06,
2.65e-06, 3.32e-06, 4.15e-06, 5.17e-06), Depth.11 = c(11.5,
11.5, 11.5, 11.5, 11.5, 11.5), Year.11 = c(1925.293378, 1925.407495,
1925.521611, 1925.635728, 1925.749844, 1925.863961), Counts.11 = c(9.04e-07,
1.13e-06, 1.4e-06, 1.72e-06, 2.1e-06, 2.58e-06), Depth.12 = c(12.5,
12.5, 12.5, 12.5, 12.5, 12.5), Year.12 = c(1915.470281, 1915.590233,
1915.710185, 1915.830138, 1915.95009, 1916.070042), Counts.12 = c(1.18e-06,
1.45e-06, 1.78e-06, 2.17e-06, 2.65e-06, 3.23e-06), Depth.13 = c(13.5,
13.5, 13.5, 13.5, 13.5, 13.5), Year.13 = c(1907.029774, 1907.144991,
1907.260209, 1907.375426, 1907.490644, 1907.605861), Counts.13 = c(1.33e-06,
1.68e-06, 2.11e-06, 2.62e-06, 3.24e-06, 4.02e-06), Depth.14 = c(14.5,
14.5, 14.5, 14.5, 14.5, 14.5), Year.14 = c(1896.291234, 1896.410534,
1896.529835, 1896.649135, 1896.768436, 1896.887736), Counts.14 = c(8.64e-07,
1.1e-06, 1.39e-06, 1.75e-06, 2.2e-06, 2.74e-06), Depth.15 = c(15.5,
15.5, 15.5, 15.5, 15.5, 15.5), Year.15 = c(1889.864627, 1889.969479,
1890.074332, 1890.179184, 1890.284037, 1890.388889), Counts.15 = c(1.05e-06,
1.36e-06, 1.75e-06, 2.22e-06, 2.81e-06, 3.55e-06), Depth.16 = c(16.5,
16.5, 16.5, 16.5, 16.5, 16.5), Year.16 = c(1886.325239, 1886.425704,
1886.526169, 1886.626634, 1886.727099, 1886.827564), Counts.16 = c(1.27e-06,
1.68e-06, 2.21e-06, 2.89e-06, 3.73e-06, 4.77e-06), Depth.17 = c(17.5,
17.5, 17.5, 17.5, 17.5, 17.5), Year.17 = c(1882.108412, 1882.108412,
1882.108412, 1882.108412, 1882.108412, 1882.108412), Counts.17 = c(1.38e-06,
1.86e-06, 2.47e-06, 3.24e-06, 4.22e-06, 5.45e-06), Depth.18 = c(18.5,
18.5, 18.5, 18.5, 18.5, 18.5), Year.18 = c(1864.178957, 1864.300768,
1864.422579, 1864.544389, 1864.6662, 1864.788011), Counts.18 = c(1.1e-06,
1.52e-06, 2.08e-06, 2.81e-06, 3.73e-06, 4.9e-06), Depth.19 = c(19.5,
19.5, 19.5, 19.5, 19.5, 19.5), Year.19 = c(1844.224683, 1844.373854,
1844.523025, 1844.672196, 1844.821367, 1844.970538), Counts.19 = c(1.05e-06,
1.52e-06, 2.18e-06, 3.07e-06, 4.25e-06, 5.78e-06), Depth.20 = c(20.5,
20.5, 20.5, 20.5, 20.5, 20.5), Year.20 = c(1826.063901, 1826.239023,
1826.414145, 1826.589268, 1826.76439, 1826.939512), Counts.20 = c(7.96e-07,
1.1e-06, 1.5e-06, 2.02e-06, 2.68e-06, 3.53e-06), Depth.21 = c(21.5,
21.5, 21.5, 21.5, 21.5, 21.5), Year.21 = c(1794.869238, 1795.097941,
1795.326644, 1795.555348, 1795.784051, 1796.012754), Counts.21 = c(5.72e-07,
7.74e-07, 1.03e-06, 1.37e-06, 1.81e-06, 2.36e-06), Depth.22 = c(22.5,
22.5, 22.5, 22.5, 22.5, 22.5), Year.22 = c(1776.381101, 1776.645157,
1776.909213, 1777.173268, 1777.437324, 1777.70138), Counts.22 = c(4.57e-07,
6.08e-07, 7.98e-07, 1.04e-06, 1.34e-06, 1.72e-06), Depth.23 = c(23.5,
23.5, 23.5, 23.5, 23.5, 23.5), Year.23 = c(1766.787508, 1767.066229,
1767.344949, 1767.62367, 1767.90239, 1768.181111), Counts.23 = c(4.28e-07,
5.65e-07, 7.39e-07, 9.57e-07, 1.23e-06, 1.57e-06), Depth.24 = c(24.5,
24.5, 24.5, 24.5, 24.5, 24.5), Year.24 = c(1724.904818, 1725.249971,
1725.595125, 1725.940278, 1726.285432, 1726.630586), Counts.24 = c(3.85e-07,
5.28e-07, 7.13e-07, 9.51e-07, 1.25e-06, 1.63e-06), Depth.25 = c(25.5,
25.5, 25.5, 25.5, 25.5, 25.5), Year.25 = c(1666.304304, 1666.747587,
1667.19087, 1667.634153, 1668.077436, 1668.520719), Counts.25 = c(3.14e-07,
4.35e-07, 5.93e-07, 7.99e-07, 1.07e-06, 1.42e-06), Depth.26 = c(26.5,
26.5, 26.5, 26.5, 26.5, 26.5), Year.26 = c(1646.315863, 1646.752634,
1647.189405, 1647.626176, 1648.062946, 1648.499717), Counts.26 = c(2.97e-07,
4.01e-07, 5.43e-07, 7.26e-07, 9.58e-07, 1.25e-06), Depth.27 = c(27.5,
27.5, 27.5, 27.5, 27.5, 27.5), Year.27 = c(1631.425358, 1631.862129,
1632.298899, 1632.73567, 1633.172441, 1633.609212), Counts.27 = c(3.01e-07,
4.02e-07, 5.39e-07, 7.13e-07, 9.33e-07, 1.21e-06), Depth.28 = c(28.5,
28.5, 28.5, 28.5, 28.5, 28.5), Year.28 = c(1623.821174, 1624.214018,
1624.606862, 1624.999706, 1625.39255, 1625.785393), Counts.28 = c(3.08e-07,
4.09e-07, 5.38e-07, 7.01e-07, 9.11e-07, 1.17e-06), Depth.29 = c(29.5,
29.5, 29.5, 29.5, 29.5, 29.5), Year.29 = c(1612.475829, 1612.864893,
1613.253957, 1613.643021, 1614.032085, 1614.421149), Counts.29 = c(3.4e-07,
4.66e-07, 6.3e-07, 8.4e-07, 1.11e-06, 1.44e-06), Depth.30 = c(30.5,
30.5, 30.5, 30.5, 30.5, 30.5), Year.30 = c(1600.26273, 1600.609876,
1600.957023, 1601.304169, 1601.651316, 1601.998462), Counts.30 = c(4.18e-07,
5.85e-07, 8.07e-07, 1.1e-06, 1.49e-06, 1.99e-06), Depth.31 = c(31.5,
31.5, 31.5, 31.5, 31.5, 31.5), Year.31 = c(1549.137398, 1549.553381,
1549.969364, 1550.385346, 1550.801329, 1551.217311), Counts.31 = c(3.27e-07,
4.48e-07, 6.06e-07, 8.1e-07, 1.07e-06, 1.41e-06), Depth.32 = c(32.5,
32.5, 32.5, 32.5, 32.5, 32.5), Year.32 = c(1379.9456, 1380.656236,
1381.366871, 1382.077507, 1382.788142, 1383.498778), Counts.32 = c(3.71e-07,
5.02e-07, 6.71e-07, 8.88e-07, 1.16e-06, 1.5e-06), Depth.33 = c(33.5,
33.5, 33.5, 33.5, 33.5, 33.5), Year.33 = c(1176.400716, 1177.495517,
1178.590318, 1179.685119, 1180.77992, 1181.874721), Counts.33 = c(1.21e-07,
1.66e-07, 2.24e-07, 2.99e-07, 3.94e-07, 5.13e-07), Depth.34 = c(34.5,
34.5, 34.5, 34.5, 34.5, 34.5), Year.34 = c(984.8733315, 986.2808571,
987.6883826, 989.0959082, 990.5034338, 991.9109593), Counts.34 = c(9.87e-08,
1.37e-07, 1.86e-07, 2.51e-07, 3.34e-07, 4.43e-07), Depth.35 = c(35.5,
35.5, 35.5, 35.5, 35.5, 35.5), Year.35 = c(931.6673674, 933.0776679,
934.4879684, 935.8982688, 937.3085693, 938.7188698), Counts.35 = c(8.67e-08,
1.15e-07, 1.53e-07, 2.01e-07, 2.61e-07, 3.34e-07), Depth.36 = c(36.5,
36.5, 36.5, 36.5, 36.5, 36.5), Year.36 = c(894.7139257, 896.1463371,
897.5787485, 899.01116, 900.4435714, 901.8759828), Counts.36 = c(7.95e-08,
1.04e-07, 1.34e-07, 1.71e-07, 2.19e-07, 2.78e-07), Depth.37 = c(37.5,
37.5, 37.5, 37.5, 37.5, 37.5), Year.37 = c(867.2347826, 868.6591119,
870.0834411, 871.5077704, 872.9320996, 874.3564289), Counts.37 = c(7.45e-08,
9.58e-08, 1.22e-07, 1.54e-07, 1.95e-07, 2.44e-07), Depth.38 = c(38.5,
38.5, 38.5, 38.5, 38.5, 38.5), Year.38 = c(822.8193907, 824.2840456,
825.7487006, 827.2133555, 828.6780105, 830.1426654), Counts.38 = c(7.25e-08,
9.32e-08, 1.19e-07, 1.5e-07, 1.89e-07, 2.37e-07), Depth.39 = c(39.5,
39.5, 39.5, 39.5, 39.5, 39.5), Year.39 = c(780.7261404, 782.1666312,
783.6071219, 785.0476127, 786.4881034, 787.9285942), Counts.39 = c(7.24e-08,
9.24e-08, 1.17e-07, 1.48e-07, 1.86e-07, 2.31e-07), Depth.40 = c(40.5,
40.5, 40.5, 40.5, 40.5, 40.5), Year.40 = c(743.4256597, 744.8411919,
746.2567241, 747.6722563, 749.0877885, 750.5033208), Counts.40 = c(1.09e-07,
1.41e-07, 1.8e-07, 2.29e-07, 2.89e-07, 3.64e-07), Depth.41 = c(41.5,
41.5, 41.5, 41.5, 41.5, 41.5), Year.41 = c(673.4489487, 674.8279538,
676.2069588, 677.5859639, 678.964969, 680.3439741), Counts.41 = c(1.06e-07,
1.36e-07, 1.73e-07, 2.19e-07, 2.77e-07, 3.48e-07), Depth.42 = c(42.5,
42.5, 42.5, 42.5, 42.5, 42.5), Year.42 = c(624.182451, 625.532222,
626.881993, 628.231764, 629.581535, 630.931306), Counts.42 = c(8e-08,
1.03e-07, 1.32e-07, 1.67e-07, 2.1e-07, 2.63e-07), Depth.43 = c(43.5,
43.5, 43.5, 43.5, 43.5, 43.5), Year.43 = c(566.5185196, 567.8721804,
569.2258412, 570.579502, 571.9331628, 573.2868236), Counts.43 = c(8.43e-08,
1.1e-07, 1.42e-07, 1.83e-07, 2.34e-07, 2.97e-07), Depth.44 = c(44.5,
44.5, 44.5, 44.5, 44.5, 44.5), Year.44 = c(518.6933347, 520.014935,
521.3365354, 522.6581358, 523.9797362, 525.3013366), Counts.44 = c(9.23e-08,
1.24e-07, 1.65e-07, 2.16e-07, 2.8e-07, 3.6e-07), Depth.45 = c(45.5,
45.5, 45.5, 45.5, 45.5, 45.5), Year.45 = c(443.0346844, 444.2413453,
445.4480063, 446.6546672, 447.8613282, 449.0679891), Counts.45 = c(1.1e-07,
1.51e-07, 2.04e-07, 2.72e-07, 3.59e-07, 4.67e-07), Depth.46 = c(46.5,
46.5, 46.5, 46.5, 46.5, 46.5), Year.46 = c(368.5762277, 369.8150017,
371.0537756, 372.2925496, 373.5313236, 374.7700976), Counts.46 = c(1.54e-07,
2.24e-07, 3.19e-07, 4.49e-07, 6.21e-07, 8.47e-07)), class = c("data.table",
"data.frame"), row.names = c(NA, -6L))
You can try the base R code (don't need data.table) below, using stack + unstack
res <- unstack(
transform(
stack(df),
ind = gsub("\\..*", "", ind)
)
)
which gives
> head(res, 20)
Counts Depth Year
1 2.87e-06 0.5 2001.539
2 3.56e-06 0.5 2001.574
3 4.38e-06 0.5 2001.609
4 5.36e-06 0.5 2001.644
5 6.52e-06 0.5 2001.679
6 7.94e-06 0.5 2001.714
7 1.46e-06 1.5 1984.294
8 1.81e-06 1.5 1984.364
9 2.22e-06 1.5 1984.435
10 2.73e-06 1.5 1984.506
11 3.34e-06 1.5 1984.577
12 4.07e-06 1.5 1984.648
13 1.59e-06 2.5 1981.471
14 1.98e-06 2.5 1981.544
15 2.45e-06 2.5 1981.618
16 3.01e-06 2.5 1981.691
17 3.70e-06 2.5 1981.765
18 4.53e-06 2.5 1981.839
19 1.52e-06 3.5 1977.872
20 1.90e-06 3.5 1977.945```
This could be done by:
setDT(df)
melt(df,, patterns("Counts", "Depth", "Year"))
If you want to be more sophisticated, just do:
nms <- c('Count', 'Depth', 'Year')
melt(df, measure.vars = patterns(nms), value.name = nms)
variable Count Depth Year
1: 1 2.87e-06 0.5 2001.539
2: 1 3.56e-06 0.5 2001.574
3: 1 4.38e-06 0.5 2001.609
4: 1 5.36e-06 0.5 2001.644
5: 1 6.52e-06 0.5 2001.679
---
206: 35 NA 34.5 NA
207: 35 NA 34.0 NA
208: 35 NA 34.5 NA
209: 35 NA 34.5 NA
210: 35 NA 34.0 NA
Using pivot_longer from tidyr :
tidyr::pivot_longer(df, cols = everything(),
names_to = '.value',
names_pattern = '(\\w+)')
# A tibble: 210 x 3
# Depth Year Counts
# <dbl> <dbl> <dbl>
# 1 0.5 2002. 0.00000287
# 2 1.5 1984. 0.00000146
# 3 2.5 1981. 0.00000159
# 4 3.5 1978. 0.00000152
# 5 4.5 1974. 0.00000268
# 6 5.5 1969. 0.00000117
# 7 6.5 1962. 0.0000015
# 8 7.5 1959. 0.00000112
# 9 8.5 1952. 0.00000103
#10 9.5 1945. 0.000000986
# … with 200 more rows
I would like to get rid off the whole NA block (highlighted here ).
I tried na.ommit and na.rm = TRUE unsuccesfully.
Here is the code I used :
library(readxl)
data <- read_excel("Documents/TFB/xlsx_geochimie/solfatara_maj.xlsx")
View(data)
data <- gather(data,FeO:`Fe2O3(T)`,key = "Element",value="Pourcentage")
library(ggplot2)
level_order <- factor(data$Element,levels = c("SiO2","TiO2","Al2O3","Fe2O3","FeO","MgO","CaO","Na2O","K2O"))
ggplot(data=data,mapping=aes(x=level_order,y=data$Pourcentage,colour=data$Ech)+geom_point()+geom_line(group=data$Ech) +scale_y_log10()
And here is my original file
https://drive.google.com/file/d/1bZi7fPWebbpodD1LFScoEcWt5Bs-cqhb/view?usp=sharing
If I run your code and look at data that goes into ggplot:
table(data$Element)
Al2O3 CaO Fe2O3 Fe2O3(T) FeO K2O LOI LOI2 MgO MnO
12 12 12 12 12 12 12 12 12 12
Na2O P2O5 SiO2 SO4 TiO2 Total Total 2 Total N Total S
12 12 12 12 12 12 12 12 12
You have included Total into the melted data frame.. which is not intended I guess. Hence when you do factor on these, and these "Total.." are not included in the levels, they become NA.
So we can do it from scratch:
data <- read_excel("solfatara_maj.xlsx")
The data:
structure(list(Ech = c("AGN 1A", "AGN 2A", "AGN 3B", "SOL 4B",
"SOL 8Ag", "SOL 8Ab", "SOL 16A", "SOL 16B", "SOL 16C", "SOL 22 A",
"SOL 22D", "SOL 25B"), FeO = c(0.2, 0.8, 1.7, 0.3, 1.7, NA, 0.2,
NA, 0.1, 0.7, 1.3, 2), `Total S` = c(5.96, 45.3, 0.22, 17.3,
NA, NA, NA, NA, NA, NA, 2.37, 0.36), SO4 = c(NA, 6.72, NA, 4.08,
0.06, 0.16, 42.2, 35.2, 37.8, 0.32, 6.57, NA), `Total N` = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, 15.2, NA, NA), SiO2 = c(50.2,
31.05, 56.47, 62.14, 61.36, 75.66, 8.41, 21.74, 17.44, 13.52,
19.62, 56.35), Al2O3 = c(15.53, 7.7, 17.56, 4.44, 17.75, 10.92,
31.92, 26.38, 27.66, 0.64, 3.85, 17.28), Fe2O3 = c(0.49, 0.63,
2.06, NA, 1.76, 0.11, 0.64, 0.88, 1.71, NA, 1.32, 2.67), MnO = c(0.01,
0.01, 0.13, 0.01, 0.09, 0.01, 0.01, 0.01, 0.01, 0.005, 0.04,
0.12), MgO = c(0.06, 0.07, 0.88, 0.03, 0.97, 0.05, 0.04, 0.07,
0.03, 0.02, 1.85, 1.63), CaO = c(0.2, 0.09, 3.34, 0.09, 2.58,
0.57, 0.2, 0.26, 0.15, 0.06, 35.66, 4.79), Na2O = c(0.15, 0.14,
3.23, 0.13, 3.18, 2.04, 0.68, 0.68, 0.55, 0.05, 0.45, 3.11),
K2O = c(4.39, 1.98, 8, 1.26, 8.59, 5.94, 8.2, 6.97, 8.04,
0.2, 0.89, 7.65), TiO2 = c(0.42, 0.27, 0.46, 0.79, 0.55,
0.16, 0.09, 0.22, 0.16, 0.222, 0.34, 0.53), P2O5 = c(0.11,
0.09, 0.18, 0.08, 0.07, 0.07, 0.85, 0.68, 0.62, NA, 0.14,
0.28), LOI = c(27.77, 57.06, 6.13, 29.03, 1.38, 4.92, 42.58,
37.58, 38.76, NA, 26.99, 3.92), LOI2 = c(27.79, 57.15, 6.32,
29.06, 1.57, 4.93, 42.6, 37.59, 38.77, 0.08, 27.13, 4.15),
Total = c(99.52, 99.88, 100.2, 98.25, 99.99, 100.5, 93.81,
95.57, 95.23, 15.25, 92.45, 100.3), `Total 2` = c(99.54,
99.96, 100.3, 98.28, 100.2, 100.6, 93.83, 95.58, 95.24, 15.33,
92.59, 100.6), `Fe2O3(T)` = c(0.71, 1.52, 3.95, 0.27, 3.65,
0.22, 0.87, 0.99, 1.82, 0.61, 2.76, 4.9)), row.names = c(NA,
-12L), class = c("tbl_df", "tbl", "data.frame"))
First we set the plotting level like you did:
plotlvls = c("SiO2","TiO2","Al2O3","Fe2O3","FeO","MgO","CaO","Na2O","K2O")
Then we select only these columns, and also Ech, note I use pivot_longer() because gather() will supposedly be deprecated, and then we do the factoring too:
plotdf = data %>% select(c(plotlvls,"Ech")) %>%
pivot_longer(-Ech,names_to = "Element",values_to = "Pourcentage") %>%
mutate(Element=factor(Element,levels=toplot))
Finally we plot, and there are no NAs:
ggplot(data=plotdf,mapping=aes(x=Element,y=Pourcentage,colour=Ech))+
geom_point()+geom_line(aes(group=Ech)) +scale_y_log10()
1.Create reproducible minimal data
data <- data.frame(Element = c("SiO2","TiO2","Al2O3","Fe2O3","FeO","MgO","CaO","Na2O","K2O",NA),
Pourcentage = 1:10,
Ech = c("AGN 1A", "SOL 16"))
2.Set factor levels for variable 'Element'
data$Element <- factor(data$Element,levels = c("SiO2","TiO2","Al2O3","Fe2O3","FeO","MgO","CaO","Na2O","K2O"))
3.Remove rows containing NA in the variable 'Element'
data <- data[!is.na(data$Element), ]
4.Plot data using ggplot2 (ggplot2 syntax uses NSE (non standard evaluation), which means you dont't have to pass the variable names as strings or using the $ notation):
ggplot(data=data,aes(x=Element,y=Pourcentage,colour=Ech)) +
geom_point() +
geom_line(aes(group=Ech)) +
scale_y_log10()
I'm creating a custom chart to visualize a variable's distribution using geom_density. I added 3 vertical lines for a custom value, the 5th percentile and the 95th percentile.
How do I add labels for those lines?
I tried using geom_text but i don't know how to parameter the x and y variables
library(ggplot2)
ggplot(dataset, aes(x = dataset$`Estimated percent body fat`)) +
geom_density() +
geom_vline(aes(xintercept = dataset$`Estimated percent body fat`[12]),
color = "red", size = 1) +
geom_vline(aes(xintercept = quantile(dataset$`Estimated percent body fat`,
0.05, na.rm = TRUE)),
color = "grey", size = 0.5) +
geom_vline(aes(xintercept = quantile(dataset$`Estimated percent body fat`,
0.95, na.rm = TRUE)),
color="grey", size=0.5) +
geom_text(aes(x = dataset$`Estimated percent body fat`[12],
label = "Custom", y = 0),
colour = "red", angle = 0)
I'd like to obtain the following:
for the custom value, I'd like to add the label at the top of the chart, just to the right of the line
for the percentiles label, I'd like to add them in the middle of the chart; at the left of the line for the 5th percentile and right of the line for 95th percentile
Here is what I was able to obtain https://i.imgur.com/thSQwyg.png
And these are the first 50 lines of my dataset:
structure(list(`Respondent sequence number` = c(21029L, 21034L,
21043L, 21056L, 21067L, 21085L, 21087L, 21105L, 21107L, 21109L,
21110L, 21125L, 21129L, 21138L, 21141L, 21154L, 21193L, 21195L,
21206L, 21215L, 21219L, 21221L, 21232L, 21239L, 21242L, 21247L,
21256L, 21258L, 21287L, 21310L, 21325L, 21367L, 21380L, 21385L,
21413L, 21418L, 21420L, 21423L, 21427L, 21432L, 21437L, 21441L,
21444L, 21453L, 21466L, 21467L, 21477L, 21491L, 21494L, 21495L
), `Estimated percent body fat` = c(NA, 7.2, NA, NA, 24.1, 25.1,
30.2, 23.6, 24.3, 31.4, NA, 14.1, 20.5, NA, 23.1, 30.6, 21, 20.9,
NA, 24, 26.7, 16.6, NA, 26.9, 16.9, 21.3, 15.9, 27.4, 13.9, NA,
20, NA, 12.8, NA, 33.8, 18.1, NA, NA, 28.4, 10.9, 38.1, 33, 39.3,
15.9, 32.7, NA, 20.4, 16.8, NA, 29)), row.names = c(NA, 50L), class =
"data.frame")
First I recommend clean column names.
dat <- dataset
names(dat) <- tolower(gsub("\\s", "\\.", names(dat)))
Whith base R plots you could do the following. The clou is, that you can store the quantiles and custom positions to use them as coordinates later which gives you a dynamic positioning. I'm not sure if/how this is possible with ggplot.
plot(density(dat$estimated.percent.body.fat, na.rm=TRUE), ylim=c(0, .05),
main="Density curve")
abline(v=c1 <- dat$estimated.percent.body.fat[12], col="red")
abline(v=q1 <- quantile(dat$estimated.percent.body.fat, .05, na.rm=TRUE), col="grey")
abline(v=q2 <- quantile(dat$estimated.percent.body.fat, .95, na.rm=TRUE), col="grey")
text(c1 + 4, .05, c(expression("" %<-% "custom")), cex=.8)
text(q1 - 5.5, .025, c(expression("5% percentile" %->% "")), cex=.8)
text(q2 + 5.5, .025, c(expression("" %<-% "95% percentile")), cex=.8)
Note: Case you don't like the arrows just do e.g. "5% percentile" instead of c(expression("5% percentile" %->% "")).
Or in ggplot you could use annotate.
library(ggplot2)
ggplot(dataset, aes(x = dataset$`Estimated percent body fat`)) +
geom_density() +
geom_vline(aes(xintercept = dataset$`Estimated percent body fat`[12]),
color = "red", size = 1) +
geom_vline(aes(xintercept = quantile(dataset$`Estimated percent body fat`,
0.05, na.rm = TRUE)),
color = "grey", size = 0.5) +
geom_vline(aes(xintercept = quantile(dataset$`Estimated percent body fat`,
0.95, na.rm = TRUE)),
color="grey", size=0.5) +
annotate("text", x=16, y=.05, label="custom") +
annotate("text", x=9.5, y=.025, label="5% percentile") +
annotate("text", x=38, y=.025, label="95% percentile")
Note, that in either solution the result (i.e. exact label positions) depends on your export size. To learn how to control this, take e.g. a look into How to save a plot as image on the disk?.
Data
dataset <- structure(list(`Respondent sequence number` = c(21029L, 21034L,
21043L, 21056L, 21067L, 21085L, 21087L, 21105L, 21107L, 21109L,
21110L, 21125L, 21129L, 21138L, 21141L, 21154L, 21193L, 21195L,
21206L, 21215L, 21219L, 21221L, 21232L, 21239L, 21242L, 21247L,
21256L, 21258L, 21287L, 21310L, 21325L, 21367L, 21380L, 21385L,
21413L, 21418L, 21420L, 21423L, 21427L, 21432L, 21437L, 21441L,
21444L, 21453L, 21466L, 21467L, 21477L, 21491L, 21494L, 21495L
), `Estimated percent body fat` = c(NA, 7.2, NA, NA, 24.1, 25.1,
30.2, 23.6, 24.3, 31.4, NA, 14.1, 20.5, NA, 23.1, 30.6, 21, 20.9,
NA, 24, 26.7, 16.6, NA, 26.9, 16.9, 21.3, 15.9, 27.4, 13.9, NA,
20, NA, 12.8, NA, 33.8, 18.1, NA, NA, 28.4, 10.9, 38.1, 33, 39.3,
15.9, 32.7, NA, 20.4, 16.8, NA, 29)), row.names = c(NA, 50L), class =
"data.frame")