Removing NAs from ggplot x-axis in ggplot2 - r

I would like to get rid off the whole NA block (highlighted here ).
I tried na.ommit and na.rm = TRUE unsuccesfully.
Here is the code I used :
library(readxl)
data <- read_excel("Documents/TFB/xlsx_geochimie/solfatara_maj.xlsx")
View(data)
data <- gather(data,FeO:`Fe2O3(T)`,key = "Element",value="Pourcentage")
library(ggplot2)
level_order <- factor(data$Element,levels = c("SiO2","TiO2","Al2O3","Fe2O3","FeO","MgO","CaO","Na2O","K2O"))
ggplot(data=data,mapping=aes(x=level_order,y=data$Pourcentage,colour=data$Ech)+geom_point()+geom_line(group=data$Ech) +scale_y_log10()
And here is my original file
https://drive.google.com/file/d/1bZi7fPWebbpodD1LFScoEcWt5Bs-cqhb/view?usp=sharing

If I run your code and look at data that goes into ggplot:
table(data$Element)
Al2O3 CaO Fe2O3 Fe2O3(T) FeO K2O LOI LOI2 MgO MnO
12 12 12 12 12 12 12 12 12 12
Na2O P2O5 SiO2 SO4 TiO2 Total Total 2 Total N Total S
12 12 12 12 12 12 12 12 12
You have included Total into the melted data frame.. which is not intended I guess. Hence when you do factor on these, and these "Total.." are not included in the levels, they become NA.
So we can do it from scratch:
data <- read_excel("solfatara_maj.xlsx")
The data:
structure(list(Ech = c("AGN 1A", "AGN 2A", "AGN 3B", "SOL 4B",
"SOL 8Ag", "SOL 8Ab", "SOL 16A", "SOL 16B", "SOL 16C", "SOL 22 A",
"SOL 22D", "SOL 25B"), FeO = c(0.2, 0.8, 1.7, 0.3, 1.7, NA, 0.2,
NA, 0.1, 0.7, 1.3, 2), `Total S` = c(5.96, 45.3, 0.22, 17.3,
NA, NA, NA, NA, NA, NA, 2.37, 0.36), SO4 = c(NA, 6.72, NA, 4.08,
0.06, 0.16, 42.2, 35.2, 37.8, 0.32, 6.57, NA), `Total N` = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, 15.2, NA, NA), SiO2 = c(50.2,
31.05, 56.47, 62.14, 61.36, 75.66, 8.41, 21.74, 17.44, 13.52,
19.62, 56.35), Al2O3 = c(15.53, 7.7, 17.56, 4.44, 17.75, 10.92,
31.92, 26.38, 27.66, 0.64, 3.85, 17.28), Fe2O3 = c(0.49, 0.63,
2.06, NA, 1.76, 0.11, 0.64, 0.88, 1.71, NA, 1.32, 2.67), MnO = c(0.01,
0.01, 0.13, 0.01, 0.09, 0.01, 0.01, 0.01, 0.01, 0.005, 0.04,
0.12), MgO = c(0.06, 0.07, 0.88, 0.03, 0.97, 0.05, 0.04, 0.07,
0.03, 0.02, 1.85, 1.63), CaO = c(0.2, 0.09, 3.34, 0.09, 2.58,
0.57, 0.2, 0.26, 0.15, 0.06, 35.66, 4.79), Na2O = c(0.15, 0.14,
3.23, 0.13, 3.18, 2.04, 0.68, 0.68, 0.55, 0.05, 0.45, 3.11),
K2O = c(4.39, 1.98, 8, 1.26, 8.59, 5.94, 8.2, 6.97, 8.04,
0.2, 0.89, 7.65), TiO2 = c(0.42, 0.27, 0.46, 0.79, 0.55,
0.16, 0.09, 0.22, 0.16, 0.222, 0.34, 0.53), P2O5 = c(0.11,
0.09, 0.18, 0.08, 0.07, 0.07, 0.85, 0.68, 0.62, NA, 0.14,
0.28), LOI = c(27.77, 57.06, 6.13, 29.03, 1.38, 4.92, 42.58,
37.58, 38.76, NA, 26.99, 3.92), LOI2 = c(27.79, 57.15, 6.32,
29.06, 1.57, 4.93, 42.6, 37.59, 38.77, 0.08, 27.13, 4.15),
Total = c(99.52, 99.88, 100.2, 98.25, 99.99, 100.5, 93.81,
95.57, 95.23, 15.25, 92.45, 100.3), `Total 2` = c(99.54,
99.96, 100.3, 98.28, 100.2, 100.6, 93.83, 95.58, 95.24, 15.33,
92.59, 100.6), `Fe2O3(T)` = c(0.71, 1.52, 3.95, 0.27, 3.65,
0.22, 0.87, 0.99, 1.82, 0.61, 2.76, 4.9)), row.names = c(NA,
-12L), class = c("tbl_df", "tbl", "data.frame"))
First we set the plotting level like you did:
plotlvls = c("SiO2","TiO2","Al2O3","Fe2O3","FeO","MgO","CaO","Na2O","K2O")
Then we select only these columns, and also Ech, note I use pivot_longer() because gather() will supposedly be deprecated, and then we do the factoring too:
plotdf = data %>% select(c(plotlvls,"Ech")) %>%
pivot_longer(-Ech,names_to = "Element",values_to = "Pourcentage") %>%
mutate(Element=factor(Element,levels=toplot))
Finally we plot, and there are no NAs:
ggplot(data=plotdf,mapping=aes(x=Element,y=Pourcentage,colour=Ech))+
geom_point()+geom_line(aes(group=Ech)) +scale_y_log10()

1.Create reproducible minimal data
data <- data.frame(Element = c("SiO2","TiO2","Al2O3","Fe2O3","FeO","MgO","CaO","Na2O","K2O",NA),
Pourcentage = 1:10,
Ech = c("AGN 1A", "SOL 16"))
2.Set factor levels for variable 'Element'
data$Element <- factor(data$Element,levels = c("SiO2","TiO2","Al2O3","Fe2O3","FeO","MgO","CaO","Na2O","K2O"))
3.Remove rows containing NA in the variable 'Element'
data <- data[!is.na(data$Element), ]
4.Plot data using ggplot2 (ggplot2 syntax uses NSE (non standard evaluation), which means you dont't have to pass the variable names as strings or using the $ notation):
ggplot(data=data,aes(x=Element,y=Pourcentage,colour=Ech)) +
geom_point() +
geom_line(aes(group=Ech)) +
scale_y_log10()

Related

Is there a way to filter out the row that has the highest of three different columns simultaneously?

Is there a way to filter out the row that has the highest of three different columns simultaneously? I am trying to filter out the row that has the best accuracy, specificity, and sensitivity in a data frame.
Pic of first few rows of data
in the data provided the highest for all 3 should be (aka the desired output)
"thresh_info.59 0.60 83.39 83.27684 83.557047"
data<- structure(list(threshold = c(0.01, 0.02, 0.03, 0.04, 0.05, 0.06,
0.07, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15, 0.16, 0.17,
0.18, 0.19, 0.2, 0.21, 0.22, 0.23, 0.24, 0.25, 0.26, 0.27, 0.28,
0.29, 0.3, 0.31, 0.32, 0.33, 0.34, 0.35, 0.36, 0.37, 0.38, 0.39,
0.4, 0.41, 0.42, 0.43, 0.44, 0.45, 0.46, 0.47, 0.48, 0.49, 0.5,
0.51, 0.52, 0.53, 0.54, 0.55, 0.56, 0.57, 0.58, 0.59, 0.6, 0.61,
0.62, 0.63, 0.64, 0.65, 0.66, 0.67, 0.68, 0.69, 0.7, 0.71, 0.72,
0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83,
0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.9, 0.91, 0.92, 0.93, 0.94,
0.95, 0.96, 0.97, 0.98, 0.99), accuracy = c(61.72, 63.67, 65.29,
66.58, 67.86, 69.01, 69.75, 70.83, 71.51, 72.79, 73.87, 74.54,
75.02, 75.29, 75.83, 76.3, 76.7, 77.25, 77.65, 77.92, 78.33,
79, 79.14, 79.07, 79.41, 79.61, 79.68, 80.28, 80.69, 80.82, 80.89,
81.16, 81.3, 81.77, 81.9, 81.97, 82.17, 82.31, 82.44, 82.65,
82.58, 82.92, 82.98, 83.59, 83.52, 83.59, 83.25, 83.46, 83.39,
83.46, 83.66, 83.73, 83.52, 83.66, 83.93, 83.46, 83.25, 83.32,
83.32, 83.39, 83.39, 82.92, 82.24, 82.04, 81.77, 81.3, 81.5,
81.23, 81.03, 80.89, 80.49, 80.35, 80.01, 80.01, 79.2, 79.14,
78.87, 78.93, 78.6, 77.92, 77.25, 76.91, 76.37, 75.56, 74.81,
73.94, 73.13, 72.79, 71.84, 71.51, 69.89, 68.4, 66.44, 64.82,
63.13, 61.44, 59.08, 55.77, 52.8), sensitivity = c(100, 100,
100, 99.8870056497175, 99.8870056497175, 99.6610169491526, 99.6610169491526,
99.5480225988701, 99.3220338983051, 99.2090395480226, 99.0960451977401,
98.7570621468927, 98.6440677966102, 98.5310734463277, 98.1920903954802,
97.9661016949153, 97.7401129943503, 97.6271186440678, 96.8361581920904,
96.3841807909604, 96.045197740113, 95.9322033898305, 95.5932203389831,
95.2542372881356, 94.9152542372881, 94.6892655367232, 94.2372881355932,
94.1242937853107, 94.1242937853107, 93.6723163841808, 93.1073446327684,
92.8813559322034, 92.6553672316384, 92.4293785310735, 92.316384180791,
91.9774011299435, 91.864406779661, 91.5254237288136, 91.2994350282486,
90.8474576271186, 90.0564971751412, 89.9435028248588, 89.7175141242938,
89.2655367231638, 89.0395480225989, 88.8135593220339, 88.135593220339,
87.909604519774, 87.3446327683616, 87.0056497175141, 86.5536723163842,
86.3276836158192, 85.6497175141243, 85.5367231638418, 85.5367231638418,
84.7457627118644, 84.180790960452, 83.8418079096045, 83.6158192090395,
83.2768361581921, 83.0508474576271, 82.0338983050847, 80.6779661016949,
80.225988700565, 79.3220338983051, 78.1920903954802, 77.9661016949153,
76.9491525423729, 76.1581920903955, 75.4802259887006, 74.3502824858757,
73.7853107344633, 72.8813559322034, 72.4293785310735, 70.8474576271186,
70.1694915254237, 69.1525423728814, 68.8135593220339, 68.135593220339,
66.8926553672316, 65.6497175141243, 64.4067796610169, 63.3898305084746,
61.9209039548023, 60.4519774011299, 58.6440677966102, 57.2881355932203,
56.1581920903955, 54.3502824858757, 53.3333333333333, 50.6214689265537,
48.135593220339, 44.8587570621469, 41.9209039548023, 38.6440677966102,
35.7062146892655, 31.638418079096, 26.1016949152542, 21.1299435028249
), specificity = c(4.86577181208054, 9.73154362416107, 13.758389261745,
17.1140939597315, 20.3020134228188, 23.489932885906, 25.3355704697987,
28.1879194630872, 30.2013422818792, 33.5570469798658, 36.4093959731544,
38.5906040268456, 39.9328859060403, 40.7718120805369, 42.6174496644295,
44.1275167785235, 45.4697986577181, 46.9798657718121, 49.1610738255034,
50.503355704698, 52.0134228187919, 53.8590604026846, 54.6979865771812,
55.0335570469799, 56.3758389261745, 57.2147651006711, 58.0536912751678,
59.7315436241611, 60.738255033557, 61.744966442953, 62.751677852349,
63.758389261745, 64.4295302013423, 65.9395973154362, 66.4429530201342,
67.1140939597315, 67.7852348993289, 68.6241610738255, 69.2953020134228,
70.4697986577181, 71.4765100671141, 72.4832214765101, 72.9865771812081,
75.1677852348993, 75.3355704697987, 75.8389261744966, 76.006711409396,
76.8456375838926, 77.5167785234899, 78.1879194630873, 79.3624161073825,
79.8657718120805, 80.3691275167785, 80.8724832214765, 81.5436241610738,
81.5436241610738, 81.8791946308725, 82.5503355704698, 82.8859060402685,
83.5570469798658, 83.8926174496644, 84.2281879194631, 84.5637583892617,
84.7315436241611, 85.4026845637584, 85.9060402684564, 86.744966442953,
87.5838926174497, 88.255033557047, 88.9261744966443, 89.5973154362416,
90.1006711409396, 90.6040268456376, 91.2751677852349, 91.6107382550336,
92.4496644295302, 93.2885906040269, 93.9597315436242, 94.1275167785235,
94.2953020134228, 94.4630872483222, 95.4697986577181, 95.6375838926175,
95.8053691275168, 96.1409395973154, 96.6442953020134, 96.6442953020134,
97.4832214765101, 97.8187919463087, 98.489932885906, 98.489932885906,
98.489932885906, 98.489932885906, 98.8255033557047, 99.496644295302,
99.6644295302013, 99.8322147651007, 99.8322147651007, 99.8322147651007
)), row.names = c("thresh_info", "thresh_info.1", "thresh_info.2",
"thresh_info.3", "thresh_info.4", "thresh_info.5", "thresh_info.6",
"thresh_info.7", "thresh_info.8", "thresh_info.9", "thresh_info.10",
"thresh_info.11", "thresh_info.12", "thresh_info.13", "thresh_info.14",
"thresh_info.15", "thresh_info.16", "thresh_info.17", "thresh_info.18",
"thresh_info.19", "thresh_info.20", "thresh_info.21", "thresh_info.22",
"thresh_info.23", "thresh_info.24", "thresh_info.25", "thresh_info.26",
"thresh_info.27", "thresh_info.28", "thresh_info.29", "thresh_info.30",
"thresh_info.31", "thresh_info.32", "thresh_info.33", "thresh_info.34",
"thresh_info.35", "thresh_info.36", "thresh_info.37", "thresh_info.38",
"thresh_info.39", "thresh_info.40", "thresh_info.41", "thresh_info.42",
"thresh_info.43", "thresh_info.44", "thresh_info.45", "thresh_info.46",
"thresh_info.47", "thresh_info.48", "thresh_info.49", "thresh_info.50",
"thresh_info.51", "thresh_info.52", "thresh_info.53", "thresh_info.54",
"thresh_info.55", "thresh_info.56", "thresh_info.57", "thresh_info.58",
"thresh_info.59", "thresh_info.60", "thresh_info.61", "thresh_info.62",
"thresh_info.63", "thresh_info.64", "thresh_info.65", "thresh_info.66",
"thresh_info.67", "thresh_info.68", "thresh_info.69", "thresh_info.70",
"thresh_info.71", "thresh_info.72", "thresh_info.73", "thresh_info.74",
"thresh_info.75", "thresh_info.76", "thresh_info.77", "thresh_info.78",
"thresh_info.79", "thresh_info.80", "thresh_info.81", "thresh_info.82",
"thresh_info.83", "thresh_info.84", "thresh_info.85", "thresh_info.86",
"thresh_info.87", "thresh_info.88", "thresh_info.89", "thresh_info.90",
"thresh_info.91", "thresh_info.92", "thresh_info.93", "thresh_info.94",
"thresh_info.95", "thresh_info.96", "thresh_info.97", "thresh_info.98"
), class = "data.frame")
You can filter by the minimum variance across the 3 columns:
library(dplyr)
data |>
tibble::rownames_to_column() |>
rowwise() |>
mutate(var = var(c_across(3:5))) |>
ungroup() |>
filter(var == min(var))
# A tibble: 1 × 6
rowname threshold accuracy sensitivity specificity var
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 thresh_info.59 0.6 83.4 83.3 83.6 0.0199

Melt/ reshape dataframe to combine columns and fill rows with NAs

Apologies that there is a wealth of information on this site about melting and reshaping data, however, I cannot find the answer to my question on any of the pages I've visited. I have a data set which looks something like:
A Year | A Mean Temp | A Max Temp | A Min Temp | B Year | B Mean Temp | B Max Temp | B Min Temp |
and I want to end up with
Year | A Mean Temp | A Max Temp | A Min Temp |B Mean Temp | B Max Temp | B Min Temp
and fill columns which don't have data for that specific year with 'NA'.
The desired output would be something like:
[Table][1]
I believe the answer lies somewhere in something like:
library(dplyr)
library(tidyr)
library(stringr)
Data %>%
pivot_longer(cols = contains("Year"), names_to = c("Country", ".value"),
names_sep="_", values_drop_na = TRUE) %>%
rename_with(~ str_c('Country_', .), Rating:Year)```
But as of yet no luck.
Any help would be appreciated.
Thank you
Data
structure(list(Antarctica.Year.CE = 167:172, Antarctica.Temp..C. = c(0.33,
0.31, 0.18, 0.08, -0.01, -0.11), Antarctica.Min..C. = c(-1.24,
-1.26, -1.39, -1.48, -1.57, -1.67), Antarctica.Max..C. = c(1.89,
1.87, 1.74, 1.64, 1.55, 1.45), Arctic.Year.CE = 1:6, Arctic.Temp..C. = c(-1.15,
-0.96, -0.32, 0.1, -0.18, -0.61), Arctic.Min..C. = c(-1.92, -1.76,
-1.38, -0.74, -1.08, -1.17), Arctic.Max..C. = c(-0.31, -0.11,
0.48, 0.83, 0.73, 0.16), Asia.Year.CE = 800:805, Asia.Temp..C. = c(-0.31,
-0.14, -0.36, -0.67, -0.78, -0.26), Asia.Min..C. = c(-1.4, -1.23,
-1.45, -1.76, -1.87, -1.35), Asia.Max..C. = c(0.79, 0.96, 0.74,
0.43, 0.31, 0.83), Australasia.Year.CE = 1001:1006, Australasia.Temp..C. = c(-0.24,
-0.38, -0.29, -0.33, -0.34, -0.11), Australasia.Min..C. = c(-0.62,
-0.79, -0.71, -0.73, -0.73, -0.56), Australasia.Max..C. = c(0.15,
0.03, 0.13, 0.07, 0.05, 0.34), Europe.Year.CE = 1:6, Europe.Temp..C. = c(0.09,
-0.26, -0.24, 0.22, 0.32, 0.67), Europe.Min..C. = c(-0.69, -1.14,
-1.18, -0.66, -0.48, -0.11), Europe.Max..C. = c(0.88, 0.56, 0.61,
1.07, 1.14, 1.5), North.America...Pollen.Year.CE = c(480L, 510L,
540L, 570L, 600L, 630L), North.America...Pollen.Temp..C. = c(-0.25,
-0.29, -0.33, -0.34, -0.34, -0.34), North.America...Pollen.Min..C. = c(-0.74,
-0.7, -0.66, -0.65, -0.64, -0.64), North.America...Pollen.Max..C. = c(0.24,
0.11, 0, -0.04, -0.04, -0.04), North.America...Trees.Year.CE = c(1204L,
1214L, 1224L, 1234L, 1244L, 1254L), North.America...Trees.Temp..C. = c(-0.22,
-0.45, -0.38, -0.87, -0.81, -0.06), North.America...Trees.Min..C. = c(-0.53,
-0.72, -0.67, -1.12, -1.09, -0.35), North.America...Trees.Max..C. = c(0.04,
-0.2, -0.11, -0.57, -0.52, 0.18), South.America.Year.CE = 857:862,
South.America.Temp..C. = c(-0.3, -0.21, -0.07, -0.38, -0.41,
-0.19), South.America.Min..C. = c(-1.12, -1, -0.88, -1.19,
-1.22, -0.98), South.America.Max..C. = c(0.53, 0.58, 0.74,
0.43, 0.39, 0.61)), row.names = c(NA, 6L), class = "data.frame") ```
[1]: https://i.stack.imgur.com/0sV7a.png
For something as small as this, I'd often just go with a more manual approach.
Given your df above, I specify the lists of countries in the columns and then grepl() on the df columns to select those columns. Then, we rename the columns, return the new dataframe. We can then apply the function to the list of countries and then rbind with do.call.
country_list = c('Antarctica', 'Arctic', 'Asia', 'Australasia', 'Europe', 'North.America...Pollen', 'North.America...Trees', 'South.America')
get_cols = function(country) {
df_new = df[,grepl(country, colnames(df))]
df_new$Country = rep(country, nrow(df_new))
colnames(df_new) = c('Year', 'Temp', 'Min_Temp', 'Max_Temp', 'Country')
return(df_new)
}
df_final = do.call(rbind, lapply(country_list, get_cols))
Hope that returns what you're looking for?

Not getting the correct output with this R script

I have once again thrown myself into learning R. However, I'm not sure if my data is formatted wrong or if I'm missing a key point.
The vision is to compare all samples against each other over time. However, nailing the code has proved difficult. I can't seem to get time on the x-axis and the samples to match and overlap. I have looked at what feels like 100 videos and web pages. Still can't work this in.
Script:
Data2 <- Data3 %>%
gather( key = "test", value = "value", c(-Name))
Data2 %>%
ggplot() +
geom_point(aes(x=value, y=test)) +
ylab("Film type") +
theme(legend.position="none") +
xlab("Time")
Name = c("2% No wash No cure 20gm", "3 % no wash no cure 20 gm", "4 % no wash no cure 20 gm", "2 % no cure just wash 20 gm", "3 % no cure just wash 20gm", "4 % no cure just wash 20 gm", "3 % cure + wash 20 gm", "4%cure+wash 20gm")
Data:
structure(list(Name = c(0, 15, 30, 45, 60, 75, 90, 105, 120,
135, 150, 165, 180), `2% No wash No cure 20gm` = c(0.0499999999999998,
0.0800000000000001, 0.13, 0.23, 0.56, 0.61, 0.54, 0.54, NA, NA,
NA, NA, NA), `3 % no wash no cure 20 gm` = c(0.0200000000000005,
0.04, 0.0700000000000003, 0.350000000000001, 0.42, 0.36, 0.36,
0.350000000000001, NA, NA, NA, NA, NA), `4 % no wash no cure 20 gm` = c(0.0499999999999998,
0.0899999999999999, 0.12, 0.18, 0.655, 0.649999999999999, 0.62,
0.62, NA, NA, NA, NA, NA), `2 % no cure just wash 20 gm` = c(0.04,
0.0699999999999994, 0.0899999999999999, 0.13, 0.44, 0.64, 0.62,
0.739999999999999, NA, NA, NA, NA, NA), `3 % no cure just wash 20gm` = c(0.04,
0.0999999999999996, 0.0800000000000001, 0.0999999999999996, 0.23,
0.6, 0.919999999999999, 1.42, 1.51, 1.64, NA, NA, NA), `4 % no cure just wash 20 gm` = c(0.0499999999999998,
0.0899999999999999, 0.0999999999999996, 0.12, 0.13, 0.13, 0.2,
0.37, 0.62, 0.86, 1.05, 1.23, 0.899999999999999), `3 % cure + wash 20 gm` = c(0.11,
0.16, 0.17, 0.18, 0.19, 0.2, 0.37, 0.819999999999999, 1.34, 1.62,
1.62, 2.02, 1.53), `4%cure+wash 20gm` = c(0.0600000000000005,
0.11, 0.14, 0.16, 0.17, 0.19, 0.26, 0.680000000000001, 0.87,
1.02, 1.12, 1.29, 1.12)), row.names = c(NA, -13L), class = c("tbl_df",
"tbl", "data.frame"))
I'm not sure about the meaning of your features, but did you think about something like this?
Data2 %>%
ggplot(aes(x = Name, y = value)) +
geom_point(aes(col = test), alpha = 0.5, position = "jitter")

ggplot counting observations in each quadrant

I have some data as below:
# A tibble: 158 x 2
X Y
<dbl> <dbl>
1 -0.71 -2.39
2 0.92 0.573
3 -2.52 -1.61
4 3.88 5.43
5 0.15 0.106
6 3.49 6.66
7 -0.54 0.613
8 1.4 4.21
9 1.16 0.107
10 -3.37 -3.62
# ... with 148 more rows
I plot the observations and draw a line horizontally and vertically at 0 using:
df %>%
ggplot(aes(x = X,
y = Y)) +
geom_point() +
#geom_smooth(method = "lm") +
geom_hline(aes(yintercept = 0)) +
geom_vline(aes(xintercept = 0))
What I would like to do now is count the number of points which are in each quadrant of the plot and just display the number on each of the quadrant.
Data:
df <- structure(list(X = c(-0.71, 0.92, -2.52, 3.88, 0.15, 3.49, -0.54,
1.4, 1.16, -3.37, -0.55, -0.74, 2.13, 1.33, 3.27, 1.74, 0.65,
1.23, -1.75, 0.9, 3.86, 3.69, -1.74, -3.43, 0.67, 3.83, 2.32,
-5.46, -0.55, -6.39, -2.23, -1.3, 4.72, 2.42, -7.9, -1.54, 0.99,
-9.97, -18.41, -7.73, 1.5, -7.5, -9.88, 8.82, 10.48, 6.7, -0.23,
8.15, 3.02, 4.54, -2.76, 5.77, 3.03, -3.63, 3.71, 6.27, 1.92,
-7.86, -5.5, -4.44, 9.47, 3.89, 0.81, 6.83, 1.98, 4.01, 0.43,
2.79, -1.48, -1.87, -5.93, -8.58, 11.56, -0.46, 0.33, 5.27, 4.32,
2.4, -0.64, -6.7, 3.74, 1.01, 2.76, 2.8, -1.63, 0.65, 1.3, 5.33,
0.96, 3.71, 1.27, 2.53, -1.52, 5.69, -2.53, 3.82, 4.09, 2.79,
2.64, -3.42, 4.72, 0.62, 0.25, 1.98, 2.82, -2.06, 4.06, -2.45,
2.03, 2.22, -0.2, -3.47, 6.15, -1.2, 1.11, 1, -1.71, 1.05, -5.93,
-3.35, 7.53, 0.45, -2.45, -5.73, 0.26, 7, 1.12, 1.39, -0.11,
0.43, 0.34, -2.05, 4.54, 1.76, 2.15, 3.26, 0.2, 0.84, 0.93, 0.98,
1.97, 0.07, 2.48, 1.98, 2.88, 1.18, 5.23, -3.95, -2.17, 0.35,
2.51, 0.39, 3.11, 3.09, 0.06, -7.81, 1.62, -9.53), Y = c(-2.38916419707325,
0.572675136581781, -1.61130358515631, 5.42706994951004, 0.105533424368025,
6.65697289481407, 0.613486039256266, 4.21013704773222, 0.106990463992386,
-3.62352710962904, -0.203607589793183, -4.24563967581072, 2.97070300267885,
2.92544516479698, 5.02538739147422, 2.25461465260415, 1.66492554339803,
3.5690423154001, 0.108411247307002, 0.961008630173696, 3.79172784045593,
1.94108347244724, -2.12992072359958, -5.87473482253699, -1.45100684091412,
1.47842234462587, 1.43196010231586, -7.74290369146724, -2.79056547363334,
-5.03532133668577, -1.99400739381075, -2.92320856826413, 3.93394610595585,
3.29451174347621, -10.0410470556235, 3.34517672842812, 2.41625183369762,
-10.3476519710384, -21.791966984666, -11.1142687331988, 3.32761656369176,
-3.96223311815655, -11.093184503697, 11.6694167237026, 22.2461574652919,
9.28255170483023, 4.63817899423635, 11.8553670456421, 8.27889381692159,
8.19911670446593, -6.470817611772, 3.09218109975165, 7.5825172514382,
0.0284717847140023, 4.90864483240255, 10.0311544305095, 8.55401150272708,
-8.84107625063785, -8.04105369987643, -6.65872061590883, 10.8577722872979,
4.03706922467202, 3.04148092466194, 8.90634921641063, 1.56555573277521,
4.42535372370123, 0.841035482771217, 1.75578768128183, -2.67241757153407,
-2.25418139889371, -8.7723458397205, -11.2420616969584, 11.4836809985778,
-1.8649021388476, 0.832085873992507, 11.6062841497052, 2.59039949751966,
2.28509371230735, -1.97715071813135, -7.3280081242774, 3.97121830333205,
-0.569284938256821, 2.31082313266322, 3.02490478503254, -1.38512132143018,
-0.866847983058995, 2.97552563660034, 5.95976111047322, -0.102502393594657,
4.58003409048615, 0.842834319309465, 3.06786040532266, 0.250639945095402,
6.78696057469418, -1.62606880448011, 5.46367912370997, 2.53357559730344,
4.73895950607308, 2.50934817572881, -0.312149263565189, 4.82621271905962,
-0.79009628184665, -3.12115495501355, -0.461711220579862, 4.27359516836912,
-4.60871127364226, 3.84488020178729, -5.26245849925393, 3.54222359765326,
1.04191534953213, 1.4982293818719, -3.56618092951384, 4.95478586278666,
-0.270584959088251, -0.900452947549406, 0.901254072925249, -0.254483190258712,
-2.63217404877559, -4.71624328721887, -7.1747474980974, 4.86036342835152,
3.24549729559669, -4.19219918146311, -10.128570960197, 0.803895306904637,
9.33865112323734, 2.85517888612945, 0.316844258915139, -0.151669189522978,
1.00839469793829, 1.57398998124214, -5.0607247073979, 8.91704977465508,
2.59984205825244, 1.31737969318745, 2.70804837397023, 1.80193676584248,
1.48362026996833, -2.11380109244311, 3.54300752215851, 1.6501194298151,
-1.01504840432201, 6.74326962933175, 0.1866931051541, 2.9825290286452,
1.42593783576641, 2.71110274944611, -4.09572797775837, 1.50144422897237,
-0.552818435076999, 5.23843746771127, 1.33321908169899, 1.28745947800351,
2.60490918566195, -1.54038908822145, -9.6363012621261, -0.190177144865133,
-13.0653210889016)), row.names = c(NA, -158L), class = c("tbl_df",
"tbl", "data.frame"))
library(dplyr)
quad_count <- df %>%
# Count how many with each combination of X and Y being positive
count(right = X > 0, top = Y > 0) %>%
# TRUE = 1, FALSE = 0, so these map the TRUE to +1 and FALSE to -1
mutate(X = 2 * (right - 0.5), Y = 2 * (top - 0.5))
df %>%
ggplot(aes(x = X, y = Y)) +
geom_point() +
geom_hline(aes(yintercept = 0)) +
geom_vline(aes(xintercept = 0)) +
# This layer should use the other dataset, but keep using X and Y for location
geom_text(data = quad_count, aes(label = n), size = 10)
df %>%
ggplot(aes(x = X,
y = Y)) +
geom_point() +
#geom_smooth(method = "lm") +
geom_hline(aes(yintercept = 0)) +
geom_vline(aes(xintercept = 0)) +
geom_text(data = df %>%
mutate(X = X >= 0, Y = Y >= 0) %>%
count(X, Y) %>%
mutate(X = if_else(X, 10, -10),
Y = if_else(Y, 10, -10)),
mapping = aes(X, Y, label = n), size = 10)

R - d3heatmap - implement breaks

I am trying to plot a heatmap using the d3heatmap package.
Unfortunately, I have not been successful yet in implementing certain breaks using the option breaks=... as in heatmap or heatmap.2.
This yields just funny results, I am not even sure whether I am doing something wrong or whether the function just ignores breaks.
For example, I tried:
breaks = c(seq(-10, -2), seq(-2, -1.65), seq(-1.65, 1.65), seq(1.65, 2), seq(2, 10)
and
breaks = c(-10, -2, -1.65, 1.65, 2, 10)
with
colors = c("red", "yellow", "green", "yellow", "red")
but nothing seems to work properly.
Any suggestions?
Here's the dput of my data:
> dput(mat)
structure(c(-0.04, NA, 0.59, NA, 0.675, 0.96, 1.09, 0.445, NA,
0.545, NA, NA, 0.09, -1.11, NA, 0.99, 0.13, 0.215, 1.425, 0,
NA, 0.69, 0.805, NA, 0.69, 1.22, NA, 0.3, NA, 0.025, NA, 0.075,
0.36, -0.94, NA, -0.31, 0.26, 1.02, -1.19, NA, NA, -0.77, NA,
-1.48, 1.05, 0.48, NA, NA, NA, 1.49, -1.285, NA, 0.76, 1.14,
-0.62, NA, NA, NA, 0.95, NA, NA, -0.12, 0.49, NA, 2.31, NA, -0.33,
0.85, NA, -1.7, -1.63, NA, -1.12, 0.135, -0.18, NA, -0.245, NA,
-0.2, -0.2, 0.23, -0.11, NA, 0.3, -0.81, 0.04, 0.18, -0.7, 0.53,
0.44, -0.49, 0.28, 0.26, 0.06, 0.265, 0.21, 0.06, -0.175, 0.365,
0.255, 1.25, -0.35, 0.16, 0.125, 0.825, 0.08, 0.02, -0.02, 0.99,
0.79, -0.23, 0.06, NA, 0.36, -0.64, -0.195, 1.19, -0.29, 0.915,
NA, NA, NA, NA, 0.2, 0.1, NA, 0.04, 0.33, NA, 1.46, 2.36, NA,
-0.92, 1.295, NA, NA, 0.8, NA, 1.09, 1.45, 5.42, NA, NA, NA,
1.69, 3.43, NA, 0.55), .Dim = c(37L, 4L), .Dimnames = list(c("AT",
"BE", "BG", "CEE", "CH", "CN", "CZ", "DE", "DK", "EA", "EE",
"EMU", "ES", "EU", "FI", "FR", "GB", "GR", "HR", "HU", "IE",
"IT", "JP", "LU", "NL", "PL", "PT", "RO", "RS", "RU", "SE", "SI",
"SK", "TR", "UA", "UK", "US"), c("Credit Risk", "Funding and liquidity Risk",
"Macro Risk", "Market Risk")))
And the code I am running:
d3heatmap(abs(mat),
dendrogram = "none",
breaks = c(0,1.65,2,10),
col = c("green", "yellow", "red"),
na.rm = TRUE)
The same function using heatmap.2 works perfectly, though.
The function d3heatmap simply does not have a 'breaks' argument. If it gets passed in as an argument it is silently ignored. (See ?d3heatmap.)
The heatmap.2 function in the gplots package on the other hand does have a "breaks" argument. That explains the difference in behaviour.
Luckily, it is still possible to get the desired behaviour by passing an appropriate 'colors' function to d3heatmap. It works as follows.
First the example data:
mat <- structure(c(-0.04, NA, 0.59, NA, 0.675, 0.96, 1.09, 0.445, NA,
0.545, NA, NA, 0.09, -1.11, NA, 0.99, 0.13, 0.215, 1.425, 0,
NA, 0.69, 0.805, NA, 0.69, 1.22, NA, 0.3, NA, 0.025, NA, 0.075,
0.36, -0.94, NA, -0.31, 0.26, 1.02, -1.19, NA, NA, -0.77, NA,
-1.48, 1.05, 0.48, NA, NA, NA, 1.49, -1.285, NA, 0.76, 1.14,
-0.62, NA, NA, NA, 0.95, NA, NA, -0.12, 0.49, NA, 2.31, NA, -0.33,
0.85, NA, -1.7, -1.63, NA, -1.12, 0.135, -0.18, NA, -0.245, NA,
-0.2, -0.2, 0.23, -0.11, NA, 0.3, -0.81, 0.04, 0.18, -0.7, 0.53,
0.44, -0.49, 0.28, 0.26, 0.06, 0.265, 0.21, 0.06, -0.175, 0.365,
0.255, 1.25, -0.35, 0.16, 0.125, 0.825, 0.08, 0.02, -0.02, 0.99,
0.79, -0.23, 0.06, NA, 0.36, -0.64, -0.195, 1.19, -0.29, 0.915,
NA, NA, NA, NA, 0.2, 0.1, NA, 0.04, 0.33, NA, 1.46, 2.36, NA,
-0.92, 1.295, NA, NA, 0.8, NA, 1.09, 1.45, 5.42, NA, NA, NA,
1.69, 3.43, NA, 0.55), .Dim = c(37L, 4L),
.Dimnames = list(c("AT", "BE", "BG", "CEE", "CH", "CN", "CZ", "DE", "DK", "EA", "EE", "EMU", "ES", "EU", "FI", "FR", "GB", "GR", "HR", "HU", "IE", "IT", "JP", "LU", "NL", "PL", "PT", "RO", "RS", "RU", "SE", "SI", "SK", "TR", "UA", "UK", "US"), c("Credit Risk", "Funding and liquidity Risk", "Macro Risk", "Market Risk")))
Suppose we want the following three color bins: blue for values < 0, green for values >= 0 but < 2, and red for values >= 2. We then define the corresponding ordered list of colors.
palette <- c("blue", "green", "red")
We also define the boundary values of the color bins. These values must include the domain boundaries.
mi <- min(mat, na.rm = TRUE)
ma <- max(mat, na.rm = TRUE)
breaks <- c(mi, 0, 2, ma)
We can now define a color interpolation function which maps a value in [0,1] onto a color, respecting our color bins. The 'scales' package comes to help here.
install.package('scales') # if needed
library(scales)
colorFunc <- col_bin(palette, bins = rescale(breaks))
The breaks originally defined in the domain of our data needed to be rescaled to [0,1]. The 'rescale' function in the 'scales' package handled that.
Small detail: the low boundary of a bin is included in the bin, but the high boundary is excluded. So the value 0 will be green, anything between 0 and 2 will be green too, but 2 will be red.
We can now plot the heat map.
d3heatmap(mat, dendrogram = "none", colors = colorFunc, na.rm = TRUE)
The result looks like this:

Resources