Place elements from vector on histogram bins (R ggplot) - r

I have a ggplot histogram, showing two histograms of a continuous variable, one for each level of a group.
Through use of ggplot_build, I now also have vectors where each element is the proportional count of one group (1) versus the other (0), per bin.
So for the following histogram built with
ggplot(data,aes(x=nonfordist)) + geom_histogram(aes(fill=presence),
position="identity",alpha=0.5,bins=30)+ coord_cartesian(xlim=c(NA,1750))
I have the following list, showing sequential proportions of group1/group0 per bin
list(0.398927744608261, 0.35358629130967, 0.275296034083078,
0.247361252979231, 0.260224274406332, 0.22107969151671, 0.252847380410023,
0.230055658627087, 0.212244897959184, 0.242105263157895,
0.235294117647059, 0.115384615384615, 0.2, 0.421052631578947,
0.4375, 0.230769230769231, 0.222222222222222, 0.5, 0, 0,
0, NaN, 1, 1, 0, 0, NaN, NaN, NaN, Inf)
What I want now is to plot the elements of this list on the corresponding bins, preferably above the bars showing the counts for group1.
I do not want to include the proportions for bins that fall outside of the histogram due to my xlim command.

You could use stat_bin with a text geom, using the same breaks as you do for your histogram. We don't have your actual data, so I've tried to approximate it here (see footnote for reproducible data). You haven't told us what your list of proportions is called, so I have named it props in this example.
ggplot(data,aes(x=nonfordist)) +
geom_histogram(aes(fill = presence),
breaks = seq(-82.5, by = 165, length = 11),
position = "identity", alpha = 0.5, bins = 30) +
stat_bin(data = data[data$presence == 1, ], geom = "text",
breaks = seq(-82.5, by = 165, length = 11),
label = round(unlist(props)[1:10], 2), vjust = -0.5) +
coord_cartesian(xlim = c(NA, 1750))
Approximation of data
data <- data.frame(
nonfordist = rep(165 * c(0:10, 0:10),
c(24800, 20200, 16000, 6000, 2800, 1300, 700, 450, 100,
50, 30, 9950, 7400, 4500, 600, 300, 150, 80, 50, 30, 20,
10)),
presence = factor(rep(c(0, 1), c(72430, 23090))))

Related

The ggplotly() deactivates distance between axis title and axis and hides secondary axis title

I have the dataframe below and Im trying to increase the distance between axis title and y axis. This my approach but its not working when using ggplotly(). Also the secondary axis title is getting lost. How can I fix those?
dt2<-structure(list(year2 = c(1950, 1955, 1960, 1965, 1970, 1975,
1980, 1985, 1990, 1995, 2000, 2005, 2010, 2015), pta_count = c(2,
4, 10, 14, 24, 18, 13, 19, 84, 100, 105, 96, 47, 15), scope_ntis_mean = c(3.5,
9.5, 5, 9.57142857142857, 4.54166666666667, 11.7222222222222,
6.23076923076923, 7.05263157894737, 17.1071428571429, 15.16,
15.2761904761905, 17.6354166666667, 22.9574468085106, 26.8666666666667
), scope_ntis_sd = c(0.707106781186548, 11.7046999107196, 6.25388767976457,
8.72409824049971, 4.56812364359683, 9.2278705436976, 5.11784209333462,
10.7779284971676, 13.2864799994027, 12.9643801053175, 12.1295056958191,
12.7964796077233, 12.4375963125981, 14.5791762782532), scope_ntis_se = c(0.822426813475736,
9.62625905026287, 3.25294959458435, 3.83516264302846, 1.53376734188638,
3.57760589505535, 2.33476117415722, 4.06710846230115, 2.38450123589789,
2.13245076374089, 1.94704374916827, 2.14823678655809, 2.98410970181292,
6.19176713030084), scope_ntis_cil = c(2.67757318652426, -0.12625905026287,
1.74705040541565, 5.73626592840011, 3.00789932478029, 8.14461632716687,
3.89600805661201, 2.98552311664622, 14.722641621245, 13.0275492362591,
13.3291467270222, 15.4871798801086, 19.9733371066977, 20.6748995363658
), scope_ntis_ciu = c(4.32242681347574, 19.1262590502629, 8.25294959458435,
13.406591214457, 6.07543400855305, 15.2998281172776, 8.56553040492645,
11.1197400412485, 19.4916440930407, 17.2924507637409, 17.2232342253587,
19.7836534532248, 25.9415565103236, 33.0584337969675)), row.names = c(NA,
-14L), class = c("tbl_df", "tbl", "data.frame"))
library(ggplot2)
library(plotly)
p<-ggplot(dt2, aes(x=year2)) +
geom_col(aes(y=pta_count/(max(dt2$pta_count)/max(dt2$scope_ntis_ciu))),
fill="darkolivegreen",alpha=0.3,width=3)+
geom_point(aes(y=scope_ntis_mean))+
geom_segment(aes(x=year2,y=scope_ntis_cil,xend=year2,yend=scope_ntis_ciu),
arrow=arrow(length=unit(0.1,"cm"),
ends='both'),
lineend="square",size=0.3) +
scale_x_continuous(n.breaks=14)+
# Custom the Y scales:
scale_y_continuous(
# Features of the first axis
name = "NTI Scope\n(scope measures the sum of all NTIs mentioned in a PTA,\ndot indicated mean scope per 5-year interval,\n arrows signal confidence intervals)",
# Add a second axis and specify its features
sec.axis = sec_axis( ~ . * max(dt2$pta_count)/max(dt2$scope_ntis_ciu), name="PTA Count\n(green columns indicate number of PTAs\n signed in given 5-year intervall)")
)+
labs(x='')+
theme_bw()+theme(axis.title = element_text(size = 8),
axis.title.y = element_text(margin = margin(t = 0, r = 20, b = 0, l = 0)))
ggplotly(p)
ggplotly is an amazing tool, but it doesn't cover everything.
If you want a secondary axis on your plot, you have to add it. (At least as far as I know.)
I used plotly_json(ggplotly(p)) to capture content like ticklen and tickcolor so the two axes would match.
I started with adding a trace and making it transparent., then I added a new margin call and yaxis2.
ggplotly(p) %>%
add_trace(inherit = F, x = ~year2,
y = ~(pta_count/(max(pta_count)/ max(scope_ntis_ciu))
) * (max(dt2$pta_count)/max(dt2$scope_ntis_ciu)),
data = dt2,
yaxis = "y2",
alpha = 0, # make it invisible
type = "bar") %>%
layout(margin = list(l = 85, r = 85),
yaxis2 = list(
ticklen = 3.7, # to match other axes
tickcolor = "rgba(51, 51, 51, 1)", # to match other axes
tickfont = list(size = 11.7, # to match other axes
color = "rgba(77, 77, 77, 1)"), # to match the others
titlefont = list(size = 11.7), # to match other axes
side = "right",
overlaying = "y",
showgrid = F, # to match ggplot version
dtick = 25, # between ticks
title = "PTA Count\n(green columns indicate number of PTAs\n signed in given 5-year interval)"))
The ggplot is on the left; the ggplotly is on the right.
The font is bigger in the plotly version, but I didn't change what it determined on the left (you could, though). Also, the 2nd y title is flipped. I don't know if you can mirror it.

How to find an intersection of two rasters with different ranges of continuous variables in r

I'm trying to use R to find intersections between ranges of rasters that represent different climate variables. The closest answer I can find is this: overlay rasters at a given value
For example, if I'm using the raster package to analyse two bioclim variables (e.g. average temperature and precipitation), how can I find and plot places with a temperature range of 15-20 and a precipitation of 500-750?
Here is a minimal, self-contained, reproducible, example
library(raster)
temp <- prec <- raster()
values(temp) <- rep(1:30, each=ncell(temp)/30)
prec[,1:ncol(prec)] <- seq(0,nrow(prec)*10,10)
Solution
tr <- reclassify(temp, c(-Inf, 15, NA, 15, 20, 1, 20, Inf, NA))
pr <- reclassify(prec, c(-Inf, 500, NA, 500, 750, 1, 750, Inf, NA))
zone <- overlay(tr, pr, fun=function(x,y) x*y)
# zone <- tr * pr # is equivalent
or using 0 instead of NA
tr <- reclassify(temp, c(-Inf, 15, 0, 15, 20, 1, 20, Inf, 0))
pr <- reclassify(prec, c(-Inf, 500, 0, 500, 750, 1, 750, Inf, 0))
zoneTF <- tr & pr
And now you may want to do
s <- stack(prec, temp)
s <- mask(s, zone)

Create a Range Bar Chart in R

I would like to create a range bar chart in R.
I found a similar question that was posted about 4 years ago, but the answer was a bit awkward and cumbersome. It did the job, but I was hoping that a cleaner approach might be possible now. Using ggplot2 would be ideal.
Floating bar chart with dates as the plotted values in r
If possible, I would also like to include some sort of point data, or some other way to indicate a summary statistic, like the mean or median for that particular range.
Compared to the attached picture, the Item IDs would take the place of the months given along the y-axis and the Start and End values would be represented horizontally along the x-axis.
Although the data doesn't have to be structured this way, here is a sample dataframe to make things easier:
structure(list(Item = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20), Start = c(500, 550, 500,
450, 400, 400, 500, 400, 300, 300, 350, 250, 300, 200, 200, 100,
100, 50, 0, 0), End = c(550, 600, 550, 550, 700, 600, 600, 700,
850, 600, 650, 650, 750, 900, 800, 900, 1000, 950, 900, 1000),
Median = c(525, 575, 525, 500, 550, 500, 550, 550, 575, 450,
500, 450, 525, 550, 500, 500, 550, 500, 450, 500)), class = c("spec_tbl_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -20L), spec = structure(list(
cols = list(Item = structure(list(), class = c("collector_double",
"collector")), Start = structure(list(), class = c("collector_double",
"collector")), End = structure(list(), class = c("collector_double",
"collector")), Median = structure(list(), class = c("collector_double",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector")), skip = 1), class = "col_spec"))```
The example picture was taken from the range bar chart entry at the "AnyChart" website.
As suggested by RAB, you can try to use geom_segment with your dataframe df:
library(ggplot2)
ggplot(data = df)+
geom_segment(aes(x = Item, xend = Item, y = Start, yend = End), size = 5, colour = "red", alpha = 0.6) +
geom_segment(aes(x = Item, xend = Item, y = Median-1, yend = Median+1), size = 5, colour = "black") +
coord_flip() +
scale_x_discrete(limits = as.character(ddf$Item))+
ylab("Value")
Alternatively, you can also use geom_crossbar:
ggplot(data = ddf, aes(x = as.factor(Item), y = Median)) +
geom_crossbar(aes(ymin = Start, ymax = End), width = 0.5, fill = "grey") +
coord_flip() +
xlab("Item") +
ylab("Value")

Plot correlation matrix with R in specific data range

I have used corrplot package to plot my data-pairs. But all the relationships in my data are positive.
Mydata<-read.csv("./xxxx.csv")
M <-cor(Mydata)
corrplot(M,,col=rev(brewer.pal(n=8, name="RdYlBu")))
Using ggcorr, I also can't find any solution to deal with the issue.
How to generate a user-defined colormap with the corresponding range from 0 to 1?
If you are trying to map the entire range of the colormap to only the positive correlations, you could use col = rep(rev(brewer.pal(n=8, name="RdYlBu")), 2). This repeats the color sequence, and then cl.lim = c(0,1) forces corrplot to use only the 2nd half of the sequence, mapped to the range 0 to 1.
par(xpd=T)
corrplot(M,,'upper',
col = rep(rev(brewer.pal(n=8, name="RdYlBu")), 2),
cl.lim = c(0,1),
mar = c(1, 0, 1, 0))
Some reproducible data
set.seed(12)
x = (1:100)/100
Mydata = data.frame(a=x^runif(1, 0, 50),
b=x^runif(1, 0, 50),
c=x^runif(1, 0, 50),
d=x^runif(1, 0, 50),
e=x^runif(1, 0, 50),
f=x^runif(1, 0, 50),
g=x^runif(1, 0, 50),
h=x^runif(1, 0, 50),
i=x^runif(1, 0, 50))
M = cor(Mydata)

R: How Plot an Excel Table(Matrix) with R

I got this problem I still haven't found out how to solve it. I want to plot all the Values MW1, MW2 and MW3 in function of "DHT + Procymidone". How can I plot all this values in the graphic so that I will get 3 different curves (in different colors and different number like curve 1, 2, ...)? And I want the labels of the X-Values("DHT + Procymidone") to be like -10, -9, ... , -4 instead of 1,00E-10, ...
DHT + Procymidone MW 1 MW 2 MW 3
1,00E-10 114,259526780335 111,022461066274 213,212408408682
1,00E-09 115,024187788314 111,083316791613 114,529425136628
1,00E-08 110,517449986348 107,867941606743 125,10230718665
1,00E-07 100,961311263444 98,4219995773135 116,045168653416
1,00E-06 71,2383604211297 73,539659636842 50,3213799775309
1,00E-05 20,3553333652104 36,1345771905088 15,42260866106
1,00E-04 4,06189509055904 18,1246447874679 10,1988107887318
I have shortened your data frame for convenience reasons, so here's an example:
mydat <- data.frame(DHT_Procymidone = c(-10, -9, -8, -7, -6, -5, -4),
MW1 = c(114, 115, 110, 100, 72, 20, 4),
MW2 = c(111, 111, 107, 98, 73, 36, 18),
MW3 = c(213, 114, 123, 116, 50, 15, 10))
library(tidyr)
library(ggplot2)
mydf <- gather(mydat, "grp", "MW", 2:4)
ggplot(mydf, aes(x = DHT_Procymidone, y = MW, colour = grp)) + geom_line()
which gives following plot:
To use ggplot, your data needs to be in long-format. gather does this for you, appending columns MW1-MW3 into one column, while the column names are added as new column values in the grp-column. This group-column allows to identify different groups, i.e. different colored lines in the plot.
Depending on the type of DHT + Procymidone, you can, e.g. use format(..., scientific = FALSE) to convert to numeric, however, this will result in -0.0000000001 (and not -10).
However, if this data column is a character vector (you can coerce with as.character), this may work:
a <- "1,00E-10"
sub("1,00E", "", a, fixed = TRUE)
> [1] "-10"
As an alternative answer to #Daniel's which doesn't rely on ggplot (thanks Daniel for providing the reproducible data).
mydat <- data.frame(DHT_Procymidone = c(-10, -9, -8, -7, -6, -5, -4),
MW1 = c(114, 115, 110, 100, 72, 20, 4),
MW2 = c(111, 111, 107, 98, 73, 36, 18),
MW3 = c(213, 114, 123, 116, 50, 15, 10))
plot(mydat[,2] ~ mydat[,1], typ = "l", ylim = c(0,220), xlim = c(-10,-2), xlab = "DHT Procymidone", ylab = "MW")
lines(mydat[,3] ~ mydat[,1], col = "blue")
lines(mydat[,4] ~ mydat[,1], col = "red")
legend(x = -4, y = 200, legend = c("MW1","MW2","MW3"), lty = 1, bty = "n", col = c("black","blue","red"))
To change axis labels see the text in xlab and ylab. To change axis limits see xlim and ylim.

Resources