ggparcoord y axis scale to (1, 10, 100, 1000) - r

Hi I have a problem changing the scale using ggparcoord
please tell me how to change y axis value to (1, 10, 100, 1000)
p<- ggparcoord(dt1, columns = c(108,111), groupColumn = 155, order = "anyClass", scale = "globalminmax", showPoints = TRUE,
alphaLines = 0.3)

Obviously, we don't have your data, so let's make a dummy example that is similar to yours:
library(GGally)
set.seed(8)
dt1 <- data.frame(BNP = rexp(500, 0.001),
BNP_Max3 = rexp(500, 0.001),
minusDeltaBNP = rep(c("A", "B"), each = 250))
p <- ggparcoord(dt1, columns = 1:2, groupColumn = 3,
scale = "globalminmax", showPoints = TRUE, alphaLines = 0.3) +
theme_minimal(base_size = 16)
To change the y axis to a log scale, we can simply do:
p + scale_y_log10()
Created on 2022-09-04 with reprex v2.0.2

Related

plot contours in ggplot

How do I plot contours ?
I have x, y, z. I wish to plot contour lines using V values.
# data
tbl <- tibble(x = runif(n = 1000, min = 0, max = 1),
y = runif(n = 1000, min = 0, max = 1),
V = x^2.5 + y^2)
# plots
ggplot(data = tbl,
aes(x = x,
y = y
z = V)) +
geom_contour_filled(alpha = 0.8, breaks = seq(0, 2, 0.2)) +
theme_bw()
Here is a way, solving the problem with a shameless copy&paste of the franke example in the documentation of geom_contour_filled.
The trick is to use package interp to prepare the data for plotting. In the code below the only change in the instruction to create grid is the data set being binned.
suppressPackageStartupMessages({
library(tidyverse)
library(interp)
})
set.seed(2022)
tbl <- tibble(x = runif(n = 1000, min = 0, max = 1),
y = runif(n = 1000, min = 0, max = 1),
V = x^2.5 + y^2)
grid <- with(tbl, interp::interp(x, y, V))
griddf <- subset(data.frame(x = rep(grid$x, nrow(grid$z)),
y = rep(grid$y, each = ncol(grid$z)),
z = as.numeric(grid$z)),
!is.na(z))
# plots
ggplot(data = griddf,
aes(x = x,
y = y,
z = z)) +
stat_contour_filled(alpha = 0.8, breaks = seq(0, 2, 0.2)) +
theme_bw()
Created on 2022-05-18 by the reprex package (v2.0.1)
Edit
To better control the bins, use either argument bins or argument binwidth instead of breaks. The following code has a bin width of 0.1, doubling the number of bins and now uses geom_contour_filled, like in the question.
ggplot(data = griddf,
aes(x = x,
y = y,
z = z)) +
geom_contour_filled(alpha = 0.8, binwidth = 0.1, show.legend = FALSE) +
theme_bw()
Created on 2022-05-18 by the reprex package (v2.0.1)
geom_contour_filled require binned data.
So your data should be
# data
tbl <- tibble(x = rep(seq(0,1,length.out=100),100),
y = rep(seq(0,1,length.out=100),each=100),
V = x^2.5 + y^2)

Combined scatter and line ggplot with proper legend

I try to find a clear approach for combined scatter and line plots with ggplot2 that have an appropriate legend. The following works, in principle, but with warnings:
library("ggplot2")
library("dplyr")
## 2 data sets, one for the lines, one for the points
tbl <- tibble(
f = rep(letters[1:2], each = 10),
x = rep(1:10, 2),
y = c(1e-4 * exp(1:10), log(1:10))
)
obs <- tibble(
f = rep("c", 5),
x = seq(2, 10, 2),
y = log(seq(2, 10, 2)) + rnorm(5, sd = 0.1)
)
rbind(tbl, obs) %>%
ggplot(aes(x, y, color = f, linetype = f)) +
geom_line(show.legend = TRUE) +
geom_point(show.legend = TRUE, aes(shape = f), size = 3) +
scale_linetype_manual(values=c("solid", "solid", "blank")) +
scale_shape_manual(values=c(NA, NA, 16))
but I would like to get rid of warnings and to write something like:
scale_shape_manual(values=c("none", "none", "circle"))
Is there already a "none" or "empty" shape code? Several past answers have been suggested on SO, but I wonder if there is a recent canonical way.

Remove points with 0 density (no data) in stat_density_2d(geom = 'point')

I have two dataframes, one which I want to make a stat_density_2d plot using a 'raster' geom and one in which I want to use a 'point' geom. For the point geom I want to remove any point where there is no data though, as measured by a point size of 0.
The following is my code:
library(tidyverse)
set.seed(1)
#tibble for raster density plot
df <- tibble(x = runif(1000000, min = -7, max = 5),
y = runif(1000000, min = 0, max = 1000))
#tibble for point density plot
df2 <- tibble(x = runif(20000, min = -2, max = 2),
y = runif(20000, min = 0, max = 500))
#create the density plot
p1 <- ggplot(NULL, aes(x=x, y=y) ) +
stat_density_2d(data = df, aes(fill = stat(density)), geom = "raster", contour = FALSE) +
scale_fill_gradient(low="transparent", high="red") +
stat_density_2d(data = df2, geom = "point", aes(size = ..density..), n = 40, contour = FALSE) +
theme_bw() +
theme(text=element_text(size=18)) +
ylim(0, 1000) + xlim(-7, 5)
p1
which returns:
But where the points are smallest (outside the bounds specified in the df2 tibble) I don't want any density points to be shown. Is there anyway to remove these?
Here's a hack, though I don't know how robust it is to differences in data.
BLUF: add scale_radius(range=c(-1,6)).
I reduced your data a lot so that it doesn't take 5 minutes to render.
set.seed(1)
df <- tibble(x = runif(1000, min = -7, max = 5),
y = runif(1000, min = 0, max = 1000))
df2 <- tibble(x = runif(20, min = -2, max = 2),
y = runif(20, min = 0, max = 500))
Four plots:
Your code (my data), no other change;
scale_radius();
scale_radius(range = c(-0.332088004, 6)); and
scale_radius(range = c(-1, 6)).
This is surely a hack, and I don't know how to find a more precise way of filtering out specific levels.
The modified code:
p1 <- ggplot(NULL, aes(x=x, y=y) ) +
stat_density_2d(data = df, aes(fill = stat(density)), geom = "raster", contour = FALSE) +
scale_fill_gradient(low="transparent", high="red") +
stat_density_2d(data = df2, geom = "point", aes(size = ..density..), n = 40, contour = FALSE) +
theme_bw() +
# scale_radius() +
# scale_radius(range = c(-0.332088004, 6)) +
scale_radius(range = c(-1, 6)) +
theme(text=element_text(size=18)) +
ylim(0, 1000) + xlim(-7, 5)

Use position_jitterdodge without mapping aesthetic

I would like to produce a plot like the one obtained with the code below. However, I would like to dodge by "replicate", but without actually mapping an aesthetic (because I would like to assign fill and colors to other aesthetics).
dataset <- data_frame(sample = rep(c("Sample1","Sample2","Sample3", "Sample4"), each = 25),
replicate = sample(x = c("A", "B"), size = 100, replace = TRUE),
value = rnorm(n = 100, mean = 0, sd = 10))
ggplot(data = dataset, aes(x = sample, y = value, fill = replicate)) +
geom_point(position = position_jitterdodge(jitter.width = 0.15, dodge.width = 0.75),
show.legend = F)
I had hope using group = replicate instead of fill = replicate but this doesn't work. I can imagine a workaround using for example alpha = replicate as an aesthetic and setting scale_alpha_manual(values = c(1, 1)) in case of duplicates, but I don't find this solution ideal and would like to keep all aesthetics available (other than x and y available for further use)
ggplot(data = dataset, aes(x = sample, y = value, alpha = replicate)) +
geom_point(position = position_jitterdodge(jitter.width = 0.15, dodge.width = 0.75),
show.legend = F) +
scale_alpha_manual(values = c(1, 1))
The plot that I expect to get is:
I hope my question makes sense, any hint ?
Best,
Yvan
You could unite the sample and replicate columns and use that as the x-axis, injecting a 'Placeholder' value for spacing between samples.
library(tidyverse)
set.seed(20181101)
dataset <- data_frame(sample = rep(c("Sample1","Sample2","Sample3", "Sample4"), each = 25),
replicate = sample(x = c("A", "B"), size = 100, replace = TRUE),
value = rnorm(n = 100, mean = 0, sd = 10))
dataset %>%
bind_rows({
#create a dummy placeholder to allow for spacing between samples
data.frame(sample = unique(dataset$sample),
replicate = rep("Placeholder", length(unique(dataset$sample))),
stringsAsFactors = FALSE)
}) %>%
#unite the sample & replicate columns, and use it as the new x-axis
unite(sample_replicate, sample, replicate, remove = FALSE) %>%
ggplot(aes(x = sample_replicate, y = value, color = replicate)) +
geom_jitter() +
#only have x-axis labels for each sample
scale_x_discrete(breaks = paste0("Sample", 1:length(unique(dataset$sample)), "_B"),
labels = paste0("Sample ", 1:length(unique(dataset$sample)))) +
labs(x = "Sample") +
#don't show the Placeholder value in the legend
scale_color_discrete(breaks = c("A", "B"))

Adding line plot with boxplot

Sample data
set.seed(123)
par(mfrow = c(1,2))
dat <- data.frame(years = rep(1980:2014, each = 8), x = sample(1000:2000, 35*8 ,replace = T))
boxplot(dat$x ~ dat$year, ylim = c(500, 4000))
I have another dataset that has a single value for some selected years
ref.dat <- data.frame(years = c(1991:1995, 2001:2008), x = sample(1000:2000, 13, replace = T))
plot(ref.dat$years, ref.dat$x, type = "b")
How can I add the line plot on top of the boxplot
With ggplot2 you could do this:
ggplot(dat, aes(x = years, y = x)) +
geom_boxplot(data = dat, aes(group = years)) +
geom_line(data = ref.dat, colour = "red") +
geom_point(data = ref.dat, colour = "red", shape = 1) +
coord_cartesian(ylim = c(500, 4000)) +
theme_bw()
The trick here is to figure out the x-axis on the boxplot. You have 35 boxes and they are plotted at the x-coordinates 1, 2, 3, ..., 35 - i.e. year - 1979. With that, you can add the line with lines as usual.
set.seed(123)
dat <- data.frame(years = rep(1980:2014, each = 8),
x = sample(1000:2000, 35*8 ,replace = T))
boxplot(dat$x ~ dat$year, ylim = c(500, 2500))
ref.dat <- data.frame(years = c(1991:1995, 2001:2008),
x = sample(1000:2000, 13, replace = T))
lines(ref.dat$years-1979, ref.dat$x, type = "b", pch=20)
The points were a bit hard to see, so I changed the point style 20. Also, I used a smaller range on the y-axis to leave less blank space.

Resources