I'm new to R and I need your help. I need to remove the point number 8, x = "180" from multiple lines geom_line, but remaining at geom_point. What should you do?
Data is in an excel spreadsheet
data<-melt(CB_fechado, id.vars = 'a');
#Ângulo de incidência de vento
#print(data)
Grafico_CB_fechado <- ggplot(data,aes(x =`Ângulo de incidência de vento [°]`, y=`value`, color=`variable`))+
geom_line() + geom_point()+
scale_x_continuous(limits = c(0,180), breaks = c(0,15,30,45,60,75,90,105,120,135,150,165,180))+
scale_y_continuous(limits = c(-1.5,1.5))+
ylab("b")+theme(legend.position = "bottom")+
theme(legend.title = element_blank())
For exemplo
This is about subsetting the data that you use for the geom_line(). Not that this would be a bit more complex if it were not the last point. Here is an example with similar dummy data since I did not want to type in from the image.
dummy data
data <- data.frame(angle = rep(c(0:6*15, 180), 4),
cat = rep(LETTERS[1:4], each = 8),
value = rep(1:4/-4, each = 8))
subset data
Drop points with angle = 180.
data_lines <- data[data$angle != 180,]
graph it
Use data_lines instead of data in geom_line().
library(ggplot2)
ggplot(data, aes(x = angle, y = value, color= cat)) +
geom_line(data = data_lines) +
geom_point()+
scale_x_continuous(limits = c(0,180), breaks = c(0,15,30,45,60,75,90,105,120,135,150,165,180))+
scale_y_continuous(limits = c(-1.5,1.5))+
ylab("b")+theme(legend.position = "bottom")+
theme(legend.title = element_blank())
The trick is to use a subset of the data in the ggplot call. In this case I use subset to remove the point with a = 180.
Note that I redefine the color argument to "red".
library(ggplot2)
ggplot(data, aes(x = a, y = b, color = "red")) +
geom_point()+
geom_line(data = subset(data, a != 180)) +
scale_x_continuous(limits = c(0,180), breaks = c(0,15,30,45,60,75,90,105,120,135,150,165,180))+
scale_y_continuous(limits = c(-1.5,1.5))+
ylab("b") +
theme(legend.position = "bottom",
legend.title = element_blank())
Data.
data <- read.table(text = "
a b
1 0 0.57395085
2 15 0.47593420
3 30 0.30175686
4 45 0.13363012
5 60 -0.02727459
6 75 -0.17971621
7 90 -0.44955122
8 180 -0.30247414
", header = TRUE)
Related
I want to plot customized Horizontal dots using my data and the code given here
data:
df <- data.frame (origin = c("A","B","C","D","E","F","G","H","I","J"),
Percentage = c(23,16,32,71,3,60,15,21,44,60),
rate = c(10,12,20,200,-25,12,13,90,-105,23),
change = c(10,12,-5,12,6,8,0.5,-2,5,-2))
.
origin Percentage rate change
1 A 23 10 10.0
2 B 16 12 12.0
3 C 32 20 -5.0
4 D 71 200 12.0
5 E 3 -25 6.0
6 F 60 12 8.0
7 G 15 13 0.5
8 H 21 90 -2.0
9 I 44 -105 5.0
10 J 60 23 -2.0
obs from 'origin' column need be put on y-axis. corresponding values in 'change' and 'rate' column must be presented/differentiated through in box instead of circles, for example values from 'change' column in lightblue and values from 'rate' column in blue. In addition I want to add second vertical axis on right and put circles on it which size will be defined based on corresponding value in 'Percentage' column.
Output of code from the link:
Expected outcome (smth. like this:
Try this.
First, reshaping so that both rate and change are in one column better supports ggplot's general preference towards "long" data.
df2 <- reshape2::melt(df, id.vars = c("origin", "Percentage"))
(That can also be done using pivot_wider.)
The plot:
ggplot(df2, aes(value, origin)) +
geom_label(aes(label = value, fill = variable, color = variable)) +
geom_point(aes(size = Percentage), x = max(df2$value) +
20, shape = 21) +
scale_x_continuous(expand = expansion(add = c(15, 25))) +
scale_fill_manual(values = c(change="lightblue", rate="blue")) +
scale_color_manual(values = c(change="black", rate="white")) +
theme_bw() +
theme(panel.border = element_blank(), panel.grid.major.x = element_blank(), panel.grid.minor.x = element_blank()) +
labs(x = NULL, y = NULL)
The legend and labels can be adjusted in the usual ggplot methods. Overlapping of labels is an issue with which you will need to contend.
Update on OP request: See comments:
gg_dot +
geom_text(aes(x = rate, y = origin,
label = paste0(round(rate, 1), "%")),
col = "black") +
geom_text(aes(x = change, y = origin,
label = paste0(round(change, 1), "%")),
col = "white") +
geom_text(aes(x = x, y = y, label = label, col = label),
data.frame(x = c(40 - 1.1, 180 + 0.6), y = 11,
label = c("change", "rate")), size = 6) +
scale_color_manual(values = c("#9DBEBB", "#468189"), guide = "none") +
scale_y_discrete(expand = c(0.2, 0))
First answer:
Something like this?
library(tidyverse)
library(dslabs)
gg_dot <- df %>%
arrange(rate) %>%
mutate(origin = fct_inorder(origin)) %>%
ggplot() +
# remove axes and superfluous grids
theme_classic() +
theme(axis.title = element_blank(),
axis.ticks.y = element_blank(),
axis.line = element_blank()) +
# add a dummy point for scaling purposes
geom_point(aes(x = 12, y = origin),
size = 0, col = "white") +
# add the horizontal discipline lines
geom_hline(yintercept = 1:10, col = "grey80") +
# add a point for each male success rate
geom_point(aes(x = rate, y = origin),
size = 11, col = "#9DBEBB") +
# add a point for each female success rate
geom_point(aes(x = change, y = origin),
size = 11, col = "#468189")
gg_dot +
geom_text(aes(x = rate, y = origin,
label = paste0(round(rate, 1))),
col = "black") +
geom_text(aes(x = change, y = origin,
label = paste0(round(change, 1))),
col = "white") +
geom_text(aes(x = x, y = y, label = label, col = label),
data.frame(x = c(40 - 1.1, 180 + 0.6), y = 11,
label = c("change", "rate")), size = 6) +
scale_color_manual(values = c("#9DBEBB", "#468189"), guide = "none") +
scale_y_discrete(expand = c(0.2, 0))
I would like to plot stacked barplot with added line plot that presents the overall set sizes. I'm plotting stacked barplot in ggplot2 without problems however additional line with different y axis is the difficulty. I'm using long-formated table as input, so there is no 'overall size' column.
Code to reproduce sample table:
df <- data.frame(Sample=c("S1","S2","S3","S4","S5","S6"), A=c(30,52,50,81,23,48), B=c(12,20,15,22,30,14), C=c(rep(15,6)))
df.melt <- melt(setDT(df), id.vars = "Sample", variable.name = "Group")
Head of the table:
Sample Group value
1: S1 A 30
2: S2 A 52
3: S3 A 50
4: S4 A 81
5: S5 A 23
6: S6 A 48
Code to draw stacked barplot:
ggplot(df.melt, aes(x = Sample, y = value, fill = Group)) +
geom_col(position = position_fill(reverse = TRUE)) +
theme(axis.text.x=element_text(angle=45, hjust=1), legend.title=element_blank()) +
scale_fill_brewer(palette="Set3") +
ylab("% of Total") +
scale_y_continuous(labels = percent) +
scale_x_discrete(limits = unique(df.melt$Sample))
Therefore the line would run through six stacked bars pointing the size of each set i.e. for sample S1 it would be 57 (A + B + C), and y axis labels to the right of the plot would show set size range.
You can put the data set directly in the geom. This allows you to use different data sets for each geom. Secondary axis are a bit tricky. They need to be a function of the primary axis and the data adjusted accordingly. I've used 120 as the adjustment factor.
percent <- c("0%", "25%", "50%", "75%", "100%")
set_sizes <- df %>%
rowwise %>%
mutate(Size = sum(A, B, C))
ggplot() +
geom_col(df.melt, mapping = aes(x = Sample, y = value, fill = Group),position = position_fill(reverse = TRUE)) +
geom_line(set_sizes, mapping = aes(x = Sample, y = Size / 120, group = 1)) +
scale_y_continuous(name = "% of Total", labels = percent, sec.axis = sec_axis(~ .*120, name = "Sample Size")) +
theme(axis.text.x=element_text(angle=45, hjust=1), legend.title=element_blank()) +
scale_fill_brewer(palette="Set3") +
scale_x_discrete(limits = unique(df.melt$Sample))
Alternatively, you can use cowplot to arrange two independent plots on top of each other, e.g.:
suppressMessages(invisible(lapply(c("data.table", "ggplot2", "cowplot"),
require, character.only=TRUE)))
df <- data.table(Sample=c("S1","S2","S3","S4","S5","S6"),
A=c(30,52,50,81,23,48), B=c(12,20,15,22,30,14), C=c(rep(15,6)))
df.melt <- melt(df, id.vars = "Sample", variable.name = "Group")
percent <- paste0(sprintf("%s", seq(0, 100, 25)), "%")
p1 <- ggplot(df.melt, aes(x = Sample, y = value, fill = Group)) +
geom_col(position = position_fill(reverse = TRUE)) +
theme(axis.text.x=element_text(angle=45, hjust=1), legend.title=element_blank()) +
scale_fill_brewer(palette="Set3") +
ylab("% of Total") +
scale_y_continuous(labels = percent) +
scale_x_discrete(limits = unique(df.melt$Sample))
p2 <- ggplot(df.melt[, .(value=sum(value)), by="Sample"],
aes(x = Sample, y = value, group=1)) +
geom_line() +
scale_x_discrete(labels = NULL, breaks = NULL) +
labs(x = NULL)
plot_grid(p2, NULL, p1, align="hv", nrow=3, axis='tlbr', rel_heights=c(1, -.28, 4), greedy=FALSE)
Created on 2022-02-20 by the reprex package (v2.0.1)
I am using the windrose function posted here: Wind rose with ggplot (R)?
I need to have the percents on the figure showing on the individual lines (rather than on the left side), but so far I have not been able to figure out how. (see figure below for depiction of goal)
Here is the code that makes the figure:
p.windrose <- ggplot(data = data,
aes(x = dir.binned,y = (..count..)/sum(..count..),
fill = spd.binned)) +
geom_bar()+
scale_y_continuous(breaks = ybreaks.prct,labels=percent)+
ylab("")+
scale_x_discrete(drop = FALSE,
labels = waiver()) +
xlab("")+
coord_polar(start = -((dirres/2)/360) * 2*pi) +
scale_fill_manual(name = "Wind Speed (m/s)",
values = spd.colors,
drop = FALSE)+
theme_bw(base_size = 12, base_family = "Helvetica")
I marked up the figure I have so far with what I am trying to do! It'd be neat if the labels either auto-picked the location with the least wind in that direction, or if it had a tag for the placement so that it could be changed.
I tried using geom_text, but I get an error saying that "aesthetics must be valid data columns".
Thanks for your help!
One of the things you could do is to make an extra data.frame that you use for the labels. Since the data isn't available from your question, I'll illustrate with mock data below:
library(ggplot2)
# Mock data
df <- data.frame(
x = 1:360,
y = runif(360, 0, 0.20)
)
labels <- data.frame(
x = 90,
y = scales::extended_breaks()(range(df$y))
)
ggplot(data = df,
aes(x = as.factor(x), y = y)) +
geom_point() +
geom_text(data = labels,
aes(label = scales::percent(y, 1))) +
scale_x_discrete(breaks = seq(0, 1, length.out = 9) * 360) +
coord_polar() +
theme(axis.ticks.y = element_blank(), # Disables default y-axis
axis.text.y = element_blank())
#teunbrand answer got me very close! I wanted to add the code I used to get everything just right in case anyone in the future has a similar problem.
# Create the labels:
x_location <- pi # x location of the labels
# Get the percentage
T_data <- data %>%
dplyr::group_by(dir.binned) %>%
dplyr::summarise(count= n()) %>%
dplyr::mutate(y = count/sum(count))
labels <- data.frame(x = x_location,
y = scales::extended_breaks()(range(T_data$y)))
# Create figure
p.windrose <- ggplot() +
geom_bar(data = data,
aes(x = dir.binned, y = (..count..)/sum(..count..),
fill = spd.binned))+
geom_text(data = labels,
aes(x=x, y=y, label = scales::percent(y, 1))) +
scale_y_continuous(breaks = waiver(),labels=NULL)+
scale_x_discrete(drop = FALSE,
labels = waiver()) +
ylab("")+xlab("")+
coord_polar(start = -((dirres/2)/360) * 2*pi) +
scale_fill_manual(name = "Wind Speed (m/s)",
values = spd.colors,
drop = FALSE)+
theme_bw(base_size = 12, base_family = "Helvetica") +
theme(axis.ticks.y = element_blank(), # Disables default y-axis
axis.text.y = element_blank())
I would like to create a dot plot with percentiles, which looks something like this-
Here is the ggplot2 code I used to create the dot plot. There are two things I'd like to change:
I can plot the percentile values on the y-axis but I want these
values on the x-axis (as shown in the graph above). Note that
the coordinates are flipped.
The axes don't display label for the
minimum value (for example the percentile axis labels start at 25
when they should start at 0 instead.)
# loading needed libraries
library(tidyverse)
library(ggstatsplot)
# creating dataframe with mean mileage per manufacturer
cty_mpg <- ggplot2::mpg %>%
dplyr::group_by(.data = ., manufacturer) %>%
dplyr::summarise(.data = ., mileage = mean(cty, na.rm = TRUE)) %>%
dplyr::rename(.data = ., make = manufacturer) %>%
dplyr::arrange(.data = ., mileage) %>%
dplyr::mutate(.data = ., make = factor(x = make, levels = .$make)) %>%
dplyr::mutate(
.data = .,
percent_rank = (trunc(rank(mileage)) / length(mileage)) * 100
) %>%
tibble::as_data_frame(x = .)
# plot
ggplot2::ggplot(data = cty_mpg, mapping = ggplot2::aes(x = make, y = mileage)) +
ggplot2::geom_point(col = "tomato2", size = 3) + # Draw points
ggplot2::geom_segment(
mapping = ggplot2::aes(
x = make,
xend = make,
y = min(mileage),
yend = max(mileage)
),
linetype = "dashed",
size = 0.1
) + # Draw dashed lines
ggplot2::scale_y_continuous(sec.axis = ggplot2::sec_axis(trans = ~(trunc(rank(.)) / length(.)) * 100, name = "percentile")) +
ggplot2::coord_flip() +
ggplot2::labs(
title = "City mileage by car manufacturer",
subtitle = "Dot plot",
caption = "source: mpg dataset in ggplot2"
) +
ggstatsplot::theme_ggstatsplot()
Created on 2018-08-17 by the reprex package (v0.2.0.9000).
I am not 100% sure to have understood what you really want, but below is my attempt to reproduce the first picture with mpg data:
require(ggplot2)
data <- aggregate(cty~manufacturer, mpg, FUN = mean)
data <- data.frame(data[order(data$cty), ], rank=1:nrow(data))
g <- ggplot(data, aes(y = rank, x = cty))
g <- g + geom_point(size = 2)
g <- g + scale_y_continuous(name = "Manufacturer", labels = data$manufacturer, breaks = data$rank,
sec.axis = dup_axis(name = element_blank(),
breaks = seq(1, nrow(data), (nrow(data)-1)/4),
labels = 25 * 0:4))
g <- g + scale_x_continuous(name = "Mileage", limits = c(10, 25),
sec.axis = dup_axis(name = element_blank()))
g <- g + theme_classic()
g <- g + theme(panel.grid.major.y = element_line(color = "black", linetype = "dotted"))
print(g)
That produces:
data <- aggregate(cty~manufacturer, mpg, FUN = mean)
data <- data.frame(data[order(data$cty), ], rank=1:nrow(data))
These two lines generate the data for the graph. Basically we need the manufacturers, the mileage (average of cty by manufacturer) and the rank.
g <- g + scale_y_continuous(name = "Manufacturer", labels = data$manufacturer, breaks = data$rank,
sec.axis = dup_axis(name = element_blank(),
breaks = seq(1, nrow(data), (nrow(data)-1)/4),
labels = 25 * 0:4))
Note that here the scale is using rank and not the column manufacturer. To display the name of the manufacturers, you must use the labels property and you must force the breaks to be for every values (see property breaks).
The second y-axis is generated using the sec.axis property. This is very straight-forward using dup_axis that easily duplicate the axis. By replacing the labels and the breaks, you can display the %-value.
g <- g + theme(panel.grid.major.y = element_line(color = "black", linetype = "dotted"))
The horizontal lines are just the major grid. This is much easier to manipulate than geom_segments in my opinion.
Regarding your question 1, you can flip the coordinates easily using coord_flip, with minor adjustments. Replace the following line:
g <- g + theme(panel.grid.major.y = element_line(color = "black", linetype = "dotted")
By the following two lines:
g <- g + coord_flip()
g <- g + theme(panel.grid.major.x = element_line(color = "black", linetype = "dotted"),
axis.text.x = element_text(angle = 90, hjust = 1))
Which produces:
Regarding your question 2, the problem is that the value 0% is outside the limits. You can solve this issue by changing the way you calculate the percentage (starting from zero and not from one), or you can extend the limit of your plot to include the value zero, but then no point will be associated to 0%.
Say I created a heatmap using the function geom_raster() (from ggplot2).
What's a smart way to add a row at the bottom of the table showing (in my case) the 'Mean return' for each month on the period considered ?
It would be nice there is some space left between the 1985-2013 period and the row for the average, and maybe police color and 'cases' could be customized.
The core of my code is as follows (the object molten contains the my data, originally a matrix passed through the melt() function of reshape2.
hm <- ggplot(data = molten, aes(x = factor(Var2, levels = month.abb), y=Var1, fillll=value)) + geom_raster()
hm <- hm + scale_fill_gradient2(low=LtoM(100), mid=Mid, high=MtoH(100))
hm <- hm + labs(fill='% Return')
hm <- hm + geom_text(aes(label=paste(sprintf("%.1f %%", value))), size = 4)
hm <- hm + scale_y_continuous(breaks = 1985:2013)
hm <- hm + xlab(label = NULL) + ylab(label = NULL)
hm <- hm + theme_bw()
hm <- hm + theme(axis.text.x = element_text(size = 10, hjust = 0, vjust = 0.4, angle=90))
It's not very concise, but I think this should do what you need.
You didn't provide a data set, so I just made some up. Also, the LtoM and MtoH functions are not included in any R package I could find, so I did a quick Google search and found them here
The following code produces a plot hm2 with facets to make the "Mean Return" row at the bottom:
require(reshape2)
require(ggplot2)
# Random data
set.seed(100)
casted = data.frame(Var1 = rep(1985:2013, times=12), Var2 = rep(month.abb, each=29), return = rnorm(12*29, 0, 9))
molten = melt(casted, id.vars = c("Var1", "Var2"))
LtoM <-colorRampPalette(c('red', 'yellow' ))
Mid <- "snow3"
MtoH <-colorRampPalette(c('lightgreen', 'darkgreen'))
# Averages
monthly.avg = cbind(Var1 = rep("Mean", 12), dcast(molten, Var2 ~ ., mean))
colnames(monthly.avg)[3] = "Mean"
molten2 = merge(molten, melt(monthly.avg), all.x = TRUE, all.y = TRUE)
# New plot
hm2 =
ggplot(data = molten2, aes(x = factor(Var2, levels = month.abb), y=Var1, fill=value)) +
geom_raster() +
scale_fill_gradient2(low=LtoM(100), mid=Mid, high=MtoH(100)) +
labs(fill='% Return') +
geom_text(aes(label=paste(sprintf("%.1f %%", value))), size = 4) +
xlab(label = NULL) + ylab(label = NULL) +
theme_bw() +
theme(axis.text.x = element_text(size = 10, hjust = 0, vjust = 0.4, angle=90)) +
facet_grid(variable ~ ., scales = "free_y", space = "free_y") + # grid layout
theme(strip.background = element_rect(colour = 'NA', fill = 'NA'), strip.text.y = element_text(colour = 'white')) # remove facet labels
which gives the following plot:
How about this:
I created a grid to mock up your data
Main changes, are to precalculate the aggregate and "spacer" data rows, and add to molten,
then add scale_y_discrete so you can label the rows,
then make sure the format works for the grey spacer bar with no % label (comments in code)
Easier in future if you include the data (or a sample) in the question
require(ggplot2)
molten<-expand.grid(c("Jan","Feb","Mar","Apr","May","Jun","Jul","Aug","Sep","Oct","Nov","Dec"),1985:2013,0)
colnames(molten)<-c("Var2","Var1","value")
molten$value=(runif(nrow(molten))*60)-30
#create means
means<-aggregate(molten[,c(1,3)], by=list(molten$Var2),FUN=mean, na.rm=TRUE)
colnames(means)<-c("Var2","Var1","value")
means$Var1<-"MEANS"
#create spacer bar
spacer<-means
spacer$Var1<-" "
spacer$value<-NA
#append them to the data
molten<-rbind(molten,spacer,means)
hm <- ggplot(data = molten, aes(x = Var2, y=Var1, fill=value)) +
geom_raster() +
# replaced your functions for ease of use
scale_fill_gradient2(low="red", mid="yellow", high="green",na.value="grey") +
labs(fill='% Return') +
# don't format the NA vals with %, return blank
geom_text(aes(label=ifelse((is.na(value)),"",paste(sprintf("%.1f %%", value)))), size = 4) +
# make the scale discrete to add labels and enforce order (use a blank space for the spacer)
scale_y_discrete(limits = c("MEANS"," ",1985:2013)) +
xlab(label = NULL) + ylab(label = NULL) +
theme_bw() +
theme(axis.text.x = element_text(size = 10, hjust = 0, vjust = 0.4, angle=90))
hm