adjust x scale values - r

So I'm making a histogram of the months, but the x-axis goes from 0.5 to 12.5. Does anyone know how I can fix this to 1 - 12 (as they represent the months?
x<-c(1,2,3,4,5,6,6,7,8,9,10,11,12)
qplot(x,geom='histogram',fill=I("red"), col=I("darkred"),xlab="Maand",ylab="Hoeveelheid",bins=12)

You can pass x as.factor.
library(ggplot2)
x <- c(1,2,3,4,5,6,6,7,8,9,10,11,12)
x <- as.data.frame(x)
ggplot(x, aes(as.factor(x))) +
geom_bar(fill = "red", color = "darkred") +
xlab("Maand") +
ylab("Hoeveelheid")

You can try
library(tidyverse)
tibble(x = c(1,2,3,4,5,6,6,7,8,9,10,11,12)) %>%
ggplot(aes(x)) +
geom_histogram(binwidth = 1, color="white") +
scale_x_continuous(breaks = 1:12)
In base R you can try
hist(c(1,2,3,4,5,6,6,7,8,9,10,11,12))

Related

Equivalent of loglog(MATLAB) to R

I am trying to plot a loglog graph but use the non-log labels. So instead of showing in x axis 1 (log10(10), it shows 10. I found the following solution. But that's too much work in my opinion. Is there anything equivalent to the Matlab function "loglog"?
plot(log10(x),log10(y),xaxt = "n")
axis(1,at = c(log10(0.5), log10(10), log10(45),log(100)), labels = c(0.5,10,45,100))
Could be done easily with ggplot:
library(ggplot2)
library(scales)
library(tibble)
tibble(
x = seq(1, 10, .01),
y = sin(log(x)*pi) + 2
) %>%
ggplot(aes(x,y)) +
geom_line() +
scale_x_continuous(trans = log_trans()) +
scale_y_continuous(trans = log_trans())
# Or for log10 it can be done without trans:
tibble(
x = seq(1, 10, .01),
y = sin(log(x)*pi) + 2
) %>%
ggplot(aes(x,y)) +
geom_line() +
scale_x_log10() +
scale_y_log10()
Created on 2022-03-09 by the reprex package (v2.0.1)

How do I add data labels to a ggplot histogram with a log(x) axis?

I am wondering how to add data labels to a ggplot showing the true value of the data points when the x-axis is in log scale.
I have this data:
date <- c("4/3/2021", "4/7/2021","4/10/2021","4/12/2021","4/13/2021","4/13/2021")
amount <- c(105.00, 96.32, 89.00, 80.84, 121.82, 159.38)
address <- c("A","B","C","D","E","F")
df <- data.frame(date, amount, address)
And I plot it in ggplot2:
plot <- ggplot(df, aes(x = log(amount))) +
geom_histogram(binwidth = 1)
plot + theme_minimal() + geom_text(label = amount)
... but I get the error
"Error: geom_text requires the following missing aesthetics: y"
I have 2 questions as a result:
Why am I getting this error with geom_histogram? Shouldn't it assume to use count as the y value?
Will this successfully show the true values of the data points from the 'amount' column despite the plot's log scale x-axis?
Perhaps like this?
ggplot(df, aes(x = log(amount), y = ..count.., label = ..count..)) +
geom_histogram(binwidth = 1) +
stat_bin(geom = "text", binwidth = 1, vjust = -0.5) +
theme_minimal()
ggplot2 layers do not (at least in any situations I can think of) take the summary calculations of other layers, so I think the simplest thing would be to replicate the calculation using stat_bin(geom = "text"...
Or perhaps simpler, you could pre-calculate the numbers:
library(dplyr)
df %>%
count(log_amt = round(log(amount))) %>%
ggplot(aes(log_amt, n, label = n)) +
geom_col(width = 1) +
geom_text(vjust = -0.5)
EDIT -- to show buckets without the log transform we could use:
df %>%
count(log_amt = round(log(amount))) %>%
ggplot(aes(log_amt, n, label = n)) +
geom_col(width = 0.5) +
geom_text(vjust = -0.5) +
scale_x_continuous(labels = ~scales::comma(10^.),
minor_breaks = NULL)

Grouping box plot based on time and coloring based on categories

I am making a box plot for the data I have. Here is the data frame. I wrote the codes and got a nice box plot as in picture 1. But I suppose that there must be box plot for all (2,3,4) time for Land each. As (at 2 time step in first grid extremely saline there should be box plots for all type of land, so on and forth). I may be missing grouping them based on time please see the picture 2. I have also tried to group them but couldn't get the graph as I intended to do. Any help will appreciated. Thanks
Seed(123)
ID = 1:5
Time = rep (c(1,2,3,4,5), each = 20)
Type = 1:25
data <- data.frame( IDn = rep(ID,20), Time, Land = rep(Type, 40), y = rnorm(100,0,1), x = runif(100,0,1))
data$Land= ifelse (data$Land > 15,"large farmers", ifelse(data$Land <=5, "small farmers", "medium-farmers"))
data<- data %>% mutate(xtype = case_when(x> 0.8~ 'Extremely Saline',
x > 0.6 & x<=0.8~ 'Severely Saline',
x > 0.5 & x<=0.6~ 'Highly Saline',
x > 0.3 & x<=0.5~ 'Moderatley Saline',
x > 0.2 & x<=0.3~ 'Slightly Saline',
x <= 0.2~ 'Non saline' ))
## Box Plot
ggplot(data, aes(x=Time, y =x)) +
geom_boxplot(aes(color = Land), size = 0.5, alpha = 0.6) +
facet_wrap(~xtype, nrow = 1) + theme_bw()
#box plot grouping
ggplot(data, aes(x=Time, y =x, group=Time)) +
geom_boxplot(aes(color = Land), size = 0.5, alpha = 0.6) +
facet_wrap(~xtype, nrow = 1) + theme_bw()
Picture 2
Edit: I tried suggested solution for my data set which i have used to put a reproducible example here. Data is some what large and got this graph time is overlapped. I am not sure what's happening.
Is this what you are looking for.
I changed the aes and made Time factor.
## Box Plot
ggplot(data, aes(x=factor(Time), y =x, color = Land)) +
geom_boxplot(size = 0.5, alpha = 0.6) +
facet_wrap(~xtype, nrow = 1) + theme_bw()
#box plot grouping
ggplot(data, aes(x=factor(Time), y =x, group=Time, color = Land)) +
geom_boxplot(size = 0.5, alpha = 0.6) +
facet_wrap(~xtype, nrow = 1) + theme_bw()

How to highlight a column in ggplot2

I have the following graph and I want to highlight the columns (both) for watermelons as it has the highest juice_content and weight. I know how to change the color of the columns but I would like to WHOLE columns to be highlighted. Any idea on how to achieve this? There doesn't seems to be any similar online.
fruits <- c("apple","orange","watermelons")
juice_content <- c(10,1,1000)
weight <- c(5,2,2000)
df <- data.frame(fruits,juice_content,weight)
df <- gather(df,compare,measure,juice_content:weight, factor_key=TRUE)
plot <- ggplot(df, aes(fruits,measure, fill=compare)) + geom_bar(stat="identity", position=position_dodge()) + scale_y_log10()
An option is to use gghighlight
library(gghighlight)
ggplot(df, aes(fruits,measure, fill = compare)) +
geom_col(position = position_dodge()) +
scale_y_log10() +
gghighlight(fruits == "watermelons")
In response to your comment, how about working with different alpha values
ggplot(df, aes(fruits,measure)) +
geom_col(data = . %>% filter(fruits == "watermelons"),
mapping = aes(fill = compare),
position = position_dodge()) +
geom_col(data = . %>% filter(fruits != "watermelons"),
mapping = aes(fill = compare),
alpha = 0.2,
position = position_dodge()) +
scale_y_log10()
Or you can achieve the same with one geom_col and a conditional alpha (thanks #Tjebo)
ggplot(df, aes(fruits, measure)) +
geom_col(
mapping = aes(fill = compare, alpha = fruits == 'watermelons'),
position = position_dodge()) +
scale_alpha_manual(values = c(0.2, 1)) +
scale_y_log10()
You could use geom_area to highlight behind the bars. You have to force the x scale to discrete first which is why I've used geom_blank (see this answer geom_ribbon overlay when x-axis is discrete) noting that geom_ribbon and geom_area are effectively the same except geom_area always has 0 as ymin
#minor edit so that the level isn't hard coded
watermelon_level <- which(levels(df$fruits) == "watermelons")
AreaDF <- data.frame(fruits = c(watermelon_level-0.5,watermelon_level+0.5))
plot <- ggplot(df, aes(fruits)) +
geom_blank(aes(y=measure, fill=compare))+
geom_area(data = AreaDF, aes( y = max(df$measure)), fill= "yellow")+
geom_bar(aes(y=measure, fill=compare),stat="identity", position=position_dodge()) + scale_y_log10()
Edit to address comment
If you want to highlight multiple fruits then you could do something like this. You need a data.frame with where you want the geom_area x and y, including dropping it to 0 between. I'm sure there's slightly tidier methods of getting the data.frame but this one works
highlight_level <- which(levels(df$fruits) %in% c("apple", "watermelons"))
AreaDF <- data.frame(fruits = unlist(lapply(highlight_level, function(x) c(x -0.51,x -0.5,x+0.5,x+0.51))),
yval = rep(c(1,max(df$measure),max(df$measure),1), length(highlight_level)))
AreaDF <- AreaDF %>% mutate(
yval = ifelse(floor(fruits) %in% highlight_level & ceiling(fruits) %in% highlight_level, max(df$measure), yval)) %>%
arrange(fruits) %>% distinct()
plot <- ggplot(df, aes(fruits)) +
geom_blank(aes(y=measure, fill=compare))+
geom_area(data = AreaDF, aes(y = yval ), fill= "yellow")+
geom_bar(aes(y=measure, fill=compare),stat="identity", position=position_dodge()) + scale_y_log10()
plot

ggplot Highlight a point where x axis equals a value

If you run the code below you will a line graph. How can I change the color of the point at x = 2 to RED and increase it's size?
In this case the on the graph the point at (.6) where x = 2 would be highlighted red and made bigger.
Here is my code:
library("ggplot2")
data<-data.frame(time= c(1,2,3), value = c(.4,.6,.7))
ggplot(data, aes( x = time, y=value) ) + geom_line() + geom_point(shape = 7,size = 1)
Thank you!
If your dataset is small you could do this:
> library("ggplot2")
> data<-data.frame(time= c(1,2,3), value = c(.4,.6,.7),point_size=c(1,10,1),cols=c('black','red','black'))
> ggplot(data, aes( x = time, y=value) ) + geom_line() + geom_point(shape = 7,size = data$point_size, colour=data$cols)
Makes:
Also I would not advise calling your data frame data
In addition to #Harpal's solution, you can add two more columns to your data frame where pointsize and -color is specified according to particular conditions:
df <- data.frame(time= c(1,2,3), value = c(.4,.6,.7))
# specify condition and pointsize here
df$pointsize <- ifelse(df$value==0.6, 5, 1)
# specify condition and pointcolour here
df$pointcol <- ifelse(df$value==0.6, "red", "black")
ggplot(df, aes(x=time, y=value)) + geom_line() + geom_point(shape=7, size=df$pointsize, colour=df$pointcol)
You may change ifelse(df$value==0.6, 5, 1) to meet any criteria you like, or you use a more complex approach to specifiy more conditions to be met:
df <- data.frame(time= c(1,2,3), value = c(.4,.6,.7))
df$pointsize[which(df$value<0.6)] <- 1
df$pointsize[which(df$value>0.6)] <- 8
df$pointsize[which(df$value==0.6)] <- 5
df$pointcol[which(df$value<0.6)] <- "black"
df$pointcol[which(df$value>0.6)] <- "green"
df$pointcol[which(df$value==0.6)] <- "red"
ggplot(df, aes(x=time, y=value)) + geom_line() + geom_point(shape=7, size=df$pointsize, colour=df$pointcol)

Resources