Difference in hour between group according ID - r

df dataset contain Direction value and mesure of Timecode. I Want to generate :
Difference_begin_end : is the difference between the last timecode and the first of each sample_ID by Direction
Difference_begin_end_all : is the difference between first et last line of each sample_ID by Directionin second.
This is df dataset :
df=structure(list(Sample_ID = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), Direction = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), Timecode = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 5L, 5L,
6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L), .Label = c("",
"17:02:10", "17:02:11", "17:02:12", "17:02:13", "17:02:15", "17:02:26",
"17:02:47", "17:02:48", "17:02:49", "17:02:50", "17:02:59", "17:03:02",
"17:03:03", "17:03:07", "17:03:10", "17:03:11"), class = "factor")), .Names = c("Sample_ID",
"Direction", "Timecode"), row.names = c(NA, 50L), class = "data.frame")
EDIT 2:

If your time variable is of character or factor class, you need to convert it by as.POSIXct(), strptime(), or other similar functions in lubridate package. The following is my solution:
library(dplyr)
df %>% group_by(Sample_ID) %>%
mutate(Timecode = as.POSIXct(Timecode, format = "%H:%M:%S"),
yellow = last(Timecode) - first(Timecode)) %>%
group_by(Sample_ID, Direction) %>%
mutate(red_purple = last(Timecode) - first(Timecode))
# # A tibble: 99 x 5
# # Groups: Sample_ID, Direction [4]
# Sample_ID Direction Timecode yellow red_purple
# <int> <int> <dttm> <time> <time>
# 1 1 0 2018-12-24 17:02:10 61 secs 5 secs
# 2 1 0 2018-12-24 17:02:10 61 secs 5 secs
# 3 1 0 2018-12-24 17:02:10 61 secs 5 secs
# 4 1 0 2018-12-24 17:02:10 61 secs 5 secs
# 5 1 0 2018-12-24 17:02:10 61 secs 5 secs
The variables yellow and red_purple correspond to the colors of the second photo (EDIT 2) in your question.

Related

How can I fix the runtime error in ecdf function in R?

When I run this code-
a<- read.delim(file.choose("data.txt"))
d<-sort(a$d)
plot(d, sort(ecdf(d)(d)),type="s", lty=2,col="red", ylab= "P(X<=x)",ylim= 0:1)
it makes me make this mistake-
Error in ecdf(d) : 'x' must have 1 or more non-missing values
help?
I ran your code and it seems to be alright. I've just changed the second line of your code, because the only column provided in your data was named as x, instead of d.
Check it out:
# load data
a = structure(list(x = c(4L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L,
1L, 4L, 1L, 2L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 1L, 3L, 0L, 5L, 2L, 2L, 1L, 0L, 0L, 2L, 0L, 0L,
0L, 1L, 3L, 3L, 0L, 0L, 0L, 2L, 0L, 2L, 1L, 1L, 4L, 4L,
0L, 1L, 3L, 1L, 0L, 2L, 1L, 2L, 0L, 0L, 0L, 1L, 0L, 1L,
6L, 0L, 2L, 2L, 0L, 1L, 1L, 2L, 1L, 0L, 1L, 0L, 3L, 0L,
3L, 0L, 4L, 3L, 2L, 2L, 2L, 1L, 3L, 0L, 3L, 2L, 0L, 1L,
2L, 1L)), class = "data.frame", row.names = c(NA, -100L))
# sort x column (the only column)
d = sort(a$x)
# plot
plot(d, sort(ecdf(d)(d)), type = "s", lty = 2, col = "red",
ylab = "P(X<=x)", ylim = 0:1)
Output:

How to color outline differently from fill in histogram using ggplot / R? [duplicate]

This question already has an answer here:
Manually colouring plots with `scale_fill_manual` in ggplot2 not working
(1 answer)
Closed 3 years ago.
Please find My data q below
I have produced the following plot:
By using the script:
library(tidyverse)
w %>%
as_tibble() %>%
mutate(Studie=as.character(Studie),
best.resp =as.factor(best.resp)) %>%
bind_rows(., mutate(., Studie="all")) %>%
count(Studie, best.resp) %>%
ggplot(aes(Studie, n, fill= best.resp)) +
scale_fill_manual(values = c("green", "purple", "yellow")) +
scale_colour_manual(values = c("blue", "red","orange")) +
geom_col(position = position_dodge2(preserve = "single", padding = 0))
I want the outline around each bar to have one set of colors while the fill to have another set of colors. As you can see, I tried using scale_fill_manual and scale_colour_manual, however, that does not solve my problem.
I have attached a picture illustrating what I mean by outlines having one color and the fill another color:
My data
q <- structure(list(Studie = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L), best.resp = c(0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L,
1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 2L, 0L, 2L)), .Names = c("Studie",
"best.resp"), class = "data.frame", row.names = c(NA, -106L))
You need to map a variable to the colour aesthetic (in aes):
ggplot(aes(Studie, n, fill= best.resp, colour = best.resp)
I think in your code you have a w %>% is it supposed to be q?
You need to specify color in your aes command. Right now, you just have the fill-- so the scale color manual later, doesn't apply to anything.
q %>%
as_tibble() %>%
mutate(Studie=as.character(Studie),
best.resp =as.factor(best.resp)) %>%
bind_rows(., mutate(., Studie="all")) %>%
count(Studie, best.resp) %>%
ggplot(aes(Studie, n, color = best.resp, fill= best.resp)) +
scale_fill_manual(values = c("green", "purple", "yellow")) +
scale_colour_manual(values = c("blue", "red","orange")) +
geom_col(position = position_dodge2(preserve = "single", padding = 0))

Bootstrapping eigenvalues for nonlinear PCA in r

I am running nonlinear PCA in r, using the homals package. Here is a chunk of the code I am using as an example:
res1 <- homals(data = mydata, rank = 1, ndim = 9, level = "nominal")
res1 <- rescale(res1)
I want to generate 1000 bootstrap estimates of the eigenvalues in this analysis (with replacement), but I can't figure out the code. Does anyone have any suggestions?
Sample data:
dput(head(mydata, 30))
structure(list(`W age` = c(45L, 43L, 42L, 36L, 19L, 38L, 21L,
27L, 45L, 38L, 42L, 44L, 42L, 38L, 26L, 48L, 39L, 37L, 39L, 26L,
24L, 46L, 39L, 48L, 40L, 38L, 29L, 24L, 43L, 31L), `W education` = c(1L,
2L, 3L, 3L, 4L, 2L, 3L, 2L, 1L, 1L, 1L, 4L, 2L, 3L, 2L, 1L, 2L,
2L, 2L, 3L, 3L, 4L, 4L, 4L, 2L, 4L, 4L, 4L, 1L, 3L), `H education` = c(3L,
3L, 2L, 3L, 4L, 3L, 3L, 3L, 1L, 3L, 4L, 4L, 4L, 4L, 4L, 1L, 2L,
2L, 1L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 2L, 4L), `N children` = c(10L,
7L, 9L, 8L, 0L, 6L, 1L, 3L, 8L, 2L, 4L, 1L, 1L, 2L, 0L, 7L, 6L,
8L, 5L, 1L, 0L, 1L, 1L, 5L, 8L, 1L, 0L, 0L, 8L, 2L), `W religion` = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), `W employment` = c(1L,
1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L), `H occupation` = c(3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 3L, 1L, 1L, 3L, 2L, 4L, 2L, 2L,
2L, 2L, 4L, 3L, 1L, 1L, 1L, 3L, 1L, 1L, 2L, 2L, 1L), `Standard of living` =
c(4L,
4L, 3L, 2L, 3L, 2L, 2L, 4L, 2L, 3L, 3L, 4L, 3L, 3L, 1L, 4L, 4L,
3L, 1L, 1L, 1L, 4L, 4L, 4L, 3L, 4L, 4L, 2L, 4L, 4L), Media = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), Contraceptive = c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L)), .Names = c("W age",
"W education", "H education", "N children", "W religion", "W employment",
"H occupation", "Standard of living", "Media", "Contraceptive"
), row.names = c(NA, 30L), class = "data.frame")
>
I was given the rescale function to use with the homals package, to do optimal scaling. Here is the function:
rescale <- function(res) {
# Rescale homals results to proper scaling
n <- nrow(res$objscores)
m <- length(res$catscores)
res$objscores <- (n * m)^0.5 * res$objscores
res$scoremat <- (n * m)^0.5 * res$scoremat
res$catscores <- lapply(res$catscores, FUN = function(x) (n * m)^0.5 * x)
res$cat.centroids <- lapply(res$cat.centroids, FUN = function(x) (n * m)^0.5 * x)
res$low.rank <- lapply(res$low.rank, FUN = function(x) n^0.5 * x)
res$loadings <- lapply(res$loadings, FUN = function(x) m^0.5 * x)
res$discrim <- lapply(res$discrim, FUN = function(x) (n * m)^0.5 * x)
res$eigenvalues <- n * res$eigenvalues
return(res)
}
The standard way to bootstrap in R is to use base package boot.
I am not very satistied with the code that follows because it is throwing lots of warnings. But maybe this is due to the dataset I have tested it with. I have used the dataset and 3rd example in help("homals").
I have run 10 bootstrap replicates only.
library(homals)
library(boot)
boot_eigen <- function(data, indices){
d <- data[indices, ]
res <- homals(d, active = c(rep(TRUE, 4), FALSE), sets = list(c(1,3,4),2,5))
res$eigenvalues
}
data(galo)
set.seed(7578) # Make the results reproducible
eig <- boot(galo, boot_eigen, R = 10)
eig
#
#ORDINARY NONPARAMETRIC BOOTSTRAP
#
#
#Call:
#boot(data = galo, statistic = boot_eigen, R = 10)
#
#
#Bootstrap Statistics :
# original bias std. error
#t1* 0.1874958 0.03547116 0.005511776
#t2* 0.2210821 -0.02478596 0.005741331
colMeans(eig$t)
#[1] 0.2229669 0.1962961
If this also doesn't run properly in your case, please say so and I will delete the answer.
EDIT.
In order to answer to the discussion in the comments, I have changed the function boot_eigen, the call to homals now follows the question code and rescale is called before returning.
boot_eigen <- function(data, indices){
d <- data[indices, ]
res <- homals(data = d, rank = 1, ndim = 9, level = "nominal")
res <- rescale(res)
res$eigenvalues
}
set.seed(7578) # Make the results reproducible
eig <- boot(mydata, boot_eigen, R = 10)

identifying rows in data frame that exhibit patterns

Below I have code with 3 columns: a group field, a open/close field for the store, and the rolling sum of 3 month opens for the store. I also have the desired solution output.
My dataset can be thought of as an employees availability. You can assume each row to be a different time period (hour, day,month, year, whatever). In the open/closed column I have whether or not the employee was present. The 3month rolling column is a sum of the previous rows.
What I want to identify is the non-zero values in this rolling sum column following a gap of at least 3 zero rows for that particular group. While not present in this dataset, you can assume that there might be more than one 'gap' of zeros present.
structure(list(Group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L), .Label = c("A", "B"), class = "factor"), X0_closed_1_open = c(0L,
1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L), X3month_roll_open = c(0L,
0L, 1L, 2L, 2L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 2L, 0L, 1L, 1L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L), desired_solution = structure(c(1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L), .Label = c("no", "yes"), class ="factor")), .Names = c("Group", "X0_closed_1_open", "X3month_roll_open", "desired_solution"), class = "data.frame", row.names = c(NA,
-26L))
One option is:
res <- unsplit(
lapply(split(df1, df1$Group), function(x) {
rl <- with(x,rle(X3month_roll_open==0))
indx <- cumsum(c(0,diff(inverse.rle(within.list(rl,
values[values] <- lengths[values]>=3)))<0))
x$Flag <- indx!=0 & x[,3]!=0
x}),
df1$Group)
NOTE: Instead of 'yes/no', it may be better to have 'TRUE/FALSE' for easing subsetting.
identical(c('no', 'yes')[res$Flag+1L], as.character(res$desired_solution))
#[1] TRUE

Incorrect frequency using ggplot2 histogram

This is something I noticed just as I was about to put the histograms in my thesis. I noticed that the frequency did not reflect the correct count as displayed in the graph. To double check I tried this in excel and it was proved that the frequency being displayed in R using the ggplot2 was indeed incorrect. I noticed that in my syntax I had applied the xlim function. I removed that out of curiosity to see the result and then magically ggplot2 produced the correct histogram!
This is the code that I'm using:
ggplot(data, aes(x = variable) )+
geom_histogram(binwidth = 1) +
xlim(0, 40)
The one that is producing the correct histogram graph is this:
hist(data$variable, breaks = seq(0, 40, 1), ylim = c(0,700))
Can anybody please help me here? I've spent a lot of time trying to get this to work but to no avail. Any help would be greatly appreciated.
# example data
variable <- c(1L, 1L, 1L, 3L, 4L, 1L, 2L, 1L, 2L, 0L, 1L, 2L, 1L, 1L, 0L,
3L, 1L, 2L, 2L, 3L, 2L, 3L, 2L, 2L, 1L, 0L, 5L, 0L, 0L, 2L, 1L,
1L, 2L, 1L, 3L, 2L, 5L, 4L, 3L, 2L, 3L, 0L, 1L, 1L, 1L, 1L, 2L,
0L, 2L, 1L, 3L, 1L, 4L, 2L, 6L, 2L, 1L, 6L, 5L, 5L, 1L, 1L, 0L,
2L, 1L, 1L, 0L, 0L, 1L, 2L, 1L, 1L, 5L, 2L, 1L, 0L, 3L, 2L, 2L,
4L, 6L, 3L, 2L, 1L, 6L, 1L, 4L, 2L, 1L, 2L, 1L, 1L, 1L, 0L, 1L,
1L, 0L, 2L, 3L, 1L, 3L, 2L, 2L, 1L, 1L, 2L, 13L, 3L, 2L, 5L,
5L, 1L, 3L, 0L, 2L, 1L, 2L, 1L, 0L, 10L, 2L, 0L, 1L, 2L, 2L,
0L, 1L, 4L, 0L, 2L, 0L, 0L, 1L, 0L, 1L, 13L, 15L, 2L, 4L, 4L,
12L, 7L, 4L, 4L, 0L, 0L, 1L, 0L, 1L, 2L, 6L, 3L, 0L, 2L, 2L,
0L, 1L, 5L, 0L, 3L, 3L, 4L, 1L, 1L, 3L, 20L, 2L, 1L, 0L, 4L,
4L, 5L, 6L, 9L, 2L, 4L, 2L, 1L, 1L, 2L, 2L, 1L, 1L, 0L, 1L, 1L,
1L, 2L, 0L, 3L, 2L, 1L, 2L, 1L, 2L, 4L, 18L, 16L, 3L, 3L, 1L,
3L, 1L, 7L, 13L, 2L, 3L, 2L, 4L, 2L, 2L, 1L, 0L, 0L, 0L, 0L,
1L, 2L, 1L, 1L, 1L, 1L, 3L, 2L, 2L, 2L, 4L, 3L, 4L, 4L, 5L, 2L,
1L, 1L, 6L, 4L, 0L, 3L, 3L, 1L, 4L, 0L, 0L, 2L, 2L, 1L, 0L, 1L,
1L, 0L, 0L, 1L, 2L, 4L, 1L, 2L, 1L, 0L, 0L, 5L, 2L, 10L, 4L,
1L, 2L, 3L, 2L, 2L, 1L, 2L, 0L, 4L, 2L, 1L, 0L, 0L, 3L, 1L, 3L,
1L, 1L, 0L, 0L, 0L, 1L, 4L, 2L, 2L, 3L, 0L, 4L, 1L, 34L, 20L,
1L, 3L, 3L, 1L, 7L, 5L, 1L, 3L, 5L, 2L, 1L, 1L, 3L, 0L, 1L, 4L,
1L, 2L, 2L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 5L, 4L, 5L,
9L, 9L, 3L, 5L, 1L, 2L, 1L, 2L, 1L, 0L, 3L, 2L, 1L, 0L, 2L, 1L,
2L, 0L, 1L, 2L, 1L, 1L, 1L, 2L, 0L, 1L, 5L, 9L, 8L, 0L, 5L, 2L,
3L, 1L, 0L, 0L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L,
2L, 2L, 1L, 2L, 0L, 1L, 1L, 1L, 7L, 0L, 1L, 1L, 1L, 1L, 2L, 2L,
3L, 2L, 0L, 1L, 5L, 6L, 3L, 6L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 2L, 1L, 1L, 1L, 0L, 1L, 1L, 2L, 0L, 1L, 0L, 0L, 1L,
3L, 2L, 3L, 3L, 3L, 4L, 6L, 7L, 6L, 3L, 1L, 0L, 1L, 0L, 0L, 2L,
1L, 1L, 1L, 2L, 1L, 3L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 1L, 2L, 2L,
0L, 0L, 1L, 2L, 0L, 3L, 3L, 0L, 3L, 1L, 1L, 2L, 3L, 0L, 0L, 0L,
0L, 1L, 1L, 3L, 2L, 0L, 4L, 3L, 0L, 0L, 1L, 1L, 1L, 2L, 1L, 1L,
0L, 1L, 2L, 2L, 1L, 2L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 3L, 0L, 1L,
1L, 1L, 0L, 0L, 3L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 2L, 3L, 1L, 0L,
1L, 4L, 2L, 1L, 0L, 2L, 2L, 1L, 1L, 2L, 3L, 2L, 2L, 4L, 1L, 2L,
0L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 2L, 1L, 1L,
1L, 1L, 3L, 1L, 1L, 0L, 3L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 2L, 1L,
1L, 1L, 0L, 0L, 5L, 8L, 6L, 4L, 2L, 1L, 1L, 0L, 1L, 0L, 2L, 1L,
1L, 1L, 1L, 0L, 1L, 0L, 2L, 0L, 1L, 0L, 3L, 3L, 1L, 0L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 0L, 1L, 2L, 3L, 3L, 2L, 3L, 2L, 1L,
1L, 0L, 0L, 1L, 0L, 0L, 2L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 2L, 0L,
2L, 0L, 1L, 2L, 2L, 0L, 0L, 0L, 1L, 0L, 0L, 4L, 0L, 1L, 0L, 0L,
2L, 1L, 0L, 4L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 1L,
1L, 2L, 1L, 0L, 3L, 5L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 2L, 1L, 0L, 0L, 3L,
2L, 0L, 1L, 0L, 2L, 2L, 3L, 2L, 1L, 0L, 0L, 2L, 0L, 2L, 1L, 1L,
0L, 0L, 0L, 1L, 2L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 4L,
0L, 1L, 0L, 0L, 2L, 2L, 0L, 2L, 0L, 4L, 3L, 3L, 4L, 1L, 2L, 1L,
1L, 1L, 1L, 2L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L,
2L, 1L, 1L, 0L, 1L, 3L, 3L, 2L, 1L, 1L, 1L, 4L, 2L, 2L, 3L, 2L,
1L, 3L, 1L, 4L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 2L, 0L, 1L, 1L, 1L,
1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 4L, 3L, 3L, 1L, 3L, 3L, 3L,
2L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 1L,
1L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 5L, 5L, 2L, 4L, 3L, 7L, 5L, 3L,
0L, 1L, 2L, 2L, 1L, 3L, 2L, 0L, 0L, 0L, 1L, 0L, 2L, 1L, 0L, 1L,
1L, 1L, 0L, 1L, 0L, 0L, 1L, 2L, 7L, 11L, 5L, 8L, 15L, 6L, 6L,
0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 4L, 1L, 0L, 1L, 0L, 0L, 0L,
1L, 1L, 1L, 1L, 1L, 0L, 2L, 14L, 19L, 8L, 9L, 3L, 4L, 0L, 0L,
0L, 1L, 1L, 0L, 0L, 2L, 1L, 1L, 2L, 1L, 0L, 0L, 1L, 0L, 1L, 0L,
2L, 1L, 1L, 7L, 7L, 3L, 4L, 6L, 2L, 1L, 2L, 1L, 1L, 1L, 0L, 1L,
0L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 2L, 0L, 0L, 1L, 1L,
0L, 2L, 1L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 5L, 2L, 2L,
1L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L,
2L, 0L, 0L, 1L, 1L, 0L, 1L, 2L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 2L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 2L, 1L,
2L, 1L, 0L, 1L, 0L, 2L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L,
2L, 2L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L,
2L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 0L, 1L, 0L, 1L, 1L, 11L, 1L, 0L, 0L, 1L, 1L, 3L, 4L, 0L,
0L, 0L, 1L, 6L)
data <- data.frame(variable)
Ok I see, the difference is the specific definition of a bin, i.e. whether you use [0,1) or [0,1] for the first bin. Try
ggplot(data, aes(x = variable)) +
geom_histogram(breaks = seq(0,40,by = 1), right = TRUE)
or if you don't use explicit breaks, you have to specify origin additionaly, either by xlim as you did, or
ggplot(data, aes(x = variable)) +
geom_histogram(binwidth = 1, right = TRUE, origin = 0)

Resources