I have two data frames in R
df1 <- data.frame(Name = c("RIS_001", "RIS_002", "RIS_003", "RIS_004", "RIS_005")) %>%
mutate(Value = c(5, 3, 8, 6, 9))
df2 <- data.frame(Prod = c("RIS_010", "RIS_011", "RIS_012", "RIS_013", "RIS_014", "RIS_015", "RIS_016", "RIS_017")) %>%
mutate(Value = c(54, 87, 92, 48, 66, 35, 12, 18))
I want to create two new data frames from them as in below images. How to accomplish this in R?
new_df1, concatenation of Row & Column or Column & Row separated by a special character "|"
new_df2, mean of each intersecting row & column
Ex: Mean for RIS_001 & RIS_010 = mean(5 + 54) = 29.5
Appreciate any help. Thank you!
You can outer for the first df:
o <- outer(df1$Name, df2$Prod, paste, sep = " | ")
rownames(o) <- df1$Name
colnames(o) <- df2$Prod
And for the second df:
m <- outer(df1$Value, df2$Value, \(x, y) (x + y) / 2)
rownames(m) <- df1$Name
colnames(m) <- df2$Prod
RIS_010 RIS_011 RIS_012 RIS_013 RIS_014 RIS_015 RIS_016 RIS_017
RIS_001 29.5 46.0 48.5 26.5 35.5 20.0 8.5 11.5
RIS_002 28.5 45.0 47.5 25.5 34.5 19.0 7.5 10.5
RIS_003 31.0 47.5 50.0 28.0 37.0 21.5 10.0 13.0
RIS_004 30.0 46.5 49.0 27.0 36.0 20.5 9.0 12.0
RIS_005 31.5 48.0 50.5 28.5 37.5 22.0 10.5 13.5
I might be mis-interpreting your question, but this seems to me to be what you are asking?
library(tidyverse)
#vals sep by bar
outer(
pivot_wider(df1, names_from = Name, values_from = Value),
pivot_wider(df2, names_from = Prod, values_from = Value),
\(x, y) paste(x, y, sep = "|")
)
#> RIS_010 RIS_011 RIS_012 RIS_013 RIS_014 RIS_015 RIS_016 RIS_017
#> RIS_001 "5|54" "5|87" "5|92" "5|48" "5|66" "5|35" "5|12" "5|18"
#> RIS_002 "3|54" "3|87" "3|92" "3|48" "3|66" "3|35" "3|12" "3|18"
#> RIS_003 "8|54" "8|87" "8|92" "8|48" "8|66" "8|35" "8|12" "8|18"
#> RIS_004 "6|54" "6|87" "6|92" "6|48" "6|66" "6|35" "6|12" "6|18"
#> RIS_005 "9|54" "9|87" "9|92" "9|48" "9|66" "9|35" "9|12" "9|18"
#vals mean
outer(
pivot_wider(df1, names_from = Name, values_from = Value),
pivot_wider(df2, names_from = Prod, values_from = Value),
\(x, y) (as.numeric(x)+as.numeric(y))/2
)
#> RIS_010 RIS_011 RIS_012 RIS_013 RIS_014 RIS_015 RIS_016 RIS_017
#> RIS_001 29.5 46.0 48.5 26.5 35.5 20.0 8.5 11.5
#> RIS_002 28.5 45.0 47.5 25.5 34.5 19.0 7.5 10.5
#> RIS_003 31.0 47.5 50.0 28.0 37.0 21.5 10.0 13.0
#> RIS_004 30.0 46.5 49.0 27.0 36.0 20.5 9.0 12.0
#> RIS_005 31.5 48.0 50.5 28.5 37.5 22.0 10.5 13.5
Related
What I'm having trouble with is I'd like the first row of this matrix (mat.a) to be the first row of matrix 1 in my array, and then the second row to be the first row of matrix 2, etc. Then the first row of mat.b to be the second row of the first matrix in my array, second row of mat. b to be the second row in the second matrix of the array, etc. This trend continues for mat.c. The fourth row of my matrix should be the averages of the values in each column. Also, I'm not allowed to use a for loop
mat.a <- matrix(c(scores$A1, scores$A2, scores$avgA), ncol = 3,
byrow = FALSE)
mat.b <- matrix(c(scores$B1, scores$B2, scores$avgB), ncol = 3,
byrow = FALSE)
mat.c <- matrix(c(scores$C1, scores$C2, scores$avgC), ncol = 3,
byrow = FALSE)
scores.array<- array(c(mat.a,mat.b, mat.c), dim = c(3,3,21))
> dim(mat.a)
[1] 21 3
> dim(scores)
[1] 21 10
> dim(mat.b)
[1] 21 3
> dim(mat.c)
[1] 21 3
scores
scores.updated
Here is a natural (I think) approach to this problem:
Use array to construct an array with A, B, and C lying along the third dimension.
Use aperm to transpose the array so that A, B, and C lie along the first dimension.
Use colMeans to compute means over the first dimension ("columnwise").
Use abind to attach the means to the transposed array.
nms <- c("A1", "A2", "avgA", "B1", "B2", "avgB", "C1", "C2", "avgC")
z <- array(unlist(scores[nms]), dim = c(21L, 3L, 3L))
zz <- aperm(zz, 3:1)
zzz <- abind::abind(zz, colMeans(zz, dims = 1L), along = 1L)
zzz[, , 1:2]
, , 1
[,1] [,2] [,3]
[1,] 28.75775 69.28034 49.01905
[2,] 41.37243 27.43836 34.40540
[3,] 10.28646 89.03502 49.66074
[4,] 26.80555 61.91791 44.36173
, , 2
[,1] [,2] [,3]
[1,] 78.83051 64.05068 71.44060
[2,] 36.88455 81.46400 59.17427
[3,] 43.48927 91.44382 67.46655
[4,] 53.06811 78.98617 66.02714
I have used scores as (very helpfully!) defined by #langtang.
Try this:
library(tidyverse)
# add the averages
scores <- scores %>%
rowwise() %>%
mutate(avg1 = mean(c_across(ends_with("1"))),
avg2 = mean(c_across(ends_with("2"))),
avg3 = mean(c_across(starts_with("avg")))) %>%
# relocate the columns
relocate(ini, A1,B1,C1,avg1, A2,B2,C2,avg2, avgA,avgB,avgC, avg3)
# create scores array
scores.array = array(scores %>% pivot_longer(cols = A1:avg3) %>% pull(value), dim=c(4,3,21))
# add dim names
dimnames(scores.array) = list(c("A","B","C","mean"), c("Midterm", "Final", "mean"), scores$ini)
Output (first two):
> scores.array[,,1:2]
, , ZO
Midterm Final mean
A 28.75775 69.28034 49.01905
B 41.37243 27.43836 34.40540
C 10.28646 89.03502 49.66074
mean 26.80555 61.91791 44.36173
, , UE
Midterm Final mean
A 78.83051 64.05068 71.44060
B 36.88455 81.46400 59.17427
C 43.48927 91.44382 67.46655
mean 53.06811 78.98617 66.02714
Input Data (fake data):
set.seed(123)
scores = data.frame(
A1 = runif(21)*100,
A2 = runif(21)*100,
B1 = runif(21)*100,
B2 = runif(21)*100,
C1 = runif(21)*100,
C2 = runif(21)*100
)
scores <- scores %>% rowwise() %>%
mutate(ini = paste0(sample(LETTERS,2), collapse="")) %>%
relocate(ini)
scores$avgA = apply(scores[,c("A1","A2")],1,mean)
scores$avgB = apply(scores[,c("B1","B2")],1,mean)
scores$avgC = apply(scores[,c("C1","C2")],1,mean)
ini A1 A2 B1 B2 C1 C2 avgA avgB avgC
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 ZO 28.8 69.3 41.4 27.4 10.3 89.0 49.0 34.4 49.7
2 UE 78.8 64.1 36.9 81.5 43.5 91.4 71.4 59.2 67.5
3 HS 40.9 99.4 15.2 44.9 98.5 60.9 70.2 30.0 79.7
4 JR 88.3 65.6 13.9 81.0 89.3 41.1 76.9 47.4 65.2
5 JL 94.0 70.9 23.3 81.2 88.6 14.7 82.4 52.3 51.7
6 BJ 4.56 54.4 46.6 79.4 17.5 93.5 29.5 63.0 55.5
7 VL 52.8 59.4 26.6 44.0 13.1 30.1 56.1 35.3 21.6
8 TN 89.2 28.9 85.8 75.4 65.3 6.07 59.1 80.6 35.7
9 QN 55.1 14.7 4.58 62.9 34.4 94.8 34.9 33.8 64.6
10 VC 45.7 96.3 44.2 71.0 65.7 72.1 71.0 57.6 68.9
# ... with 11 more rows
I am using R programming language. Suppose I have the following data ("my_data"):
student first_run second_run third_run fourth_run fifth_run sixth_run seventh_run eight_run ninth_run tenth_run
1 student1 19.70847 21.79771 16.49083 19.51691 13.97987 14.60733 13.89703 15.24651 20.75679 18.44020
2 student2 11.22369 15.36253 16.90215 20.20724 15.90227 15.14539 13.74945 18.30090 19.55124 17.24132
3 student3 15.93649 17.03599 14.20214 13.17548 14.70327 15.49697 13.08945 19.94142 22.41674 17.37958
4 student4 16.18733 15.13197 14.79481 16.75177 14.51287 17.71816 13.45054 14.25553 19.89091 18.88981
5 student5 18.71084 18.85453 17.15864 19.38880 15.68862 18.39169 15.26428 16.04526 18.92532 16.62409
6 student6 19.75246 12.74605 18.52214 17.92626 14.48501 17.20780 13.10512 12.46502 20.68583 15.87711
7 student7 14.75144 23.82376 18.51366 20.77424 14.22155 16.08186 12.95981 12.67820 20.12166 15.66006
8 student8 17.06516 15.63075 13.72026 15.02068 14.21098 15.99414 14.64818 16.15603 21.74607 17.07382
9 student9 20.27611 12.44592 12.26502 15.13456 14.61552 18.72192 15.11129 17.60746 18.83831 17.55257
10 student10 17.70736 16.21620 14.10861 17.20014 16.59376 19.50027 13.05073 15.80002 18.09781 18.34313
I want to add 2 columns to this data:
my_mean : the mean of each row
my_median: the median of each row
I tried the following code in R:
my_data$median = apply(my_data, 1, median, na.rm=T)
my_data$mean = apply(my_data, 1, mean, na.rm=T)
But I don't think this code is correct. For instance, when using this code, the median of the second row of data is returned as "16.90215"
But when I manually take the median of this row:
median(11.22369 , 15.36253 , 16.90215 , 20.20724, 15.90227 , 15.14539 , 13.74945 , 18.30090 , 19.55124 , 17.24132)
I get an answer of
11.22
Can someone please show me what I am doing wrong?
Thanks
The calculation is incorrect i.e. the first argument of median is 'x' which can be a vector. The second argument is na.rm, followed by variadic arguments .... So, when write 11.22369, 15.36253, the 'x' is taken as 11.22369 and that is the value returned. Instead, it should be a vector by concatenation c
median(c(11.22369 , 15.36253 , 16.90215 , 20.20724, 15.90227 , 15.14539 , 13.74945 , 18.30090 , 19.55124 , 17.24132))
[1] 16.40221
Also, based on the OP's data, the first column should be dropped which is character or factor
apply(my_data[-1], 1, median, na.rm=TRUE)
1 2 3 4 5 6 7 8 9 10
17.46551 16.40221 15.71673 15.65965 17.77517 16.54246 15.87096 15.81245 16.34356 16.89695
The second row is used in the manual calculation
library(dplyr)
df %>%
rowwise() %>%
mutate(median = median(c_across(where(is.numeric))),
mean = mean(c_across(where(is.numeric))))
c_across and rowwise were created for this type of situation. Most verbs work column-wise. To change this behavior pipe to rowwise first.
c_across will then combine all values in a row that are numeric (hence where(is.numeric) into a numeric vector and then mean or median can be applied.
Note: You will likely want to pipe the output to ungroup since rowwise creates a rowwise grouped data frame.
Here is an alternative using pmap along with passing all the arguments simultaneously thus using ellipsis i.e. .... The output is needed to be unnested with unnest_wider from tidyr:
library(tidyr)
library(dplyr)
library(purrr)
df %>%
mutate(res = pmap(across(where(is.numeric)),
~ list(median = median(c(...)),
avg = mean(c(...))))) %>%
unnest_wider(res)
output:
student first_run second_run third_run fourth_run fifth_run sixth_run seventh_run eight_run ninth_run tenth_run median avg
<chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 student1 19.7 21.8 16.5 19.5 14.0 14.6 13.9 15.2 20.8 18.4 17.5 17.4
2 student2 11.2 15.4 16.9 20.2 15.9 15.1 13.7 18.3 19.6 17.2 16.4 16.4
3 student3 15.9 17.0 14.2 13.2 14.7 15.5 13.1 19.9 22.4 17.4 15.7 16.3
4 student4 16.2 15.1 14.8 16.8 14.5 17.7 13.5 14.3 19.9 18.9 15.7 16.2
5 student5 18.7 18.9 17.2 19.4 15.7 18.4 15.3 16.0 18.9 16.6 17.8 17.5
6 student6 19.8 12.7 18.5 17.9 14.5 17.2 13.1 12.5 20.7 15.9 16.5 16.3
7 student7 14.8 23.8 18.5 20.8 14.2 16.1 13.0 12.7 20.1 15.7 15.9 17.0
8 student8 17.1 15.6 13.7 15.0 14.2 16.0 14.6 16.2 21.7 17.1 15.8 16.1
9 student9 20.3 12.4 12.3 15.1 14.6 18.7 15.1 17.6 18.8 17.6 16.3 16.3
10 student10 17.7 16.2 14.1 17.2 16.6 19.5 13.1 15.8 18.1 18.3 16.9 16.7
You could definitely benefit from the speed of matrixStats library.
matrixStats::rowMedians(as.matrix(d[-1]))
# [1] 17.46551 16.40221 15.71673 15.65965 17.77517 16.54246 15.87096 15.81245 16.34356 16.89695
matrixStats::rowMeans2(as.matrix(d[-1]))
# [1] 17.44417 16.35862 16.33775 16.15837 17.50521 16.27728 16.95862 16.12661 16.25687 16.66180
stopifnot(all.equal(matrixStats::rowMedians(as.matrix(d[-1])),
as.numeric(apply(d[-1], 1, median, na.rm=T))))
stopifnot(all.equal(matrixStats::rowMeans2(as.matrix(d[-1])),
as.numeric(apply(d[-1], 1, mean, na.rm=T))))
Data:
d <- structure(list(student = c("student1", "student2", "student3",
"student4", "student5", "student6", "student7", "student8", "student9",
"student10"), first_run = c(19.70847, 11.22369, 15.93649, 16.18733,
18.71084, 19.75246, 14.75144, 17.06516, 20.27611, 17.70736),
second_run = c(21.79771, 15.36253, 17.03599, 15.13197, 18.85453,
12.74605, 23.82376, 15.63075, 12.44592, 16.2162), third_run = c(16.49083,
16.90215, 14.20214, 14.79481, 17.15864, 18.52214, 18.51366,
13.72026, 12.26502, 14.10861), fourth_run = c(19.51691, 20.20724,
13.17548, 16.75177, 19.3888, 17.92626, 20.77424, 15.02068,
15.13456, 17.20014), fifth_run = c(13.97987, 15.90227, 14.70327,
14.51287, 15.68862, 14.48501, 14.22155, 14.21098, 14.61552,
16.59376), sixth_run = c(14.60733, 15.14539, 15.49697, 17.71816,
18.39169, 17.2078, 16.08186, 15.99414, 18.72192, 19.50027
), seventh_run = c(13.89703, 13.74945, 13.08945, 13.45054,
15.26428, 13.10512, 12.95981, 14.64818, 15.11129, 13.05073
), eight_run = c(15.24651, 18.3009, 19.94142, 14.25553, 16.04526,
12.46502, 12.6782, 16.15603, 17.60746, 15.80002), ninth_run = c(20.75679,
19.55124, 22.41674, 19.89091, 18.92532, 20.68583, 20.12166,
21.74607, 18.83831, 18.09781), tenth_run = c(18.4402, 17.24132,
17.37958, 18.88981, 16.62409, 15.87711, 15.66006, 17.07382,
17.55257, 18.34313)), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10"))
I am trying to replace values from two columns with values from another two columns. This is a rather basic question and has been asked by python users, however I am using R.
I have a df that looks like this (only on a much larger scale [>20,000]):
squirrel_id locx locy dist
6391 17.5 10.0 50.0
6391 17.5 10.0 20.0
6391 17.5 10.0 15.5
8443 20.5 1.0 800
6025 -5.0 -0.5 0.0
I need to, for 63 squirrels, replace their locx and locy values.
I normally replace values with the following code:
library(dplyr)
df <- df %>%
mutate(locx = ifelse (squirrel_id=="6391", "12.5", locx),
locy = ifelse (squirrel_id=="6391", "15.5", locy),
locx = ifelse (squirrel_id=="8443", "2.5", locx),
locy = ifelse (squirrel_id=="8443", "80", locy)) #etc for 63 squirrels
Which would give me:
squirrel_id locx locy dist
6391 12.5 10.0 50.0
6391 12.5 10.0 20.0
6391 12.5 10.0 15.5
8443 2.5 80.0 800
6025 -5.0 -0.5 0.0
But this is creating an extra 126 lines of code and I suspect there is a simpler way to do this.
I do have all the new locx and locy values in a separate df, but I do not know how to join the two dataframes by squirrel_id without it messing up the data.
df with the values that need to replace the ones in the old df:
squirrel_id new_locx new_locy
6391 12.5 15.5
8443 2.5 80
6025 -55.0 0.0
How can I do this more efficiently?
You can left_join the two data frames and then use an if_else statement to get the right locx and locy. Try out:
library(dplyr)
df %>% left_join(df2, by = "squirrel_id") %>%
mutate(locx = if_else(is.na(new_locx), locx, new_locx), # as suggested by #echasnovski, we can also use locx = coalesce(new_locx, locx)
locy = if_else(is.na(new_locy), locy, new_locy)) %>% # or locy = coalesce(new_locy, locy)
select(-new_locx, -new_locy)
# output
squirrel_id locx locy dist
1 6391 12.5 15.5 50.0
2 6391 12.5 15.5 20.0
3 6391 12.5 15.5 15.5
4 8443 2.5 80.0 800.0
5 6025 -55.0 0.0 0.0
6 5000 18.5 18.5 10.0 # squirrel_id 5000 was created for an example of id
# present if df but not in df2
Data
df <- structure(list(squirrel_id = c(6391L, 6391L, 6391L, 8443L, 6025L,
5000L), locx = c(17.5, 17.5, 17.5, 20.5, -5, 18.5), locy = c(10,
10, 10, 1, -0.5, 12.5), dist = c(50, 20, 15.5, 800, 0, 10)), class = "data.frame", row.names = c(NA,
-6L))
df2 <- structure(list(squirrel_id = c(6391L, 8443L, 6025L), new_locx = c(12.5,
2.5, -55), new_locy = c(15.5, 80, 0)), class = "data.frame", row.names = c(NA,
-3L))
Using #ANG's data, here's a data.table solution. It joins and updates the original df by reference.
library(data.table)
setDT(df)
setDT(df2)
df[df2, on = c('squirrel_id'), `:=` (locx = new_locx, locy = new_locy) ]
df
squirrel_id locx locy dist
1: 6391 12.5 15.5 50.0
2: 6391 12.5 15.5 20.0
3: 6391 12.5 15.5 15.5
4: 8443 2.5 80.0 800.0
5: 6025 -55.0 0.0 0.0
6: 5000 18.5 12.5 10.0
See also:
how to use merge() to update a table in R
Replace a subset of a data frame with dplyr join operations
R: Updating a data frame with another data frame
I have a dataframe with 6 features like this:
X1 X2 X3 X4 X5 X6
Modern Dog 9.7 21.0 19.4 7.7 32.0 36.5
Golden Jackal 8.1 16.7 18.3 7.0 30.3 32.9
Chinese Wolf 13.5 27.3 26.8 10.6 41.9 48.1
Indian Wolf 11.5 24.3 24.5 9.3 40.0 44.6
Cuon 10.7 23.5 21.4 8.5 28.8 37.6
Dingo 9.6 22.6 21.1 8.3 34.4 43.1
I want to draw a line plot like this:
I'm trying this:
plot(df$X1, type = "o",col = "red", xlab = "Month", ylab = "Rain fall")
lines(c(df$X2, df$X3, df$X4, df$X5, df$X6), type = "o", col = "blue")
But it's only plotting a single variable. I'm sorry if this question is annoying, i'm totally new to R and i just don't know how to get this done. I would really appreciate any help on this.
Thanks in advance
The easiest way would be to convert your dataset to a long format (e.g. by using the gather function in the tidyr package), and then plotting using the group aesthetic in ggplot.
I recreate your dataset, assuming your group variable is named "Group":
df <- read.table(text = "
Group X1 X2 X3 X4 X5 X6
Modern_Dog 9.7 21.0 19.4 7.7 32.0 36.5
Golden_Jackal 8.1 16.7 18.3 7.0 30.3 32.9
Chinese_Wolf 13.5 27.3 26.8 10.6 41.9 48.1
Indian_Wolf 11.5 24.3 24.5 9.3 40.0 44.6
Cuon 10.7 23.5 21.4 8.5 28.8 37.6
Dingo 9.6 22.6 21.1 8.3 34.4 43.1 ",
header = TRUE, stringsAsFactors = FALSE)
Then convert the dataset to long format and plot:
library(tidyr)
library(ggplot2)
df_long <- df %>% gather(X1:X6, key = "Month", value = "Rainfall")
ggplot(df_long, aes(x = Month, y = Rainfall, group = Group, shape = Group)) +
geom_line() +
geom_point() +
theme(legend.position = "bottom")
See also the answers here: Group data and plot multiple lines.
I am an R beginner user and I face the following problem. I have the following data frame:
distance speed
1 61.0 36.4
2 51.4 35.3
3 42.2 34.2
4 33.4 32.8
5 24.9 31.3
6 17.5 28.4
7 11.5 24.1
8 7.1 19.4
9 3.3 16.9
10 0.5 15.5
11 4.4 15.1
12 8.5 15.5
13 13.1 17.3
14 18.8 20.5
15 25.7 24.1
16 33.3 26.3
17 41.0 27.0
18 48.7 27.7
19 56.6 28.4
20 64.8 29.2
21 73.6 31.7
22 83.3 34.2
23 93.4 35.3
The column distance represents the distance of a following object over a specific point and the column speed the object's speed. As you can see the object is getting closer to the point and then it is getting away. I am trying to make its speed profile. I tried the following code but it didn't give me the plot I want (because I want to show how its speed is changing when the moving object moves closer and past the reference point)
ggplot(speedprofile, aes(x = distance, y = speed)) + #speedprofile is the data frame
geom_line(color = "red") +
geom_smooth() +
geom_vline(xintercept = 0) # the vline is the reference line
The plot is the following:
Then, I tried to set the first 10 distances as negative manually which are prior to zero (0). So I get a plot closer to that I want:
But there is a problem. The distance can't be defined as negative.
To sum up, the expected plot is the following (and I am sorry for the quality).
Do you have any ideas on how to solve this?
Thank you in advance!
You can do something like this to auto-compute the change point (to know when the distance should be negative) and then set the axis labels to be positive.
Your data (in case anyone needs it to answer):
read.table(text="distance speed
61.0 36.4
51.4 35.3
42.2 34.2
33.4 32.8
24.9 31.3
17.5 28.4
11.5 24.1
7.1 19.4
3.3 16.9
0.5 15.5
4.4 15.1
8.5 15.5
13.1 17.3
18.8 20.5
25.7 24.1
33.3 26.3
41.0 27.0
48.7 27.7
56.6 28.4
64.8 29.2
73.6 31.7
83.3 34.2
93.4 35.3", stringsAsFactors=FALSE, header=TRUE) -> speed_profile
Now, compute the "real" distance (negative for approaching, positive for receding):
speed_profile$real_distance <- c(-1, sign(diff(speed_profile$distance))) * speed_profile$distance
Now, compute the X axis breaks ahead of time:
breaks <- scales::pretty_breaks(10)(range(speed_profile$real_distance))
ggplot(speed_profile, aes(real_distance, speed)) +
geom_smooth(linetype = "dashed") +
geom_line(color = "#cb181d", size = 1) +
scale_x_continuous(
name = "distance",
breaks = breaks,
labels = abs(breaks) # make all the labels for the axis positive
)
Provided fonts are working well on your system you could even do:
labels <- abs(breaks)
labels[(!breaks == 0)] <- sprintf("%s\n→", labels[(!breaks == 0)])
ggplot(speed_profile, aes(real_distance, speed)) +
geom_smooth(linetype = "dashed") +
geom_line(color = "#cb181d", size = 1) +
scale_x_continuous(
name = "distance",
breaks = breaks,
labels = labels,
)