I am not expert in R and trying my best. I appreciate to have some assistance.
I have data as follows:
POPs: num[1:3000] 3,4,5,6,7,....
PM1: num[1:3000] 3,4,5,6,7,....
PM2: num[1:3000] 3,4,5,6,7,....
PM3: num[1:3000] 3,4,5,6,7,....
PM4: num[1:3000] 3,4,5,6,7,....
.. etc
I want to do regression analysis for each PMs (PM1, PM2, PM3, ..) and put them into one figure (as in the picture) . Also, adding into them the R2 , RMSE, MAE and the regression abline and 1:1 line.
The x is POPs and the y is PM1 and PM2 and PM3 ... etc.
I can do for each PMs (y-axis) individually in the code (aes(x=POPs, y=PM1)). However, it takes lot of figures and better to combine them in one figure. How I can add all the PMs into a single (y) in the code. I think some advance in looping which I am not into this level unfortunately.
ggplot(data =Plot,aes(x=POPs, y=PM1)) +
stat_smooth(method = "lm", se=FALSE, color="black", formula = y ~ x) +
geom_point(size=0.3) +
stat_cor(aes(label = paste(..rr.label..)), # adds R^2 value
r.accuracy = 0.01,
label.x = 0, label.y = 375, size = 4) +
stat_regline_equation(aes(label = ..eq.label..), # adds equation to linear regression
label.x = 0, label.y = 400, size = 4)
Based on Behnam Hedayat Answer below with some coding modification from my side and from Allan Cameron .. I can say now it worked 100% perfectly
# change format of df to longer
Plot %>% pivot_longer(cols=starts_with("PEM"), names_to = "PEMs", values_to = "PEMs_value") -> df2
df2 %>% ggplot(aes(POPs, PEMs_value)) +
geom_point(color = "#fe4300", size=0.3) +
geom_abline()+
geom_smooth(method='lm', se=FALSE, formula = y ~ x, color = "#1b14fd")+
labs(y = expression(bold(PLF~PM["2.5"]~("u"*g/m^"3"))), x = expression(bold(POPS~PM["2.5"]~("u"*g/m^"3")))) +
stat_cor(aes(label = paste(..rr.label..)), # adds R^2 value
r.accuracy = 0.01,
label.x = 0, label.y = 110, size = 3) +
stat_regline_equation(aes(label = ..eq.label..), # adds equation to linear regression
label.x = 0, label.y = 100, size = 3) +
facet_wrap(~PEMs, ncol=5)
You can use facet_wrap function of ggplot2, but first you have to reshape your dataset to longer format by pivot_longer() function of tidyverse.
To add regression metrics on plots, you can create a separate data frame containing metrics of each group of PMs variable, then use this data frame in geom_text function with x and y column created for x and y position respectively.
Here I also used caret package functions (R2, RMSE, MAE) to calculate regression metrics.
# caret for calculating R2, MAE and RMSE
# tidyverse to reshape data to longer format
libs <- c("ggplot2", "tidyverse","caret")
suppressMessages(invisible(sapply(libs, library, character.only=T)))
# sample dataset
df <- data.frame(POPs = sample(1:100, 100),
PM1 = sample(1:100, 100),
PM2 = sample(1:100, 100),
PM3 = sample(1:100, 100),
PM4 = sample(1:100,100),
PM5 = sample(1:100,100),
PM6 = sample(1:100,100),
PM7 = sample(1:100,100),
PM8 = sample(1:100,100))
# change format of df to longer
df %>% pivot_longer(cols=starts_with("PM"),
names_to = "PMs", values_to = "PMs_value") -> df2
head(df2, 10)
#> # A tibble: 10 × 3
#> POPs PMs PMs_value
#> <int> <chr> <int>
#> 1 5 PM1 88
#> 2 5 PM2 21
#> 3 5 PM3 51
#> 4 5 PM4 40
#> 5 5 PM5 40
#> 6 5 PM6 2
#> 7 5 PM7 30
#> 8 5 PM8 70
#> 9 52 PM1 13
#> 10 52 PM2 90
# create a dataframe of summary of regression metrics
summary_df <- df2 %>%
group_by(PMs) %>%
summarise(R2 = R2(PMs_value, POPs),
RMSE=RMSE(PMs_value, POPs),
MAE=MAE(PMs_value, POPs)) %>%
mutate_if(is.numeric, round,digits=2) %>%
pivot_longer(cols = -PMs, names_to = "Metric", values_to = "Metric_value") %>%
# add x column for x position of text and y column for y position
mutate(x = rep(30, times =nrow(.)),
y = rep(c(90,80,70), times=nrow(.)/3)) %>%
unite("Metric", Metric:Metric_value, sep = " = ")
summary_df
#> # A tibble: 24 × 4
#> PMs Metric x y
#> <chr> <chr> <dbl> <dbl>
#> 1 PM1 R2 = 0.03 30 90
#> 2 PM1 RMSE = 43.95 30 80
#> 3 PM1 MAE = 36.72 30 70
#> 4 PM2 R2 = 0.02 30 90
#> 5 PM2 RMSE = 37.83 30 80
#> 6 PM2 MAE = 29.76 30 70
#> 7 PM3 R2 = 0.02 30 90
#> 8 PM3 RMSE = 43.69 30 80
#> 9 PM3 MAE = 36.88 30 70
#> 10 PM4 R2 = 0.01 30 90
#> # … with 14 more rows
df2 %>% ggplot(aes(POPs, PMs_value)) +
geom_point(size=0.3) +geom_abline()+
geom_smooth(method='lm', se=FALSE)+
facet_wrap(~PMs, ncol=4)+
geom_text(data = summary_df,
mapping = aes(x = x, y = y, label = Metric))
#> `geom_smooth()` using formula = 'y ~ x'
Created on 2023-02-12 with reprex v2.0.2
You first need to get your data into the correct format - that is, to pivot it into long format, such that the PM column names are in a single column, and the values are in their own column too. Then you can use the names column as a faceting variable in ggplot:
library(tidyverse)
Plot %>%
pivot_longer(-POPs) %>%
ggplot(aes(POPs, value)) +
geom_abline() +
geom_point(color = "#fe4300", alpha = 0.5) +
geom_smooth(method = "lm", se = FALSE, formula = y ~ x, color = "#fd1b14") +
coord_cartesian(xlim = c(0, 100), ylim = c(0, 100)) +
facet_wrap(.~name, nrow = 5, scales = "free") +
theme_classic() +
theme(strip.background = element_blank(),
panel.border = element_rect(fill = NA))
Data used
Obviously we don't have your data (unless we were to transcribe the picture of your data or include the output of dput(Plot) in your question, so I have constructed a dummy data set with the same names and structure as your own:
set.seed(1)
Plot <- setNames(as.data.frame(cbind(1:115,
replicate(17, sample(100, 115, TRUE)))),
c("POPs", paste0("PM", 1:17)))
str(Plot)
#> 'data.frame': 115 obs. of 18 variables:
#> $ POPs: int 1 2 3 4 5 6 7 8 9 10 ...
#> $ PM1 : int 68 39 1 34 87 43 14 82 59 51 ...
#> $ PM2 : int 1 29 78 22 70 28 37 61 46 67 ...
#> $ PM3 : int 99 77 57 71 25 31 37 92 28 62 ...
#> $ PM4 : int 60 65 64 53 5 44 35 23 29 35 ...
#> $ PM5 : int 48 7 27 43 9 8 86 45 6 27 ...
#> $ PM6 : int 65 2 9 49 69 91 93 66 31 78 ...
#> $ PM7 : int 50 89 8 54 31 69 12 30 9 66 ...
#> $ PM8 : int 21 7 99 42 33 94 5 5 4 11 ...
#> $ PM9 : int 22 56 58 55 99 96 5 52 47 55 ...
#> $ PM10: int 84 84 55 98 73 47 13 5 63 3 ...
#> $ PM11: int 41 83 91 7 78 32 49 14 92 84 ...
#> $ PM12: int 16 39 37 15 24 97 56 62 69 100 ...
#> $ PM13: int 94 69 53 37 70 57 50 51 18 29 ...
#> $ PM14: int 79 40 11 67 25 54 21 34 59 46 ...
#> $ PM15: int 5 89 74 34 47 85 29 24 46 98 ...
#> $ PM16: int 44 22 57 63 7 95 46 66 4 92 ...
#> $ PM17: int 38 57 48 75 8 28 21 2 84 95 ...
Created on 2023-02-11 with reprex v2.0.2
Related
This question already has an answer here:
Annotation on only the first facet of ggplot in R?
(1 answer)
Closed last month.
I want to add a few texts in one facet out of four facets in my ggplot.
I am using annotate function to add a text but it generates the text at a given location (x,y) in every facet. Because the data variables have different ranges of y in each facet, the texts are not coming at a desired location (x,y).
Please let me know what should be done. Thanks.
library(dplyr)
library(tidyr)
library(ggplot2)
df%>%
select(Date, Ca, Na, K, Mg)%>%
gather(var,value,-Date)%>%
ggplot(aes(as.Date(Date), value))+
geom_point()+
theme_bw()+
facet_wrap(~var,scales = 'free_y',ncol = 1)+
ylab(" (ppm) (ppm)
(ppm) (ppm)")+
facet_wrap(~var,scales = 'free_y',ncol = 1, strip.position = "right")+
geom_vline(aes(xintercept = as.Date("2021-04-28")), col = "red")+
geom_vline(aes(xintercept = as.Date("2021-04-28")), col = "red")+
geom_vline(aes(xintercept = as.Date("2021-04-29")), col = "red")+
theme(axis.title = element_text(face="bold"))+
theme(axis.text = element_text(face="bold"))+
xlab('Date')+
theme(axis.title.x = element_text(margin = margin(t = 10)))+
theme(axis.title.y = element_text(margin = margin(r = 10)))+
annotate("text", label = "E1", x = as.Date("2021-04-28"), y = 2.8)
This is the code I am using for the desired output. I want to name all the xintercept lines which is E1, E2, E3 (from left to right) on the top of xaxis i.e. above the first facet of variable Ca in the data. Any suggestions?
Here is a part of my data:
df <- read.table(text = "
Date Ca K Mg Na
2/18/2021 1 25 21 19
2/22/2021 2 26 22 20
2/26/2021 3 27 23 21
3/4/2021 4 28 5 22
3/6/2021 5 29 6 8
3/10/2021 6 30 7 9
3/13/2021 7 31 8 10
3/17/2021 8 32 9 11
3/20/2021 9 33 10 12
3/23/2021 10 34 11 13
3/27/2021 11 35 12 14
3/31/2021 12 36 13 15
4/3/2021 13 37 14 16
4/7/2021 14 38 15 17
4/10/2021 15 39 16 18
4/13/2021 16 40 17 19
4/16/2021 17 41 18 20
4/19/2021 8 42 19 21
4/22/2021 9 43 20 22
4/26/2021 0 44 21 23
4/28/2021 1 45 22 24
4/28/2021 2 46 23 25
4/28/2021 3 47 24 26
4/28/2021 5 48 25 27
4/29/2021 6 49 26 28
5/4/2021 7 50 27 29
5/7/2021 8 51 28 30
5/8/2021 9 1 29 31
5/10/2021 1 2 30 32
5/29/2021 3 17 43 45
5/31/2021 6 18 44 46
6/1/2021 4 19 45 47
6/2/2021 8 20 46 48
6/3/2021 2 21 47 49
6/7/2021 3 22 48 50
6/10/2021 5 23 49 51
6/14/2021 3 5 50 1
6/18/2021 1 6 51 2
", header = TRUE)
Prepare the data before plotting, make a separate data for text annotation:
dfplot <- df %>%
select(Date, Ca, Na, K, Mg) %>%
#convert to date class before plotting
mutate(Date = as.Date(Date, "%m/%d/%Y")) %>%
#using pivot instead of gather. gather is superseded.
#gather(var, value, -Date)
pivot_longer(cols = 2:5, names_to = "grp", values_to = "ppm")
dftext <- data.frame(grp = "Ca", # we want text to show up only on "Ca" facet.
ppm = max(dfplot[ dfplot$grp == "Ca", "ppm" ]),
Date = as.Date(c("2021-04-27", "2021-04-28", "2021-04-29")),
label = c("E1", "E2", "E3"))
After cleaning up your code, we can use geom_text with dftext:
ggplot(dfplot, aes(Date, ppm)) +
geom_point() +
facet_wrap(~grp, scales = 'free_y',ncol = 1, strip.position = "right") +
geom_vline(xintercept = dftext$Date, col = "red") +
geom_text(aes(x = Date, y = ppm, label = label), data = dftext, nudge_y = -2)
Try using ggrepel library to avoid label overlap, replace geom_text with one of these:
#geom_text_repel(aes(x = Date, y = ppm, label = label), data = dftext)
#geom_label_repel(aes(x = Date, y = ppm, label = label), data = dftext)
After cleaning up the code and seeing the plot, I think this post is a duplicate of Annotation on only the first facet of ggplot in R? .
I was trying to plot a time series composed of weekly averanges. Here is the plot that I have obtained:
[weekly averages A]
[1]: https://i.stack.imgur.com/XMGMs.png
As you can see the time serie do not cover all the years completely, so, when I have got no data ggplot links two subsequent years. I think I have to group the data in some ways, but I do not understand how. Here is the code:
df4 <- data.frame(df$Date, df$A)
colnames(df4)<- c("date","A")
df4$date <- as.Date(df4$date,"%Y/%m/%d")
df4$week_day <- as.numeric(format(df4$date, format='%w'))
df4$endofweek <- df4$date + (6 - df4$week_day)
week_aveA <- df4 %>%
group_by(endofweek) %>%
summarise_all(list(mean=mean), na.rm=TRUE) %>%
na.omit()
g1 = ggplot() +
geom_step(data=week_aveA, aes(group = 1, x = (endofweek), y = (A_mean)), colour="gray25") +
scale_y_continuous(expand = c(0, 0), limits = c(0, 2500)) +
scale_x_date(breaks="year", labels=date_format("%Y")) +
labs(y = expression(A~ ~index),
x = NULL) +
theme(axis.text.x = element_text(size=10),
axis.title = element_text(size=10))
Here an extraction (the former three years) of the dataset:
endofweek date_mean A_mean week_day_mean
1 20/03/2010 17/03/2010 939,2533437 3
2 27/03/2010 24/03/2010 867,3620121 3
3 03/04/2010 31/03/2010 1426,791222 3
4 10/04/2010 07/04/2010 358,5698314 3
5 17/04/2010 13/04/2010 301,1815352 2
6 24/04/2010 21/04/2010 273,4922895 3,333333333
7 01/05/2010 28/04/2010 128,5989633 3
8 08/05/2010 05/05/2010 447,8858881 3
9 15/05/2010 12/05/2010 387,9828891 3
10 22/05/2010 19/05/2010 138,0770986 3
11 29/05/2010 26/05/2010 370,2147933 3
12 05/06/2010 02/06/2010 139,0451791 3
13 12/06/2010 09/06/2010 217,1286356 3
14 19/06/2010 16/06/2010 72,36972411 3
15 26/06/2010 23/06/2010 282,2911902 3
16 03/07/2010 30/06/2010 324,3215936 3
17 10/07/2010 07/07/2010 210,568691 3
18 17/07/2010 14/07/2010 91,76930829 3
19 24/07/2010 21/07/2010 36,4211218 3,666666667
20 31/07/2010 28/07/2010 37,53981103 3
21 07/08/2010 04/08/2010 91,33282642 3
22 14/08/2010 11/08/2010 28,38587352 3
23 21/08/2010 18/08/2010 58,72836406 3
24 28/08/2010 24/08/2010 102,1050612 2,5
25 04/09/2010 02/09/2010 13,45357513 4,5
26 11/09/2010 08/09/2010 51,24017212 3
27 18/09/2010 15/09/2010 159,7395663 3
28 25/09/2010 21/09/2010 62,71136678 2
29 02/04/2011 31/03/2011 1484,661164 4
30 09/04/2011 06/04/2011 656,1827964 3
31 16/04/2011 13/04/2011 315,3097313 3
32 23/04/2011 20/04/2011 293,2904042 3
33 30/04/2011 26/04/2011 255,7517519 2,4
34 07/05/2011 04/05/2011 360,7035289 3
35 14/05/2011 11/05/2011 342,0902797 3
36 21/05/2011 18/05/2011 386,1380421 3
37 28/05/2011 24/05/2011 418,9624807 2,833333333
38 04/06/2011 01/06/2011 112,7568 3
39 11/06/2011 08/06/2011 85,17855619 3,2
40 18/06/2011 15/06/2011 351,8714638 3
41 25/06/2011 22/06/2011 139,7936898 3
42 02/07/2011 29/06/2011 68,57716191 3,6
43 09/07/2011 06/07/2011 62,31823822 3
44 16/07/2011 13/07/2011 80,7328917 3
45 23/07/2011 20/07/2011 114,9475331 3
46 30/07/2011 27/07/2011 90,13118758 3
47 06/08/2011 03/08/2011 43,29372258 3
48 13/08/2011 10/08/2011 49,39935204 3
49 20/08/2011 16/08/2011 133,746822 2
50 03/09/2011 31/08/2011 76,03928942 3
51 10/09/2011 05/09/2011 27,99834637 1
52 24/03/2012 23/03/2012 366,2625797 5,5
53 31/03/2012 28/03/2012 878,8535513 3
54 07/04/2012 04/04/2012 1029,909052 3
55 14/04/2012 11/04/2012 892,9163416 3
56 21/04/2012 18/04/2012 534,8278693 3
57 28/04/2012 25/04/2012 255,1177585 3
58 05/05/2012 02/05/2012 564,5280546 3
59 12/05/2012 09/05/2012 767,5018168 3
60 19/05/2012 16/05/2012 516,2680148 3
61 26/05/2012 23/05/2012 241,2113073 3
62 02/06/2012 30/05/2012 863,6123397 3
63 09/06/2012 06/06/2012 201,2019288 3
64 16/06/2012 13/06/2012 222,9955486 3
65 23/06/2012 20/06/2012 91,14166632 3
66 30/06/2012 27/06/2012 26,93145693 3
67 07/07/2012 04/07/2012 67,32183278 3
68 14/07/2012 11/07/2012 46,25297513 3
69 21/07/2012 18/07/2012 81,34359825 3,666666667
70 28/07/2012 25/07/2012 49,59130851 3
71 04/08/2012 01/08/2012 44,13438077 3
72 11/08/2012 08/08/2012 30,15773151 3
73 18/08/2012 15/08/2012 57,47256772 3
74 25/08/2012 22/08/2012 31,9109555 3
75 01/09/2012 29/08/2012 52,71058484 3
76 08/09/2012 04/09/2012 24,52495229 2
77 06/04/2013 01/04/2013 1344,388042 1,5
78 13/04/2013 10/04/2013 1304,838687 3
79 20/04/2013 17/04/2013 892,620141 3
80 27/04/2013 24/04/2013 400,1720434 3
81 04/05/2013 01/05/2013 424,8473083 3
82 11/05/2013 08/05/2013 269,2380208 3
83 18/05/2013 15/05/2013 238,9993749 3
84 25/05/2013 22/05/2013 128,4096151 3
85 01/06/2013 29/05/2013 158,5576121 3
86 08/06/2013 05/06/2013 175,2036942 3
87 15/06/2013 12/06/2013 79,20250839 3
88 22/06/2013 19/06/2013 126,9065428 3
89 29/06/2013 26/06/2013 133,7480108 3
90 06/07/2013 03/07/2013 218,0092943 3
91 13/07/2013 10/07/2013 54,08460936 3
92 20/07/2013 17/07/2013 91,54285041 3
93 27/07/2013 24/07/2013 44,64567928 3
94 03/08/2013 31/07/2013 229,5067999 3
95 10/08/2013 07/08/2013 49,70729373 3
96 17/08/2013 14/08/2013 53,38618335 3
97 24/08/2013 21/08/2013 217,2800997 3
98 31/08/2013 28/08/2013 49,43590136 3
99 07/09/2013 04/09/2013 64,88783029 3
100 14/09/2013 11/09/2013 11,04300773 3
So at the end I have one mainly question: how can I eliminated the connection between the years? ... and an aesthetic question: how can I add minor ticks on the x_axis? At least one every 6 months, just to make the plot easy to read.
Thanks in advance for any suggestion!
Edit
This is the code I tried with the suggestion, maybe I mistype some part of it.
library(tidyverse)
library(dplyr)
library(lubridate)
df4 <- data.frame(df$Date, df$A)
colnames(df4)<- c("date","A")
df4$date <- as.Date(df4$date,"%Y/%m/%d")
df4$week_day <- as.numeric(format(df4$date, format='%w'))
df4$endofweek <- df4$date + (6 - df4$week_day)
week_aveA <- df4 %>%
group_by(endofweek) %>%
summarise_all(list(mean=mean), na.rm=TRUE) %>%
na.omit()
week_aveA$endofweek <- as.Date(week_aveA$endofweek,"%d/%m/%Y")
week_aveA$A_mean <- as.numeric(gsub(",", ".", week_aveA$A_mean))
week_aveA$week_day_mean <- as.numeric(gsub(",", ".", week_aveA$week_day_mean))
week_aveA$year <- format(week_aveA$endofweek, "%Y")
library(ggplot2)
library(methods)
library(scales)
mylabel <- function(x) {
ifelse(grepl("-07-01$", x), "", format(x, "%Y"))
}
ggplot() +
geom_step(data=week_aveA, aes(x = endofweek, y = A_mean, group = year), colour="gray25") +
scale_y_continuous(expand = c(0, 0), limits = c(0, 2500)) +
scale_x_date(breaks="6 month", labels = mylabel) +
labs(y = expression(A~ ~index),
x = NULL) +
theme(axis.text.x = element_text(size=10),
axis.title = element_text(size=10))
You have to group by year:
Add a variable with the year to your dataset
Map the year variable on the group aesthetic
For the ticks. Increase the number of the breaks. If you want only ticks but not labels you can use a custom function to get rid of unwanted labels, e.g. my approach below set the breaks to "6 month" but replaces the mid-year labels with an empty string:
week_aveA$endofweek <- as.Date(week_aveA$endofweek,"%d/%m/%Y")
week_aveA$A_mean <- as.numeric(gsub(",", ".", week_aveA$A_mean))
week_aveA$week_day_mean <- as.numeric(gsub(",", ".", week_aveA$week_day_mean))
week_aveA$year <- format(week_aveA$endofweek, "%Y")
library(ggplot2)
mylabel <- function(x) {
ifelse(grepl("-07-01$", x), "", format(x, "%Y"))
}
ggplot() +
geom_step(data=week_aveA, aes(x = endofweek, y = A_mean, group = year), colour="gray25") +
scale_y_continuous(expand = c(0, 0), limits = c(0, 2500)) +
scale_x_date(breaks="6 month", labels = mylabel) +
labs(y = expression(A~ ~index),
x = NULL) +
theme(axis.text.x = element_text(size=10),
axis.title = element_text(size=10))
I am having trouble extracting pixel intensity values from images of microtiter plates in R. I have used EBImage to threshold and segment an image, but when I do this I lose the actual intensity values from the original image.
Starting with a .png image like this:
I need to identify each individual well and calculate the average intensity within each (they are leaf discs in the well plate). Thus I would want to have 81 values from this image.
Next, I need to extract those values in a matrix that I can use to perform operations from separate images of the same plate. So the segmentation needs to be re-usable so I can just read in the other images of this same plate and extract the respective well values. The images are all the exact same size, and the location of wells does not change. There are hundreds of images of this plate taken over several hours.
So far I've segmented and thresholded, but this causes loss of the original image intensities.
Here are the attributes of the original image posted above:
print(fo)
Image
colorMode : Color
storage.mode : double
dim : 696 520 3
frames.total : 3
frames.render: 1
imageData(object)[1:5,1:6,1]
[,1] [,2] [,3] [,4] [,5] [,6]
[1,] 0 0 0 0 0 0
[2,] 0 0 0 0 0 0
[3,] 0 0 0 0 0 0
[4,] 0 0 0 0 0 0
[5,] 0 0 0 0 0 0
###progress so far
library(tidyverse)
library("EBImage")
#read in image
fo <- readImage("image.png")
#crop excess
fo <- fo[99:589,79:437,1:3]
#adaptive thresholding
threshold <- thresh(fo,w=25,h=25,offset=0.01)
#use bwlabel to segment thresholded image
fo_lab <- bwlabel(threshold[,,2])
nmask = watershed(distmap(threshold), 10 )
display(colorLabels(nmask), all=TRUE)
Which would leave me with this image:
The values I would get in fo_lab are based on the thresholded intensity for each region, so they don't effectively capture the true difference in intensity between wells. More importantly, I need to carry those values to use in mathematical operations on the exact same extracted areas from proceeding images.
Any thoughts on how to do this?
Thank you.
This is tricky. Let's start by reproducing your data by reading the image straight from this Stack Overflow page:
library(tidyverse)
library("EBImage")
fo <- readImage("https://i.stack.imgur.com/MFkmD.png")
#crop excess
fo <- fo[99:589,79:437,1:3]
#adaptive thresholding
threshold <- thresh(fo, w = 25, h = 25, offset = 0.01)
#use bwlabel to segment thresholded image
fo_lab <- bwlabel(threshold[,,2])
Now, the key to this is realising that fo_lab contains an array of pixels which are labelled according to the group (i.e. the well) they are in. There are also a few stray pixels which have been assigned to their own groups, so we remove anything with fewer than a hundred pixels by writing 0s into fo_lab at these locations:
fo_table <- table(fo_lab)
fo_lab[fo_lab %in% as.numeric(names(fo_table)[fo_table < 100])] <- 0
Now we have only the pixels that are on a well labelled with anything other than a zero, and we can ensure we have the correct number of wells:
fo_wells <- as.numeric(names(table(fo_lab)))[-1]
length(fo_wells)
#> [1] 81
So now we can create a data frame that records the position (the centroid) of each well:
df <- as.data.frame(computeFeatures.moment(fo_lab))
And we can add the average intensity of the pixels within each well on the original image to that data frame:
df$intensity <- sapply(fo_wells, function(x) mean(fo[fo_lab == x]))
So we have a data frame with the required results:
head(df)
#> m.cx m.cy m.majoraxis m.eccentricity m.theta intensity
#> 1 462.2866 17.76579 29.69468 0.3301601 -0.2989824 0.1229826
#> 2 372.9313 20.51608 29.70871 0.1563481 -1.0673974 0.2202901
#> 3 417.3410 19.64526 29.43567 0.2725219 0.4858422 0.1767944
#> 4 328.2435 21.87536 29.73790 0.1112710 -0.9316834 0.3010003
#> 5 283.9245 22.69954 29.17318 0.2366731 -1.4561670 0.5471162
#> 6 239.0390 24.15465 29.39590 0.1881874 0.6315008 0.3799093
So we have each plate recorded according to its x, y position and we have its average intensity. To prove this, let's plot the original image in ggplot and overlay the intensity values on each plate:
img_df <- reshape2::melt(as.matrix(as.raster(as.array(fo))))
ggplot(img_df, aes(Var1, Var2, fill = value)) +
geom_raster() +
scale_fill_identity() +
scale_y_reverse() +
geom_text(inherit.aes = FALSE, data = df, color = "red",
aes(x = m.cx, y = m.cy, label = round(intensity, 3))) +
coord_equal()
We can see that the whitest plates have the highest intensities and the darker plates have lower intensities.
In terms of making sure successive plates are comparable, note that the output of computeFeatures.moment(fo_lab) will always produce the labelling in the same order:
ggplot(img_df, aes(Var1, Var2, fill = value)) +
geom_raster() +
scale_fill_identity() +
scale_y_reverse() +
geom_text(inherit.aes = FALSE, data = df, color = "red",
aes(x = m.cx, y = m.cy, label = seq_along(m.cx))) +
coord_equal()
So you can use this to identify wells in subsequent plates.
Putting this all together, you can have a function that takes the image and spits out the intensities of each well, like this:
well_intensities <- function(img) {
fo <- readImage(img)[99:589,79:437,1:3]
fo_lab <- bwlabel(thresh(fo, w = 25, h = 25, offset = 0.01)[,,2])
fo_table <- table(fo_lab)
fo_lab[fo_lab %in% as.numeric(names(fo_table)[fo_table < 100])] <- 0
fo_wells <- as.numeric(names(table(fo_lab)))[-1]
data.frame(well = seq_along(fo_wells),
intensity = sapply(fo_wells, function(x) mean(fo[fo_lab == x])))
}
Which allows you to do:
well_intensities("https://i.stack.imgur.com/MFkmD.png")
#> well intensity
#> 1 1 0.1229826
#> 2 2 0.2202901
#> 3 3 0.1767944
#> 4 4 0.3010003
#> 5 5 0.5471162
#> 6 6 0.3799093
#> 7 7 0.2266809
#> 8 8 0.2691313
#> 9 9 0.1973300
#> 10 10 0.1219945
#> 11 11 0.1041047
#> 12 12 0.1858798
#> 13 13 0.1853668
#> 14 14 0.3065456
#> 15 15 0.4998599
#> 16 16 0.4173711
#> 17 17 0.3521405
#> 18 18 0.4614704
#> 19 19 0.2955793
#> 20 20 0.2511733
#> 21 21 0.1841083
#> 22 22 0.2669468
#> 23 23 0.3062121
#> 24 24 0.5471972
#> 25 25 0.7279144
#> 26 26 0.4425966
#> 27 27 0.4174344
#> 28 28 0.5155241
#> 29 29 0.5298436
#> 30 30 0.2440677
#> 31 31 0.2971507
#> 32 32 0.1490848
#> 33 33 0.2785301
#> 34 34 0.4392502
#> 35 35 0.4466012
#> 36 36 0.4020305
#> 37 37 0.4516624
#> 38 38 0.3949014
#> 39 39 0.4749804
#> 40 40 0.3820500
#> 41 41 0.2409199
#> 42 42 0.1769995
#> 43 43 0.4764645
#> 44 44 0.3035113
#> 45 45 0.3331184
#> 46 46 0.4859249
#> 47 47 0.8278420
#> 48 48 0.5102533
#> 49 49 0.5754179
#> 50 50 0.4044553
#> 51 51 0.2949486
#> 52 52 0.2020463
#> 53 53 0.3663714
#> 54 54 0.5853405
#> 55 55 0.4011272
#> 56 56 0.8564808
#> 57 57 0.5154415
#> 58 58 0.5178042
#> 59 59 0.5585773
#> 60 60 0.5070020
#> 61 61 0.2637470
#> 62 62 0.2379200
#> 63 63 0.2463080
#> 64 64 0.3840690
#> 65 65 0.3139230
#> 66 66 0.5157990
#> 67 67 0.3606038
#> 68 68 0.3066231
#> 69 69 0.4538155
#> 70 70 0.2935641
#> 71 71 0.1639805
#> 72 72 0.1892272
#> 73 73 0.2618652
#> 74 74 0.3513564
#> 75 75 0.4484937
#> 76 76 0.5032775
#> 77 77 0.3014721
#> 78 78 0.3475152
#> 79 79 0.2001712
#> 80 80 0.2873561
#> 81 81 0.1462936
I'm trying to plot a stacked barplot of the rate of computer used in different departments with details on what type of PC in each bar( so that for each department type1+type2+type3=tot_rate) . I've got a dataframe that looks like this :
dat=read.table(text = "Tot_rate Type1 Type2 Type3
DPT1 72 50 12 10
DPT2 80 30 20 30
DPT3 92 54 14 24", header = TRUE)
I tried to plot my barplot with raw data but now it's very important that i get the one with percentages and i can't seem to understand how i can do that.
This is how i thought i could that, but it just doesn't work
p<-ggplot(dat, aes(x=row.names(dat), y=dat$Tot_rate, fill=data[,2:ncol(dat)])) + geom_bar(stat="identity")+theme_minimal()+xlab("") + ylab("PC rate")+geom_abline(slope=0, intercept=90, col = "red",lty=2) + theme(axis.text.x = element_text(angle = 90, hjust = 1))
p
When i try the code above i get :
Don't know how to automatically pick scale for object of type data.frame. Defaulting to continuous.
Error: Aesthetics must be either length 1 or the same as the data (9): fill
Can you please help ?
Thank you,
Liana
Here is one way to do it using a ggplot2 extension package called ggstatsplot-
set.seed(123)
library(tidyverse)
# creating dataframe in long format
(dat <- read.table(
text = "Tot_rate Type1 Type2 Type3
DPT1 72 50 12 10
DPT2 80 30 20 30
DPT3 92 54 14 24",
header = TRUE
) %>%
tibble::rownames_to_column(var = "id") %>%
tidyr::gather(., "key", "counts", Type1:Type3))
#> id Tot_rate key counts
#> 1 DPT1 72 Type1 50
#> 2 DPT2 80 Type1 30
#> 3 DPT3 92 Type1 54
#> 4 DPT1 72 Type2 12
#> 5 DPT2 80 Type2 20
#> 6 DPT3 92 Type2 14
#> 7 DPT1 72 Type3 10
#> 8 DPT2 80 Type3 30
#> 9 DPT3 92 Type3 24
# bar plot
ggstatsplot::ggbarstats(dat,
main = id,
condition = key,
counts = counts,
messages = FALSE)
Created on 2019-05-27 by the reprex package (v0.3.0)
library(reshape2)
dat=read.table(text = "Department Tot_rate Type1 Type2 Type3
DPT1 72 50 12 10
DPT2 80 30 20 30
DPT3 92 54 14 24", header = TRUE)
long_dat <- dat[-2] %>% gather(type,number,Type1:Type3,-c("Department"))
First I reshaped the data you had : I put the department in a column and reshaped your data from wide to long format (dropped tot_rate which isn't needed here).
p <- ggplot(data=long_dat,aes(x=Department,y=number,fill=type)) +
geom_bar(position = "fill",stat = "identity")
p
To scale the barplot in percantages, we use the position argument of geom_barset to position=fill.
My dataset is formed by 4 columns, as shown below:
The two columns on the left represent the coordinates XY of a geographical structure, and the two on the left represent the size of "each" geographical unit (diameters North-South and East-West)
I would like to graphically represent a scatterplot where to plot all the coordinates and draw over each point an ellipse including the diameters of each geographical unit.
Manually, and using only two points, the image should be like this one:
How can I do it using ggplot2?
You can download the data here
Use geom_ellipse() from ggforce:
library(ggplot2)
library(ggforce)
d <- data.frame(
x = c(10, 20),
y = c(10, 20),
ns = c(5, 8),
ew = c(4, 4)
)
ggplot(d, aes(x0 = x, y0 = y, a = ew/2, b = ns/2, angle = 0)) +
geom_ellipse() +
coord_fixed()
Created on 2019-06-01 by the reprex package (v0.2.1)
I'm not adding any new code to what Claus Wilke already posted above. All credit should go to Claus. I'm simply testing it with the actual data, and showing OP how to post data,
Loading packages needed
# install.packages(c("tidyverse"), dependencies = TRUE)
library(tidyverse)
Reading data,
tbl <- read.table(
text = "
X Y Diameter_N_S Diameter_E_W
-4275 1145 77 96
-4855 1330 30 25
-4850 1612 45 90
-4990 1410 15 15
-5055 1230 60 50
-5065 1503 43 45
-5135 1305 40 50
-5505 1190 55 70
-5705 1430 90 40
-5645 1535 52 60
", header = TRUE, stringsAsFactors = FALSE) %>% as_tibble()
showing data,
tbl
#> # A tibble: 10 x 4
#> X Y Diameter_N_S Diameter_E_W
#> <int> <int> <int> <int>
#> 1 -4275 1145 77 96
#> 2 -4855 1330 30 25
#> 3 -4850 1612 45 90
#> 4 -4990 1410 15 15
#> 5 -5055 1230 60 50
#> 6 -5065 1503 43 45
#> 7 -5135 1305 40 50
#> 8 -5505 1190 55 70
#> 9 -5705 1430 90 40
#> 10 -5645 1535 52 60
loading more packages needed
library(ggforce) # devtools::install_github("thomasp85/ggforce")
executing
ggplot(tbl, aes(x0 = X, y0 = Y, a = Diameter_E_W, b = Diameter_N_S, angle = 0)) +
geom_ellipsis() + geom_point(aes(X, Y), size = .5) + coord_fixed() + theme_bw()