Please help i am trying to make all then columns into x-axis and the make side by side bars later by date
this is my data i really tried but to no avail
dateVisited hh_visited hh_ind_confirmed new_in_mig out_mig deaths HOH_death Preg_Obs Preg_Outcome child_forms
102 2020-07-21 292 1170 131 86 18 7 3 14 79
103 2020-07-22 400 1553 115 100 25 10 11 18 107
104 2020-07-23 381 1458 103 67 21 9 5 23 87
105 2020-07-24 345 1379 90 98 12 4 3 20 89
106 2020-07-25 436 1585 131 119 13 2 7 20 117
107 2020-07-26 0 0 0 0 0 0 0
0 0
I think you're looking for something like this:
library(tidyr)
library(ggplot2)
df %>%
pivot_longer(cols = -1) %>%
ggplot(aes(name, value)) +
geom_col(aes(fill = dateVisited), width = 0.6,
position = position_dodge(width = 0.8)) +
guides(x = guide_axis(angle = 45))
Reproducible Data from question
df <- structure(list(dateVisited = structure(1:6, .Label = c("2020-07-21",
"2020-07-22", "2020-07-23", "2020-07-24", "2020-07-25", "2020-07-26"
), class = "factor"), hh_visited = c(292L, 400L, 381L, 345L,
436L, 0L), hh_ind_confirmed = c(1170L, 1553L, 1458L, 1379L, 1585L,
0L), new_in_mig = c(131L, 115L, 103L, 90L, 131L, 0L), out_mig = c(86L,
100L, 67L, 98L, 119L, 0L), deaths = c(18L, 25L, 21L, 12L, 13L,
0L), HOH_death = c(7L, 10L, 9L, 4L, 2L, 0L), Preg_Obs = c(3L,
11L, 5L, 3L, 7L, 0L), Preg_Outcome = c(14L, 18L, 23L, 20L, 20L,
0L), child_forms = c(79L, 107L, 87L, 89L, 117L, 0L)), class = "data.frame",
row.names = c("102", "103", "104", "105", "106", "107"))
Your data cannot be used easily since it requires time to format it into something that could ingested by R. Here is something to get you started. I made up a hypothetical dataframe of 4 columns that resemble your data, use the function melt from reshape2 package to format the data such that it is understandable by ggplot2 package, and use ggplot2 package to generate a bar plot.
df <- data.frame(dateVisited = seq(as.Date('2019-01-01'), as.Date('2019-12-31'), 30),
hh_visited = runif(13, 0, 436),
hh_ind_confirmed = runif(13, 0, 1585),
new_in_mig = runif(13, 0, 131))
df <- reshape2::melt(df, id.vars = 'dateVisited')
ggplot(data = df, aes(x = dateVisited, y = value, fill = variable))+
geom_col(position = 'dodge')
Related
I have two dataframes in R
df1
chr start end strand bam1 bam2 bam3 bam4 bam5 bam6 bam7 bam8
1 chr1 3531569 3531966 - 2 2 1 4 8 36 21 1
2 chr1 3670538 3672624 - 251 50 170 165 294 259 665 86
3 chr1 4491645 4493854 - 220 46 179 167 275 332 414 77
4 chr1 4496542 4497750 - 115 41 100 67 114 69 42 63
5 chr1 4571267 4572265 - 64 32 77 44 76 130 179 27
6 chr1 4688213 4688719 - 39 10 20 20 14 23 25 17
7 chr1 4688800 4688919 - 20 30 10 20 14 55 17 20
8 chr1 4688800 4688919 - 2 4 6 8 10 12 14 16
9 chr1 4688800 4688919 - 1 2 3 4 5 6 7 8
and
df2
bam_file r1 r2
1 bam1 2 1
2 bam2 9 3
3 bam3 1 4
4 bam4 1 5
5 bam5 1 1
6 bam6 8 6
7 bam7 3 7
8 bam8 3 2
I want to apply following formula (let say X), So that column of df2 X row of df1
((df2[1,2]-df1[1,5])ˆ2 + (df2[2,2]-df1[1,6])ˆ2 + (df2[3,2]-df1[1,7])ˆ2 + (df2[4,2]-df1[1,8])ˆ2 + (df2[5,2]-df1[1,9])ˆ2 + (df2[6,2]-df1[1,10])ˆ2 + (df2[7,2]-df1[1,11])ˆ2 +
(df2[8,2]-df1[1,12])ˆ2)/(ncol(df1)-4)
So the desired output will be
output
r1 r2
1 152.375 144.75
2 89140.25 88467.875
3 57822.75 57413.125
4 6195.125 6148
5 8007.375 7858.75
6 395.75 372.625
7 508.75 543.125
8 60.75 47.125
9 15.5 6.875
I apologize if this appears to be a repetitive question, but I tried and was unable to resolve it (as I am beginner and learning). It would be great to find a solution. Thank you in advance and looking for a positive response.
We could create a sequence column ('rn'), reshape to 'long' format with pivot_longer on the first data, join with the second data ('df2') and do a group by calculation on the 'r1', 'r2' columns in reframe
library(dplyr) # version >= 1.1.0
library(tidyr)
df1 %>%
mutate(rn = row_number()) %>%
pivot_longer(cols= starts_with("bam"), names_to = "bam_file") %>%
left_join(df2) %>%
reframe(across(r1:r2, ~ sum((value - .x)^2)/n()), .by = "rn")
-output
# A tibble: 9 × 3
rn r1 r2
<int> <dbl> <dbl>
1 1 152. 145.
2 2 89140. 88468.
3 3 57823. 57413.
4 4 6195. 6148
5 5 8007. 7859.
6 6 396. 373.
7 7 509. 543.
8 8 60.8 47.1
9 9 15.5 6.88
data
df1 <- structure(list(chr = c("chr1", "chr1", "chr1", "chr1", "chr1",
"chr1", "chr1", "chr1", "chr1"), start = c(3531569L, 3670538L,
4491645L, 4496542L, 4571267L, 4688213L, 4688800L, 4688800L, 4688800L
), end = c(3531966L, 3672624L, 4493854L, 4497750L, 4572265L,
4688719L, 4688919L, 4688919L, 4688919L), strand = c("-", "-",
"-", "-", "-", "-", "-", "-", "-"), bam1 = c(2L, 251L, 220L,
115L, 64L, 39L, 20L, 2L, 1L), bam2 = c(2L, 50L, 46L, 41L, 32L,
10L, 30L, 4L, 2L), bam3 = c(1L, 170L, 179L, 100L, 77L, 20L, 10L,
6L, 3L), bam4 = c(4L, 165L, 167L, 67L, 44L, 20L, 20L, 8L, 4L),
bam5 = c(8L, 294L, 275L, 114L, 76L, 14L, 14L, 10L, 5L), bam6 = c(36L,
259L, 332L, 69L, 130L, 23L, 55L, 12L, 6L), bam7 = c(21L,
665L, 414L, 42L, 179L, 25L, 17L, 14L, 7L), bam8 = c(1L, 86L,
77L, 63L, 27L, 17L, 20L, 16L, 8L)), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9"))
df2 <- structure(list(bam_file = c("bam1", "bam2", "bam3", "bam4", "bam5",
"bam6", "bam7", "bam8"), r1 = c(2L, 9L, 1L, 1L, 1L, 8L, 3L, 3L
), r2 = c(1L, 3L, 4L, 5L, 1L, 6L, 7L, 2L)), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8"))
I want to compare the means of the variables in a barplot.
This is a portion of my dataframe.
Group Gender Age Anxiety_score Depression_score IUS OBSC
1 Anxiety 0 25 32 29 12
2 Anxiety 1 48 34 28 11
3 Anxiety 0 32 48 32 12
4 Anxiety 1 24 43 26 12
5 Anxiety 1 18 44 26 15
6 Control 0 45 12 11 3
7 Control 0 44 11 11 5
8 Control 1 26 21 10 5
9 Control 1 38 12 NA 2
10 Control 0 18 13 10 1
I'd like to create a barplot where each variable (Gender, Age, Anxiety_score, depression_score, IUS, ...) represents a bar and I'd like to have this for each group (anxiety vs control next to each other, not stacked) on the same graph. The height of the bar would represent the mean. For gender, I'd like to have the gender ratio. I also want to map the variables on the y axis. How do I do this in R?
This type of problems generally has to do with reshaping the data. The format should be the long format and the data is in wide format. See this post on how to reshape the data from wide to long format.
Then, group by Group and name, compute the means and plot.
library(dplyr)
library(tidyr)
library(ggplot2)
df1 %>%
pivot_longer(-Group) %>%
group_by(Group, name) %>%
summarise(value = mean(value), .groups = "drop") %>%
ggplot(aes(name, value, fill = Group)) +
geom_col(position = position_dodge()) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Data
df1 <-
structure(list(Group = c("Anxiety", "Anxiety", "Anxiety", "Anxiety",
"Anxiety", "Control", "Control", "Control", "Control", "Control"
), Gender = c(0L, 1L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 0L), Age = c(25L,
48L, 32L, 24L, 18L, 45L, 44L, 26L, 38L, 18L), Anxiety_score = c(32L,
34L, 48L, 43L, 44L, 12L, 11L, 21L, 12L, 13L), Depression_score = c(29L,
28L, 32L, 26L, 26L, 11L, 11L, 10L, NA, 10L), IUS = c(12L, 11L,
12L, 12L, 15L, 3L, 5L, 5L, 2L, 1L)), class = "data.frame",
row.names = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10"))
Are you looking for something like this?
library(tidyverse)
df %>%
pivot_longer(
-Group
) %>%
group_by(Group, name) %>%
summarise(Mean=mean(value, na.rm=TRUE)) %>%
ggplot(aes(x=factor(Group), y=Mean, fill=name))+
geom_col(aes(group=name), position = "dodge") +
geom_text(
aes(label = Mean, y = Mean + 0.05),
position = position_dodge(0.9),
vjust = 0
)
I have multiple entries in my data from same patient ID, I wanted to make it as one entry. What are my possible options? Here is the data -
PtID WorryHighBGNow
40 5
40 1
40 2
70 3
101 4
263 2
263 5
263 3
143 4
245 4
137 3
219 2
219 3
219 4
3 3
264 3
264 3
98 1
200 3
105 3
111 4
149 3
I want to create a visualization like below out of this data, where on y axis I want to see columns of my table and on X I want to se ranking 1,2,3,4,5
If x is your data frame you can try this
d <- setDT(x)[, list(WorryHighBGNow = paste(WorryHighBGNow, collapse = ', ')),by = c('PtID')]
It will give result like
PtID WorryHighBGNow
40 5,1,2
70 3
101 4
263 2,5,3
And so on.
Not really sure that is what you need. I've just tried to mimic visualization you linked to the question, as close as it possible.
library(tidyverse)
dat %>%
mutate_all(factor) %>%
count(WorryHighBGNow, ) %>%
mutate(percentage = round(n / sum(n) * 100, 1)) %>%
mutate(WorryHighBGNow = reorder(WorryHighBGNow, n)) %>%
ggplot(aes(x = WorryHighBGNow, y = percentage,
fill = WorryHighBGNow, label = paste(percentage, '%'))) +
geom_col() +
geom_text(hjust = -.1, fontface = 'bold') +
scale_fill_brewer(type = 'qual', breaks = 1:5) +
coord_flip() +
expand_limits(y = 50) +
theme_void() +
theme(legend.position = 'bottom')
Data:
dat <- structure(
list(
PtID = c(40L, 40L, 40L, 70L, 101L, 263L, 263L, 263L, 143L, 245L, 137L, 219L,
219L, 219L, 3L, 264L, 264L, 98L, 200L, 105L, 111L, 149L),
WorryHighBGNow = c(5L, 1L, 2L, 3L, 4L, 2L, 5L, 3L, 4L, 4L, 3L, 2L, 3L, 4L,
3L, 3L, 3L, 1L, 3L, 3L, 4L, 3L)
),
class = "data.frame", row.names = c(NA, -22L)
)
I am a novice in R language. I am having text file separated by tab available with sales data for each day. The format will be like product-id, day0, day1, day2, day3 and so on. The part of the input file given below
productid 0 1 2 3 4 5 6
1 53 40 37 45 69 105 62
4 0 0 2 4 0 8 0
5 57 133 60 126 90 87 107
6 108 130 143 92 88 101 66
10 0 0 2 0 4 0 36
11 17 22 16 15 45 32 36
I used code below to read a file
pdInfo <- read.csv("products.txt",header = TRUE, sep="\t")
This allows to read the entire file and variable x is a data frame. I would like to change data.frame x to time series object in order for the further processing.On a stationary test, Dickey–Fuller test (ADF) it shows an error. I tried the below code
x <- ts(data.matrix(pdInfo),frequency = 1)
adf <- adf.test(x)
error: Error in adf.test(x) : x is not a vector or univariate time series
Thanks in advance for the suggestions
In R, time series are usually in the form "one row per date", where your data is in the form "one column per date". You probably need to transpose the data before you convert to a ts object.
First transpose it:
y= t(pdInfo)
Then make the top row (being the product id's) into the row titles
colnames(y) = y[1,]
y= y[-1,] # to drop the first row
This should work:
x = ts(y, frequency = 1)
library(purrr)
library(dplyr)
library(tidyr)
library(tseries)
# create the data
df <- structure(list(productid = c(1L, 4L, 5L, 6L, 10L, 11L),
X0 = c(53L, 0L, 57L, 108L, 0L, 17L),
X1 = c(40L, 0L, 133L, 130L, 0L, 22L),
X2 = c(37L, 2L, 60L, 143L, 2L, 16L),
X3 = c(45L, 4L, 126L, 92L, 0L, 15L),
X4 = c(69L, 0L, 90L, 88L, 4L, 45L),
X5 = c(105L, 8L, 87L, 101L, 0L, 32L),
X6 = c(62L, 0L, 107L, 66L, 36L, 36L)),
.Names = c("productid", "0", "1", "2", "3", "4", "5", "6"),
class = "data.frame", row.names = c(NA, -6L))
# apply adf.test to each productid and return p.value
adfTest <- df %>% gather(key = day, value = sales, -productid) %>%
arrange(productid, day) %>%
group_by(productid) %>%
nest() %>%
mutate(adf = data %>% map(., ~adf.test(as.ts(.$sales)))
,adf.p.value = adf %>% map_dbl(., "p.value")) %>%
select(productid, adf.p.value)
I have a dataset as follows: I have a data frame like this, called data_frame_test.
Value time group
3.96655960 0 184
-8.71308460 0 184
-11.11638947 0 184
-6.84213562 11 184
-1.25926609 11 184
-4.60649529 11 184
0.27577858 11 184
11.85394249 20 184
-0.27114563 20 184
1.73081284 20 184
1.78209915 20 184
11.34305840 20 184
13.49688263 20 184
-7.54752045 20 184
-13.63673286 25 184
-5.75711517 25 184
0.35823669 25 184
-2.45237694 25 184
0.49313087 0 66
-9.04148674 0 66
-15.50337906 0 66
-17.51445351 0 66
-10.66807098 0 66
-2.24337845 5 66
-13.79929533 5 66
1.33287125 5 66
2.22143402 5 66
11.46484833 10 66
23.26805916 10 66
9.07377968 10 66
4.28664665 10 66
data_frame_test <- structure(list(Value = c(3.9665596, -8.7130846, -11.11638947,
-6.84213562, -1.25926609, -4.60649529, 0.27577858, 11.85394249,
-0.27114563, 1.73081284, 1.78209915, 11.3430584, 13.49688263,
-7.54752045, -13.63673286, -5.75711517, 0.35823669, -2.45237694,
0.49313087, -9.04148674, -15.50337906, -17.51445351, -10.66807098,
-2.24337845, -13.79929533, 1.33287125, 2.22143402, 11.46484833,
23.26805916, 9.07377968, 4.28664665), time = c(0L, 0L, 0L, 11L,
11L, 11L, 11L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 25L, 25L, 25L,
25L, 0L, 0L, 0L, 0L, 0L, 5L, 5L, 5L, 5L, 10L, 10L, 10L, 10L),
group = c(184L, 184L, 184L, 184L, 184L, 184L, 184L, 184L,
184L, 184L, 184L, 184L, 184L, 184L, 184L, 184L, 184L, 184L,
66L, 66L, 66L, 66L, 66L, 66L, 66L, 66L, 66L, 66L, 66L, 66L,
66L)), .Names = c("Value", "time", "group"), class = "data.frame", row.names = c(NA,
-31L))
I want to plot a boxplot of a value for each time point and group.
ggplot(data_frame_test, aes(x=factor(time), y=Value, colour = factor(group))) +
geom_boxplot(outlier.size=0, fill = "white", position="identity", alpha=.5) +
scale_x_discrete(limits = seq(-1,26), breaks = seq(-1,26), labels = seq(-1,26))
This results in the following picture, which is almost right:
However, the x axis labels and ticks are shifted. How do I put it where it belongs?
You are trying to treat a factor like a numeric, which it isn't. Here is a better solution:
ggplot(data_frame_test, aes(x=factor(time, levels = seq(-1,26), ordered = TRUE),
y=Value, colour = factor(group))) +
geom_boxplot(outlier.size=0, fill = "white", position="identity", alpha=.5) +
scale_x_discrete(drop = FALSE)
I'm not quite sure why that is happening, but I would probably make the plot like this, since converting time to a factor is intuitive to me:
ggplot(data_frame_test,
aes(x = time, y=Value, colour = factor(group), group = interaction(time, group))) +
geom_boxplot(outlier.size=0, fill = "white", position="identity", alpha=.5)
Which gives:
You can use scale_x_continuous to change the breaks and such.