Can I present the result of TukeyHSD as a heatmap? And how would the code look like concerning the example below?
#Daten erstellen
set.seed (0)
data <- data.frame(group = rep(c("A", "B", "C"), each = 30),
values = c(runif(30, 0, 3),
runif(30, 0, 5),
runif(30, 1, 7)))
#Die ersten sechs Zeilen anzeigen
head(data)
#einfaktorielles ANOVA-Modell anpassen
model <- aov(values~group, data=data)
#Sehen Sie sich die Modellausgabe an
summary(model)
#Tukey Test durchführen
TukeyHSD(model, conf.level=.95)
#Konfidenzintervalle plotten
plot(TukeyHSD(model, conf.level=.95), las = 2)
Thank you so much!!
I actually get the right results, but can't display them as a heatmap.
Here a way to do it with a tidyverse approach
library(dplyr)
library(lubridate)
library(tidyr)
data <- data.frame(group = rep(c("A", "B", "C"), each = 30),
values = c(runif(30, 0, 3),
runif(30, 0, 5),
runif(30, 1, 7)))
#einfaktorielles ANOVA-Modell anpassen
model <- aov(values~group, data=data)
#Tukey Test durchführen
test_list <- TukeyHSD(model, conf.level=.95)
test_data <- test_list$group
test_data %>%
as_tibble() %>%
bind_cols(data.frame(rw = rownames(test_data))) %>%
separate(rw,into = c("var1","var2")) %>%
ggplot(aes(x = var1,y = var2, fill = `p adj`))+
geom_tile()+
geom_text(aes(label = p.adjust(`p adj`)), color = "white")
Related
I am trying to use plotly to scatter the results of applying kmeans to dataframe and I want to color the scattered points based on the results of the kmeans. I am following this this link to use plotly but plotly is not coloring the scattered plots as I would like. Here is my code.
library(plotly)
library(data.table)
library(tidyverse)
# generate the data
sample1_x <- rnorm(n = 1000, mean = 0, sd = 1)
sample1_y <- rnorm(n = 1000, mean = 1, sd = 1)
sample2_x <- rnorm(n = 1000, mean = 5, sd = 1)
sample2_y <- rnorm(n = 1000, mean = 3, sd = 1)
# store the data in dataframes
df1 <- data.frame(sample1_x, sample1_y)
df2 <- data.frame(sample2_x, sample2_y)
df <- rbind(as.data.table(df1), as.data.table(df2), use.names = FALSE)
clusts <- kmeans(df, 2, 5)
clusts <- array(unlist(clusts), dim = c(2000, 1, 1))
df <- df %>%
add_column(clusters = clusts)
# rename the columns
colnames(df)[1: 2] <- c("col1", "col2")
# plot
fig <- plot_ly(data = df,
x = ~col1, y = ~col2,
color = ~clusters,
colors = c("red", "blue"),
type = "scatter", mode = "markers")
fig
is what plotly is returning, but what I want is .
The problem is that your clusters column is an <array> instead of a <num>.
set.seed(1)
df <- data.frame(col1 = c(rnorm(1000, 0), rnorm(1000, 5)),
col2 = c(rnorm(1000, 1), rnorm(1000, 3)))
setDT(df)
clusts <- kmeans(df, 2, 5)
df[, clusters := clusts$cluster]
plot_ly(data = df,
x = ~col1, y = ~col2,
color = ~clusters,
colors = c("red", "blue"),
type = "scatter", mode = "markers")
When I run this
library(tidyverse)
df = data.frame(
stringsAsFactors = FALSE,
Type = c("a", "b", "c", "d"),
A = c(51, 5, 10, 155.5),
P1 = c(40.1, 50.5, 127.8, 216),
C = c(40, 45, 50, 255)
)
library(huxtable)
ht = as_hux(df)
ht %>% map_text_color( row = 2:nrow(df), col = 2:3,
by_cases(. < 50 ~ "red")) %>%
set_all_borders()
ht
I get table:
table
The problem is that 127.8 is bigger than 50 so it shouldn't be red. How to make it to be as I want?
The underlying issue is that adding cases has turned your numbers to character(). A workaround is to use by_cases(as.numeric(.) < 50 ~ "red"). Alternatively, write:
ht = as_hux(df, add_colnames = FALSE)
ht %>% map_text_color(col = 2:3,
by_cases(. < 50 ~ "red")) %>%
add_colnames() %>%
set_all_borders() %>%
which keeps the data as numeric until after you've done the colour mapping.
I have two data sets from which I would like to generate histograms showing how the data overlap by name (A, B, C). I have written a custom function so I can use ggplot with map2.
I would like the graphs to be titled according to the name of each data set, so "A", "B", "C." Does anyone know of a way to do this?
# load packages
library(ggplot2)
library(dplyr)
library(purrr)
## load and format data 1
df1_raw <- data.frame(name = c("A", "B", "C", "A", "C", "B"),
start = c(1, 3, 4, 5, 2, 1),
end = c(6, 5, 7, 8, 6, 7))
df1 <- split(x = df1_raw, f = df1_raw$name) # split data by name
df1 <- lapply(df1, function(x) Map(seq.int, x$start, x$end)) # generate sequence intervals
df1 <- map(df1, unlist) # unlist sequences
df1 <- lapply(df1, data.frame) # convert to df
## load and format data 2
df2_raw <- data.frame(name = c("C", "B", "C", "A", "A", "B"),
start = c(5, 4, 3, 4, 4, 5),
end = c(7, 8, 7, 6, 9, 6))
df2 <- split(x = df2_raw, f = df2_raw$name) # split data by name
df2 <- lapply(df2, function(x) Map(seq.int, x$start, x$end)) # generate sequence intervals
df2 <- map(df2, unlist) # unlist sequences
df2 <- lapply(df2, data.frame) # convert to df
## write custom ggplot function and generate graphs
gplot <- function(data1, data2) {
ggplot() +
geom_histogram(data = data1, aes(x = X..i..), binwidth = 1, color = "grey", fill = "grey") +
geom_histogram(data = data2, aes(x = X..i..), binwidth = 1, fill = "pink", alpha = 0.7) +
labs(
title = ls(data1))
}
hist <- map2(df1, df2, gplot)
I also tried the following in the title field in my function:
deparse(substitute(data1))
Another similar option to what #GregorThomas mentioned in the comments, you could add a name variable to your data.frames and pull from that in your gplot() function. I've also shown how you might combine a few of your data manipulation steps:
# load packages
library(ggplot2)
library(dplyr)
library(purrr)
## load and format data 1
df1_raw <- data.frame(name = c("A", "B", "C", "A", "C", "B"),
start = c(1, 3, 4, 5, 2, 1),
end = c(6, 5, 7, 8, 6, 7))
df1 <- df1_raw %>%
split(.$name) %>% # split data by name
imap(function(x, x_name) {
data.frame(value = Map(seq.int, x$start, x$end) %>% unlist,
name = x_name)
})
## load and format data 2
df2_raw <- data.frame(name = c("C", "B", "C", "A", "A", "B"),
start = c(5, 4, 3, 4, 4, 5),
end = c(7, 8, 7, 6, 9, 6))
df2 <- df2_raw %>%
split(.$name) %>% # split data by name
imap(function(x, x_name) {
data.frame(value = Map(seq.int, x$start, x$end) %>% unlist,
name = x_name)
})
## change the title component of your previous function
gplot <- function(data1, data2) {
ggplot() +
geom_histogram(data = data1, aes(x = value), binwidth = 1, color = "grey", fill = "grey") +
geom_histogram(data = data2, aes(x = value), binwidth = 1, fill = "pink", alpha = 0.7) +
ggtitle(data1$name[1])
}
## plot it
map2(df1, df2, gplot)
I'm trying to do a semi circle donut with highcharter library but I only know how to do a pie chart. I know that with JS you can do it by adding "startAngle" and "endAngle" but I want to know how to do it with R:
A <- c("a", "b", "c", "d")
B <- c(4, 6, 9, 2)
C <- c(23, 26, 13, 15)
df <- data.frame(A, B, C)
highchart() %>%
hc_chart(type = "pie") %>%
hc_add_series_labels_values(labels = df$A, values = df$B)%>%
hc_tooltip(crosshairs = TRUE, borderWidth = 5, sort = TRUE, shared = TRUE, table = TRUE,
pointFormat = paste('<b>{point.percentage:.1f}%</b>')
) %>%
hc_title(text = "ABC",
margin = 20,
style = list(color = "#144746", useHTML = TRUE))
Thank you!
You can do something like this though not using Highcharts library.
library(tidyverse)
library(ggforce)
library(scales)
library(ggplot2)
# -------------------------------------------------------------------------
A <- c("a", "b", "c", "d")
B <- c(4, 6, 9, 2)
C <- c(23, 26, 13, 15)
df <- data.frame(A, B, C)
# Ensure A is a factor (we'll be using it to fill the pie)
df$A <- factor(df$A)
# compute the individual proportion in this case using var C
df$prop <- df$C/sum(df$C)
# compute the cumulative proportion and use that to plot ymax
df$p_end <- cumsum(df$prop)
# generate a y-min between 0 and 1 less value than p_end (using p_end)
df$p_start <- c(0, head(df$p_end ,-1))
# -------------------------------------------------------------------------
# plot
df %>%
mutate_at(c("p_start", "p_end"), rescale, to=pi*c(-.5,.5), from=0:1) %>%
ggplot +
geom_arc_bar(aes(x0 = 0, y0 = 0, r0 = .5, r = 1, start = p_start, end = p_end, fill=A)) +
coord_fixed() +xlab("X_label") + ylab("Y_lablel") + guides(fill=guide_legend(title="Legend Title"))
Output
Hope that helps.
Try adding startAngle = -90, endAngle = 90 inside hc_add_series_labels_values.
Note as per the warning hc_add_series_labels_values is deprecated so suggest using hc_add_series.
highchart() %>%
hc_add_series(type = "pie", data = df, hcaes(x = A, y = B), startAngle = -90, endAngle = 90) %>%
hc_tooltip(pointFormat = '<b>{point.percentage:.1f}%</b>') %>%
hc_title(text = "ABC",
margin = 20,
style = list(color = "#144746", useHTML = TRUE))
I would like to get predicted values based on a model I fit to a training set of data. I have done this before, but now I have a grouping factor and it is throwing me off. I want to predict biomass based on population for each environment.
library(tidyverse)
fit_mods<-df %>%
group_by(environ) %>%
do(model = lm(biomass ~ poly(population, 2), data = .))
Ultimately, I will want to find at which population biomass is the greatest. Usually I would do this by creating a grid and running the model on my new values and finding the max value, but I'm blanking on how to do this with the grouping. Usual way:
min_pop <- min(df$population)
max_pop <- max(df$population)
grid_pop <- expand.grid(new = (seq(from = min_pop,
to = max_pop,
length.out = 1000)),
environ = c("A", "B"))
#This is what I did with ungrouped data, but doesn't work now.
pred_pop <- predict(object = fit_mods,
newdata = grid_pop,
interval = "predict")
Here is some dummy data:
df <- as.data.frame(list(environ = c("a", "a", "a", "a", "a", "b", "b", "b", "b", "b"),
population = c(2, 3, 4, 5, 6, 3, 4, 5, 6, 7),
biomass = c(1, 2.2, 3.5, 4.1, 3.8, 2.5, 3.6, 4.3, 5.2, 5.1)), class = "data.frame")
In a tidyverse many models approach you could do it the following way:
library(tidyverse)
fit_mods <- df %>%
nest(-environ) %>%
mutate(models = map(data, ~ lm(biomass ~ poly(population, 2), data = .x)),
min_pop = map_dbl(data, ~ pull(.x, population) %>% min),
max_pop = map_dbl(data, ~ pull(.x, population) %>% max),
new = map2(min_pop, max_pop, ~ tibble(population = seq(from = .x,
to = .y,
length.out = 1000))),
pred = map2(models,
new,
~ predict(object = .x,
newdata = select(.y,population),
interval = "predict")))