How to change colours in this heatmap? - r

I would be very grateful for any help. I have created a heatmap using Pheatmap. My measures are binary and I would like the annotation row colours (5 categories) to be the same as the data points. Currently I have one colour across the 5 categories. I have attached the chart produced by my code. I am not sure how to do this. Thanks in advance!
Here is my code and sample data:
library(pheatmap)
library(dplyr)
*Arrange cluster
spells2=spells%>%arrange(PAM_complete)
*Df for wheeze columns
whz=spells2%>%dplyr::select(2:6)
*Create separate df for cluster
c5=spells2$PAM_complete
c5=as.data.frame(c5)
*Wheeze and cluster need the same row names (id)
rownames(whz)=spells2$id
rownames(c5)=spells2$id
c5$c5=as.factor(c5$c5)
col=c("white", "darkblue")
pheatmap(whz,legend_breaks = 0:1, legend_labels = c("No wheeze", "Wheeze"), fontsize = 10,
show_rownames=FALSE, cluster_rows = FALSE, color=col,
cluster_cols=FALSE , annotation_row=c5, )
> dput(head(spells2, 50))
structure(list(id = c("10003A", "1001", "10012A", "10013A", "10016A",
"10019A", "1001A", "10023A", "1002A", "10037A", "1004", "10042A",
"10045A", "1005", "10051A", "10054A", "1006", "10064A", "10065A",
"10075A", "10076A", "10082A", "10087A", "10094A", "10095A", "10097A",
"10098A", "100A", "10103A", "10104A", "10106A", "10121A", "10124A",
"10126A", "10132A", "1013A", "10144A", "10146A", "1014A", "1015",
"10153A", "10156A", "10159A", "10161A", "1017", "10171A", "10175A",
"10178A", "1018", "10186A"), whz1 = c(0, 1, 0, 0, 0, 0, 0, 1,
0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0
), whz2 = c(1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0,
0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0), whz3 = c(0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0,
0, 0, 0), whz4 = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1,
0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0), whz5 = c(0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,
0, 0, 0, 0, 0, 0), PAM_complete = c("ETW", "ETW", "NWZ", "NWZ",
"LOW", "NWZ", "NWZ", "INT", "NWZ", "ETW", "NWZ", "PEW", "ETW",
"INT", "NWZ", "INT", "ETW", "NWZ", "ETW", "ETW", "NWZ", "ETW",
"ETW", "NWZ", "NWZ", "NWZ", "NWZ", "NWZ", "NWZ", "PEW", "NWZ",
"ETW", "NWZ", "INT", "NWZ", "INT", "NWZ", "INT", "NWZ", "LOW",
"PEW", "NWZ", "NWZ", "INT", "ETW", "NWZ", "ETW", "NWZ", "ETW",
"NWZ")), row.names = c(NA, -50L), class = c("tbl_df", "tbl",
"data.frame"))
>

If I understand you correctly, you have plot "B" below, but you want plot "A" (without the little gaps in between the plots). This is not a straightforward task using the pheatmap package. The approach I used to create plot "A" below might be suitable with some tweaking (basically, plot each group separately then paste them all together in a column). Otherwise, a simpler 'ggplot' method is included below.
library(tidyverse)
library(pheatmap)
library(cowplot)
spells2 <- as.data.frame(spells) %>%
arrange(PAM_complete)
#Df for wheeze columns
whz <- spells2 %>%
dplyr::select(2:6)
#Create separate df for cluster
c5 <- spells2$PAM_complete %>%
as.data.frame()
colnames(c5) <- "names"
#Wheeze and cluster need the same row names (id)
rownames(whz) <- spells2$id
rownames(c5) <- spells2$id
c5$names <- as.factor(c5$names)
combined <- cbind(c5, whz)
# To get the 'default' pheatmap colour scheme
gg_color_hue <- function(n) {
hues = seq(15, 375, length = n + 1)
hcl(h = hues, l = 75, c = 100)[1:n]
}
scales::show_col(gg_color_hue(5))
# Specify colours for each group
ann_colors = list(
names = c(ETW = "#FF9289", INT = "#FF8AFF",
LOW = "#00DB98", NWZ = "#00CBFF",
PEW = "#BEC100"))
# Generate the plots
col = c("grey95", "darkblue")
p <- pheatmap(whz, legend_breaks = 0:1,
legend_labels = c("No wheeze", "Wheeze"),
fontsize = 10, show_rownames = FALSE,
cluster_rows = FALSE, color = col,
cluster_cols = FALSE, annotation_row = c5)
col_1 <- c("grey95", "#FF9289")
p1 <- pheatmap(combined %>% filter(names == "ETW") %>% select(-c(names)),
show_rownames = FALSE, show_colnames = FALSE,
cluster_rows = FALSE, cluster_cols = FALSE,
legend = FALSE, annotation_legend = FALSE,
color = col_1, annotation_names_row = FALSE,
annotation_colors = ann_colors,
annotation_row = combined %>% filter(names == "ETW") %>% select(names))
col_2 <- c("grey95", "#FF8AFF")
p2 <- pheatmap(combined %>% filter(names == "INT") %>% select(-c(names)),
show_rownames = FALSE, show_colnames = FALSE,
cluster_rows = FALSE, cluster_cols = FALSE,
legend = FALSE, annotation_legend = FALSE,
color = col_2, annotation_names_row = FALSE,
annotation_colors = ann_colors, cellheight = 7,
annotation_row = combined %>% filter(names == "INT") %>% select(names))
col_3 <- c("grey95", "#00DB98")
p3 <- pheatmap(combined %>% filter(names == "LOW") %>% select(-c(names)),
show_rownames = FALSE, show_colnames = FALSE,
cluster_rows = FALSE, cluster_cols = FALSE,
legend = FALSE, annotation_legend = FALSE,
color = col_3, annotation_names_row = FALSE,
annotation_colors = ann_colors,
annotation_row = combined %>% filter(names == "LOW") %>% select(names))
# Because all whz values = 0 for NWZ,
# you need to change one value to '1'
# in order for pheatmap to generate a plot
combined[23,2] <- 1
col_4 <- c("grey95", "grey95")
p4 <- pheatmap(combined %>% filter(names == "NWZ") %>% select(-c(names)),
show_rownames = FALSE, show_colnames = FALSE,
cluster_rows = FALSE, cluster_cols = FALSE,
legend = FALSE, annotation_legend = FALSE,
color = col_4, annotation_names_row = FALSE,
annotation_colors = ann_colors,
annotation_row = combined %>% filter(names == "NWZ") %>% select(names))
col_5 <- c("grey95", "#BEC100")
p5 <- pheatmap(combined %>% filter(names == "PEW") %>% select(-c(names)),
show_rownames = FALSE,
cluster_rows = FALSE, cluster_cols = FALSE,
legend = FALSE, annotation_legend = FALSE,
color = col_5,
annotation_colors = ann_colors,
annotation_row = combined %>% filter(names == "PEW") %>% select(names))
heatmaps <- cowplot::plot_grid(p1[[4]], p2[[4]], p3[[4]],
p4[[4]], p5[[4]], ncol = 1,
rel_heights = c(1.3, 0.7, 0.3, 2.4, 0.8))
cowplot::plot_grid(heatmaps, p$gtable, ncol = 2, rel_widths = c(0.7, 1), labels = "AUTO")
EDIT
If you don't necessarily want to use pheatmap, ggplot2 geom_tile() would be a lot easier, e.g.
library(tidyverse)
my_levels <- rownames(combined)
my_colours <- c("#FF9289", "#FF8AFF", "#00DB98", "#00CBFF", "#BEC100")
combined %>%
rownames_to_column(var = "IDs") %>%
pivot_longer(cols = -c(IDs, names),
names_to = "Trial",
values_to = "Wheeze") %>%
rename(Group = names) %>%
mutate(IDs = factor(IDs, levels = my_levels)) %>%
ggplot() +
geom_tile(aes(y = rev(IDs),
x = Trial,
fill = Group,
alpha = Wheeze),
color = "black") +
scale_alpha_continuous(breaks = c(0, 1),
labels = c("No", "Yes")) +
scale_fill_manual(values = my_colours) +
theme_minimal() +
theme(panel.grid = element_blank())
EDIT 2
To include an 'annotation' bar before the plot, you can use this:
combined %>%
rownames_to_column(var = "IDs") %>%
pivot_longer(cols = -c(IDs, names),
names_to = "Trial",
values_to = "Wheeze") %>%
rename(Group = names) %>%
mutate(IDs = factor(IDs, levels = my_levels)) %>%
ggplot() +
geom_tile(aes(y = rev(IDs),
x = Trial,
fill = Group,
alpha = Wheeze),
color = "black") +
geom_tile(aes(x = -0.1, y = rev(IDs), fill = Group),
show.legend = FALSE) +
coord_cartesian(c(0.8, 5)) +
scale_fill_manual(values = my_colours) +
scale_alpha_continuous(breaks = c(0, 1),
labels = c("No", "Yes")) +
theme(plot.margin=unit(c(1,0,0,0), units="lines"))
I wasn't able to label it as "Groups", but I imagine it's possible if you tinker with it.

Related

Sorting barplot based on multi-categories in r

I am trying to get a bar plot for sentiment scores corrected as per the following order and put into two separate colors:
(NEGATIVE) anger, disgust, fear, sadness, negative --- (POSITIVE) anticipation, joy, surprise, trust, positive.
Below is the code which only gives a decreasing plot.
barplot(sort(colSums(s), decreasing = TRUE),
las = 2,
col = rainbow(2),
ylab = 'Count',
main = 'User Synergies')
> dput(head(s))
structure(list(anger = c(1, 0, 0, 0, 0, 0), anticipation = c(0,
0, 5, 0, 0, 0), disgust = c(0, 0, 0, 0, 0, 0), fear = c(1, 0,
2, 1, 0, 0), joy = c(1, 0, 1, 0, 0, 0), sadness = c(1, 0, 2,
1, 0, 0), surprise = c(0, 0, 2, 1, 0, 0), trust = c(4, 2, 3,
1, 0, 1), negative = c(2, 0, 3, 2, 1, 1), positive = c(4, 4,
7, 1, 0, 2)), row.names = c(NA, 6L), class = "data.frame")
Another way:
positive <- c("anticipation", "joy", "surprise", "trust", "positive")
negative <- c("anger", "disgust", "fear", "sadness", "negative")
barplot(colSums(s[,c(negative, positive)]),
las = 2,
col = c(rep("red", length(negative)), rep("cyan", length(positive))),
ylab = 'Count', ylim = c(0, 20),
main = 'User Synergies')
The result:
Try this ,
df <- structure(list(anger = c(1, 0, 0, 0, 0, 0),
anticipation = c(0, 0, 5, 0, 0, 0),
disgust = c(0, 0, 0, 0, 0, 0),
fear = c(1, 0,2, 1, 0, 0),
joy = c(1, 0, 1, 0, 0, 0),
sadness = c(1, 0, 2, 1, 0, 0),
surprise = c(0, 0, 2, 1, 0, 0),
trust = c(4, 2, 3, 1, 0, 1),
negative = c(2, 0, 3, 2, 1, 1),
positive = c(4, 4,7, 1, 0, 2)),
row.names = c(NA, 6L), class = "data.frame")
pn <- rainbow(2) # "#FF0000" "#00FFFF" one for positive and the other for negative
s <- sort(colSums(df) , decreasing = TRUE)
names(s)
#> [1] "positive" "trust" "negative" "anticipation" "fear"
#> [6] "sadness" "surprise" "joy" "anger" "disgust"
# arrange colors based on names of sorted columns
col <- c(pn[1] , pn[1] , pn[2] , pn[1] , pn[2] ,
pn[2] , pn[1] , pn[1] , pn[2] , pn[2])
barplot(s ,
las = 2,
col = col,
ylab = 'Count',
main = 'User Synergies')
Created on 2022-05-31 by the reprex package (v2.0.1)
You may try
library(dplyr)
library(reshape2)
df <- data.frame(
anger = 200,
disgust = 100,
fear = 900,
sadness = 400,
negative = 1500,
anticipation = 2000,
joy = 1200,
surprise = 300,
trust = 2500,
positive = 5000
)
pall <- c("red", "blue")
colSums(df) %>%
melt %>%
tibble::rownames_to_column(., "sentiments") %>%
mutate(sentiments = factor(sentiments, levels = c("anger", "disgust", "fear", "sadness", "negative", "anticipation", "joy", "surprise", "trust", "positive"))) %>%
mutate(colo = ifelse(sentiments %in% c("anger", "disgust", "fear", "sadness", "negative"), 0, 1) %>% as.factor) %>%
barplot(data = ., value ~ sentiments, col = pall[.$colo], las = 2, xlab = "")
Another approach :
df <- structure(list(anger = c(1, 0, 0, 0, 0, 0),
anticipation = c(0, 0, 5, 0, 0, 0),
disgust = c(0, 0, 0, 0, 0, 0),
fear = c(1, 0,2, 1, 0, 0),
joy = c(1, 0, 1, 0, 0, 0),
sadness = c(1, 0, 2, 1, 0, 0),
surprise = c(0, 0, 2, 1, 0, 0),
trust = c(4, 2, 3, 1, 0, 1),
negative = c(2, 0, 3, 2, 1, 1),
positive = c(4, 4,7, 1, 0, 2)),
row.names = c(NA, 6L), class = "data.frame")
s <- sort(colSums(df) , decreasing = TRUE)
pos <- c("positive" , "trust" , "anticipation" ,
"surprise" , "joy")
col <- names(s)
col <- ifelse(col %in% pos , "cyan" , "red")
barplot(s ,
las = 2,
col = col,
ylab = 'Count',
main = 'User Synergies')
Created on 2022-05-31 by the reprex package (v2.0.1)

Plotting points in map (long and lat) ggplot2

I'm trying to plot points in a map (shapefile) but I can't do it. I've transformed the shapefile with lat and long st_transform('+init=epsg:4326') because my dat has lat and long information.
Then I plot but doesn't work.
map = st_read('nxprovincias.shp') %>%
sf::st_transform('+init=epsg:4326')
# I've tried using `geom_sf`
dat1 = dat %>%
st_as_sf(coords = c("long", "lat"), crs=4326)
ggplot() +
geom_sf(data = map) +
geom_sf(data = dat1, aes(geometry = geometry))
# and `geom_point`
ggplot() +
geom_sf(data = map) +
geom_point(data = dat, aes(x = long, y = lat))
Data (long, lat, x)
dat = structure(list(lat = structure(c(-2.87660479545593, -2.87720417976379,
-2.87735748291016, -2.87753105163574, -2.87765717506409, -2.87785005569458,
-2.87821888923645, -2.87918782234192, -2.87929964065552, -2.87667083740234,
-2.87697672843933, -2.87707996368408, -2.87767362594604, -2.87771010398865,
-2.87813591957092, -2.8781750202179, -2.8784019947052, -2.87957549095154,
-2.87958645820618, -2.87968635559082, -2.87970232963562, -2.87977194786072,
-2.87977933883667, -2.87978482246399, -2.87985110282898, -2.87985396385193,
-2.87991166114807, -2.87996673583984, -2.87998247146606, -2.88041758537292,
-2.9928183555603, -99, -2.87677383422852, -2.87691879272461,
-2.87718558311462, -2.87721037864685, -2.87743043899536, -2.87768173217773,
-2.87944602966309, -2.87797331809998, -2.87819075584412, -2.87830853462219,
-2.87849140167236, -2.8785994052887, -2.87917923927307, -2.87923359870911,
-2.87934041023254, -2.87948775291443, -2.88050103187561, -2.88078212738037,
-2.88109421730042, -2.88113117218018, -2.88172602653503, -2.88214111328125,
-2.88219523429871, -2.87862133979797, -2.88026261329651, -2.88060832023621,
-2.88061451911926, -2.88077187538147, -2.88077616691589, -2.88100337982178,
-2.88157868385315, -2.8817310333252, -2.88299989700317, -2.89299464225769,
-2.88181924819946, -2.88214421272278, -2.88239336013794, -2.88244104385376,
-2.88291192054749, -2.88306641578674, -2.87702965736389, -2.87748551368713,
-2.87786865234375, -2.87825655937195, -2.87838006019592, -2.88284087181091,
-2.87875247001648, -2.88032579421997, -2.88060545921326, -2.87759780883789,
-2.87762522697449, -2.8776683807373, -2.87819457054138, -2.87915062904358,
-2.87936305999756, -2.87957811355591, -2.87959146499634, -2.87961769104004,
-2.88021159172058, -2.88076829910278, -2.88081574440002, -2.88141989707947,
-2.87116622924805, -2.87180852890015, -2.87283968925476, -2.87302923202515,
-2.87308740615845, -2.87557435035706), format.stata = "%9.0g"),
long = structure(c(-79.0676956176758, -79.0701141357422,
-79.0691986083984, -79.067756652832, -79.0691986083984, -79.0691986083984,
-79.0691528320312, -79.0684051513672, -79.0679779052734,
-79.0663223266602, -79.0669784545898, -79.0658645629883,
-79.0663909912109, -79.0657043457031, -79.0673751831055,
-79.0672378540039, -79.0664520263672, -79.0665512084961,
-79.0662689208984, -79.0657424926758, -79.0663528442383,
-79.0650329589844, -79.0666046142578, -79.0664138793945,
-79.0665588378906, -79.0667877197266, -79.0649337768555,
-79.0649490356445, -79.0650482177734, -79.064826965332, -79.0410537719727,
-99, -79.0623397827148, -79.0616836547852, -79.0619812011719,
-79.061897277832, -79.0632171630859, -79.0630722045898, -79.061653137207,
-79.0590896606445, -79.0603866577148, -79.0595092773438,
-79.0588912963867, -79.0578765869141, -79.0596008300781,
-79.0606155395508, -79.0592498779297, -79.0592041015625,
-79.0583572387695, -79.0598678588867, -79.0614395141602,
-79.0602798461914, -79.0587768554688, -79.0586318969727,
-79.0586547851562, -79.0604934692383, -79.0666580200195,
-79.0646667480469, -79.0649719238281, -79.0640106201172,
-79.0656890869141, -79.0631713867188, -79.059700012207, -79.0645904541016,
-79.0590209960938, -78.9783630371094, -79.0576248168945,
-79.0585327148438, -79.0580749511719, -79.0582504272461,
-79.0576858520508, -79.0575942993164, -79.0545349121094,
-79.0535278320312, -79.0556869506836, -79.0555191040039,
-79.0541076660156, -79.0554046630859, -79.0519485473633,
-79.052360534668, -79.052848815918, -79.0486145019531, -79.0485687255859,
-79.0481719970703, -79.0492935180664, -79.0472640991211,
-79.0477523803711, -79.0483016967773, -79.04833984375, -79.0483245849609,
-79.047981262207, -79.0500640869141, -79.0481643676758, -79.0477676391602,
-79.0512161254883, -79.050537109375, -79.0501861572266, -79.0501327514648,
-79.0500335693359, -79.0494155883789), format.stata = "%9.0g"),
x = structure(c(0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1,
0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 0, 1,
0, 1, 0, 1, 0, 0, 0, 0, 1, 2, 0, 0, 0, 1, 0, 1, 0, 1, 0,
2, 0, 0, 0, 2, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 0, 0), format.stata = "%8.0g")), row.names = c(NA,
-100L), class = c("tbl_df", "tbl", "data.frame"))
Can you elaborate a bit on what exactly did not work? There is one point with invalid coordinates in your data set:
subset(
dat
, lat < -90
)
# # A tibble: 1 × 3
# lat long x
# <dbl> <dbl> <dbl>
# -99 -99 2
Remove it before converting to sf and your plot call works just fine:
dat1 = dat |>
subset(
lat >= -90
) |>
st_as_sf(
coords = c("long", "lat")
, crs = 4326
)
ggplot() +
geom_sf(data = map) +
geom_sf(data = dat1, aes(geometry = geometry), color = "red") +
theme_minimal()
BTW, you might want to think about switching to something more interactive (like mapview) for exploratory analysis of spatial data:
library(mapview)
m_map = mapview(
map
, legend = FALSE
)
m_dat = mapview(
dat1
, layer.name = "x"
)
m_map +
m_dat

Adding 2 DF's of Different Sizes Together

I have two DF's:
passesComb <- structure(list(P1_Good = c(0, 1, 0, 0, 0, 0, 1), P2_Good = c(2,
0, 0, 0, 0, 0, 2), P3_Good = c(0, 1, 0, 0, 0, 0, 1), P4_Good = c(0,
0, 1, 0, 0, 0, 1), P5_Good = c(0, 0, 0, 1, 0, 0, 1), P1_Bad = c(0,
0, 0, 0, 0, 0, 0), P2_Bad = c(0, 0, 0, 0, 0, 0, 0), P3_Bad = c(0,
0, 0, 0, 0, 0, 0), P4_Bad = c(0, 0, 1, 0, 0, 0, 1), P5_Bad = c(0,
0, 0, 0, 0, 0, 0), `Bad Pass` = c(0, 0, 1, 0, 0, 1, 1), `Good Pass` = c(2,
2, 1, 1, 0, 3, 6), `Intercepted Pass` = c(0, 0, 0, 0, 0, 1, 0
), Turnover = c(0, 0, 0, 0, 0, 1, 0), totalEvents = c(2, 2, 2,
1, 0, 6, 7)), row.names = c("P1", "P2", "P3", "P4", "P5", "Opponent",
"VT"), class = "data.frame")
of size 7x15, and
copyComb <- structure(list(P1_Good = c(0, 1, 0, 0, 0, 1), P2_Good = c(2,
0, 0, 0, 0, 2), P4_Good = c(0, 0, 0, 0, 0, 0), P5_Good = c(0,
0, 1, 0, 0, 1), P1_Bad = c(0, 0, 0, 0, 0, 0), P2_Bad = c(0, 0,
0, 0, 0, 0), P3_Bad = c(0, 0, 0, 0, 0, 0), P4_Bad = c(0, 0, 0,
0, 0, 0), P5_Bad = c(0, 0, 0, 0, 0, 0), `Bad Pass` = c(0, 0,
0, 0, 1, 0), `Good Pass` = c(2, 1, 1, 0, 3, 4), `Intercepted Pass` = c(0,
0, 0, 0, 1, 0), Turnover = c(0, 0, 0, 0, 1, 0), totalEvents = c(2,
1, 1, 0, 6, 4)), row.names = c("P1", "P2", "P4", "P5", "Opponent",
"VT"), class = "data.frame")
or simply,
copyComb <- passesComb
copyComb <- copyComb[-3,-3]
#Updating specific cells since [3,3] is removed
copyComb[2,11] <- 1
copyComb[2,14] <- 1
copyComb[6,8] <- 0
copyComb[6,3] <- 0
copyComb[6,10] <- 0
copyComb[6,11] <- 4
copyComb[6,14] <- 4
#This now equals the copyComb from dput() above
of size 6x14.
I am trying to combine/add these two df's together based on matching row/column names. I tried to achieve this using the code from the answer to this post
gamesComb <- data.frame(matrix(NA, nrow = ifelse(nrow(passesComb) >= nrow(copyComb), nrow(passesComb),nrow(copyComb)),
ncol = ifelse(ncol(passesComb) >= ncol(copyComb), ncol(passesComb),ncol(copyComb))))
gamesComb[row.names(ifelse(nrow(passesComb) >= nrow(copyComb), passesComb, copyComb)),
colnames(ifelse(ncol(passesComb) >= ncol(copyComb), passesComb, copyComb))] <- passesComb
Here, I create a df, gamesComb and set the dimensions of whichever passesComb or copyComb is bigger. It does create a 7x15 df, but doesn't add the row/col names.
I also am trying to then add the 2 df's together based on the cell value if they have the same row/col name (same as in the post link above), i.e. passesComb["P2","P1_Good"] = 1 and copyComb["P2","P1_Good"] = 1, so gamesComb["P2","P1_Good"] should = 2, and same for all similar row/col names.
So the final result look like:
expectedOutput <- structure(list(P1_Good = c(0, 2, 0, 0, 0, 0, 2), P2_Good = c(4,
0, 0, 0, 0, 0, 4), P3_Good = c(0, 1, 0, 0, 0, 0, 1), P4_Good = c(0,
0, 1, 0, 0, 0, 1), P5_Good = c(0, 0, 0, 2, 0, 0, 2), P1_Bad = c(0,
0, 0, 0, 0, 0, 0), P2_Bad = c(0, 0, 0, 0, 0, 0, 0), P3_Bad = c(0,
0, 0, 0, 0, 0, 0), P4_Bad = c(0, 0, 1, 0, 0, 0, 1), P5_Bad = c(0,
0, 0, 0, 0, 0, 0), `Bad Pass` = c(0, 0, 1, 0, 0, 2, 1), `Good Pass` = c(4,
3, 1, 2, 0, 6, 10), `Intercepted Pass` = c(0, 0, 0, 0, 0, 2,
0), Turnover = c(0, 0, 0, 0, 0, 2, 0), totalEvents = c(4, 3,
2, 2, 0, 12, 11)), row.names = c("P1", "P2", "P3", "P4", "P5",
"Opponent", "VT"), class = "data.frame")
Here's a dplyr/tidyr approach where I reshape each table into a long format, then join them, sum, and pivot wider again.
library(dplyr); library(tidyr)
lengthen <- function(df) { df %>% rownames_to_column(var = "row") %>% pivot_longer(-row)}
full_join(lengthen(passesComb), lengthen(copyComb), by = c("row", "name")) %>%
mutate(new_val = coalesce(value.x, 0) + coalesce(value.y, 0)) %>%
select(-starts_with("value")) %>%
pivot_wider(names_from = name,values_from = new_val)
Another option is to stack them and then sum by rowname groups.
library(dplyr, warn.conflicts = FALSE)
library(tibble)
out <-
rownames_to_column(passesComb) %>%
bind_rows(rownames_to_column(copyComb)) %>%
# bind_rows(rownames_to_column(third_table)) %>% if you want to add another
select(rowname, names(passesComb)) %>%
group_by(rowname) %>%
summarise(across(everything(), sum, na.rm = T)) %>%
slice(match(rownames(passesComb), rowname)) %>%
column_to_rownames('rowname')
all.equal(out, expectedOutput)
#> [1] TRUE
Created on 2021-10-09 by the reprex package (v2.0.1)

Combining Two Data Fames with Different Row/Col Names Together

I have this data frame:
dtMatrix <- structure(list(category = c("Opponent", "Opponent", "Opponent",
"Opponent", "P1", "P2", "P3", "P4", "P2", "Opponent", "Opponent",
"P1"), Event = c("Good Pass", "Good Pass", "Good Pass", "Turnover",
"Good Pass", "Good Pass", "Good Pass", "Good Pass", "Good Pass",
"Intercepted Pass", "Bad Pass", "Good Pass"), Receiver = c(NA,
NA, NA, NA, "P2", "P3", "P4", "P5", "P1", NA, NA, "P2")), row.names = c(NA,
-12L), class = c("tbl_df", "tbl", "data.frame"))
With this, I created a matrix
goodMatrix <- dtMatrix %>%
filter(Event == 'Good Pass' & !is.na(Receiver)) %>%
dplyr::count(category, Receiver) %>%
tidyr::complete(category = dfList, Receiver = dfList, fill = list(n = 0)) %>%
pivot_wider(names_from = Receiver, values_from = n) %>%
column_to_rownames('category')
This goodMatrix stores the combinations of good passes between P1-P5. In the dtMatrix, it also has other values in the Event column such as turnover/intercepted pass, and also accounts for the opponent. I would like to create a similar matrix as goodMatrix but for the events and opponent previously mentioned.
countTypes <- dtMatrix %>% dplyr::count(category, Event) Grabs all the counts of the events based on the category column. With that, I then did:
secondMatrix <- data.frame(matrix(ncol = length(unique(countTypes$Event)), nrow = length(unique(countTypes$category))))
rownames(secondMatrix) <- unique(countTypes$category)
colnames(secondMatrix) <- unique(countTypes$Event)
secondMatrix
test <- merge(goodMatrix, secondMatrix, by = "row.names")
To try and combine the two separate matrices together.
anotherMatrix <- dtMatrix %>%
dplyr::count(category, Event) %>%
tidyr::complete(category = dfList, Event = dfList, fill = list(n = 0)) %>%
pivot_wider(names_from = Event, values_from = n) %>%
column_to_rownames('category')
This also adds them into one, but does not keep the values from dtMatrix and instead resets them to 0.
My expected result should look as such:
expectedOutput <- structure(list(P1 = c(0, 1, 0, 0, 0, 0), P2 = c(2, 0, 0, 0, 0,
0), P3 = c(0, 1, 0, 0, 0, 0), P4 = c(0, 0, 1, 0, 0, 0), P5 = c(0,
0, 0, 1, 0, 0), `Good Pass` = c(2, 2, 1, 1, 0, 3), `Bad Pass` = c(0,
0, 0, 0, 0, 1), `Intercepted Pass` = c(0, 0, 0, 0, 0, 1), Turnover = c(0,
0, 0, 0, 0, 1)), row.names = c("P1", "P2", "P3", "P4", "P5",
"Opponent"), class = "data.frame")
And anotherMatrix does half of this, while dtMatrix does the other half, but I am struggling on merging them into what I would like my result to be.
Edit
newTest <- test[,-1]
rownames(newTest) <- test[,1]
newTry <- merge(anotherMatrix, newTest, by = "row.names")
Just as an extra attempted method - this also gets close to my expected output, but does not include the opponent row, and also doubles every column.
dfList <- c("P1", "P2", "P3", "P4", "P5")
Edit 2
A quick follow up on combining 2 DF's with different row/col lengths, how would I go about combining passesComb + copyComb into gamesComb:
passesComb <- structure(list(P1_Good = c(0, 1, 0, 0, 0, 0, 1), P2_Good = c(2,
0, 0, 0, 0, 0, 2), P3_Good = c(0, 1, 0, 0, 0, 0, 1), P4_Good = c(0,
0, 1, 0, 0, 0, 1), P5_Good = c(0, 0, 0, 1, 0, 0, 1), P1_Bad = c(0,
0, 0, 0, 0, 0, 0), P2_Bad = c(0, 0, 0, 0, 0, 0, 0), P3_Bad = c(0,
0, 0, 0, 0, 0, 0), P4_Bad = c(0, 0, 1, 0, 0, 0, 1), P5_Bad = c(0,
0, 0, 0, 0, 0, 0), `Bad Pass` = c(0, 0, 1, 0, 0, 1, 1), `Good Pass` = c(2,
2, 1, 1, 0, 3, 6), `Intercepted Pass` = c(0, 0, 0, 0, 0, 1, 0
), Turnover = c(0, 0, 0, 0, 0, 1, 0), totalEvents = c(2, 2, 2,
1, 0, 6, 7)), row.names = c("P1", "P2", "P3", "P4", "P5", "Opponent",
"VT"), class = "data.frame")
and
copyComb <- structure(list(P1_Good = c(0, 1, 0, 0, 0, 1), P2_Good = c(2,
0, 0, 0, 0, 2), P4_Good = c(0, 0, 0, 0, 0, 1), P5_Good = c(0,
0, 1, 0, 0, 1), P1_Bad = c(0, 0, 0, 0, 0, 0), P2_Bad = c(0, 0,
0, 0, 0, 0), P3_Bad = c(0, 0, 0, 0, 0, 0), P4_Bad = c(0, 0, 0,
0, 0, 1), P5_Bad = c(0, 0, 0, 0, 0, 0), `Bad Pass` = c(0, 0,
0, 0, 1, 1), `Good Pass` = c(2, 2, 1, 0, 3, 6), `Intercepted Pass` = c(0,
0, 0, 0, 1, 0), Turnover = c(0, 0, 0, 0, 1, 0), totalEvents = c(2,
2, 1, 0, 6, 7)), row.names = c("P1", "P2", "P4", "P5", "Opponent",
"VT"), class = "data.frame")
copyComb is the same as passesComb just with row/column 3 removed. I tried adapting from the code for the original answer.
gamesComb <- data.frame(matrix(NA, nrow = ifelse(nrow(passesComb) >= nrow(copyComb), nrow(passesComb),nrow(copyComb)),
ncol = ifelse(ncol(passesComb) >= ncol(copyComb), ncol(passesComb),ncol(copyComb))))
gamesComb[row.names(ifelse(nrow(passesComb) >= nrow(copyComb), passesComb, copyComb)),
colnames(ifelse(ncol(passesComb) >= ncol(copyComb), passesComb, copyComb))] <- passesComb
but this only creates a 7x15 df and doesn't add the row/column names for some reason, in addition to not adding the cell values.
If the intention is to update 'anotherMatrix' with goodMatrix, use the row.names and colnames from 'goodMatrix' to subset the 'anotherMatrix' and assign the 'goodMatrix' to 'anotherMatrix'
anotherMatrix[row.names(goodMatrix), colnames(goodMatrix)] <- goodMatrix
Then, we just replace the NA with 0
anotherMatrix[is.na(anotherMatrix)] <- 0
-checking with 'expectedOutput
> identical(expectedOutput, anotherMatrix[names(expectedOutput)])
[1] TRUE

How to create and export multiple plots to jpeg format in r?

I have been creating a bar plot for the result of a sentiment analysis model in R. The data is very confidential feedbacks from the customers. So, the feedbacks are then fed into a sentiment analysis model to generate outputs. My work is to generate a chart for each combination for example zone = delhi and delhi has sub zones like eastdelhi, westdelhi,northdelhi,southdelhi. I want to generate charts with combination like
zone = delhi and sub-zone = eastdelhi. And I want to save it to a jpeg file.I have written a for loop to do so. But for some reason it isn't working. This is the code
#Set locales
rm(list = ls())
Sys.setlocale(category = "LC_ALL",locale = "English")
#Load libraries
LoadLibraries <- c("openxlsx",
"dplyr",
"tidyr",
"plotly",
"RColorBrewer",
"shiny",
"officer",
"parallel",
"dplyr",
"tidyr",
"magrittr",
"knitr")
lapply(LoadLibraries, require, character.only = TRUE)
path = "C:/Users/R_Visual/Data/visual_data.xlsx"
input_data <- read.xlsx(path)
name <- names(input_data[,1:10])
#Filtering the zones and circles
for (i in 1:length(unique(Zone.Final))){
for (j in 1:length(unique(Circle.Final))){
fileName = 'C:/Users/R_Visual/'+ str(i) + str(j) + '.jpeg'
jpeg(fileName, width = 900, height = 450)
df <- input_data %>%
filter(input_data$Zone.Final[i])
df <- df %>%
filter(df$Circle.Final[j])
color <- c("#ca2f27","#f56d43","#f8c38a","#fde08b","#d9ef8b","#a7d86f","#67bd64","#1a984f","#D3D3D3","#A9A9A9")
plot <- barplot(sort(colSums(input_data[, 1:10])),
main = paste("Sentiment Analysis for Zone",df$Zone.Final[i]," and Circle",df$Circle.Final[j], sep = ""),
xlab = "Sentiments",
ylab = "Count",
horiz = FALSE,
names = name,
col = color,
border = FALSE,
legend = TRUE,
beside = TRUE,
legend.text = name,
args.legend = list(bty = "n", x = "topleft",ncol = 1, cex = 0.8, y.intersp = 0.8, x.intersp = 0.25, horiz = F, xpd = TRUE, inset = c(0,0)))
dev.off()
}
}
EDIT:
This is the sample of input_data
> dput(input_data)
structure(list(anger = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), anticipation = c(1,
0, 0, 0, 0, 0, 1, 0, 0, 0), disgust = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0), fear = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), joy = c(0,
0, 0, 0, 0, 0, 1, 0, 0, 0), sadness = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0), surprise = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), trust = c(0,
0, 1, 1, 1, 0, 2, 0, 0, 0), negative = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0), positive = c(1, 0, 0, 0, 1, 1, 2, 1, 0, 1), Zone.Final = c("Delhi",
"Lucknow", "Durgapur", "Lucknow", "Mumbai", "Bhopal", "Chandigarh",
"Chandigarh", "Gurugram", "Chandigarh"), Circle.Final = c("Noida",
"Gorakhpur", "Murshidabad", "Gorakhpur", "Mumbai City", "Bhopal",
"Chandigarh", "Panchkula", "Hisar", "Karnal")), row.names = c(NA,
10L), class = "data.frame")
If anyone could help me with the code, it would be of great help.
You can try creating a list combining the zone and subzone:
#Data
input_data <- structure(list(anger = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), anticipation = c(1,
0, 0, 0, 0, 0, 1, 0, 0, 0), disgust = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0), fear = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), joy = c(0,
0, 0, 0, 0, 0, 1, 0, 0, 0), sadness = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0), surprise = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0), trust = c(0,
0, 1, 1, 1, 0, 2, 0, 0, 0), negative = c(0, 0, 0, 0, 0, 0, 0,
0, 0, 0), positive = c(1, 0, 0, 0, 1, 1, 2, 1, 0, 1), Zone.Final = c("Delhi",
"Lucknow", "Durgapur", "Lucknow", "Mumbai", "Bhopal", "Chandigarh",
"Chandigarh", "Gurugram", "Chandigarh"), Circle.Final = c("Noida",
"Gorakhpur", "Murshidabad", "Gorakhpur", "Mumbai City", "Bhopal",
"Chandigarh", "Panchkula", "Hisar", "Karnal")), row.names = c(NA,
10L), class = "data.frame")
#Code
#First create and global id to combine zone and subzone
df <- input_data
df$id <- paste(df$Zone.Final,df$Circle.Final,sep = '-')
#Split
List <- split(df,df$id)
#Plot
color <- c("#ca2f27","#f56d43","#f8c38a","#fde08b","#d9ef8b","#a7d86f","#67bd64","#1a984f","#D3D3D3","#A9A9A9")
#Plot names
vnames <- paste0(names(List),'.jpeg')
#Loop
for(i in 1:length(List))
{
name <- names(List[[i]][, 1:10])
#Plot
jpeg(filename = vnames[i], width = 900, height = 450)
barplot(sort(colSums(List[[i]][, 1:10])),
main = paste("Sentiment Analysis for Zone ",
unique(List[[i]]$Zone.Final),
" and Circle ",unique(List[[i]]$Circle.Final), sep = ""),
xlab = "Sentiments",
ylab = "Count",
horiz = FALSE,
names = name,
col = color,
border = FALSE,
legend = TRUE,
beside = TRUE,
legend.text = name,
args.legend = list(bty = "n", x = "topleft",ncol = 1,
cex = 0.8, y.intersp = 0.8, x.intersp = 0.25,
horiz = F, xpd = TRUE, inset = c(0,0)))
dev.off()
}
That will create the plots. Of course you can add a path to vnames like the dir you have to save the plots in that folder.

Resources