Conditional grouped barplot R - r

I am trying to make a barplot in R for two categorical variables, Dep_meds_at_time_of_rx_2 and phq9_cat. phq9_cat has two levels, 0 and 1, where 0 corresponds to PHQ-L and 1 corresponds to PHQ-H.
Here is my code:
# get counts of vars
counts <- table(data2$Dep_meds_at_time_of_rx_2, data2$phq9_cat)
# get percentages of vars
pcnts <- scale(counts, FALSE, colSums(counts))*100
# plot barplot
bp <- barplot(pcnts, beside=TRUE, col=c("azure3", "azure4"), ylab="Frequency (%)", border=NA)
legend("topright", legend=c("PHQ-L", "PHQ-H"), bty="n", fill=c("azure3", "azure4"), border=NA)
text(bp, 1, round(pcnts, 2), cex=1, pos=3, col=c("black"))
And the resulting plot:
Which is great! But I need to only plot the data2$Dep_meds_at_time_of_rx_2==1 category. So I would like a barplot with only the 3.03 bar and the 19.44 bar.
I've exhausted any clever tricks that I know of already such as making the data2$Dep_meds_at_time_of_rx_2==0 bars white and using space = c(-1, 0) to make the data2$Dep_meds_at_time_of_rx_2==1 bars next to one another but then the bars are super wide, like so:
I just need the data2$Dep_meds_at_time_of_rx_2==1 columns, but at a normal width.
Any ideas?
Here is my data:
> dput(data2)
structure(list(Dep_meds_at_time_of_rx_2 = c(0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L,
0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L,
0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L,
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L), phq9_cat = c(1L,
1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L)), .Names = c("Dep_meds_at_time_of_rx_2", "phq9_cat"), row.names = c(NA,
-243L), class = "data.frame")

Here is a minor revision of what you have. I think it gets close to accomplishing what you want:
bp <- barplot(pcnts[2,], beside=TRUE, col=c("azure3", "azure4"), ylab="Frequency (%)",
border=NA)
legend("topleft", legend=c("PHQ-L", "PHQ-H"), bty="n", fill=c("azure3", "azure4"), border=NA)
text(bp[], 1, round(pcnts[2,], 2), cex=1, pos=3, col=c("black"))
Note that if it is desired to drop the "0" "1" labels on the x axis, you can accomplish this by replacing pcnts[2, ] with unname(pcnts) in the first line:
bp <- barplot(unname(pcnts[2, ]), beside=TRUE, col=c("azure3", "azure4"),
ylab="Frequency (%)", border=NA)

I thought I'd throw in a ggplot2 answer. This solution ensures that both of the labels on the x-axis are 1 - reflecting the status of Anti-depressant use:
library(ggplot2)
df1 <- data.frame(Frequency = pcnts[2,],
PHQ = c('PHQ-L','PHQ-H'))
ggplot(df1, aes(x = 1, y = Frequency))+
geom_bar(stat = 'identity', aes(fill = PHQ),
position = position_dodge(width = 1))+
scale_fill_manual(values = c('PHQ-L' = 'azure3',
'PHQ-H' = 'azure4'),
name = '')+
scale_x_continuous(breaks = c(.75, 1.25),
labels = c(1,1))+
xlab('Anti-Depressant use at time of treatment')+
ylab('Frequency (%)')+
geom_text(x = .75, y = 2.5, label = '19.44%')+
geom_text(x = 1.25, y = 2.5, label = '3.03%')+
theme_bw()

You just need to select the second row of your percentages table, e.g.
# get percentages of vars
pcnts <- scale(counts, FALSE, colSums(counts))*100
# Filter for the results you want
pcnts <- pcnts[2, ]
# Plot as before

If you want to achieve narrow bars then a combination of width and space arguments will do the trick.
barplot(pcnts, beside=TRUE, col=c("azure3", "azure4"), ylab="Frequency (%)", border=NA, width = c(0,.51, 0,0.51), space = c(1,2))
or you can change the colour to white
barplot(pcnts, beside=TRUE, col=c("white", "azure4"), ylab="Frequency (%)", border=NA, space = c(1,2))

Related

R two regressions from one table

I am trying to plot two different regression lines (with the formula: salary = beta0 + beta1D3 + beta2spending + beta3*(spending*D3) + w) into one scatter plot by deviding the data I have into two subsets as seen in the following code:
salary = data$salary
spending = data$spending
D1 = data$North
D2 = data$South
D3 = data$West
subsetWest = subset(data, D3 == 1)
subsetRest = subset(data, D3 == 0)
abab = lm(salary ~ 1 + spending + 1*spending, data=subsetWest) #red line
caca = lm(salary ~ 0 + spending + 0*spending, data=subsetRest) #blue line
plot(spending,salary)
points(subsetWest$spending, subsetWest$salary, pch=25, col = "red")
points(subsetRest$spending, subsetRest$salary, pch=10, col = "blue")
abline(abab, col = "red")
abline(caca, col = "blue")
This is a sample of my data table:
And this is the plot I get when running the code:
[enter image description here][2] [2]: https://i.stack.imgur.com/It8ai.png
My problem is that the intercept for my second regression is wrong, in fact I do not even get an intercept when looking at the summary, unlike with the first regression.
Does anybody see where my problem is or does anybody know an alternative way of plotting the two regression lines?
Help would be much appreciated. Thank you very much!
This is the whole table:
structure(list(salary = c(39166L, 40526L, 40650L, 53600L, 58940L,
53220L, 61356L, 54340L, 51706L, 49000L, 48548L, 54340L, 60336L,
53050L, 54720L, 43380L, 43948L, 41632L, 36190L, 41878L, 45288L,
49248L, 54372L, 67980L, 46764L, 41254L, 45590L, 43140L, 44160L,
44500L, 41880L, 43600L, 45868L, 36886L, 39076L, 40920L, 42838L,
50320L, 44964L, 41938L, 54448L, 51784L, 45288L, 49280L, 44682L,
51220L, 52030L, 51576L, 58264L, 51690L), spending = c(6692L,
6228L, 7108L, 9284L, 9338L, 9776L, 11420L, 11072L, 8336L, 7094L,
6318L, 7242L, 7564L, 8494L, 7964L, 7136L, 6310L, 6118L, 5934L,
6570L, 7828L, 9034L, 8698L, 10040L, 7188L, 5642L, 6732L, 5840L,
5960L, 7462L, 5706L, 5066L, 5458L, 4610L, 5284L, 6248L, 5504L,
6858L, 7894L, 5018L, 10880L, 8084L, 6804L, 5658L, 4594L, 5864L,
7410L, 8246L, 7216L, 7532L), North = c(1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), South = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L), West = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L)), class = "data.frame", row.names = c(NA,
-50L))
My problem is that the intercept for my second regression is wrong, in fact I do not even get an intercept when looking at the summary, unlike with the first regression.
That is because your second model specifies no intercept, since you use ... ~ 0 + ...
Also, your first model doesn't make sense because it includes spending twice. The second entry for spending will be ignored by lm

system is computationally singular : mlogit

When I am trying to add all variables in formula, I am getting this error. If I omit A then, the model runs fine.
dput(df1)
structure(list(ID = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), Choice = c(1L, 0L, 0L,
0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L,
1L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L,
1L), A = c(0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, -1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, -1L, 0L, 0L, 1L, 0L, 0L, -1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L), B = c(0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, -1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, -1L, 0L, 1L, 0L, 0L, -1L, 0L, 0L),
C = c(1L, 0L, 0L, -1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, -1L, 0L, 0L, 0L, 1L,
0L, 0L, -1L, 0L, 0L, 0L, 0L, 0L, 0L), D = c(0L, 1L, 0L, 0L,
-1L, 0L, 0L, 1L, 0L, 0L, -1L, 0L, 0L, 1L, 0L, 0L, -1L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L), E = c(0L, 0L, 1L, 0L, 0L, -1L, 0L, 0L, 1L, 0L,
0L, -1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, -1L, 0L), F = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, -1L, 0L, 0L, 1L, 0L, 0L, -1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, -1L), Alternative = c(1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L, 2L, 3L, 1L,
2L, 3L)), row.names = c(NA, -36L), class = "data.frame")
> model = mlogit( Choice ~ A + B + C + D + E + F | 0, data = df1,
+
+ alt.var = 'Alternative',
+
+ shape = "long")
Error in solve.default(H, g[!fixed]) :
system is computationally singular: reciprocal condition number = 4.85723e-17
I have seen these two questions and the documentation however still not able to figure it out completely. Any help will be highly appreciated.
R mlogit model, computationally singular
Error in mlogit: Error in solve.default(H, g[!fixed]) : system is computationally singular: reciprocal condition number = 3.4767e-18

How to label entire groups in r Heatmap on the right side of the y-axis?

I am just getting started learning how to use RStudio and am trying to build a Heatmap. I understand the basics and how to set up the data in excel and convert it into a matrix. However, I would like to add labels on the side to show treatment groups rather than just having all the individual samples labeled.
Here is the code
vac_v1 <- read.csv('~/Desktop/Seaborn trials/Heatmap of oral treatment.csv')
rownames <- vac_v1[,1]
mat_vac <- data.matrix(vac_v1[,2:ncol(vac_v1)])
heatmap(mat_vac, Rowv = NA, Colv = NA, col = c("black", "green"), scale = 'none', cexRow = 0.6, cexCol = 0.8)
#From the advice of commenters I have included the output from dput(vac_v1)
dput(vac_v1)
structure(list(Group = c("O Oral", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "E Oral",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "Combo Oral",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "Control Oral", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""
), gene.1 = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L), trait.1 = c(0L, 0L, 0L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 0L,
0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 1L, 1L,
1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L,
1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 1L,
1L, 1L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 0L, 0L), trait.2 = c(0L,
0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L,
0L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
0L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 0L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L), trait.3 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L), trait.4 = c(0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 1L, 1L, 1L), trait.5 = c(0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 0L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 1L, 1L, 1L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L,
1L), trait.6 = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 1L,
1L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L,
0L, 1L, 1L, 1L, 1L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 0L, 0L)), class = "data.frame", row.names = c(NA,
-162L))
I have also attached an image of the Heatmap.
What I would like to do is instead of having numbers on the right side of the y-axis, I would like to have my treatment groups labeled. The treatment groups are labeled in a separate column in the same dataset. multiple samples are from the same treatment group and I would like to label multiple samples with the same group label. Is there anyway to do this? Any help is appreciated enter image description here
In the past I have just added the labels using powerpoint, it was very clunky. I have attached an example below
A potential solution (using publicly available data):
#install.packages("tidyverse")
library(tidyverse)
# install DESeq if necessary
#if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
#BiocManager::install("DESeq")
library(DESeq)
#install.packages("viridis")
library(viridis)
# load example dataset from the DESeq package
example_file <- system.file ("extra/TagSeqExample.tab", package="DESeq")
data <- read.delim(example_file, header=T, row.names="gene")
# subset the dataset to get a reasonable number of rows to plot (top 50 rows)
data_subset <- data %>% top_n(50)
# add a dataframe (tibble: https://tibble.tidyverse.org/) called "group" and fill it with labels and colors
group <- tibble("colours" = sample(size = 50, c("#31688EFF", "#35B779FF"), replace = TRUE),
"labels" = ifelse(colours == "#31688EFF", "group_1", "group_2"))
# create the heatmap
heatmap(as.matrix(data_subset), RowSideColors = group$colours,
labRow = group$labels)
EDIT
Thanks for editing your question to include the dput() output - here is another potential solution:
# Load the data using the dput() oputput
# data <- structure(...
# Fill in the blank cells in "Group"
while(length(ind <- which(data$Group == "")) > 0){
data$Group[ind] <- data$Group[ind -1]
}
# Specify colours for each group
rc <- ifelse(data$Group == "O Oral", "#440154FF",
ifelse(data$Group == "E Oral", "#31688EFF",
ifelse(data$Group == "Combo Oral",
"#35B779FF", "#FDE725FF")))
# Plot the heatmap
heatmap(as.matrix(data[-1]), labRow = data$Group,
RowSideColors = rc, Rowv = NA, revC = TRUE,
margins = c(6, 16), col = c("black", "green"))
# Add a legend for the colours
par(lend = 1)
legend("topright", lwd = 10,
legend = c("O Oral", "E Oral", "Combo Oral", "Control Oral"),
col = c("#440154FF", "#31688EFF", "#35B779FF", "#FDE725FF"))
Is this more along the lines of what you're trying to achieve?

Non-conformable arrays in neural network - neuralnet package

I am trying to create a neural network using this code:
countries=read.table('countries.txt',header =TRUE,sep='\t',quote="",dec=",")
#install.packages("neuralnet")
library(neuralnet)
trainset<-countries[1:85,]
testset<-countries[86:118,]
retea<-neuralnet(Tari.europene~Enrolement_P+Enrolement_S, trainset,
hidden=4, lifesign="minimal", linear.output=FALSE, threshold=0.1)
and I get this error:
hidden: 4 thresh: 0.1 rep: 1/1 steps: Error in x - y : non-conformable arrays
Also , the results for this line are:
dput(trainset[c("Tari.europene", "Enrolement_P", "Enrolement_S")])
structure(list(Tari.europene = c(1L, 1L, 0L, 0L, 0L, 1L, 0L,
1L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L,
1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 1L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 1L, 1L, 0L, 1L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 1L, 0L), Enrolement_P = c(195720L,
4065L, 10172L, 4780105L, 142517L, 327246L, 103806L, 368783L,
773568L, 51978L, 2133330L, 101667L, 1346171L, 161023L, 16630217L,
258840L, 67023L, 2128685L, 4142775L, 2249164L, 1469099L, 4542644L,
120381L, 13534625L, 475731L, 3176874L, 160819L, 763364L, 53129L,
510613L, 467484L, 64319L, 7695L, 1267930L, 2093835L, 11128030L,
777043L, 77215L, 351766L, 4188552L, 285329L, 2862690L, 4117152L,
628753L, 13061L, 2417429L, 1150042L, 324171L, 393020L, 29892L,
29838440L, 7441078L, 536471L, 861699L, 2862666L, 266201L, 6714539L,
979792L, 1122282L, 8158000L, 2736224L, 114623L, 480923L, 366048L,
683977L, 1929L, 108115L, 35435L, 3178364L, 24072L, 592249L, 105447L,
14627368L, 138420L, 239289L, 5705343L, 5177276L, 1578L, 4401780L,
1222867L, 360206L, 30456129L, 108254L, 425917L, 19431565L), Enrolement_S = c(333291L,
4319L, 8077L, 4450741L, 244543L, 697388L, 90092L, 648541L, 1210112L,
37095L, 896763L, 74227L, 1131625L, 297460L, 24224945L, 518914L,
59823L, 2103459L, 2000076L, 2661089L, 1556372L, 4827962L, 70234L,
4388456L, 460235L, 1418361L, 370356L, 830375L, 58634L, 781392L,
553791L, 58553L, 5663L, 931068L, 1942230L, 8208329L, 625060L,
77873L, 536925L, 5947212L, 281739L, 7201072L, 2265692L, 667718L,
9736L, 1165624L, 619832L, 415971L, 857807L, 37530L, 22586955L,
5794537L, 348116L, 767729L, 4596916L, 223920L, 7227485L, 749134L,
1661586L, 7123778L, 3579411L, 121580L, 370359L, 130836L, 222857L,
3387L, 277349L, 46872L, 2846473L, 30230L, 178968L, 133001L, 12993322L,
245773L, 345223L, 1025975L, 3191268L, 1028L, 3163946L, 1573998L,
400562L, 26894959L, 170834L, 439250L, 11286628L)), row.names = c(NA,
85L), class = "data.frame")
I don't have Nan values and the columns have the same shape.
This is my data frame. Tari.europene means if the country is from Europe or not (1= European country; 0=non-European country.) The P and S from Repeaters, Enrolement and Teachers means primary and secondary cycles from system education.

barplot(): Frequency percentages per group

I'm trying to make a grouped barplot with frequency (%) on the y-axis and depression_meds (N/Y) on the x-axis, grouped by another variable score (LOW/HIGH).
My code so far:
meds <- table(data2$depression_meds,data2$score)/sum(table(data2$score)) * 100
bp <- barplot(meds, beside=TRUE, axes=FALSE, xlab="Anti-depression meds use", names=c("No", "Yes"), col=c("azure3", "azure"), ylab="Frequency (%)", ylim=c(0,100))
axis(2, at=seq(0,100,10))
legend("topright", legend=c("LOW", "HIGH"), bty="n", fill=c("azure3", "azure"))
text(bp, 0, round(medtimerx, 1), cex=1, pos=3)
Which is great and makes the following barplot:
But the percentages are using the total n of 243 (3rd column of the table below), not the n per score group (1st and 2nd columns in table below), which makes sense because that is what I do when I divide by the sum. But that's not what I want. I keep trying to get the frequencies per score group so that the four bars match the 1st and 2nd columns below, but I have run out of ideas. Does anyone have any suggestions?
Depression meds (0=N, 1=Y) LOW (N=99) HIGH (N=144) TOTAL (N=243)
0 96 (97.0%) 116 (80.6%) 212 (87.2%)
1 3 (3.0%) 28 (19.4%) 31 (12.8%)
Here is my data:
> dput(data2)
structure(list(depression_meds = c(0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L,
1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 1L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L,
0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L), score = c(1L,
1L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 1L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 0L, 0L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 1L, 0L, 0L, 0L, 0L, 0L,
0L, 0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L)), .Names = c("depression_meds", "score"), row.names = c(NA,
-243L), class = "data.frame")
Thanks for your help, my brain isn't working any longer.
Using prop.table in this case is very suitable, which provides a margin parameter to specify by row or by column probability calculation:
meds <- prop.table(table(data2), margin = 2) * 100
meds
# score
# depression_meds 0 1
# 0 96.969697 86.111111
# 1 3.030303 13.888889
Use this for your summary table:
meds <- table(data2)
# score
#depression_meds 0 1
# 0 96 124
# 1 3 20
meds <- scale(meds, FALSE, colSums(meds)) * 100
# score
#depression_meds 0 1
# 0 96.969697 86.111111
# 1 3.030303 13.888889
No need to change your the rest of your code:
bp <- barplot(meds, beside=TRUE, axes=FALSE, xlab="Anti-depression meds use", names=c("No", "Yes"), col=c("azure3", "azure"), ylab="Frequency (%)", ylim=c(0,100))
axis(2, at=seq(0,100,10))
legend("topright", legend=c("LOW", "HIGH"), bty="n", fill=c("azure3", "azure"))
text(bp, 0, round(meds, 1), cex=1, pos=3)

Resources