Pie Chart using R - r

I want to create a piechart (showing Forest.Area.ha. by GaPa_NaPa) based on following attribute table as below:-
The dataframe for the data is as shown:-
structure(list(GaPa_NaPa = c("Gaidahawa", "Kanchan", "Kotahimai",
"Marchawari", "Mayadevi", "Omsatiya", "Rohini", "Sammarimai",
"Siyari", "Sudhdhodhan", "Devdaha", "Lumbini Sanskritik", "Sainamaina",
"Siddharthanagar", "Tillotama", "Butwal"), Total.Area..ha. = c(9657L,
5835L, 5812L, 4844L, 7228L, 4844L, 6449L, 5066L, 6620L, 5743L,
13667L, 11194L, 16082L, 3595L, 12592L, 10139L), Forest.Area.ha. = c(114.91,
178.19, 31.37, 43.43, 152.87, 29.12, 63.16, 59.81, 36.4, 16.42,
113.13, 422.87, 186.13, 167.2, 60.27, 45.3), Forest.Percent = c(6.67,
10.35, 1.83, 2.52, 8.88, 1.69, 3.67, 3.47, 2.11, 0.95, 6.57,
24.57, 10.81, 9.71, 3.5, 2.63), Forest.Area..Fraction. = c(0.07,
0.1, 0.02, 0.03, 0.09, 0.02, 0.04, 0.03, 0.02, 0.01, 0.07, 0.25,
0.11, 0.1, 0.04, 0.03), Household.No = c(8612L, 9828L, 5939L,
5305L, 8003L, 6683L, 6349L, 5164L, 7889L, 7619L, 15624L, 10736L,
17572L, 12329L, 30452L, 36989L), Family.Size = c(10020L, 10483L,
7921L, 6972L, 10040L, 8218L, 8096L, 7303L, 9060L, 8717L, 17582L,
13854L, 19657L, 16011L, 36399L, 51099L), Total = c(56529L, 42528L,
46417L, 41058L, 57341L, 41080L, 43277L, 43300L, 45274L, 41472L,
71806L, 88090L, 78477L, 76307L, 149657L, 195054L)), row.names = c(NA,
16L), class = "data.frame")
The code I used is:-
setwd("C:/Users/lenovo/Desktop/AllAboutR/AssignmentDocs")
ForestArea2010<-read.csv("Forest2010.csv")
View(ForestArea2010)
pie(RupandehiLULC19$GaPa_NaPa, main="Piechart of Forest Area", las=3, col=hsv(12))
But I couldn't work further on how to show piechart showing Forest.Area.ha. by GaPa_NaPa working on code to plot piechart. Please help on it. How the code must be written?

Here is another option with ggplot2. If you don't want the labels then the geom_text_repel line can be removed.
library(ggplot2)
library(ggrepel)
ggplot(RupandehiLULC19, aes(x = "", y = `Forest.Area.ha.`, fill = GaPa_NaPa)) +
geom_bar(width = 1, stat = "identity") +
coord_polar("y", start = 0) +
xlab("") +
ylab("Piechart of Forest Area") +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
geom_text_repel(aes(label = `Forest.Area.ha.`),
position = position_stack(vjust = 0.5))
Output
Or another option using legend:
pie(RupandehiLULC19$Forest.Area.ha., labels = "", main="Piechart of Forest Area", las=3, col=palette(rainbow(16)))
legend(.85, 1.1, RupandehiLULC19$GaPa_NaPa, cex = 0.7, fill = palette(rainbow(16)), box.col = "white",bg = "white")
Or with the values:
pie(RupandehiLULC19$Forest.Area.ha., labels = RupandehiLULC19$Forest.Area.ha., main="Piechart of Forest Area", las=3, col=palette(rainbow(16)))
legend(.85, 1.1, RupandehiLULC19$GaPa_NaPa, cex = 0.7, fill = palette(rainbow(16)), box.col = "white",bg = "white")

Related

R package "forestplot": Error message re: identifying the upper/lower boundaries from the input matrix

I'd be grateful for your help. I'm using the forestplot R package to create a forest plot. It was going well.
library(forestplot)
test_data <- data.frame(
coef = c(1.53, 1.56, 1.11, 1.35, 1.00, 0.94, 1.03),
low = c(1.43, 1.38, 1.08, 1.05, 0.91, 0.87, 0.95),
high = c(1.64, 1.76, 1.15, 1.75, 1.10, 1.01, 1.11),
boxsize = c(0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1)
)
row_names <- cbind(
c("Variable", "BMI", "WHR", "Diabetes", "Insulin resistance", "LDL", "HDL", "ApoB"),
c("SNPs", "807", "368", "386", "53", "213", "510", "241"),
c("OR (95% CI)", "1·53 (1·43−1·64)", "1·56 (1·38−1·76)", "1·11 (1·08−1·15)", "1·35 (1·05−1·75)", "1·00 (0·91−1·10)", "0·94 (0·87−1·01)", "1·03 (0·95−1·11)"),
c("p-value", "6·72E−35", "2·90E−12", "6·61E−10", "0·02", "0·97", "0·083", "0·53")
)
test_data <- rbind(rep(NA, 3), test_data)
forestplot(
labeltext = row_names,
xlab = "Odds ratio",
test_data[, c("coef", "low", "high")],
is.summary = FALSE,
boxsize = test_data$boxsize,
zero = 1,
cex = 0.1,
align = TRUE,
xlog = FALSE,
graphwidth = unit(1.5, "snpc"),
col = fpColors(lines = "royalblue", box = "darkblue")
)
I've been adding one variable at a time and plotting my graph after the addition of each to make sure there were no errors introduced at each step. It was all going well until I added my eighth variable, "Triglycerides", and its associated numbers:
library(forestplot)
test_data <- data.frame(
coef = c(1.53, 1.56, 1.11, 1.35, 1.00, 0.94, 1.03, 1.15),
low = c(1.43, 1.38, 1.08, 1.05, 0.91, 0.87, 0.95, 1.07),
high = c(1.64, 1.76, 1.15, 1.75, 1.10, 1.01, 1.11, 1.11),
boxsize = c(0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1)
)
row_names <- cbind(
c("Variable", "BMI", "WHR", "Diabetes", "Insulin resistance", "LDL", "HDL", "ApoB", "Triglycerides"),
c("SNPs", "807", "368", "386", "53", "213", "510", "241", "427"),
c("OR (95% CI)", "1·53 (1·43−1·64)", "1·56 (1·38−1·76)", "1·11 (1·08−1·15)", "1·35 (1·05−1·75)", "1·00 (0·91−1·10)", "0·94 (0·87−1·01)", "1·03 (0·95−1·11)", "1·15 (1·07−1·11)"),
c("p-value", "6·72E−35", "2·90E−12", "6·61E−10", "0·02", "0·97", "0·083", "0·53", "2·14E−4")
)
test_data <- rbind(rep(NA, 3), test_data)
forestplot(
labeltext = row_names,
xlab = "Odds ratio",
test_data[, c("coef", "low", "high")],
is.summary = FALSE,
boxsize = test_data$boxsize,
zero = 1,
cex = 0.1,
align = TRUE,
xlog = FALSE,
graphwidth = unit(1.5, "snpc"),
col = fpColors(lines = "royalblue", box = "darkblue")
)
On adding this eighth variable, I now receive the following error message:
Error in prFpConvertMultidimArray(mean) :
Sorry did not manage to correctly identify the upper/lower boundaries from the input matrix.
I can't spot a syntax error here, and I'm really at a loss as to why this has only just happened. Any advice would be greatly appreciated.
Thank you.

Add custom tick mark to Y axis in ggplot2

I'd like to show the average for my dataset and add a tick mark on the Y-axis corresponding to this mean value - highlighted in red in the below image:
Code
plt <- ggplot(dat, aes(x = time, y = value)) +
geom_point(aes(fill = value), size = 2, alpha = 0.8, shape = 21, stroke = 0.5, color = 'black') +
scale_color_gradientn(colors = RColorBrewer::brewer.pal(4,name = 'OrRd')[-1], aesthetics = 'fill') +
geom_hline(yintercept = dat[, mean(value, na.rm = T)], color = 'black', linetype = '11', size = 1.25) +
guides(fill = F)
I can use scale_y_continuous() to add a specific break point but it messes up the grid lines and I don't know how to customize that specific tick mark (if at all possible):
plt <- plt +
scale_y_continuous(breaks = round(c(seq(from = 0, to = dat[, max(value)], by = 10), dat[, mean(value)]), digits = 1) )
Data
Reduced dataset for reproducing the plot:
structure(list(time = structure(c(1607990400, 1607996400, 1608002400,
1608008400, 1608014400, 1608020400, 1608026400, 1608032400, 1608038400,
1608044400, 1608050400, 1608056400, 1608062400, 1608068400, 1608074400,
1608080400, 1608086400, 1608092400, 1608098400, 1608104400, 1608110400,
1608116400, 1608122400, 1608128400, 1608134400, 1608140400, 1608146400,
1608152400, 1608158400, 1608164400, 1608170400, 1608176400, 1608182400,
1608188400, 1608194400, 1608200400, 1608206400, 1608212400, 1608218400,
1608224400, 1608230400, 1608236400, 1608242400, 1608248400, 1608254400,
1608260400, 1608266400, 1608272400, 1608278400, 1608284400, 1608290400,
1608296400, 1608302400, 1608308400, 1608314400, 1608320400, 1608326400,
1608332400, 1608338400, 1608344400, 1608350400, 1608356400, 1608362400,
1608368400, 1608374400, 1608380400, 1608386400, 1608392400, 1608398400,
1608404400, 1608410400, 1608416400, 1608422400, 1608428400, 1608434400,
1608440400, 1608446400, 1608452400, 1608458400, 1608464400, 1608470400,
1608476400, 1608482400, 1608488400, 1608494400, 1608500400, 1608506400,
1608512400, 1608518400, 1608524400, 1608530400, 1608536400, 1608542400,
1608548400, 1608554400, 1608560400, 1608566400, 1608572400, 1608578400,
1608584400, 1608590400, 1608596400, 1608602400, 1608608400, 1608614400,
1608620400, 1608626400, 1608632400, 1608638400), class = c("POSIXct",
"POSIXt"), tzone = "UTC"), value = c(3.87, 3.57, 4.12, 2.68,
4.85447552447552, 0, 9.85, 2.9, 0.65010183299389, 2.55242704955998,
2.94610169491525, 3.2225, 3.44, 3.2, 3.64666666666667, 3.6, 4.2236312849162,
3.56285714285714, 2.99, 2.54, 2.34, 2.245, 2.05, 2.23666666666667,
4.82, 13.81, 18.08, 4.0375, 3.96, 12.9723756906077, 23.87, 16.2053333333333,
13.0836077705828, 10.91, 5.36238095238095, 2.62, 2.5375, 2.38,
2.72, 2.345, 2.32909090909091, 3.90333333333333, 3.02166666666667,
3.94833333333333, 3.83636363636364, 4.04117647058824, 4.22139146567718,
5.57, 4.82, 3.59666666666667, 3.73873949579832, 2, 2.04, 2.57,
3.00042016806723, 3.905, 5.65, 4.271589958159, 5.28, 7.15639534883721,
5.45, 5.24295336787565, 3.11224489795918, 4.79, 2.6106976744186,
2.25, 2.08264705882353, 2.25, 2.58666666666667, 3.18682008368201,
3.24, 3.10375, 3.35833333333333, 4.39333333333333, 3.765, 7.71,
5.16117647058824, 4.95588235294118, 2.44, 2.34666666666667, 2.345,
2.375, 2.4275, 3.0975, 3.21666666666667, 4.13, 4.44663366336634,
3.60877551020408, 3.83265033407572, 3.8625, 4.2675, 6.765, 2.688,
2.43101242521859, 2.43561435803037, 2.30166666666667, 2.69, 3.18,
5.04, 4.345, 4.86529411764706, 8.57, 6.2, 6.0032, 3.82, 5.03,
7.02, 3.69716216216216, 3.00468438538206)), row.names = c(NA,
-109L), class = c("data.table", "data.frame"))
Quick, dirty, and hacky:
plt + geom_text(aes(x = dat[, min(time, na.rm = T)], y = dat[, mean(value, na.rm = T)], label = round(dat[, mean(value, na.rm = T)],1)), color = 'red', hjust = 2) + coord_cartesian(clip = 'off')
Maybe it gets you somewhere.

qplot: Only graphing nodes below a threshold

I am trying to make a visual graph of a dissimilarity matrix. Using this site, I ran into the qgraph function from the package qgraph. Using the threshold flag, I am able to remove edges from my network above the supplied numerical value. This works beautifully, however, what if I only want to plot values below a certain threshold, not above?
For this, I came back to this site and read here: How to plot near-zero values with qgraph? to use the cut flag for this purpose. However, as the answer states, this flag will only "adjust the saturation so that everything above the cut point has the strongest color intensity, anything below the cut point, the saturation gets weaker."
What I would like to do is to plot only lines between the nodes that are below my cut value (or threshold), not anything else.
Here is some reproducible data:
Dist <- data.frame(Sample_1 = c(0.0, 0.245, 0.191, 0.78, 0.5),
Sample_2 = c(0.3, 0.0, 0.2, 0.99, 0.6),
Sample_3 = c(0.65, 0.45, 0.0, 0.05, 0.8),
Sample_4 = c(0.45, 0.06, 0.88, 0.0, 0.7),
Sample_5 = c(0.11, 0.79, 0.66, 0.37, 0.0),
row.names = c("Sample_1", "Sample_2", "Sample_3", "Sample_4", "Sample_5"))
Plotting the graph:
qgraph(Dist, layout = "circle", vsize = 5, color = c("cyan", "yellow", "pink", "green3", "gray"), labels = c("Sample_1", "Sample_2", "Sample_3", "Sample_4", "Sample_5"), label.cex = 3, cut = 0.2)
As you can see, anything above the cut = 0.2 is also plotted and darker.
I would like only values below the 0.2 threshold to be plotted. Is there any way to do this?
Thanks.
qgraph does not seems to have the ability to cut below a threshold, so we have to manipulate the input data.
Replacing values above the threshold to 0 or NA should do it. Using NA result in the same output but with a warning.
Dist <- data.frame(
Sample_1 = c(0.0, 0.245, 0.191, 0.78, 0.5),
Sample_2 = c(0.3, 0.0, 0.2, 0.99, 0.6),
Sample_3 = c(0.65, 0.45, 0.0, 0.05, 0.8),
Sample_4 = c(0.45, 0.06, 0.88, 0.0, 0.7),
Sample_5 = c(0.11, 0.79, 0.66, 0.37, 0.0),
row.names = c("Sample_1", "Sample_2", "Sample_3", "Sample_4", "Sample_5")
)
library(qgraph)
qgraph(
replace(Dist, Dist > 0.2, 0),
layout = "circle",
vsize = 5,
color = c("cyan", "yellow", "pink", "green3", "gray"),
labels = c("Sample_1", "Sample_2", "Sample_3", "Sample_4", "Sample_5"),
label.cex = 3
)
Created on 2020-04-06 by the reprex package (v0.3.0)

r - ggplot2 - secondary duplicate axis log transform is incorrect

I am using ggplot 2.2.0 to create a secondary duplicated axis using the log transform.
# install.packages("install.load") # install to use the load_package function
install.load::load_package("ggplot2", "data.table")
sand <- structure(list(`Sieve #` = c("3/8”", "4", "8", "16", "30", "50",
"Pan"), `Size (mm)` = c(9.525, 4.75, 2.36, 1.18, 0.6, 0.3, NA
), `Mass Sieve (kg)` = c(0.642, 0.508, 0.474, 0.408, 0.38, 0.348,
0.376), `Mass Retained + Sieve (kg)` = c(0.642, 0.524, 0.58,
0.526, 0.598, 0.899, 0.463), `Mass Retained (kg)` = c(0, 0.016,
0.106, 0.118, 0.218, 0.551, NA), `Cumulative Mass Retained (kg)` = c(0,
0.016, 0.122, 0.24, 0.458, 1.009, NA), `Cumulative % Retained` = c(0,
1, 11, 22, 42, 92, NA), `% Passing` = c(100, 99, 89, 78, 58,
8, NA)), .Names = c("Sieve #", "Size (mm)", "Mass Sieve (kg)",
"Mass Retained + Sieve (kg)", "Mass Retained (kg)", "Cumulative Mass Retained (kg)",
"Cumulative % Retained", "% Passing"), row.names = c(NA, -7L), class = c("data.table",
"data.frame"))
x1 <- c(0.075, 0.15, 0.3, 0.6, 1.18, 2.36, 4.75, 9.5, 12.5, 19, 25, 37.5, 50)
x1_label <- c("0.075", "0.150", "0.300", "0.600", "1.180", "2.36", "4.75", "9.5",
"12.5", "19.0", "25.0", "37.5", "50.0")
x2 <- c("No. 200", "No. 100", "No. 50", "No. 30", "No. 16", "No. 8", "No. 4",
"3/8 in.", "1/2 in.", "3/4 in.", "1 in.", "1 1/2 in.", "2 in.")
ggplot(sand, aes(`Size (mm)`, `% Passing`)) + geom_point() +
geom_line() + scale_x_continuous(name = "Sieve size (mm)",
limits = c(0.075,
50), expand = c(0.001, 0), breaks = x1, labels = x1_label, minor_breaks = NULL,
trans = "log", position = "bottom", sec.axis = dup_axis(name = "Sieve size",
breaks = x1, labels = x2)) +
labs(title = "Group 1 Sand Gradation Results (ASTM C136)") + scale_y_continuous(limits =
c(0,
100), expand = c(0.01, 0), breaks = seq(0, 100, by = 10), minor_breaks = seq(0,
100, by = 5), name = "% Passing") +
theme_bw() + theme(plot.margin = margin(0.5, 0.5, 0.5, 0.5, "pt"),axis.text.x = element_text(angle = 90, vjust = 0.1))
The bottom x-axis and the top x-axis do not match in the image shown below.
Is it possible to have a duplicated secondary axis using the log transform?
If so, then how should the existing code be modified to get the desired result?
If not, then do you have any suggestions.
Thank you.

Remove margin inside plot pf ggplot2

this is my script and the associated plot:
library(ggplot2)
library(reshape)
df <- structure(list(ID = structure(1:19, .Label = c("2818/22/0834",
"2818/22/0851", "2818/22/0853", "2818/22/0886", "B0F", "B12T",
"B1T", "B21T", "B22F", "B26T", "B33F", "B4F", "P1", "P21", "P24",
"P25", "P27", "P28", "P29"), class = "factor"), K = c(0.089,
0.094, 0.096, 0.274, 0.09, 0.312, 0.33, 0.178, 0.05, 0.154, 0.083,
0.098, 0.035, 0.084, 0.053, 0.061, 0.043, 0.094, 0.101), Na = c(2.606,
3.822, 4.977, 2.522, 15.835, 83.108, 52.041, 41.448, 11.849,
40.531, 5.854, 10.151, 3.52, 8.445, 5.273, 7.246, 6.177, 14.813,
15.569), Cl = c(3.546, 6.181, 8.422, 3.733, 14.685, 96.911, 65.518,
79.01, 10.349, 53.361, 6.12, 10.832, 2.313, 10.312, 5.641, 8.708,
6.138, 12.302, 20.078), Mg = c(1.487, 1.773, 1.992, 1.143, 2.991,
1.678, 2.23, 3.288, 1.148, 2.428, 3.428, 2.729, 0.777, 2.554,
2.374, 4.075, 1.993, 1.881, 3.034), Ca = c(5.529, 6.205, 6.59,
4.099, 10.631, 4.564, 6.652, 13.374, 4.332, 10.542, 11.194, 10.053,
2.969, 7.73, 8.163, 11.539, 6.166, 5.968, 9.299), SO4 = c(0.663,
0.831, 0.607, 0.882, 9.013, 0.896, 0.652, 0.021, 1.446, 0.012,
8.832, 6.665, 1.003, 2.575, 3.685, 7.121, 3.64, 5.648, 2.397),
HCO3 = c(7.522, 5.498, 6.15, 5.242, 8.582, 4.067, 5.65, 9.364,
5.435, 8.068, 9.054, 8.326, 4.805, 7.235, 7.488, 9.234, 6.352,
6.98, 8.34)), .Names = c("ID", "K", "Na", "Cl", "Mg", "Ca",
"SO4", "HCO3"), class = "data.frame", row.names = c(NA, -19L))
df_melted<-melt(df, na.rm=T)
ggplot(df_melted, aes(variable, value, group=ID, color=ID))+
geom_point(size=2) +
geom_line() +
theme(legend.position="none") +
scale_y_log10(breaks=seq(0, 100, 10))
Is there a way to remove the spaces at the beginning and at the end of the plot? I tried with xlim but the problem is that the x variable is not a numerical variable, so, something like xlim(c("K", "HCO3")) doesn't work.
This is a discrete scale, but you can still used the expand argument as follows. Whether the output looks acceptable or not is another matter. Play with the c(0,0) values until you find something that suits. Using 0.1 for the second value gives a slightly better plot, in my view...
ggplot(df_melted, aes(variable, value, group=ID, color=ID))+
geom_point(size=2) +
geom_line() +
theme(legend.position="none") +
scale_y_log10(breaks=seq(0, 100, 10)) +
scale_x_discrete(expand = c(0,0))

Resources