How to make a melted dataset greyscale in ggplot - r

This should be a pretty straightforward question. I have the following code, which forms the plot I want:
library(reshape)
library(ggplot2)
require(ggplot2)
split1_data<-structure(list(Loci = structure(1:8, .Label = c("Baez", "Blue",
"C147", "C204", "C21", "C278_PT", "C294", "C316"), class = "factor"),
All = c(0.3357, 0.4166, 0.0242, 0.9708, 0.4518, 0.0666, 0,
0.5925), X1_only = c(0.4758, 0.3188, 0.1465, 0.3209, 1, 0.0278,
0.2065, 0.6187), X78_only = c(0.3379, 0.4102, 0.2134, 0.6807,
0.8242, 1, 0.0046, 0.279), X8_removed = c(0.0967, 0.5831,
0.058, 0.9268, 0.3518, 0.0629, 0, 0.6229), X8_only = c(0.1169,
0.8327, 0.2169, 0.0907, 1, 1, 0.07, 0.486), X7_removed = c(0.2989,
0.7268, 0.0087, 0.8874, 0.5853, 0.0568, 0, 0.7622), X7_only = c(1,
0.5714, 0.2825, 0.8673, 0.5557, 0.6861, 0.0044, 0.1146),
X5_removed = c(1, 0.1453, 0.0176, 0.8428, 0.2277, 0.2563,
0, 0.5326), X5_only = c(0.0642, 0.631, 0.5193, 0.979, 0.5348,
0.1304, 0.02, 0.0217), X4_removed = c(0.4492, 0.3821, 0.0121,
0.9957, 0.5158, 0.0498, 0, 0.718), X4_only = c(0.6485, 0.0709,
0.1639, 0.6908, 1, 1, 0.4469, 0.639), X3_removed = c(0.3009,
0.3414, 0.02, 0.9935, 0.4216, 0.1273, 0, 0.6406), X3_only = c(1,
0.9325, 0.772, 0.5505, 1, 0.2068, 0.0829, 0.17), X2_removed = c(0.6335,
0.349, 0.2095, 0.9777, 0.8928, 0.0571, 0, 0.4285), X2_only = c(0.191,
0.4397, 0.0403, 0.3606, 0.0089, 1, 0.0033, 0.659), X1_removed = c(0.1653,
0.7658, 0.0718, 0.7705, 0.4193, 0.1894, 0, 0.5167)), .Names = c("Loci",
"All", "X1_only", "X78_only", "X8_removed", "X8_only", "X7_removed",
"X7_only", "X5_removed", "X5_only", "X4_removed", "X4_only",
"X3_removed", "X3_only", "X2_removed", "X2_only", "X1_removed"
), row.names = c(NA, 8L), class = "data.frame")
split1_datam<-melt(split1_data,id="Loci")
p1<- ggplot(split1_datam, aes(x =Loci, y = value, color = variable, width=.15)) +
scale_fill_grey() +
geom_bar(position="dodge")+
geom_hline(yintercept=0.05)+
opts(axis.text.x = theme_text(angle=90, size=8)) +
scale_y_discrete(breaks=seq(0,1)) +
ylab(NULL)
p1
However, I was hoping to make the plot greyscale, but can't seem to figure out how to accomplish this (note: the scale_fit_grey() depicted above is not working for me). Any suggestions? Thanks so much!

One thing that commonly trips people up at the start with ggplot2 is the difference between color and fill. For 2D objects like bars, rectangles, basically any filled area, color affects the border color and fill affects the interior color.
In your plot, you map color = variable, but there is no mapping for fill in aes. I wonder if you meant fill = variable inside aes() and then to use scale_fill_grey.
Otherwise, you'd use color = variable and scale_color_grey, but that would only "color" the borders of the bars, not the filled areas.
For instance, with fill = variable and scale_fill_grey() I get something like this:

Related

Using ggplot to label my data using months instead of days [duplicate]

This question already has answers here:
ggplot2 and date on x-axis
(1 answer)
Problems with changing the date scale on an axis - ggplot
(1 answer)
Closed 7 months ago.
I wish to plot my data by using the date on the x axis and the number of project launched on the y axis
I am currently using this code
plot1 <- ggplot()+
geom_line(data=data0,mapping = aes(x = date, y = launches, group=1) ) +
geom_line(data=data0,mapping = aes(x = date, y = US, group=1), colour="blue" )+
ggtitle("Kickstarter")
However, i realised that i have so many dates that the Y axis becomes difficult to read
Instead i would wish for the Y axis to just show the months
Is there any way to do this ?
structure(list(date = c("2021-01-01", "2021-01-02", "2021-01-03",
"2021-01-04", "2021-01-05", "2021-01-06"), launches = c(4, 0,
0, 0, 8, 4), pledged = c(50278.64, 0, 0, 0, 366279.590415302,
172073.0471292), backers = c(2880, 0, 0, 0, 6588, 3528), total_goal = c(24000,
0, 0, 0, 148000, 60000), mean_goal = c(6000, 0, 0, 0, 18500,
15000), US = c(4, 0, 0, 0, 4, 0), `number of success` = c(4,
0, 0, 0, 8, 4), duration_days = c(30, 0, 0, 0, 31, 30), Twitter = c(1324L,
1548L, 1297L, 1585L, 1636L, 1583L), replies = c(882L, 1252L,
910L, 1018L, 810L, 1000L), likes = c(22859L, 24375L, 17854L,
20341L, 19521L, 19401L), retweets = c(8621L, 8239L, 6141L, 6728L,
6938L, 6842L)), row.names = c(NA, 6L), class = "data.frame")
edit: my apologies, i inversed teh x and y axis in my explanation
Assume you meant date on x axis
library(tidyverse)
library(lubridate)
data0$date<-ymd(data0$date)
data0$month<-months(data0$date)
plot1 <- ggplot()+
geom_line(data=data0,mapping = aes(x = date, y = launches, group=1) ) +
geom_line(data=data0,mapping = aes(x = date, y = US, group=1), colour="blue" )+
ggtitle("Kickstarter")
I changed your 'example' dataset to better illustrate a potential solution:
library(ggplot2)
data0 <- structure(list(date = c("2021-01-01", "2021-01-15", "2021-02-01",
"2021-02-15", "2021-03-01", "2021-03-15"), launches = c(4, 0,
0, 0, 8, 4), pledged = c(50278.64, 0, 0, 0, 366279.590415302,
172073.0471292), backers = c(2880, 0, 0, 0, 6588, 3528), total_goal = c(24000,
0, 0, 0, 148000, 60000), mean_goal = c(6000, 0, 0, 0, 18500,
15000), US = c(4, 0, 0, 0, 4, 0), `number of success` = c(4,
0, 0, 0, 8, 4), duration_days = c(30, 0, 0, 0, 31, 30), Twitter = c(1324L,
1548L, 1297L, 1585L, 1636L, 1583L), replies = c(882L, 1252L,
910L, 1018L, 810L, 1000L), likes = c(22859L, 24375L, 17854L,
20341L, 19521L, 19401L), retweets = c(8621L, 8239L, 6141L, 6728L,
6938L, 6842L)), row.names = c(NA, 6L), class = "data.frame")
plot1 <- ggplot(data0) +
geom_line(aes(x = date, y = launches, group = 1) ) +
geom_line(aes(x = date, y = US, group = 1), colour="blue") +
ggtitle("Kickstarter")
plot1
# Change the format from "character" to "date"
data0$date <- as.Date(data0$date)
# Then you can change the breaks on the x axis
plot2 <- ggplot(data0) +
geom_line(aes(x = date, y = launches, group = 1) ) +
geom_line(aes(x = date, y = US, group = 1), colour="blue") +
ggtitle("Kickstarter") +
scale_x_date(date_breaks = "1 month")
plot2
Created on 2022-07-27 by the reprex package (v2.0.1)
Does this solve your problem?

Pie Chart using R

I want to create a piechart (showing Forest.Area.ha. by GaPa_NaPa) based on following attribute table as below:-
The dataframe for the data is as shown:-
structure(list(GaPa_NaPa = c("Gaidahawa", "Kanchan", "Kotahimai",
"Marchawari", "Mayadevi", "Omsatiya", "Rohini", "Sammarimai",
"Siyari", "Sudhdhodhan", "Devdaha", "Lumbini Sanskritik", "Sainamaina",
"Siddharthanagar", "Tillotama", "Butwal"), Total.Area..ha. = c(9657L,
5835L, 5812L, 4844L, 7228L, 4844L, 6449L, 5066L, 6620L, 5743L,
13667L, 11194L, 16082L, 3595L, 12592L, 10139L), Forest.Area.ha. = c(114.91,
178.19, 31.37, 43.43, 152.87, 29.12, 63.16, 59.81, 36.4, 16.42,
113.13, 422.87, 186.13, 167.2, 60.27, 45.3), Forest.Percent = c(6.67,
10.35, 1.83, 2.52, 8.88, 1.69, 3.67, 3.47, 2.11, 0.95, 6.57,
24.57, 10.81, 9.71, 3.5, 2.63), Forest.Area..Fraction. = c(0.07,
0.1, 0.02, 0.03, 0.09, 0.02, 0.04, 0.03, 0.02, 0.01, 0.07, 0.25,
0.11, 0.1, 0.04, 0.03), Household.No = c(8612L, 9828L, 5939L,
5305L, 8003L, 6683L, 6349L, 5164L, 7889L, 7619L, 15624L, 10736L,
17572L, 12329L, 30452L, 36989L), Family.Size = c(10020L, 10483L,
7921L, 6972L, 10040L, 8218L, 8096L, 7303L, 9060L, 8717L, 17582L,
13854L, 19657L, 16011L, 36399L, 51099L), Total = c(56529L, 42528L,
46417L, 41058L, 57341L, 41080L, 43277L, 43300L, 45274L, 41472L,
71806L, 88090L, 78477L, 76307L, 149657L, 195054L)), row.names = c(NA,
16L), class = "data.frame")
The code I used is:-
setwd("C:/Users/lenovo/Desktop/AllAboutR/AssignmentDocs")
ForestArea2010<-read.csv("Forest2010.csv")
View(ForestArea2010)
pie(RupandehiLULC19$GaPa_NaPa, main="Piechart of Forest Area", las=3, col=hsv(12))
But I couldn't work further on how to show piechart showing Forest.Area.ha. by GaPa_NaPa working on code to plot piechart. Please help on it. How the code must be written?
Here is another option with ggplot2. If you don't want the labels then the geom_text_repel line can be removed.
library(ggplot2)
library(ggrepel)
ggplot(RupandehiLULC19, aes(x = "", y = `Forest.Area.ha.`, fill = GaPa_NaPa)) +
geom_bar(width = 1, stat = "identity") +
coord_polar("y", start = 0) +
xlab("") +
ylab("Piechart of Forest Area") +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank()) +
geom_text_repel(aes(label = `Forest.Area.ha.`),
position = position_stack(vjust = 0.5))
Output
Or another option using legend:
pie(RupandehiLULC19$Forest.Area.ha., labels = "", main="Piechart of Forest Area", las=3, col=palette(rainbow(16)))
legend(.85, 1.1, RupandehiLULC19$GaPa_NaPa, cex = 0.7, fill = palette(rainbow(16)), box.col = "white",bg = "white")
Or with the values:
pie(RupandehiLULC19$Forest.Area.ha., labels = RupandehiLULC19$Forest.Area.ha., main="Piechart of Forest Area", las=3, col=palette(rainbow(16)))
legend(.85, 1.1, RupandehiLULC19$GaPa_NaPa, cex = 0.7, fill = palette(rainbow(16)), box.col = "white",bg = "white")

merge elements in forestplot format a single element

I am creating a forestplot using the forestplot package in R, and am having trouble with a few things.
Questions:
Is it possible to merge two adjacent text elements
Is it possible to modify either a single text element font, or the font of an entire row
My Code:
library(forestplot)
# creating text
text <- rbind(c('', 'N (%)', 'SRT', 'ART', 'HR [95% CI]'),
c('', '', '5 year survival %', '5 year survival %', ''),
c('Seminal Vesicle Involvement', '', '', '', ''),
c(' Yes', '10 (20%)', '94', '12', '0.73 [0.36, 1.50]'),
c(' No', '40 (80%)', '96', '10', '1.78 [0.73, 4.35]'),
c('Gender', '', '', '', ''),
c(' Male', '13 (22.5%)', '84', '22', '0.06 [-0.2, 0.86]'),
c(' Female', '37 (77.5%)', '93', '13', '1.89 [0.90, 6.67]'))
# creating the plot
forestplot(text,
mean = c(NA, NA, NA, 0.73, 1.78, NA, 0.06, 1.89),
lower = c(NA, NA, NA, 0.36, 0.73, NA, -0.2, 0.90),
upper = c(NA, NA, NA, 1.50, 4.35, NA, 0.86, 6.67),
is.summary=c(T, T, T, F, F, T, F, F),
lineheight = unit(0.9, "cm"),
graph.pos = 5,
graphwidth = unit(4, 'cm'),
xticks = c(-1, 0, 1, 2, 3, 4),
ci.vertices = T,
txt_gp = fpTxtGp(ticks = gpar(cex = 1),
xlab = gpar(cex = 1),
label = gpar(cex = 0.8),
summary = gpar(cex = 0.8)),
col=fpColors(box="black",
line="darkgrey",
summary="black",
zero='grey20',
axes='grey20'),
hrzl_lines = list("2" = gpar(lwd=1, col = "#000044")))
Output:
Desired:
I would like the two 5 year survival % text bits to be combined into 1 (and centered between the two headings above), and either just those elements or the whole row to be italic font.
I have tried using summary=list(gpar(...)) for the txt_gp option, but that only seems to be able to modify the whole column, and I have found nothing on merging cells at all.
If you make the colgap much smaller in forestplot than usual, you can split the text that is currently duplicated in row 2 in columns 3 and 4 into two parts:
> text[2, 4] <- 'survival % '
> text[2, 3] <- '5 year '
>
> forestplot(text,
+ mean = c(NA, NA, NA, 0.73, 1.78, NA, 0.06, 1.89),
+ lower = c(NA, NA, NA, 0.36, 0.73, NA, -0.2, 0.90),
+ upper = c(NA, NA, NA, 1.50, 4.35, NA, 0.86, 6.67),
+ is.summary=c(T, T, T, F, F, T, F, F),
+ lineheight = unit(0.9, "cm"),
+ graph.pos = 5,
+ graphwidth = unit(4, 'cm'),
+ xticks = c(-1, 0, 1, 2, 3, 4),
+ ci.vertices = T,
# add line---------
colgap=unit(.0011,"npc"),
#
+ txt_gp = fpTxtGp(ticks = gpar(cex = 1),
+ xlab = gpar(cex = 1),
+ label = gpar(cex = 0.8),
+ summary = gpar(cex = 0.8)),
+ col=fpColors(box="black",
+ line="darkgrey",
+ summary="black",
+ zero='grey20',
+ axes='grey20'),
+ hrzl_lines = list("2" = gpar(lwd=1, col = "#000044")))

Repeating x axis R

I have data with the amount of radiation at a specific time (hour, minutes) for three repeating days. I want to plot this so the x-axis goes from 0 - 24 3 times. So the x axis repeats itself. And on the y axis the amount of radiation. I have tried the following script without any succes.
plot(gegevens[,1],gegevens[,2],type='l',col='red',xaxt='n',yaxt='n',xlab='',ylab='')
axis(1, at=(0:74),labels = rep.int(0:24,3), las=2)
mtext('Zonnetijd (u)', side=1,line=3)
The dataset was to big so I've selected the first two hours from 2 days. The first column is the time en the second is the radiation. The data then looks as followed:
structure(c(0, 0.083333333333333, 0.166666666666667, 0.25, 0.333333333333333,
0.416666666666667, 0.5, 0.583333333333333, 0.666666666666667,
0.75, 0.833333333333333, 0.916666666666667, 1, 1.08333333333333,
1.16666666666667, 1.25, 1.33333333333333, 1.41666666666667, 1.5,
1.58333333333333, 1.66666666666667, 1.75, 1.83333333333333, 1.91666666666667,
0.0158590638878904, 0.0991923972212234, 0.182525730554557, 0.26585906388789,
0.349192397221223, 0.432525730554557, 0.51585906388789, 0.599192397221223,
0.682525730554557, 0.76585906388789, 0.849192397221223, 0.932525730554557,
1.01585906388789, 1.09919239722122, 1.18252573055456, 1.26585906388789,
1.34919239722122, 1.43252573055456, 1.51585906388789, 1.59919239722122,
1.68252573055456, 1.76585906388789, 1.84919239722122, 1.93252573055456,
0.066, 0.066, 0.068, 0.068, 0.068, 0.066, 0.066, 0.066, 0.066,
0.066, 0.066, 0.066, 0.057, 0, 0, 0, -0.002, 0, 0, -0.002, 0,
-0.002, -0.009, -0.011, 0, -0.002, 0, -0.002, 0, -0.002, 0, 0.002,
0, 0, 0, 0, -0.002, -0.002, -0.007, 0, -0.002, 0, 0, 0, -0.002,
-0.002, -0.002, 0), .Dim = c(48L, 2L), .Dimnames = list(NULL,
c("t", "z")))
I think you would be better off to move towards a date/time class for your axis. Then you can have more control on what to plot etc. Below is an example:
# create example data
df <- data.frame(
T = seq.POSIXt(as.POSIXct("2000-01-01 00:00:00"),
by = "hours", length.out = 24*3)
)
df
df$St <- cumsum(rnorm(24*3))
# plot
png("test.png", width = 8, height = 4, units = "in", res = 200)
op <- par(mar = c(4,4,1,1), ps = 8)
plot(St ~ T, df, type="l",col='red',xaxt='n',yaxt='n',xlab='',ylab='')
axis(1, at=df$T, labels = format(df$T, "%H"), las=2)
mtext('Zonnetijd (u)', side=1,line=3)
par(op)
dev.off()
You Can see that you may have some space issues with the labels when you plot every one.
Here is another example with 3-hour increment labels:
# alt plot
AT <- seq(min(df$T), max(df$T), by = "3 hour") # 3 hour increments
LAB <- format(AT, "%H")
png("test2.png", width = 8, height = 4, units = "in", res = 200)
op <- par(mar = c(4,4,1,1), ps = 8)
plot(St ~ T, df, type="l",col='red', xlab='', ylab='', xaxt='n')
axis(1, at = AT, labels = LAB, las=2)
mtext('Zonnetijd (u)', side=2, line=3)
mtext('hour', side=1, line=3)
par(op)
dev.off()
Marc has good advice about using a datetime class. Overall, that is a good way to go. See this question for examples of converting decimal times in hours to POSIX datetime class.
If you want to continue with your numeric data we the data itself to indicate what day it occurs on. Here we create a new column identical to the first, but adding 24 every time the first column has a negative difference between successive rows:
gegevens = cbind(gegevens, gegevens[, 1] + 24 * c(0, cumsum(diff(gegevens[, 1]) < 0)))
Now when we plot using our new column, the hours are correctly spaced by day:
plot(gegevens[, 3], gegevens[, 2], type = 'l', col = 'red', xaxt = 'n', yaxt = 'n', xlab = '', ylab = '')
You have some axis issues as well. There is no 24 hour, we usually call this the 0 hour. And 24 * 3 = 72, not 74, so our maximum hour (starting at 0) is 71:
axis(1, at= 0:71, labels = rep.int(0:23,3), las = 2)
Here is the resulting plot on your sample data. It should "work" on your full data, but I agree with Marc that it is probably too many labels. Using a POSIXct date-time format is the best way to flexibly make adjustments.

Change line color depending on y value with ggplot2

I have a question about line colours in ggplot2. I need to plot solar radiation data but I only have 6 hourly data, so geom_line doest not give a "nice" outuput. I've tried geom_smooth and the result is close to what I need. But I have a new question, is it possible to change line colour depending on the y value?
The code used for the plot is
library(ggplot2)
library(lubridate)
# Lectura de datos
datos.uvi=read.csv("serie-temporal-1.dat",sep=",",header=T,na.strings="-99.9")
datos.uvi=within(datos.uvi, fecha <- ymd_h(datos.uvi$fecha.hora))
# geom_smooth
ggplot(data=datos.uvi, aes(x=fecha, y=Rad_Global_.mW.m2., colour="GLOBAL")) +
geom_smooth(se=FALSE, span=0.3)
In the desired output, line should be red for radiation values under 250, green in the 250-500 interval and blue for values higher than 500.
Is it possible with geom_smooth? I've tried to reuse code here, but could not find the point.
Data used for the plot:
dput(datos.uvi)
structure(list(fecha.hora = c(2016012706L, 2016012712L, 2016012718L,
2016012800L, 2016012806L, 2016012812L, 2016012818L, 2016012900L,
2016012906L, 2016012912L, 2016012908L, 2016013000L), latitud = c(37.75,
37.75, 37.75, 37.75, 37.75, 37.75, 37.75, 37.75, 37.75, 37.75,
37.75, 37.75), longitud = c(-1.25, -1.25, -1.25, -1.25, -1.25,
-1.25, -1.25, -1.25, -1.25, -1.25, -1.25, -1.25), altitud = c(300L,
300L, 300L, 300L, 300L, 300L, 300L, 300L, 300L, 300L, 300L, 300L
), cobertura_nubosa = c(0.91, 0.02, 0.62, 1, 0.53, 0.49, 0.01,
0, 0, 0.13, 0.62, 0.84), longitud_de_onda_inicial.nm. = c(284.55,
284.55, 284.55, 284.55, 284.55, 284.55, 284.55, 284.55, 284.55,
284.55, 284.55, 284.55), Rad_Global_.mW.m2. = c(5e-04, 259.2588,
5, 100.5, 1, 886.5742, 110, 40, 20, 331.3857, 0, 0), Rad_Directa_.mW.m2. = c(0,
16.58034, 0, 0, 0, 202.5683, 0, 0, 0, 89.81712, 0, 0), Rad_Difusa_.mW.m2. = c(0,
242.6785, 0, 0, 0, 684.0059, 0, 0, 0, 241.5686, 0, 0), Angulo_zenital_.º. = c(180,
56.681, 180, 180, 180, 56.431, 180, 180, 180, 56.176, 180, 180
), blank = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA),
fecha = structure(c(1453874400, 1453896000, 1453917600, 1453939200,
1453960800, 1453982400, 1454004000, 1454025600, 1454047200,
1454068800, 1454054400, 1454112000), tzone = "UTC", class = c("POSIXct",
"POSIXt"))), row.names = c(NA, -12L), .Names = c("fecha.hora",
"latitud", "longitud", "altitud", "cobertura_nubosa", "longitud_de_onda_inicial.nm.",
"Rad_Global_.mW.m2.", "Rad_Directa_.mW.m2.", "Rad_Difusa_.mW.m2.",
"Angulo_zenital_.º.", "blank", "fecha"), class = "data.frame")
Thanks in advance.
Calculate the smoothing outside ggplot2 and then use geom_segment:
fit <- loess(Rad_Global_.mW.m2. ~ as.numeric(fecha), data = datos.uvi, span = 0.3)
#note the warnings
new.x <- seq(from = min(datos.uvi$fecha),
to = max(datos.uvi$fecha),
by = "5 min")
new.y <- predict(fit, newdata = data.frame(fecha = as.numeric(new.x)))
DF <- data.frame(x1 = head(new.x, -1), x2 = tail(new.x, -1) ,
y1 = head(new.y, -1), y2 = tail(new.y, -1))
DF$col <- cut(DF$y1, c(-Inf, 250, 500, Inf))
ggplot(data=DF, aes(x=x1, y=y1, xend = x2, yend = y2, colour=col)) +
geom_segment(size = 2)
Note what happens at the cut points. If might be more visually appealing to make the x-grid for prediction very fine and then use geom_point instead. However, plotting will be slow then.
This is not really what you asked for, but might serve the same purpose: instead of colouring the line, colour the background. First we create a dataframe of rectangle/limit coordinates.
rect_data <- data.frame(xmin=min(datos.uvi$fecha),
xmax=max(datos.uvi$fecha),
ymin=c(0,250,500),
ymax=c(250,500,max(datos.uvi$Rad_Global_.mW.m2.)),
col=c("red","green","blue"))
Then we add them to the plot, using scale_fill_identity()
ggplot(data=datos.uvi) +
geom_smooth(aes(x=fecha, y=Rad_Global_.mW.m2.),colour="black",se=FALSE, span=0.3) +
geom_rect(data=rect_data, aes(xmin=xmin,xmax=xmax,ymin=ymin,ymax=ymax,fill=col),alpha=0.1)+
scale_fill_identity()

Resources