Unable to plot heat map - r

library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
Apartment_no <- c("1-SV","1-SV","1-SV","1-SH","1-SH","1-SH","1-1V","1-1V","1-1V","1-1H","1-1H","1-1H","3-SV","3-SV","3-SV","3-1V","3-1V","3-1V","3-1H","3-1H","3-1H")
month <- c("September","October","November","September","October","November","September","October","November","September","October","November","September","October","November","September","October","November","September","October","November")
Days <- c(NA,19,28,2,19,28,2,19,NA,2,19,28,25,31,28,12,NA,24,8,26,19)
Heat_clean <- data.frame(Apartment_no,month,Days)
This is just a sample dataset. In actual, I have around 163 apartment_no and all the months of data. I wish to create an interactive heatmap for it since the data is quite big. I wish to use a special color code i.e whenever the Days==NA, color=Red,1<=Days<=5,color=Blue,6<=Days<=15,color=Orange,16<=Days<=25,color=Pink, 25<=Days<=31,color=Green. I have used the following code but it is not working for me & infact giving me an error message as "`x' must be a numeric matrix". My code is as follows:
> mypallete <- colorRampPalette(Days=na.color,col="Red", 1<=Days<=5,col="Blue", 6<=Days<=15,col="Orange", 16<=Days<=25,col="Pink", 25<=Days<=31,color=Green)
> heatmap.2(as.matrix(Heat_clean),Rowv = F,Colv = F,main = "Heatmeters data",col = mypallete,dendrogram = "none",density.info = "none",trace = "none")
I first defined my own pallete & then implemented it in my code. I wish to get something that can be seen in figure, though with my personalised color coding. Maybe later I can insert the code in plot_ly to get it interactive.

Your ranges are not exclusive (Days <= 25 and 25 <= Days) but that is easily fixed...
I don't think the structure of your data matches what heatmap.2 is expecting, and that is what is giving you the error, not the color map.
Here is one brute-force way to generate the color palette... (note I changed the spelling of mypalette)
mypalette=rep("Green",length(Days))
mypalette[Days <= 25] = "Pink"
mypalette[Days <= 15] = "Orange"
mypalette[Days <= 5] = "Blue"
mypalette[is.na(Days)] = "Red"

Related

Converting SpatVector objects to data frames for use in ggplot2

I would like to convert SpatVector objects to data frames for use in ggplot2.
pkgs <- c("geodata", "raster", "ggplot2", "tidy")
lapply(pkgs, require, character.only = TRUE)
boundary_GB <- geodata::gadm(country = "GB", path = tempdir(), resolution = 2, level = 1)
My current approach takes a long time:
boundary_GB_df <- broom::tidy(methods::as(boundary_GB, "Spatial"))
The plot:
ggplot(data = boundary_GB_df, mapping = aes(x = long, y = lat, group = group)) +
geom_polygon(fill = NA, colour = "black")
I am not experienced with SpatVector objects, is there a faster approach?
I am aware of tidyterra package (i.e., tidyterra::geom_spatvector()).
Thanks
sf objects are also data.frame and you can use a specific geom provided by ggplot2 (geom_sf()). Conversion between spatial vectors classes in R is as simple as:
# From SpatVector to sf
sf::st_as_sf(x_spatvector)
# From sf to SpatVector
terra::vect(x_sf)
# To sp, although for most uses is recommended to stick to sf
as(x_sf, "Spatial")
So if ypu only need to plot the spatial object, why not use ggplot2::geom_sf()/tidyterra::geom_spatvector()? Convert the object to data frame for plotting seems to be just going back and forth, unless you have a good reason for doing that.
See reprex:
library(geodata)
#> Loading required package: terra
#> terra 1.6.17
library(ggplot2)
boundary_GB <- geodata::gadm(country = "GB", path = tempdir(), resolution = 2, level = 1)
class(boundary_GB)
#> [1] "SpatVector"
#> attr(,"package")
#> [1] "terra"
library(sf)
#> Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1; sf_use_s2() is TRUE
boundary_GB_sf <- st_as_sf(boundary_GB)
class(boundary_GB_sf)
#> [1] "sf" "data.frame"
# Is already a data.frame
# sf with geom_sf
ggplot(boundary_GB_sf) +
geom_sf(fill = NA, colour = "black")
# Spatvector with tidyterra
library(tidyterra)
#> Loading required package: dplyr
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:terra':
#>
#> intersect, union
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
#> Loading required package: tibble
#> Loading required package: tidyr
#>
#> Attaching package: 'tidyr'
#> The following object is masked from 'package:terra':
#>
#> extract
ggplot() +
geom_spatvector(data = boundary_GB, fill = NA, colour = "black")
Created on 2022-10-05 with reprex v2.0.2

How I can Speed up code built using purrr- Why is Furrr slower than purrr

I'd like to know if there is a way to speed up a code built with purrr package. I tried to convert it into furr and use the multisession option, but it is even slower than the sequential version.
# rm(list = ls())
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
#> Error: RStudio not running
getwd()
#> [1] "C:/Users/Angela/AppData/Local/Temp/RtmpOqCRC2/reprex-44604912759f-full-husky"
#load required packages
library(mc2d)
#> Loading required package: mvtnorm
#>
#> Attaching package: 'mc2d'
#> The following objects are masked from 'package:base':
#>
#> pmax, pmin
library(gplots)
#>
#> Attaching package: 'gplots'
#> The following object is masked from 'package:stats':
#>
#> lowess
library(RColorBrewer)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(tidyverse)
library(furrr)
#> Loading required package: future
library(future) #for parallel computation
#remotes::install_github("HenrikBengtsson/parallelly", ref="develop") #to use multisession
library(parallelly)
library(tictoc)
set.seed(99)
iters<-1000
df<-data.frame(id=c(1:30),cat=c(rep("a",12),rep("b",18)),month=c(1:6,1,6,4,1,5,2,3,2,5,4,6,3:6,4:6,1:5,5),n=rpois(30,5))
df$n[df$n == "0"] <- 3
se<-rbeta(iters,96,6)
epi.a<-rpert(iters,min=1.5, mode=2, max=3)
p=0.2
p2=epi.a*p
df<-as_tibble(df)
# this defined function ensures any `n` from `df` will be itered with 10000 s and a and generated 10000 results
plan(multisession)
tic()
iter_n <- function(n) future_map2_dbl(.x = se, .y = p2, ~ 1 - (1 - .x * .y) ^ n)
list_1 <- df %>% mutate(Result = future_map(n, ~iter_n(.x))) %>% unnest(Result)%>% group_split(month)
toc()
#> 2.22 sec elapsed
plan(sequential)
#the same without parallelization
tic()
iter_n <- function(n) map2_dbl(.x = se, .y = p2, ~ 1 - (1 - .x * .y) ^ n)
list_1 <- df %>% mutate(Result = map(n, ~iter_n(.x))) %>% unnest(Result)%>% group_split(month)
toc()
#> 0.08 sec elapsed
Created on 2022-05-08 by the reprex package (v2.0.1)
I have read about an issue of using furrr with R studio (https://github.com/DavisVaughan/furrr/issues/195), so I followed the advice and downloaded the parallely package, but it doesn't change anything. Furrr is still slower than purrr (which is actually odd)
If someone is wondering what system I am using I am working with a Windows system.
Do you have any suggestion on how to speed up a code using purr or fixing the problem of furrr? This is just an example, but I have a huge database to deal with. Any suggestion is appreciated.

download.file with wildcard matching in R

I'm trying to download all the files that match a pattern from a url directory in R using download.file, but I can't get it working for even a single file. The url is:
https://coast.noaa.gov/htdata/CMSP/AISDataHandler/2019/
and the pattern match is all files like: AIS_2019_*_18.zip
Here is what I've tried for a single file case:
download.file('https://coast.noaa.gov/htdata/CMSP/AISDataHandler/2019/AIS_2019_04_18.zip',
destfile = "AIS_2019_04_18.zip",
method = "wget", extra = c("-r", "-np", "-L", "--max-redirect=0"))
but I always get 'wget' call had nonzero exit status
I've also tried setting method = internal and mode = w, but get ```scheme not supported in url'
Here's a way to generate all the links that you can then loop through them with a for loop.
library(glue)
library(stringr)
library(lubridate)
#>
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#>
#> date, intersect, setdiff, union
# Setup
month_dates <- glue("2019-{str_pad(1:12, width = 2, pad = '0')}-01")
days_in_months <- days_in_month(as.Date(month_dates))
# Get appropriate number of days and months combinations
months <- rep(1:12, days_in_months)
days <- unlist(mapply(function(x) str_pad(1:x, width = 2, pad = "0"),
days_in_months))
base_url <- "https://coast.noaa.gov/htdata/CMSP/AISDataHandler/2019"
# Put everything together
all_files <- glue("{base_url}/AIS_2019_{months}_{days}.zip")
# See results
head(all_files)
#> https://coast.noaa.gov/htdata/CMSP/AISDataHandler/2019/AIS_2019_1_01.zip
#> https://coast.noaa.gov/htdata/CMSP/AISDataHandler/2019/AIS_2019_1_02.zip
#> https://coast.noaa.gov/htdata/CMSP/AISDataHandler/2019/AIS_2019_1_03.zip
#> https://coast.noaa.gov/htdata/CMSP/AISDataHandler/2019/AIS_2019_1_04.zip
#> https://coast.noaa.gov/htdata/CMSP/AISDataHandler/2019/AIS_2019_1_05.zip
#> https://coast.noaa.gov/htdata/CMSP/AISDataHandler/2019/AIS_2019_1_06.zip
# Check number of the days in a year is correct
length(all_files)
#> [1] 365
Created on 2021-08-04 by the reprex package (v2.0.0)
Once you have those created, you can do something like:
# Untested
for (file in all_files) {
download.file(file,
destfile = basename(file),
extra = c("-r", "-np", "-L", "--max-redirect=0"))
}

How to make a dashed line and legend using ggplot

I want to know how to add a legend to my graph and also if its possible to make a line half solid half dashed.
I need the red line to become dashed at 28 and the green one at 20 I've been told to use geo_segment but I can't find a way to see the commands I need to input.
If anyone can help and suggest what codes should I use it would be great.
man<-dataset
ggplot(man,aes(Plot))+
geom_line(aes(y=N),color="forestgreen",lwd=0.5)+
geom_ribbon(aes(ymin=NLB,ymax=NUB),alpha=0.2,fill="green")+
geom_line(aes(y=M),color="navy",lwd=0.5)+
geom_ribbon(aes(ymin=MLB,ymax=MUB),alpha=0.2,fill="blue")+
geom_line(aes(y=S),color="brown1",lwd=0.5)+
geom_ribbon(aes(ymin=SLB,ymax=SUB),alpha=0.2,fill="red")+
xlab("Number of Samples")+
ylab("Number of Diametric-Species")
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(ggplot2)
df <- data.frame(
x = seq(1:200),
y = log(seq(1:200))
)
df <- df %>% mutate(should_dash = x >= 50)
ggplot(df, aes(x,y)) + geom_line(aes(linetype = should_dash))
Created on 2018-06-15 by the reprex package (v0.2.0).

Failed two methods to subset dataset with R, requesting assistance

I am attempting to make a subset of some data in R (open source statistics scripting language). I attempt two methods, but I am unsuccessful with both. One returns a table with no data, the other returns a table of all "NA" cells, but of the apparently correct dimensions.
I laid out the code pretty clearly commented--
First, I create the list of zip codes I'll use to subset the data. The list of zip codes is from a dataset I'll be using.
The list of zip codes is called "zipCodesOfData"
Next, I download the Crime Data I'll be subsetting. I basically just subset it into the data set that I need.
The last part, section three, shows that I try both %in% and the filter method to filter the Crime Data against the zip code data.
Unfortunately, neither method works. I was hoping someone might be able to point out my mistakes or recommend a different subsetting method for the third section.
(As an aside, in section two, I attempt to turn the list into a dataframe, but it does not work. I'm curious as to why, if anyone can shed light onto this for me.)
Thanks for your time & assistance!
####
#### Section zero: references and dependencies
####
# r's "choroplethr" library creator's blog for reference:
# http://www.arilamstein.com/blog/2015/06/25/learn-to-map-census-data-in-r/
# http://stackoverflow.com/questions/30787877/making-a-zip-code-choropleth-in-r-using-ggplot2-and-ggmap
#
# library(choroplethr)
# library(choroplethrMaps)
# library(ggplot2)
# # use the devtools package from CRAN to install choroplethrZip from github
# # install.packages("devtools")
# library(devtools)
# install_github('arilamstein/choroplethrZip')
# library(choroplethrZip)
# library(data.table)
#
####
#### Section one: the data set providing the zipcode we'll use to subset the crime set
####
austin2014_data_raw <- fread('https://data.austintexas.gov/resource/hcnj-rei3.csv')
names(austin2014_data_raw)
nrow(austin2014_data_raw)
## clean up: make any blank cells in column ZipCode say "NA" instead -> source: http://stackoverflow.com/questions/12763890/exclude-blank-and-na-in-r
austin2014_data_raw[austin2014_data_raw$ZipCode==""] <- NA
# keep only rows that do not have "NA"
austin2014_data <- na.omit(austin2014_data_raw)
nrow(austin2014_data) # now there's one less row.
# selecting the first column, which is ZipCode
zipCodesOfData <- austin2014_data[,1]
View(zipCodesOfData)
# Now we have the zipcodes we need: zipCodesOfData
####
#### Section two: Crime data
####
# Crime by zipcode: https://data.austintexas.gov/dataset/Annual-Crime-2014/7g8v-xxja
# (visualized: https://data.austintexas.gov/dataset/Annual-Crime-2014/8mst-ed5t )
# https://data.austintexas.gov/resource/<insertResourceNameHere>.csv w/ resource "7g8v-xxja"
austinCrime2014_data_raw <- fread('https://data.austintexas.gov/resource/7g8v-xxja.csv')
View(austinCrime2014_data_raw)
nrow(austinCrime2014_data_raw)
# First, let's remove the data we don't need
names(austinCrime2014_data_raw)
columnSelection_Crime <- c("GO Location Zip", "GO Highest Offense Desc", "Highest NIBRS/UCR Offense Description")
austinCrime2014_data_selected_columns <- subset(austinCrime2014_data_raw, select=columnSelection_Crime)
names(austinCrime2014_data_selected_columns)
nrow(austinCrime2014_data_selected_columns)
####
#### Section Three: The problem: I am unable to make subsets with the two following methods.
####
# Neither of these methods work:
# Attempt 1:
austinCrime2014_data_selected_columns <- austinCrime2014_data_selected_columns[austinCrime2014_data_selected_columns$`GO Location Zip` %in% zipCodesOfData , ]
View(austinCrime2014_data_selected_columns) # No data in the table
# Attempt 2:
# This initially told me an error:
# Then, I installed dplyr and the error went away.
library(dplyr)
# However, it still doesn't create anything-- just an empty set w/ headers
austinCrime2014_data_selected_zips <- filter(austinCrime2014_data_selected_columns, `GO Location Zip` %in% zipCodesOfData)
View(austinCrime2014_data_selected_zips)
I edited out this section, after realizing it was unnecessary.
####
#### Bad section
####
nrow(austinCrime2014_data_selected_columns)
# Then, let's keep only the zipcodes we need
# doesnt work: austinCrime2014_data_selected_columns_df <- data.frame(austinCrime2014_data_selected_columns)
# typeof(austinCrime2014_data_selected_columns_df)
austinCrime<-do.call("rbind", austinCrime2014_data_selected_columns)
austinCrime_needsTranspose <-as.data.frame(austinCrime)
austinCrime <- t(austinCrime_needsTranspose)
typeof(austinCrime)
View(austinCrime)
names(austinCrime)
####
#### Bad section
####
I think readr and dplyr can solve your problem. It's simple:
library(readr)
library(dplyr)
### SECTION 1
# Import data
austin2014_data_raw <- read_csv('https://data.austintexas.gov/resource/hcnj-rei3.csv', na = '')
glimpse(austin2014_data_raw)
nrow(austin2014_data_raw)
# Remove NAs
austin2014_data <- na.omit(austin2014_data_raw)
nrow(austin2014_data) # now there's one less row.
# Get zip codes
zipCodesOfData <- austin2014_data$`Zip Code`
### SECTION 2
# Import data
austinCrime2014_data_raw <- read_csv('https://data.austintexas.gov/resource/7g8v-xxja.csv', na = '')
glimpse(austinCrime2014_data_raw)
nrow(austinCrime2014_data_raw)
# Select and rename required columns
columnSelection_Crime <- c("GO Location Zip", "GO Highest Offense Desc", "Highest NIBRS/UCR Offense Description")
austinCrime_df <- select(austinCrime2014_data_raw, one_of(columnSelection_Crime))
names(austinCrime_df) <- c("zipcode", "highestOffenseDesc", "NIBRS_OffenseDesc")
glimpse(austinCrime_df)
nrow(austinCrime_df)
### SECTION 3
# Filter by zipcode
austinCrime2014_data_selected_zips <- filter(austinCrime_df, zipcode %in% zipCodesOfData)
glimpse(austinCrime2014_data_selected_zips)
nrow(austinCrime2014_data_selected_zips)
Here I used read_csv() from the readr package to import data, and the subset methods select() and filter() from the dplyr package to get the required columns and rows.
I'm not sure why you're do.calling and transposing your data. You can just use something like dplyr's semi_join to get only the zipcodes you want:
library(data.table)
library(dplyr)
#> -------------------------------------------------------------------------
#> data.table + dplyr code now lives in dtplyr.
#> Please library(dtplyr)!
#> -------------------------------------------------------------------------
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:data.table':
#>
#> between, first, last
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
zipCodesOfData <- fread('https://data.austintexas.gov/resource/hcnj-rei3.csv') %>%
mutate(`Zip Code` = ifelse(`Zip Code` == "", NA, `Zip Code`)) %>%
na.omit() %>%
select(`Zip Code`)
austinCrime2014_data_raw <- fread('https://data.austintexas.gov/resource/7g8v-xxja.csv') %>%
select(`GO Location Zip`, `GO Highest Offense Desc`, `Highest NIBRS/UCR Offense Description`) %>%
semi_join(zipCodesOfData, by = c("GO Location Zip" = "Zip Code")) %>%
rename(zipcode = `GO Location Zip`,
highestOffenseDesc = `GO Highest Offense Desc`,
NIBRS_OffenseDesc = `Highest NIBRS/UCR Offense Description`)

Resources