Colorado mis-plot

Colorado mis-plot - r

I'm trying the R cartography package. Had to work to find a US state shapefile that would like to work with the cartography stuff - many seemed too big, etc.. I seemed to get everything going well, but the state of Colorado misplots.
library(cartography)
library(sf)
library(RColorBrewer)
library(maps)
library(ggplot2)
rm(list = ls())
# USA shape file
states <- st_as_sf(map("state", plot = F, fill = TRUE))
#seems to plot correctly here
#ggplot(states) + geom_sf(aes(fill = ID))
usa <- st_transform(states,
CRS("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96"))
# still seems to plot fine
#ggplot(usa) + geom_sf(aes(fill = ID))
usa <- st_buffer(usa, dist=0)
datamap <- usa
datamap$randoVar <- sample(1:3, length(datamap$ID), replace = T)
datamap_pencil <- getPencilLayer(
x = datamap,
buffer = 500,
size = 400,
lefthanded = F
)
plot(st_geometry(usa), col = "white", border = "black", bg = "lightblue1")
typoLayer(
x = datamap_pencil,
var="randoVar",
col = c("aquamarine4", "yellow3","#3c5cb0"),
lwd = .7,
legend.values.order = 1:3,
legend.pos = "bottomleft",
legend.title.txt = "",
add = TRUE
)
labelLayer(x = datamap, txt = "ID",
cex = 0.9, halo = TRUE, r = 0.15)
I first noticed because when I tried to merge in a data file and do a fill with that feature, colorado came up as "No Data". Likewise, the code above seems to indicate the state gemometry or ID is off. I don't know enough GIS to understand why. I did have to change the CRS projection so that I could buffer the map file (getPencilLayer kept throwing a self-intercection error, which seems to be common with R mapping).
Any ideas on what to do?

Well, I ended up fixing by using a shapefile from the US Census
https://www2.census.gov/geo/tiger/TIGER2017/STATE/
states <- st_read("#mypath#/tl_2017_us_state/tl_2017_us_state.shp")
states <- states[!(states$NAME %in%
c("Commonwealth of the Northern Mariana Islands", "United States Virgin Islands",
"Puerto Rico", "American Samoa", "Hawaii", "Guam", "Alaska")
), ]
usa <- st_transform(states,
CRS("+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96"))
usa <- st_buffer(usa, dist=0)
Not sure how to adjust for getting the geodate from map("state"...) but this worked for me.
Used these buffer and size settings (later after merging in data)
datamap_pencil <- getPencilLayer(
x = datamap,
buffer = 500,
size = 400,
lefthanded = F
)

Already answered here:
https://gis.stackexchange.com/a/351910/142200
The problem is that the initial map object is non-valid
library(cartography)
library(sf)
library(RColorBrewer)
library(maps)
library(ggplot2)
rm(list = ls())
# USA shape file
states <- st_as_sf(map("state", plot = F, fill = TRUE))
usa <- st_transform(states,
"+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96")
datamap <- usa
# Check validity----
st_is_valid(datamap)
#> [1] TRUE TRUE TRUE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
#> [13] TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
#> [25] TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE
#> [37] TRUE TRUE TRUE FALSE FALSE FALSE TRUE TRUE FALSE TRUE TRUE TRUE
#> [49] TRUE
#Make valid
library(lwgeom)
datamap<-st_make_valid(datamap)
st_is_valid(datamap)
#> [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
#> [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
#> [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
#> [46] TRUE TRUE TRUE TRUE
# Start cartography
datamap$randoVar <- sample(1:3, length(datamap$ID), replace = T)
datamap_pencil <- getPencilLayer(
x = datamap,
buffer = 500,
size = 400,
lefthanded = F
)
plot(st_geometry(usa), col = "white", border = "black", bg = "lightblue1")
typoLayer(
x = datamap_pencil,
var="randoVar",
col = c("aquamarine4", "yellow3","#3c5cb0"),
lwd = .7,
legend.values.order = 1:3,
legend.pos = "bottomleft",
legend.title.txt = "",
add = TRUE
)
Created on 2020-02-25 by the reprex package (v0.3.0)

Related

Delete string from names in sublists

I have a list of lists I need to delete "gene-" everywhere where it happens.
I tried
lapply(net, FUN = function(x) setNames(x, sub("gene-","", x)))
but I get the error
Error in names(object) <- nm : attempt to set an attribute on NULL
head(net)
$colors
gene-AAAS gene-AAK1 gene-AAMDC gene-AAMP gene-AARS1 gene-AASDH
"magenta" "brown" "purple" "darkgrey" "brown" "blue"
gene-AASDHPPT gene-AASS gene-AATK gene-ABAT
[ reached getOption("max.print") -- omitted 8990 entries ]
$unmergedColors
gene-AAAS gene-AAK1 gene-AAMDC gene-AAMP gene-AARS1 gene-AASDH
"darkgrey" "blue" "magenta" "darkolivegreen" "blue" "brown"
gene-AASDHPPT gene-AASS gene-AATK gene-ABAT gene-ABCA1 gene-ABCA12
"lightyellow" "lightgreen" "turquoise" "darkred" "turquoise" "grey60"
[ reached getOption("max.print") -- omitted 8990 entries ]
$MEs
MEblack MEgreenyellow MElightcyan MEyellow MEturquoise MEpink MEwhite MEdarkred
M5 -0.17423916 0.141440817 0.23401244 0.36358728 -0.0220835 -0.18126013 0.05942248 -0.45035371
N3 0.47690393 0.428961135 0.07241255 -0.02557197 0.2238352 0.06742087 -0.09574663 0.52201599
$goodSamples
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
$goodGenes
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[ reached getOption("max.print") -- omitted 8990 entries ]
$dendrograms
$dendrograms[[1]]
Call:
fastcluster::hclust(d = as.dist(dissTom), method = "average")
Cluster method : average
Number of objects: 9990
dput(net)
166L, 5768L, 2346L, 7132L, 625L, 4848L, 736L, 7001L,
1721L, 6626L, 7674L, 2543L, 7013L, 8667L, 4593L, 2804L,
....
7435L, 4895L, 8462L, 1732L, 3160L, 8529L), labels = NULL,
method = "average", call = fastcluster::hclust(d = as.dist(dissTom),
method = "average"), dist.method = NULL), class = "hclust")),
TOMFiles = NULL, blockGenes = list(1:9990), blocks = c(1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
...
1, 1), MEsOK = TRUE)

Your code almost works, you need two changes:
You want to modify the names, so your inner function needs to read names(x):
function(x) setNames(x, sub("gene-", "", names(x)))
net contains a lot of members. You only want to replace the colors and unmergedColors members, so apply your function to only those. Oh, and you need to assign the result back to your object:
which = c("colors", "unmergedColors")
net[which] = lapply(net[which], function(x) setNames(x, sub("gene-", "", names(x))))

Is there any way to customize legend histogram using tm_layout

Please see the map I drew below using the tmap package. I did not any find parameters that I can use to customize the font of the histogram legend. From the code below, you can see that I've already set the legend.text.fontface = 'bold'. However, this did not work.
psp1 <- tm_shape(province) +
tm_borders(col = 'black') +
tm_shape(county) +
tm_polygons(col = '+1 °C', title = 'Changes in %', style = 'pretty', aes.palette = 'div', n=5, legend.hist = T) +
tm_compass(north = 0, type = 'arrow', show.labels =0, position = c('right','top')) +
tm_layout(legend.format = list(fun = function(x) formatC(x, digits = 1, format = "f")),
legend.outside = T, legend.outside.position = 'bottom',
legend.hist.width = 1,
legend.hist.height = 0.5,
legend.stack = 'horizontal',
legend.title.fontface = 'bold',
legend.text.fontface = 'bold')

Very interesting question. Indeed, it does not seem possible to change the font of the labels for the histogram using legend.text.fontface = 'bold'
Hopefully, it is possible to change this using the base R library grid on which the tmap library is based on.
So, please find below one possible solution to your request (hoping that this answer does not come too late and that it will still be useful to you)
Preliminary note for other Stackoverflow users: to run the reprex below correctly you will need to first download the data made available by the OP in this post.
Reprex
STEP 1 - BUILDING THE MAP WITH THE LEGEND
library(sf)
library(tmap)
library(RColorBrewer)
setwd("Add the path to your working directory")
# Import data
province <- st_read("province.shp")
county <- st_read("county.shp")
# Split the 'sf' object 'county' into a list of five 'sf' objects
county_warm_list <- split(county , f = county$warming)
# Build the map with the legend
psp1 <- tm_shape(province) +
tm_borders(col = 'black') +
tm_shape(st_sf(county_warm_list[[3]])) + # using the scenario +3°C
tm_polygons(col = 'estimate',
title = 'Changes in %',
style = 'pretty',
aes.palette = 'div',
n=5,
legend.hist = TRUE,
midpoint = 0) +
tm_compass(north = 0,
type = 'arrow',
show.labels =0,
position = c('right','top')) +
tm_layout(legend.show = TRUE,
legend.format = list(fun = function(x) formatC(x, digits = 1, format = "f")),
legend.outside = TRUE,
legend.outside.position = 'bottom',
legend.hist.width = 1,
legend.hist.height = 0.5,
legend.stack = 'horizontal',
legend.title.fontface = 'bold',
legend.text.fontface = 'bold')
STEP 2 - BOLD ALL THE LABELS IN THE LEGEND (i.e. including those in the histogram)
library(grid)
# Convert the 'tmap' object psp1 into a 'grob' object ('grob' = 'grid graphical object')
psp1 <- tmap_grob(psp1)
# Find the name of the element we want to change using 'grid.list()' which
# returns a listing of 'grobs' (including gTree)
grid.ls(psp1)
#> GRID.gTree.41
#> multiple_1
#> BG
#> mapBG
#> mapElements
#> GRID.gTree.11
#> tm_polygons_1_2
#> GRID.gTree.12
#> tm_polygons_1_3
#> GRID.rect.13
#> meta_with_bg
#> meta
#> GRID.gTree.16
#> GRID.gTree.15
#> compass
#> GRID.polygon.14
#> outside_legend !!!! "outside_legend" element !!!!
#> meta_with_bg
#> meta
#> legend
#> GRID.rect.39
#> GRID.gTree.40
#> GRID.gTree.19
#> GRID.gTree.18
#> GRID.text.17
#> GRID.gTree.23
#> GRID.gTree.22
#> GRID.rect.20
#> GRID.text.21
#> GRID.gTree.38
#> GRID.gTree.37
#> GRID.gTree.36
#> GRID.gTree.25
#> GRID.rect.24
#> GRID.gTree.27
#> GRID.polyline.26
#> GRID.gTree.29
#> GRID.text.28
#> GRID.gTree.33
#> GRID.gTree.30
#> GRID.lines.31
#> GRID.polyline.32
#> GRID.gTree.35
#> GRID.text.34
In the listing of grob objects just above, you can see an element named "outside_legend". So, we will modify it to bold the fonts of the legend:
# Edit the 'outside_legend' element of the 'grob' object 'psp1' using
# 'editGrob()' and save it in the new 'grob' object 'my_map'
my_map <- editGrob(psp1, gPath("outside_legend"), gp = gpar(fontface = "bold"))
# Draw the 'grob' object 'my_map'
# !!!! NB: may take a few seconds to be displayed in the graphic device !!!!
grid.draw(my_map)
STEP 3 - SAVING THE MAP EITHER MANUALLY OR PROGRAMMATICALLY
(in the latter case, you need to install the rstudioapi library)
rstudioapi::savePlotAsImage(
"my_map.png", # add the path if different of the working directory
format = "png", # other possible formats: "jpeg", "bmp", "tiff", "emf", "svg", "eps"
width = 670,
height = 710
)
And that's it :-)
Created on 2022-01-30 by the reprex package (v2.0.1)

Is it possible to subset facets in a polyfreq in GGplot?

I was wondering if it was possible to use subset on a geom_polyfreq()?
I am running a topic model and in order to report the facets properly i want to remove 4 out of 10 facets.
My code is as follows:
ggplot(data = dat,
aes(x = date,
fill = Topics)) +
geom_freqpoly(binwidth = 3) +
labs(x = "",
y = "Topic Count",
title = "Mentions of Topic On a Monthly Basis")+
scale_x_date(date_breaks = "months", date_labels="%b")+
theme(text = element_text(size=8)) +
theme(axis.text.x = element_text(angle = 45))+
facet_wrap(Topics ~ ., scales = "free")
> ggplot(subset(dat, Topics %in% c(3, 4, 5, 7, 8, 9)),
aes(x = date,
fill = topic)) +
geom_freqpoly(binwidth = 3) +
labs(x = "",
y = "Topic Count",
title = "Mentions of Topic On a Monthly Basis")+
scale_x_date(date_breaks = "months", date_labels="%b")+
theme(text = element_text(size=9)) +
theme(axis.text.x = element_text(angle = 45))+
facet_wrap(Topics ~ ., scales = "free")
However, when I try to subset the data, I get an error that says:
Fejl: Faceting variables must have at least one value
Does anybody know what the issue is?
I hope this makes sense.
The full code is down below.
article.data <- article.data[!is.na(article.data$fulltext), ]
## Get date
article.data$date <- as.Date(article.data$date, "%Y-%m-%d")
#all of 2018
dat <- article.data[article.data$date > as.Date("2018-01-01", "%Y-%m-%d") &
article.data$date < as.Date("2018-12-01", "%Y-%m-%d"), ]
## 'tokenize' fulltext
quanteda_options("language_stemmer" = "danish")
texts <- gsub(":", " ", dat$fulltext, fixed = T)
texts <- tokens(texts, what = "word",
remove_numbers = T,
remove_punct = T,
remove_symbols = T,
remove_separators = T,
remove_hyphens = T,
remove_url = T,
verbose = T)
texts <- tokens_tolower(texts)
texts <- tokens_remove(texts, stopwords("danish"))
texts <- tokens_wordstem(texts)
texts <- tokens_remove(texts, stopwords("danish"))
# get actual dfm from tokens
txt.mat <- dfm(texts)
#remove frequent words with no substance
txt.mat <- txt.mat %>% dfm_remove(c("ad",
"af","aldrig","alene","alle",
"allerede","alligevel","alt",
"altid","anden","andet","andre",
"at","bag","bare", "bedre", "begge","bl.a.",
"blandt", "blev", "blevet", "blive","bliver",
"burde", "bør","ca.", "com", "da",
"dag", "dansk", "danske", "de",
"dem", "den", "denne","dens",
"der","derefter","deres","derfor",
"derfra","deri","dermed","derpå",
"derved","det","dette","dig",
"din","dine","disse","dit",
"dog","du","efter","egen",
"ej","eller","ellers","en",
"end","endnu","ene","eneste","enhver","ens",
"enten","er","et","f.eks.","far","fem",
"fik","fire","flere","flest",
"fleste","for", "foran",
"fordi","forrige","fra", "fx",
"få","får","før","først",
"gennem","gjorde","gjort","god",
"godt","gør","gøre","gørende",
"ham","han","hans","har",
"havde","have","hej","hel",
"heller","helt","hen","hende",
"hendes","henover","her",
"herefter","heri","hermed",
"herpå","hos","hun","hvad",
"hvem","hver","hvilke","hvilken",
"hvilkes","hvis",
"hvor", "hvordan","hvorefter","hvorfor",
"hvorfra","hvorhen","hvori","hvorimod",
"hvornår","hvorved","i", "ifølge", "igen",
"igennem","ikke","imellem","imens",
"imod","ind","indtil","ingen",
"intet","ja","jeg","jer","jeres",
"jo","kan","kom","komme",
"kommer", "kroner", "kun","kunne","lad",
"langs", "lang", "langt", "lav","lave","lavet",
"lidt","lige","ligesom","lille",
"længere","man","mand","mange",
"med","meget","mellem","men", "mener",
"mens","mere","mest","mig",
"min","mindre","mindst","mine",
"mit","mod","må","måske",
"ned","nej","nemlig","ni",
"nogen","nogensinde","noget",
"nogle","nok","nu","ny", "nye",
"nyt","når","nær","næste",
"næsten","og","også","okay",
"om","omkring","op","os",
"otte","over","overalt","pga.", "partier",
"partiets", "partiers", "politiske",
"procent", "på", "ritzau", "samme",
"sammen","se","seks","selv","selvom",
"senere","ser","ses","siden","sig",
"sige", "siger", "sin","sine","sit",
"skal","skulle","som","stadig",
"stor","store","synes","syntes",
"syv","så","sådan","således",
"tag","tage","temmelig","thi",
"ti","tidligere","til","tilbage",
"tit","to","tre","ud","uden",
"udover","under","undtagen","var",
"ved","vi","via","vil","ville", "viser",
"vor","vore","vores","vær","være",
"været","øvrigt","facebook","http", "https",
"www","millioner", "frem", "lars", "lars_løkke",
"rasmussen", "løkke_rasmussen", "statsminister", "politik",
"formand", "partiet", "år", "tid", "and", "fler",
"sid", "regeringen", "giv", "politisk", "folketing", "mer",
"ifølg"))
############################################################
## FEATURE SELECTION
############################################################
# check out top-appearing features in dfm
topfeatures(txt.mat)
# keep features (words) appearing in >2 documents
txt.mat <- dfm_trim(txt.mat, min_termfreq = 4)
# filter out one-character words
txt.mat <- txt.mat[, str_length(colnames(txt.mat)) > 2]
# filter out some html trash features
#txt.mat <- txt.mat[, !grepl("[[:digit:]]+px", colnames(txt.mat))]
#txt.mat <- txt.mat[, !grepl(".", colnames(txt.mat), fixed = T)]
#txt.mat <- txt.mat[, !grepl("_", colnames(txt.mat), fixed = T)]
#txt.mat <- txt.mat[, !grepl("#", colnames(txt.mat), fixed = T)]
#txt.mat <- txt.mat[, !grepl("#", colnames(txt.mat), fixed = T)]
############################################################
## SELECT FEATURES BY TF-IDF
############################################################
# Create tf_idf-weighted dfm
ti <- dfm_tfidf(txt.mat)
# Select from main dfm using its top features
txt.mat <- dfm_keep(txt.mat, names(topfeatures(ti, n = 1000)))
############################################################
## RUN TOPIC MODEL
############################################################
# convert quanteda dfm to tm 'dtm'
dtm <- convert(txt.mat, to = "topicmodels")
# run lda with 8 topics
lda <- LDA(dtm, k = 8)
# review terms by topic
terms(lda, 10)
############################################################
## LOOK FOR 'OPTIMAL' k
############################################################
# randomly sample test data
set.seed(61218)
select <- sample(1:nrow(dtm), size = 100)
test <- dtm[select, ]
train <- dtm[!(1:nrow(dtm) %in% select), ]
n.tops <- 3:14
metrics <- data.frame(topics = n.tops,
perplexity = NA)
for(i in n.tops) { # NB: takes awhile to run
print(i)
est <- LDA(train, k = i)
metrics[(i - 1), "perplexity"] <- perplexity(est, newdata = test)
}
save(metrics, file = "lda_perplexity2018.RData")
qplot(data = metrics, x = topics, y = perplexity, geom = "line",
xlab = "Number of topics",
ylab = "Perplexity on test data") + theme_bw()
#We found that 8 topics was one of those of lowest perplexity but
#also the ones which made the most sense
############################################################
## RERUN WITH BETTER CHOICE OF k
############################################################
# run lda with 10 topics
lda <- LDA(dtm, k = 10)
save(lda, file = "dr_ft_keep2018.RData")
# examine output
terms(lda, 20)
# put topics into original data
dat$topic <- topics(lda)
# add labels
#dat$date <- factor(dat$date,
#levels = 1:12,
#labels = c("januar","februar", "marts","april", "maj", "juni", "juli", "august", "september", "oktober", "november", "decemeber"))
dat$Topics <- factor(dat$topic,
levels = 1:10,
labels = c("Topc 1", "Topic 2", "Integration", "Taxation", "Burka Prohibition",
"Topic 6", "Justice", "Foreign Affairs", "Housing", "Topic 10"))
# frequency
qplot(data = dat, x = Topics,
geom = "bar", xlab = "",
ylab = "Topic Frequency", fill=Topics, main = "Figure 1: Main Topics in 2018 - DR") +
theme_bw() +
theme(axis.text.x = element_text(angle = 90))
#Make visualization showing topics over time
ggplot(data = dat,
aes(x = date,
fill = Topics[1])) +
geom_freqpoly(binwidth = 30) +
facet_wrap(Topics ~ ., scales = "free")+
theme_classic() +
scale_x_date(breaks = as.Date(c( "2018-02-01", "2018-04-01", "2018-06-01", "2018-08-01", "2018-10-01", "2018-12-01", date_labels="%B"))) +
theme(axis.text.x = element_text(angle = 90))
ggplot(data = dat,
aes(x = date,
fill = Topics)) +
geom_freqpoly(binwidth = 3) +
labs(x = "",
y = "Topic Count",
title = "Mentions of Topic On a Monthly Basis")+
scale_x_date(date_breaks = "months", date_labels="%b")+
theme(text = element_text(size=8)) +
theme(axis.text.x = element_text(angle = 45))+
facet_wrap(Topics ~ ., scales = "free")

It's best practice on this forum to make your question reproducible, so that others can try it and test their solutions to confirm they work. It's also good if you can make it minimal, both to respect potential answerers' time and to help clarify your own understanding of the problem.
How to make a great R reproducible example
In this case, the error message suggests that your subsetting is removing all your data, which breaks the faceting. It can't plot any facets if the faceting variable has no values.
It looks like dat$Topics is a factor, but your loop is referring to Topics like they're numeric with Topics %in% c(3, 4, 5, 7, 8, 9). For example, I could define a factor vector with the same levels as your Topics variable:
Topics <- factor(1:10, levels = 1:10,
labels = c("Topc 1", "Topic 2", "Integration", "Taxation", "Burka Prohibition",
"Topic 6", "Justice", "Foreign Affairs", "Housing", "Topic 10"))
Compare the output of these three lines:
Topics %in% c(1, 2)
# [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
as.numeric(Topics) %in% c(1, 2)
# [1] TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
Topics %in% c("Topc 1", "Topic 2")
# [1] TRUE TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
In the top case, none of the data matches the test, so using that to subset the data would give you an empty data set and seems like a plausible cause for the error you got.
To refer to the Topics by their underlying level, we can refer to as.numeric(Topics) %in% c(1, 2). If we want to refer to the Topics by their labels, I could use Topics %in% c("Topc 1", "Topic 2").
Since I don't have your data, I can't confirm this exact syntax will work for you, but I hope something along these lines will.
For more on how to work with factors in R, I recommend: https://r4ds.had.co.nz/factors.html

Split a large character column with different amount of values into multiple columns

I have a character column with a different amount of values per row. This is just a small example:
GoodForMeal %>% head(5)
# A tibble: 5 x 1
GoodForMeal
<chr>
1 dessert': False, 'latenight': False, 'lunch': True, 'dinner': True
2 dessert': False, 'latenight': False, 'lunch': True, 'dinner': True
3 <NA>
4 dessert': False, 'latenight': False, 'lunch': True, 'dinner': True
5 dessert': False, 'latenight': False, 'lunch': True, 'dinner': True
Here is a dput() of the first row of the column:
structure(list(GoodForMeal = "dessert': False, 'latenight': False, 'lunch': True, 'dinner': True, 'breakfast': False, 'brunch': False}"), .Names = "GoodForMeal", row.names = c(NA,
-1L), class = c("tbl_df", "tbl", "data.frame"))
I want to assign the values before the colon as column names and the values after the colon as the values of the respective column.
Example:
desert latenight lunch diner
1 False False True True
2 False False True True
3 NA NA NA NA
4 False False True True
5 False False True True
I tried it with the tidyr packadge and the separate and the spread function:
separate(GoodForMeal, c("key", "value"), sep = ":", extra = "merge") %>% spread(key, value)
The problem is the r is not splitting all the values before the colon but just the first value.
So the result looks like this:
GoodForMeal %>% str()
Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 4464 obs. of 2 variables:
$ dessert': chr " False, 'latenight': False, 'lunch': True, 'dinner': False, 'breakfast': False, 'brunch': False}" " False, 'latenight': False, 'lunch': True, 'dinner': True, 'breakfast': False, 'brunch': False}" " False, 'latenight': False, 'lunch': False, 'dinner': False, 'breakfast': False, 'brunch': False}" " False, 'latenight': False, 'lunch': True, 'dinner': True, 'breakfast': False, 'brunch': False}" ...
$ <NA> : chr NA NA NA NA ...
Any Idea how to split the values so that it´s looking like in the example? THX

Working with the test data you've provided, I would use mutate first to rid the column of characters such ' and :, along with the meal time keywords. This allows you to split on the comma that separates the various meal times. The following is an illustration:
df <- structure(list(GoodForMeal = "dessert': False, 'latenight': False, 'lunch': True, 'dinner': True, 'breakfast': False, 'brunch': False}"),
.Names = "GoodForMeal", row.names = c(NA, -1L),
class = c("tbl_df", "tbl", "data.frame"))
df %>%
mutate(GoodForMeal = trimws(gsub("[':]|dessert|lunch|dinner|latenight|brunch",
"",
GoodForMeal))) %>%
separate(GoodForMeal,
c("dessert", "latenight", "lunch", "dinner"),
", ",
extra="drop")
It should yield:
# A tibble: 1 x 4
# dessert latenight lunch dinner
# * <chr> <chr> <chr> <chr>
# False False True True
I hope this proves useful.

This is not an elegant solution (and long) but seems to work. I did change the data to make it more general. Hope this can be a good start.
# i made some changes in the data; remove lunch entry in the 4th element and remove dessert in the 1st
sampleData <- c("'dessert': False, 'latenight': False, 'lunch': True, 'dinner': True",
"'dessert': False, 'latenight': False, 'lunch': True, 'dinner': True",
NA,
"'dessert': False, 'latenight': False, 'dinner': True",
"'latenight': False, 'lunch': True, 'dinner': True")
# [1] "'dessert': False, 'latenight': False, 'lunch': True, 'dinner': True"
# [2] "'dessert': False, 'latenight': False, 'lunch': True, 'dinner': True"
# [3] NA
# [4] "'dessert': False, 'latenight': False, 'dinner': True"
# [5] "'latenight': False, 'lunch': True, 'dinner': True"
# not sure if this is necessary, but jsut to clean the data
sampleData <- gsub(x = sampleData, pattern = "'| ", replacement = "")
# i'm a data.table user, so i'll jsut use tstrsplit
# split the pairs within each elements first
x <- data.table::tstrsplit(sampleData, ",")
# split the header and the entry
test <- lapply(x, function(x) data.table::tstrsplit(x, ":", fixed = TRUE))
# get the headers
indexHeader <- do.call("rbind", lapply(test, function(x) x[[1]]))
# get the entries
indexValue <- do.call("rbind",
lapply(test, function(x){if(length(x) > 1){ return(x[[2]])}else{ return(x[[1]])} }))
# get unique headers
colNames <- unique(as.vector(indexHeader))
colNames <- colNames[!is.na(colNames)]
# determine the order of the entries using the header matrix
indexUse <- apply(indexHeader, 2, function(x) match(colNames, x))
# index the entry matrix using the above matching
resA <- mapply(FUN = function(x,y) x[y],
x = as.data.frame(indexValue),
y = as.data.frame(indexUse))
# convert to data frame
final <- as.data.frame(t(resA))
# rename columns
colnames(final) <- colNames
# should give something like this
final
# dessert latenight lunch dinner
# False False True True
# False False True True
# <NA> <NA> <NA> <NA>
# False False <NA> True
# <NA> False True True

How to make the edges of the plot smooth in NCAR command language ( NCL)?

How to make the edges of the plot created by gsn_csm_contour_map smooth instead of using those blue squares?
I have tried different fill mode, none of them helped.
Is there a active contour function in ncl? Is is possible to create a contour between null and non-null value?
load "$NCARG_ROOT/lib/ncarg/nclscripts/csm/gsn_code.ncl"
load "$NCARG_ROOT/lib/ncarg/nclscripts/csm/gsn_csm.ncl"
load "$NCARG_ROOT/lib/ncarg/nclscripts/csm/contributed.ncl"
;======================================================================
; The main code
;======================================================================
begin
;---Read desired data
sfile = addfile(f,"r")
var = sfile->var1
;---For zooming in on map
minlat = foo
maxlat = bar
minlon = foo1
maxlon = bar1
;---Get dimentions
dims = dimsizes(var)
nlev = dims(0)
time = var&time
date_str_i = getDate(time)
;---Set some resources
res = True
res#cnFillOn = True
;res#cnFillMode = "RasterFill"
;res#cnRasterSmoothingOn =True
res#cnLinesOn = False
res#cnLineLabelsOn = True
res#cnLevelSelectionMode = "ManualLevels"
res#cnMinLevelValF = -100
res#cnMaxLevelValF = 3000
res#cnLevelSpacingF = 200 ; 300 ; 50 ; 150
res#mpMinLatF = minlat
res#mpMaxLatF = maxlat
res#mpMinLonF = minlon
res#mpMaxLonF = maxlon
res#mpDataBaseVersion = "HighRes"
res#cnSmoothingOn = True
res#cnSmoothingDistanceF = 0.005
res#cnSmoothingTensionF = 0.001
res#mpCenterLonF = (minlon+maxlon)*0.5
res#mpCenterLatF = (minlat+maxlat)*0.5
res#pmTickMarkDisplayMode = "Always"
res#lbLabelFontHeightF = 0.01
res#gsnAddCyclic = False ; this is regional data
;---Loop across each level and plot to a different PNG file every time
do n=4,nlev-1
wks_type = "png"
wks_type#wkWidth = 2000
wks_type#wkHeight = 2000
wks = gsn_open_wks(wks_type,fname(0)+sprinti("%03i",n)) ;
res#gsnRightString = "Time:" + date_str_i(n)
res#gsnStringFontHeightF = 0.010
plot = gsn_csm_contour_map_ce(wks,var(n,:,:),res)
delete(wks)
end do
end
But I kind of doubt it is code related.

You have to use:
res#cnFillMode = "AreaFill"
res#cnFillOn = True

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

Colorado mis-plot - r

Related

Delete string from names in sublists

Is there any way to customize legend histogram using tm_layout

Is it possible to subset facets in a polyfreq in GGplot?

Split a large character column with different amount of values into multiple columns

How to make the edges of the plot smooth in NCAR command language ( NCL)?

Categories

Resources