tidytext -- how to do commonality and comparsion word clouds - r

Let me start with the following and fully working code from Introduction to tidytext # CRAN
library(janeaustenr)
library(dplyr)
library(stringr)
original_books <- austen_books() %>%
group_by(book) %>%
mutate(linenumber = row_number(),
chapter = cumsum(str_detect(text, regex("^chapter [\\divxlc]",
ignore_case = TRUE)))) %>%
ungroup()
original_books
library(tidytext)
tidy_books <- original_books %>%
unnest_tokens(word, text)
tidy_books
data("stop_words")
cleaned_books <- tidy_books %>%
anti_join(stop_words)
All good so far. I have a tibble with six Jane Austen novels, with the standard junk words removed.
unique(cleaned_books$book)
Which gets me: Sense & Sensibility, Pride & Prejudice, Mansfield Park, Emma, Northanger Abbey, Persuasion.
So if I want to do a standard TF word cloud of all six, no problem. Just like this (color added):
library(wordcloud)
library(RColorBrewer)
dark2 <- brewer.pal(8, "Dark2")
cleaned_books %>%
count(word) %>%
with(wordcloud(word, n, color = dark2, max.words = 100))
Works beautifully. But how do I then do a commonality.cloud() with all six novels, and a comparison.cloud() with the same?
All the data I need is in cleaned_books -- but I can't figure out how to reshape it. Your help appreciated!
Got it. Thanks.
Will leave up in case anyone else has a similar issue.
The above code &
set1 <- brewer.pal(8, "Set1") ## a second color just for other cloud type
library(reshape2)
# title size and scale optional, obviously
cleaned_books %>%
group_by(book) %>%
count(word) %>%
acast(word ~ book, value.var = "n", fill = 0) %>%
comparison.cloud(color = dark2, title.size = 1, scale = c(3, 0.3), random.order = FALSE, max.words = 100)
cleaned_books %>%
group_by(book) %>%
count(word) %>%
acast(word ~ book, value.var = "n", fill = 0) %>%
commonality.cloud(color = set1, title.size = 1, scale = c(3, 0.3), random.order = FALSE, max.words = 100)
That worked out nicely.

Got it. Thanks. Run the above code. Then
set1 <- brewer.pal(8, "Set1") ## a second color just for other cloud type
library(reshape2)
Color is optional. reshape2 essential. Then just group by book, and go.
cleaned_books %>%
group_by(book) %>%
count(word) %>%
acast(word ~ book, value.var = "n", fill = 0) %>%
comparison.cloud(color = dark2, title.size = 1, scale = c(3, 0.3), random.order = FALSE, max.words = 100)
cleaned_books %>%
group_by(book) %>%
count(word) %>%
acast(word ~ book, value.var = "n", fill = 0) %>%
commonality.cloud(color = set1, title.size = 1, scale = c(3, 0.3), random.order = FALSE, max.words = 100)
All good!

Related

kableExtra table repeat_header being strange

I've finally made my dream table with kableExtra, but it's doing very weird things with kable_styling(latex_options=c("repeat_header")), where it's writing the headings over my column labels. Here I'm using the example dataset diamonds. It is also moving the first column below where it should be (red arrows in picture).
(Also can't download LaTex because of work authorization issues, so that's not an option)
Thanks for all the help!
library(knitr)
library(kableExtra)
library(dplyr)
options(
knitr.table.toprule = '\\toprule',
knitr.table.midrule = '\\midrule',
knitr.table.bottomrule = '\\bottomrule'
)
df1 <- read.csv("diamonds.csv")
a11<-df1%>%
group_by(cut, color, clarity)%>%
summarize_at( .vars=c("price"),
.funs=~mean(.,na.rm=TRUE)) %>%
mutate(sort = 3)
b11<-df1%>%
group_by(cut, color) %>%
summarize_at( .vars=c("price"),
.funs=~mean(.,na.rm=TRUE)) %>%
mutate(clarity="", sort = 2) %>%
select(cut, color, clarity,everything())
c11<-df1 %>%
group_by(cut) %>%
summarize_at( .vars=c("price"),
.funs=~mean(.,na.rm=TRUE)) %>%
mutate(color="", clarity= "", sort = 1) %>%
select(cut, color,everything())
table3<-rbind(a11,b11,c11)
table3%>%
arrange(cut, color, clarity) %>%
select(-sort)%>%
filter(price>=3000)%>%
kbl(
caption = "Table",
longtable=T, booktabs=T) %>%
add_header_above(header = c("Table 1." = 4))%>%
kable_styling(latex_options=c("repeat_header"))%>%
kable_paper(full_width = F) %>%
column_spec(1, bold = T) %>%
column_spec(3, italic = T) %>%
collapse_rows(columns = 1:3, valign = "top")

Mapview highlight SpatialLines upon hover

I want to highlight all lines going to a node/marker on a map in mapview. In the example code here, the nodes represent capital cities. Upon hovering on one of the cities, I would like all 4 lines going to/from that city to become highlighted. The hover option inside mapview had no effect, when I tried it. Thanks.
library(dplyr)
library(readr)
library(janitor)
library(sp)
library(purrr)
cc = read_csv("http://techslides.com/demos/country-capitals.csv")
nodes =
cc %>%
clean_names() %>%
mutate(capital_latitude = as.numeric(capital_latitude)) %>%
select(capital_name, capital_longitude, capital_latitude) %>%
filter(capital_name %in% c("Warsaw", "El-AaiĂșn", "Jamestown", "Antananarivo", "Manama"))
edges =
full_join(
nodes %>% rename(from = capital_name, from_lon = capital_longitude, from_lat = capital_latitude) %>% mutate(index = 1),
nodes %>% rename(to = capital_name, to_lon = capital_longitude, to_lat = capital_latitude) %>% mutate(index = 1),
by = "index") %>%
mutate(from_to = paste(from, "_", to)) %>%
filter(from != to) %>%
select(-index) %>%
rowwise() %>%
mutate(capital_lines = pmap(list(from_lon = from_lon, from_lat = from_lat, to_lon = to_lon, to_lat = to_lat, from_to = from_to),
function(from_lon, from_lat, to_lon, to_lat, from_to) {
Line(cbind(c(from_lon, to_lon),
c(from_lat, to_lat))) %>%
Lines(., ID = from_to)}
)) %>%
mutate(capital_lines = list(SpatialLines(list(capital_lines))))
mapview(nodes, xcol = "capital_longitude", ycol = "capital_latitude") +
mapview(do.call(rbind, edges$capital_lines))
library(mapview)
mapviewOptions(fgb = FALSE)
mapview(shp, highlight = leaflet::highlightOptions(color = "red", weight = 2, sendToBack = TRUE))
This works for me.
See details in https://github.com/r-spatial/mapview/issues/392.

how to make customised pretty flexable function

I am loving flextable however, incorporating it within my workflow is causing issues in that I am not able to write general purpose functions.
I want a function that would automatically highlight the header and the last row of the table. I am able to do this but I have to specify the name of the first column name. This is simply too much work, is there a work around?
library(tidyverse)
require(flextable)
require(rlang)
# Function that works
my_table <- function(x){
require(flextable)
require(rlang)
x %>%
flextable() %>%
# Header colour and bold
bg(bg = "#e05297", part = "header") %>%
flextable::color(color = "white", part = "header") %>%
# Last row bold and highlight
bold(i = ~rowname == "Total", bold = TRUE) %>%
bg(i = ~rowname == "Total",
bg = "grey",
part = "body")
}
mtcars %>%
rownames_to_column() %>%
adorn_totals("row") %>%
my_table()
# This is a general purpose function which is not working
my_table <- function(x){
require(flextable)
require(rlang)
first_col_name <- colnames(x) %>% .[1]
x %>%
flextable() %>%
# Header colour and bold
bg(bg = "#e05297", part = "header") %>%
flextable::color(color = "white", part = "header") #%>%
# Last row bold and highlight
bold(i = ~eval(rlang::sym(first_col_name)) == "Total", bold = TRUE) %>%
bg(i = ~eval(rlang::sym(first_col_name)) == "Total",
bg = "grey",
part = "body")
}
Any ideas how to make the general purpose my_table function to work
i argument also accepts position (row number) of the dataframe to highlight so you may use nrow to get the last row in the dataframe.
library(flextable)
library(janitor)
my_table <- function(x){
x %>%
flextable() %>%
# Header colour and bold
bg(bg = "#e05297", part = "header") %>%
flextable::color(color = "white", part = "header") %>%
bold(i = nrow(x), bold = TRUE) %>%
bg(i = nrow(x),bg = "grey",part = "body")
}
mtcars %>%
rownames_to_column() %>%
adorn_totals("row") %>%
my_table()

r ggvis axis format with percentage and decimal places

I want to ask, if someone knows answer regarding axis format in r package ggvis.
I am using % axis format, but I want to add som decimal places depending on spread of CVaR.
image here
Here is my code :
Datovka %>% ggvis(x = ~yShort,y = ~xShort,strokeWidth:=1, stroke:="blue", strokeWidth.hover := 3) %>% layer_paths() %>% layer_paths(x = ~yNoShort, y = ~xNoShort, data = DatovkaNoShort , stroke :="green",strokeWidth:=1, strokeWidth.hover := 3) %>%
layer_points(~vol,~r,data = vynosyR, fill := ~isinP, key := ~id , size := 60, size.hover :=120 ) %>%
# hide_legend("fill") %>%
add_tooltip(all_values, "hover") %>%
add_axis("x", title = "CVaR",format = "%" , title_offset = 50) %>%
add_axis("y", title = "Expected return", format = "%" , title_offset = 50) %>%
scale_numeric("y", nice = FALSE, clamp = TRUE) %>%
scale_numeric("x", nice = FALSE, clamp = TRUE) }
Thank you
You can read about formating here: https://github.com/d3/d3-3.x-api-reference/blob/master/Formatting.md
df <- data.frame(x = seq(0.1, 1, 0.1) / 100, y = runif(10))
df %>% ggvis(~x, ~y) %>% layer_paths() %>% add_axis("x", format = '.2%')

Creating negative stacking bar chart with Highcharter(Likert chart)

The dream is that Jkunst responds, but I am trying to create a negative stacking bar chart with highcharter like the highcharts demo here -http://jsfiddle.net/KV5KV/
I've tried swapping all of the options and what not, but I can't seem to get it to show more than one series at a time, if anybody can help it would be wonderful. Here is what I am trying to run in R.
highchart() %>%
hc_chart(type = "bar") %>%
hc_title(text = "stuff") %>%
hc_yAxis(title = list(text = ""),
labels = list(format = "{value}"), min=0) %>%
hc_plotOptions(column = list(
series=list(stacking='normal'),
dataLabels = list(enabled = FALSE),
enableMouseTracking = TRUE)) %>%
hc_legend(enabled = FALSE) %>%
hc_xAxis(reversed=FALSE,opposite=TRUE,reversed=FALSE, linkedTo=0) %>%
hc_series(list(name="Value",color=c("#766A62"),data=list(-10, -5, -6))) %>%
hc_add_series(list(name="Value",color=c("#766A62"),data=list(-2, -5, -3))) %>%
hc_add_series(list(name="neutral",id='neutral',color=c("#766A62"),data=list(-2, -5, -3))) %>%
hc_add_series(list(name="Value",color=c("#766A62"),data=list(5, 1,6))) %>%
hc_add_series(list(name="Value",color=c("#766A62"),data=list(2, 5, 3))) %>%
hc_add_series(list(linkedTo='neutral',name="neutral",color=c("#766A62"),data=list(6, 8, 2)))
I HAVE DONE IT.
highchart() %>%
hc_chart(type = "bar") %>%
hc_title(text = "Experimental Survey Questions breakdown") %>%
hc_yAxis(title = list(text = ""),
labels = list(format = "{value}")) %>%
hc_plotOptions(series=list(stacking='normal'),column = list(
dataLabels = list(enabled = FALSE),
enableMouseTracking = TRUE)) %>%
hc_legend(enabled = FALSE) %>%
hc_xAxis(reversed=FALSE,opposite=TRUE,reversed=FALSE) %>%
hc_add_series(name="Value",color=c("rgb(205,35,35)"),data=list(-10, -5, -6)) %>%
hc_add_series(name="Values",color=c("rgb(165,85,85)"),data=list(-2, -5, -3)) %>%
hc_add_series(name="neutral",id='neutral',color=c("#766A62"),data=list(-2, -5, -3)) %>%
hc_add_series(name="Valuess",color=c("rgb(35,35,205)"),data=list(5, 1,6)) %>%
hc_add_series(name="Valuesss",color=c("rgb(85,85,165)"),data=list(2, 5, 3)) %>%
hc_add_series(name="Neutral",linkedTo='neutral',color=c("#766A62"),data=list(6, 8, 2))
^Cut and dry for those looking for answer, Special thanks to good ol jbkunst, he made this amazing library and has completely changed the way I present my research( you dah best they evah wuz ;)
I still have the question of how to get the actual values to link together(neutral has a line through it, which is lame.)
I'm still fiddling with it but this seems to be closer. I got rid of the colors for now. I was hoping to get the mirrored x-axis but I'll edit that in if I can figure it out. It does show all 3 series though
highchart() %>%
hc_chart(type = "bar") %>%
hc_title(text = "stuff") %>%
hc_yAxis(title = list(text = ""),
labels = list(format = "{value}"), min=0) %>%
hc_plotOptions(
series=list(stacking='normal'),
dataLabels = list(enabled = TRUE),
enableMouseTracking = TRUE) %>%
hc_legend(enabled = TRUE) %>%
hc_xAxis(reversed=FALSE,opposite=TRUE,reversed=FALSE, linkedTo=0) %>%
hc_add_series(name="Value", data=list(-10, -5, -6)) %>%
hc_add_series(name="foo",data=list(-2, -15, -3)) %>%
hc_add_series(name="neutral",id='neutral',data=list(-2, -5, -4)) %>%
hc_add_series(name="Value",data=list(5, 1, 7)) %>%
hc_add_series(name="Value", linkedTo = "neutral", data=list(2, 5, 3))

Resources