Classifying the words as per emotions & counts in the song lyrics

Classifying the words as per emotions & counts in the song lyrics - r

library(rvest)
library(dplyr)
library(tidyr)
library(spotifyr)
library(tidytext)
library(textdata)
Using the above libraries I scraped artist data from Spotify using the API token.
I've got the data of words with sentiments (i.e. anger let's say) and the details about the songs.
I now want to run it in loop over multiple such word category (eg. anger) and see which words are most used in that particular emotion, and in general too I want to plot a histogram for the words used in the songs.
So I use the following functions:
data %>%
unnest() %>%
unnest_tokens(word, lyric) %>%
anti_join(stop_words, by = "word") %>%
left_join(angry_words, by = "word") %>%
group_by(track_name, energy, album_name, duration_ms, valence) %>%
summarize(angry_words = sum(anger, na.rm = TRUE)) %>%
ungroup() %>%
select(track_name, album_name, angry_words) %>%
arrange(desc(angry_words))
Every-time I run the code I get the following error:
Error in `fn()`:
! In row 64, can't recycle input of size 3 to size 2.
Run `rlang::last_error()` to see where the error occurred.
Warning message:
`cols` is now required when using unnest().
Please use `cols = c(album_images, artists, available_markets)`
All suggestions will be helpful.
Here the data and angry_words data frames are:
data <- structure(list(artist_name = c("María José Llergo", "María José Llergo"
), artist_id = c("70GBRlKEGjfueop2lfdQ4Q", "70GBRlKEGjfueop2lfdQ4Q"
), album_id = c("6BMyWViSAgXtUVlPfXiGES", "6BMyWViSAgXtUVlPfXiGES"
), album_type = c("album", "album"), album_images = list(structure(list(
height = c(640L, 300L, 64L), url = c("https://i.scdn.co/image/ab67616d0000b2735f3d845e18e06df1bbe95178",
"https://i.scdn.co/image/ab67616d00001e025f3d845e18e06df1bbe95178",
"https://i.scdn.co/image/ab67616d000048515f3d845e18e06df1bbe95178"
), width = c(640L, 300L, 64L)), class = "data.frame", row.names = c(NA,
3L)), structure(list(height = c(640L, 300L, 64L), url = c("https://i.scdn.co/image/ab67616d0000b2735f3d845e18e06df1bbe95178",
"https://i.scdn.co/image/ab67616d00001e025f3d845e18e06df1bbe95178",
"https://i.scdn.co/image/ab67616d000048515f3d845e18e06df1bbe95178"
), width = c(640L, 300L, 64L)), class = "data.frame", row.names = c(NA,
3L))), album_release_date = c("2020-01-31", "2020-01-31"), album_release_year = c(2020,
2020), album_release_date_precision = c("day", "day"), danceability = c(0.612,
0.5), energy = c(0.342, 0.267), key = c(4L, 7L), loudness = c(-9.193,
-11.736), mode = 0:1, speechiness = c(0.0419, 0.0448), acousticness = c(0.358,
0.815), instrumentalness = c(0.000502, 2.66e-06), liveness = c(0.257,
0.0981), valence = c(0.122, 0.264), tempo = c(99.993, 114.192
), track_id = c("7pB0e4E78UfAmKBPzQPo8a", "1sgH6adzL1BBaIXRC7NOYI"
), analysis_url = c("https://api.spotify.com/v1/audio-analysis/7pB0e4E78UfAmKBPzQPo8a",
"https://api.spotify.com/v1/audio-analysis/1sgH6adzL1BBaIXRC7NOYI"
), time_signature = 3:4, artists = list(structure(list(href = "https://api.spotify.com/v1/artists/70GBRlKEGjfueop2lfdQ4Q",
id = "70GBRlKEGjfueop2lfdQ4Q", name = "María José Llergo",
type = "artist", uri = "spotify:artist:70GBRlKEGjfueop2lfdQ4Q",
external_urls.spotify = "https://open.spotify.com/artist/70GBRlKEGjfueop2lfdQ4Q"), class = "data.frame", row.names = 1L),
structure(list(href = "https://api.spotify.com/v1/artists/70GBRlKEGjfueop2lfdQ4Q",
id = "70GBRlKEGjfueop2lfdQ4Q", name = "María José Llergo",
type = "artist", uri = "spotify:artist:70GBRlKEGjfueop2lfdQ4Q",
external_urls.spotify = "https://open.spotify.com/artist/70GBRlKEGjfueop2lfdQ4Q"), class = "data.frame", row.names = 1L)),
available_markets = list(c("AD", "AE", "AG", "AL", "AM",
"AO", "AR", "AT", "AU", "AZ", "BA", "BB", "BD", "BE", "BF",
"BG", "BH", "BI", "BJ", "BN", "BO", "BR", "BS", "BT", "BW",
"BY", "BZ", "CA", "CD", "CG", "CH", "CI", "CL", "CM", "CO",
"CR", "CV", "CW", "CY", "CZ", "DE", "DJ", "DK", "DM", "DO",
"DZ", "EC", "EE", "EG", "ES", "FI", "FJ", "FM", "FR", "GA",
"GB", "GD", "GE", "GH", "GM", "GN", "GQ", "GR", "GT", "GW",
"GY", "HK", "HN", "HR", "HT", "HU", "ID", "IE", "IL", "IN",
"IQ", "IS", "IT", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
"KM", "KN", "KR", "KW", "KZ", "LA", "LB", "LC", "LI", "LK",
"LR", "LS", "LT", "LU", "LV", "LY", "MA", "MC", "MD", "ME",
"MG", "MH", "MK", "ML", "MN", "MO", "MR", "MT", "MU", "MV",
"MW", "MX", "MY", "MZ", "NA", "NE", "NG", "NI", "NL", "NO",
"NP", "NR", "NZ", "OM", "PA", "PE", "PG", "PH", "PK", "PL",
"PS", "PT", "PW", "PY", "QA", "RO", "RS", "RW", "SA", "SB",
"SC", "SE", "SG", "SI", "SK", "SL", "SM", "SN", "SR", "ST",
"SV", "SZ", "TD", "TG", "TH", "TJ", "TL", "TN", "TO", "TR",
"TT", "TV", "TW", "TZ", "UA", "UG", "US", "UY", "UZ", "VC",
"VE", "VN", "VU", "WS", "XK", "ZA", "ZM", "ZW"), c("AD",
"AE", "AG", "AL", "AM", "AO", "AR", "AT", "AU", "AZ", "BA",
"BB", "BD", "BE", "BF", "BG", "BH", "BI", "BJ", "BN", "BO",
"BR", "BS", "BT", "BW", "BY", "BZ", "CA", "CD", "CG", "CH",
"CI", "CL", "CM", "CO", "CR", "CV", "CW", "CY", "CZ", "DE",
"DJ", "DK", "DM", "DO", "DZ", "EC", "EE", "EG", "ES", "FI",
"FJ", "FM", "FR", "GA", "GB", "GD", "GE", "GH", "GM", "GN",
"GQ", "GR", "GT", "GW", "GY", "HK", "HN", "HR", "HT", "HU",
"ID", "IE", "IL", "IN", "IQ", "IS", "IT", "JM", "JO", "JP",
"KE", "KG", "KH", "KI", "KM", "KN", "KR", "KW", "KZ", "LA",
"LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", "LV", "LY",
"MA", "MC", "MD", "ME", "MG", "MH", "MK", "ML", "MN", "MO",
"MR", "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", "NE",
"NG", "NI", "NL", "NO", "NP", "NR", "NZ", "OM", "PA", "PE",
"PG", "PH", "PK", "PL", "PS", "PT", "PW", "PY", "QA", "RO",
"RS", "RW", "SA", "SB", "SC", "SE", "SG", "SI", "SK", "SL",
"SM", "SN", "SR", "ST", "SV", "SZ", "TD", "TG", "TH", "TJ",
"TL", "TN", "TO", "TR", "TT", "TV", "TW", "TZ", "UA", "UG",
"US", "UY", "UZ", "VC", "VE", "VN", "VU", "WS", "XK", "ZA",
"ZM", "ZW")), disc_number = c(1L, 1L), duration_ms = c(197316L,
313028L), explicit = c(FALSE, FALSE), track_href = c("https://api.spotify.com/v1/tracks/7pB0e4E78UfAmKBPzQPo8a",
"https://api.spotify.com/v1/tracks/1sgH6adzL1BBaIXRC7NOYI"
), is_local = c(FALSE, FALSE), track_name = c("¿De Qué Me Sirve Llorar?",
"Niña De Las Dunas"), track_preview_url = c("https://p.scdn.co/mp3-preview/1ed3fba536f1813af99c88f69893dfe6272df847?cid=cf686ca455c74783b8f27d0c35dfc5b0",
"https://p.scdn.co/mp3-preview/e4f9386ef79ff5027800aa9ccd8560a622df28d0?cid=cf686ca455c74783b8f27d0c35dfc5b0"
), track_number = 1:2, type = c("track", "track"), track_uri = c("spotify:track:7pB0e4E78UfAmKBPzQPo8a",
"spotify:track:1sgH6adzL1BBaIXRC7NOYI"), external_urls.spotify = c("https://open.spotify.com/track/7pB0e4E78UfAmKBPzQPo8a",
"https://open.spotify.com/track/1sgH6adzL1BBaIXRC7NOYI"),
album_name = c("Sanación", "Sanación"), key_name = c("E",
"G"), mode_name = c("minor", "major"), key_mode = c("E minor",
"G major")), row.names = 1:2, class = "data.frame")
angry_words <- structure(list(word = c("abandoned", "abandonment", "abhor",
"abhorrent", "abolish", "abomination", "abuse", "accursed", "accusation",
"accused", "accuser", "accusing", "actionable", "adder", "adversary",
"adverse", "adversity", "advocacy", "affront", "aftermath", "aggravated",
"aggravating", "aggravation", "aggression", "aggressive", "aggressor",
"agitated", "agitation", "agony", "alcoholism", "alienate", "alienation",
"allegation", "altercation", "ambush", "anarchism", "anarchist",
"anarchy", "anathema", "anger", "angry", "anguish", "animosity",
"animus", "annihilate", "annihilated", "annihilation", "annoy",
"annoyance", "annoying", "antagonism", "antagonist", "antagonistic",
"antichrist", "antipathy", "antisocial", "antithesis", "anxiety",
"argue", "argument", "argumentation", "arguments", "armament",
"armed", "arraignment", "arrogant", "arson", "assail", "assailant",
"assassin", "assassinate", "assassination", "assault", "asshole",
"atrocious", "atrocity", "attack", "attacking", "attorney", "avarice"
), anger = c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE)), row.names = c(NA, -80L), class = c("tbl_df",
"tbl", "data.frame"))

Related

Creating a tdm with only one variable

So I need to prepare a term-document-matrix for each of the sets of text I want to subsequently run against each other in a classification procedure (rolling.classify() in Stylo package).
So I created a tdm of the whole text corpus, then want to make two sets only of selected texts, one which should contain one text only. So multiple texts works fine (a), but one texts only does not (b), I cannot do this?
freq.list <- make.frequency.list(words, head = 265) # Creating frequency list using only the frequencies of the
# selected features from word-list (words)
word.frequencies <- make.table.of.frequencies(corpus = x, features = freq.list)
# Document-term matrix of whole corpus and matching frequencies.
# Making two subsets now:
a <- word.frequencies[c(1,2,3,17,19,20,21,22,23), 1:263]
dim(a) # Double-check that it is the right no. of texts
b <- word.frequencies[18,1:263]
dim(b) # Double-check
> dim(a)
[1] 9 263
> dim(b)
NULL
data:
(used dput())
x <- structure(list(middleFr_Calmative_1946 = c("the", "calmative",
"i", "don’t", "know", "when", "i", "died", ".", "it", "always",
"seemed", "to", "me", "i", "died", "old", ",", "about", "ninety",
"years", "old", ",", "and", "what", "years", ",", "and", "that",
"my", "body", "bore", "it", "out", ",", "from", "head", "to",
"foot", ".", "but", "this", "evening", ",", "alone", "in", "my",
"icy", "bed", ",", "i", "have", "the", "feeling", "i’ll", "be",
"older", "than", "the", "day", ",", "the", "night", ",", "when",
"the", "sky", "with", "all", "its", "lights", "fell", "upon",
"me", ",", "the", "same", "i", "had", "so", "often", "gazed",
"resolved", "to", "speak", "to", "him", ".", "so", "i", "marshalled",
"the", "words", "and", "opened", "my", "mouth", ",", "thinking",
"i", "would", "hear", "them", ".", "but", "all", "i", "heard",
"was", "a", "kind", "of", "rattle", ",", "unintelligible", "even",
"have", "a", "penny", "in", "my", "pocket", ",", "nor", "anything",
"resembling", "it", "."), middleFr_End_1946 = c("the", "end",
"they", "clothed", "me", "and", "gave", "me", "money", ".", "i",
"back", "mine", ".", "i", "added", ",", "give", "me", "back",
"my", "greatcoat", ".", "they", "replied", "that", "they", "had",
"burnt", "them", ",", "together", "with", "my", "other", "clothes",
".", "i", "understood", "then", "that", "the", "end", "was",
"near", ",", "at", "least", "fairly", "near", ".", "later", "on",
"i", "tried", "to", "exchange", "this", "hat", "for", "a", "cap",
",", "or", "a", "slouch", "which", "could", "be", "pulled", "down",
"over", "my", "face", ",", "but", "without", "much", "success",
".", "and", "yet", "i", "could", "not", "go", "about", "bare",
"-", "headed", ",", "with", "my", "skull", "in", "the", "state",
"it", "was", ".", "at", "first", "this", "hat", "was", "too",
"small", ",", "then", "it", "got", "used", "to", "me", ".", "they",
"gave", "me", "a", "tie", ",", "after", "long", "discussion",
".", "it", "seemed", "a", "pretty", "tie", "to", "me", ",", "but",
"i", "didn’t", "like", "it", ".", "when", "it", "came", "at",
"last", "i", "was", "too", "tired", "to", "send", "it", "back",
".", "but", "in", "the", "end", "it", "came", "in", "useful",
".", "it", "was", "blue", ",", "with", "kinds", "of", "little",
"stars", ".", "i", "didn’t", "feel", "well", ",", "but", "they",
"told", "me", "i", "was", "well", "enough", "."), middleFr_Expelled_1946 = c("the",
"expelled", "there", "were", "not", "many", "steps", ".", "i",
"had", "counted", "them", "a", "thousand", "times", ",", "both",
"going", "up", "and", "coming", "down", ",", "but", "the", "figure",
"has", "gone", "from", "my", "mind", ".", "i", "have", "never",
"known", "whether", "you", "should", "say", "one", "with", "your",
"every", "day", "several", "times", "a", "day", ",", "until",
"they", "sink", "forever", "in", "the", "mud", ".", "that’s",
"an", "order", ".")), class = "stylo.corpus", call = load.corpus.and.parse(files = "all",
corpus.dir = "x", markup.type = "plain", corpus.lang = "English.all",
splitting.rule = ("[ \t\n]+"), sampling = "no.sampling",
features = "w", ngram.size = 1, preserve.case = FALSE, encoding = "UTF-8"))
freq.list <- c("", "-", "—", ",", ";", ":", "!", "?", ".", "’", "\"",
"(", ")", "a", "about", "above", "across", "after", "again",
"against", "ah", "all", "almost", "along", "Already", "also",
"always", "am", "among", "an", "and", "another", "any", "anything",
"are", "as", "at", "away", "back", "be", "because", "been", "before",
"behind", "being", "best", "better", "between", "beyond", "both",
"but", "by", "came", "can", "can't", "can’t", "cannot", "come",
"comes", "could", "did", "didn’t", "different", "do", "does",
"doing", "don't", "don’t", "done", "down", "each", "either",
"else", "even", "ever", "every", "everything", "except", "far",
"few", "fifteen", "first", "five", "for", "forward", "four",
"from", "get", "go", "goes", "going", "got", "great", "had",
"half", "has", "have", "having", "he", "her", "here", "herself",
"him", "himself", "his", "how", "however", "hundred", "i", "i'll",
"i'm", "i’ll", "if", "in", "indeed", "instead", "into", "is",
"it", "it's", "it’s", "its", "itself", "just", "last", "late",
"least", "left", "less", "let", "like", "little", "long", "made",
"make", "many", "may", "me", "merely", "might", "mine", "more",
"most", "moved", "much", "must", "my", "myself", "near", "neither",
"never", "next", "no", "none", "nor", "not", "nothing", "now",
"of", "off", "often", "oh", "on", "once", "one", "only", "or",
"other", "others", "otherwise", "our", "out", "over", "own",
"perhaps", "place", "quite", "rather", "really", "right", "said",
"same", "say", "second", "shall", "she", "should", "since", "six",
"small", "so", "some", "someone", "something", "sometimes", "somewhere",
"soon", "still", "such", "ten", "than", "that", "that's", "that’s",
"the", "their", "them", "themselves", "then", "there", "therefore",
"these", "they", "thing", "things", "third", "this", "those",
"though", "three", "through", "thus", "till", "time", "times",
"to", "together", "too", "towards", "two", "under", "unless",
"until", "up", "upon", "us", "very", "was", "way", "we", "well",
"went", "were", "what", "whatever", "when", "where", "whether",
"which", "while", "who", "whom", "whose", "why", "will", "with",
"within", "without", "won't", "would", "yes", "yet", "you", "your",
"yourself")

You can do:
b <- word.frequencies[18,1:263, drop = F]
dim(b)
# [1] 1 263

Getting simple table to a word list with Kable

I am trying to get a simple table for a strings (words) list using Kable in a Rstudio markdown. With MSword i get this example:
I am trying to get this with Rstudio/Markdown in differents ways, but only get this:
library(kableExtra)
table= data.frame(words=c("uno", "dos", "tres", "cuatro", "cinco"))
kable(table)
table2= as.vector(table)
kable(table2)
table3=c("uno", "dos", "tres", "cuatro", "cinco")
kable(table3)
EDIT (I add another example) :
#manro reply, work fine but not work for long list of string words.
table_long= data.frame(words=c("uno", "hoy", "a", "al", "desconcierto", "nos", "estan", "por", "era", "otra", "con", "de", "hacia", "_"son", "h"sobre", "hasta", "quienes", "tratar", "nuestro", "y", "porque", "su", "eres" , "otro", "al", "mi", "aos", ".", "vueltas", "", "hacer"utilizando", "haciendo", "estimado", "realizando", "baja", "hago", "existen", "resto", "traves", "todas", "realice", "saber"posteriormente", "presentaron", "mandarlo", "veces", "ya", "posee", "b", "cosas", "hechos", "ud", "durante", "eran", "coste, "de", "o", "que", "todo","te", "ni", "tu", "antes", "pag", "bajo", "asi", "fue","una", "muy", "les", "ha sido", "desde", "puede", "a", rso", "sugiero", "oceanografia", "grabe", "ella", "casi", "fin", "da", "sr", "ahora","sido", "la", "a", "en", "y", "seguir", "nada"pues", "pudo", "profesor", "profesora", "punto", "profe", "temas", "unos", "dia", "gabriele", "a", "al", "de", "vuelven", "hizo", "de", "de", "que", "se", "y", "a", "un", "a", "al ", " al ", "ha", "no", "se", "los", "por", "al", "yo", "enviar", "usted", "cristian", s", "del", "lo", "es", "para", "como", "direccion", "toda", "toco", "ser", "este", "pero", "donde", "el", "entre", "estos", "antes", "s, "ya", "o", "e", "mas", "dos", "han", "dirección", "decir", "le", "ex", "ante", "tras", "http", "noviembre", "borde", "segun", "quien", emas", "sobre", "esta", "dijo", "debe", "tengan", "parte", "tips", "tipo", "habia", "solo", "mil", "mucha", "mucho", "tener", "tiene", "si", "me", "ese", "eso", "hace", "hablaba", "ellos", "esto", "cuando", "esa", "va", "nos", "cada", "ahi", "tus", "uso", "dafne", "tanto", "vez", "año", "años", "tienen", "mayor", "hyperlink", "tenian", "tenia", "tan", "sea", "podria", "aunque", "mismo", "creo", "n", "tres", "ayer", "otros", "tambien", "https", "p", "nop", "dado", "alguna", "algunas", "estaba", "deberia", "cualquier", "hora", "poquito", "parecen", "grande", "hilo", "cerrar", "paso", "algo", "ocasion", "respecto", "sola", "segundo", "llegar", "parecer", "dice", "todos", "luego", "c", "fueron", "mientras", "pag", "alfo", "dentro", "alla", "cual", "caso", "dejado", "haces", "estar", "dan", "vaya", "pocos", "nuestra", "una", "hemos", "estamos", "decia", "tuviera", "aveces", "tenido", "u", "vemos", "he", "aula", "mar", "topado", "sino", "cabo", "ac", "esas", "alfinal", "dante", "dan", "hofer", "juan", "deben", "estas", "ti", "quizas", "valio", "tenga", "siga", "pone", "via", "etc", "èl", "fui", "den", "ah", "ir", "tuve", "claudio", "has", "joaquim"))

You can try this:
```{r, echo = FALSE}
library(kableExtra)
library(data.table)
table3 <- data.frame("uno, dos, tres, cuatro, cinco")
table_t3 <- transpose(table3)
colnames(table_t3) <- NULL
kable(table_t3, format = "latex", align ="|c|", booktabs = T) %>%
kable_styling(latex_options =c("striped", "hold_position"))
```
For the long list:
add the full_width = TRUE to the kable_styling
kable_styling(latex_options =c("striped", "hold_position"), full_width = TRUE)
But borders don't work in this case.

Filling in the values of a column based on matching strings from the column of another dataset [duplicate]

This question already has answers here:
How to join (merge) data frames (inner, outer, left, right)
(13 answers)
Closed 2 years ago.
I am working with the following two datasets :
will_can
structure(list(will_can.REGION = c("AB", "B", "B", "B", "BB",
"BB", "BD", "BH", "BH", "BH", "BR", "BS", "BS", "BT", "BT", "CF",
"CF", "CM", "CO", "CV", "CV", "CV", "CW", "DA", "DA", "DD", "DE",
"DE", "DG", "DG", "DG", "DG", "DL", "DN", "DT", "E", "E", "E",
"EH", "EH", "EH", "EH", "EH", "EH", "EH", "EX", "EX", "EX", "FK",
"FK", "FY", "G", "G", "G", "GL", "GL", "HA", "HD", "HD", "IV",
"KA", "KA", "KA", "KA", "KA", "KA", "KA", "KA", "KA", "KA", "KA",
"KA", "KA", "KA", "KA", "KA", "KA", "KT", "KY", "KY", "KY", "L",
"L", "L", "LA", "LA", "LE", "LE", "M", "M", "ME", "ME", "MK",
"ML", "N", "N", "N", "NE", "NG", "NN", "NN", "NR", "NW", "OL",
"OX", "OX", "PH", "PO", "PR", "RG", "RH", "RM", "RM", "S", "S",
"S", "S", "SA", "SE", "SE", "SE", "SE", "SE", "SG", "SL", "SN",
"SN", "SO", "SO", "SO", "SS", "ST", "ST", "ST", "ST", "SW", "SW",
"SW", "SW", "SY", "SY", "SY", "TA", "TD", "TD", "TN", "TW", "UB",
"UB", "W", "W", "W", "W", "WA", "WC", "WD")), class = "data.frame", row.names = c(NA,
-156L))
will_can_region_norm
structure(list(norm = c(67.3112073766083, 0, 62.9924341677094,
0, 134.940019161483, 86.0271073135687, 233.710968710152, 0, 0,
136.210220315945, 72.0106074505199, 54.9624828839958, 0, 0, 46.5061888459603,
0, 51.9149234846709, 85.3970454501009, 0, 0, 141.438961332615,
122.50716299382, 197.887432921107, 96.646567080111, 108.996678489718,
873.779493880704, 0, 109.106806944561, 56.7421763178016, 249.99781251914,
0, 106.993398828272, 0, 182.997053590583, 0, 225.716259764203,
217.655353412983, 98.8344746903195, 70.3435951664196, 106.870878390986,
0, 0, 113.255439262354, 226.344150395729, 0, 0, 0, 0, 0, 0, 0,
0, 0, 92.5698187029358, 0, 1159.88543061088, 59.5746039659052,
0, 217.977759293264, 88.627745595238, 155.299651064979, 0, 70.3301130229532,
0, 0, 0, 0, 36.166169734453, 162.12380892704, 74.7710230881704,
112.29824076945, 120.249189991435, 25.6209421071498, 36.7120335621411,
115.238964414265, 0, 50.4621322067494, 59.9490876378327, 82.9160720202368,
132.342362545417, 0, 0, 209.987774511768, 0, 45.0104437732687,
59.5244437425851, 54.7420581590574, 77.921490980977, 132.545922191567,
100.083647410414, 51.5757713324224, 102.602449571922, 98.8984492920948,
0, 129.885834248271, 0, 189.332549749021, 149.846130500895, 0,
0, 73.4653456617979, 220.103517986062, 111.317004279081, 375.711503660056,
156.229153172374, 760.35739839154, 0, 83.1515916711375, 0, 0,
0, 73.5483180088058, 269.518568414391, 102.141462145838, 55.2886923953334,
151.949727736478, 148.297412239816, 0, 0, 0, 0, 0, 0, 0), REGION = c("AB",
"AL", "B", "BA", "BB", "BD", "BH", "BL", "BN", "BR", "BS", "BT",
"CA", "CB", "CF", "CH", "CM", "CO", "CR", "CT", "CV", "CW", "DA",
"DD", "DE", "DG", "DH", "DL", "DN", "DT", "DY", "E", "EC", "EH",
"EN", "EX", "FK", "FY", "G", "GL", "GU", "GY", "HA", "HD", "HG",
"HP", "HR", "HS", "HU", "HX", "IG", "IM", "IP", "IV", "JE", "KA",
"KT", "KW", "KY", "L", "LA", "LD", "LE", "LL", "LN", "LS", "LU",
"M", "ME", "MK", "ML", "N", "NE", "NG", "NN", "NP", "NR", "NW",
"OL", "OX", "PA", "PE", "PH", "PL", "PO", "PR", "RG", "RH", "RM",
"S", "SA", "SE", "SG", "SK", "SL", "SM", "SN", "SO", "SP", "SR",
"SS", "ST", "SW", "SY", "TA", "TD", "TF", "TN", "TQ", "TR", "TS",
"TW", "UB", "W", "WA", "WC", "WD", "WF", "WN", "WR", "WS", "WV",
"YO", "ZE")), row.names = c(NA, -124L), class = "data.frame")
I am trying to add a new column, will_can$norm, which would contain values of will_can_region_norm$norm based on matching values of the variable "REGION" which is the same in both datasets. So gaps from the second dataset to the first would be automatically filled based on matching strings of REGION
Based on another question in the forum I tried the following function:
will_can2 <- merge(will_can, will_can_region_norm[,"norm"], by = "REGION", all=TRUE)
But I get the following error:
Error in fix.by(by.y, y) :
'by' must specify a unique correct column [translated from French]
Is there something I'm missing here? Would be grateful for some help !
Cameron

For your merge(will_can, will_can_region_norm[,"norm"], by = "REGION", all=TRUE) command to work, both data.frames would need a column called REGION. In your example:
will_can doesn't have this column, but it does have one called will_can.REGION.
You've extracted a single column from will_can_norm called norm, and tried to merge based on that single column. Unfortunately, the merge() command never sees the REGION column of will_can_norm.
In your case, try something like
merge(will_can, will_can_region_norm, by.x = "will_can.REGION", by.y="REGION", all=TRUE)

How do I build a UK postcode area map in R?

Input
I have count data by first 2 letter UK postcode in this form:
Postcode Count
BD 45
DE 123
L8 90
Desired output
I would like to create a postcode map using a shapefile based on the 2 letter postcode, and colour the map based on count, similar to this:
My question is, how would I be able to produce a map like this in R?

Similar result using the same Shapefile but plotting with ggplot:
library(ggplot2)
library(rgdal)
library(maptools)
if (!require(gpclib)) install.packages("gpclib", type="source");library(gpclib)
gpclibPermit() # Gives maptool permisssion to use gpclib
# Download UK postcode polygon Shapefile
download.file(
"http://www.opendoorlogistics.com/wp-content/uploads/Data/UK-postcode-boundaries-Jan-2015.zip",
"postal_shapefile"
)
unzip("postal_shapefile")
# Read the downloaded Shapefile from disk
postal <- maptools::readShapeSpatial("./Distribution/Areas")
# Assign each "region" an unique id
postal.count <- nrow(postal#data)
postal#data$id <- 1:postal.count
# Transform SpatialPolygonsDataFrame to regular data.frame in ggplot format
postal.fort <- ggplot2::fortify(postal, region='id')
# Generate random data for each postal area
some_area_codes <- c("AB","AL","B","BA","BB","BD","BH","BL","BN","BR","BS","CA","CB","CF","CH","CM","CO","CR","CT","CV","CW","DA","DD","DE","DG","DH","DL","DN","DT","DY","E","EC","EH","EN","EX","FK","FY","G","GL","GU","HA","HD","HG","HP","HR","HS","HU","HX","IG","IP","IV","KA","KT","KW","KY","L","LA","LD","LE","LL","LN","LS","LU","M","ME","MK","ML","N","NE","NG","NN","NP","NR","NW","OL","OX","PA","PE","PH","PL","PO","PR","RG","RH","RM","S","SA","SE","SG","SK","SL","SM","SN","SO","SP","SR","SS","ST","SW","SY","TA","TD","TF","TN","TQ","TR","TS","TW","UB","W","WA","WC","WD","WF","WN","WR","WS","WV","YO","ZE","BT","GY","IM","JE")
df <- data.frame(postal_area_code=some_area_codes, freq=sample.int(100, length(some_area_codes), replace=TRUE))
# Add "region" id to frequency data
df <- merge(df, postal#data, by.x="postal_area_code", by.y="name")
# Merge frequency data onto geogrphical postal polygons
postal.fort <- merge(postal.fort, df, by="id", all.x=T, all.y=F)
postal.fort <- postal.fort[order(postal.fort$order),] # Reordering since ggplot expect data.fram in same order as "order" column
ggplot(postal.fort) +
geom_polygon(aes(x = long, y = lat, group = group, fill=freq), colour='white') +
coord_fixed()

Is this what you had in mind? Make sure that you have a value for every postal code and that the column containing the name of each postal code is called name.
library(tidyverse)
library(maptools)
library(raster)
library(plotrix)
# Generate dummy data
dta <-
tibble(
name = c(
"AB",
"AL",
"B",
"BA",
"BB",
"BD",
"BH",
"BL",
"BN",
"BR",
"BS",
"CA",
"CB",
"CF",
"CH",
"CM",
"CO",
"CR",
"CT",
"CV",
"CW",
"DA",
"DD",
"DE",
"DG",
"DH",
"DL",
"DN",
"DT",
"DY",
"E",
"EC",
"EH",
"EN",
"EX",
"FK",
"FY",
"G",
"GL",
"GU",
"HA",
"HD",
"HG",
"HP",
"HR",
"HS",
"HU",
"HX",
"IG",
"IP",
"IV",
"KA",
"KT",
"KW",
"KY",
"L",
"LA",
"LD",
"LE",
"LL",
"LN",
"LS",
"LU",
"M",
"ME",
"MK",
"ML",
"N",
"NE",
"NG",
"NN",
"NP",
"NR",
"NW",
"OL",
"OX",
"PA",
"PE",
"PH",
"PL",
"PO",
"PR",
"RG",
"RH",
"RM",
"S",
"SA",
"SE",
"SG",
"SK",
"SL",
"SM",
"SN",
"SO",
"SP",
"SR",
"SS",
"ST",
"SW",
"SY",
"TA",
"TD",
"TF",
"TN",
"TQ",
"TR",
"TS",
"TW",
"UB",
"W",
"WA",
"WC",
"WD",
"WF",
"WN",
"WR",
"WS",
"WV",
"YO",
"ZE",
"BT",
"GY",
"IM",
"JE"
),
value = rnorm(124)
)
# Make sure your postal codes are stored in a column called name
# Example:
# dta <- rename(dta, name = name)
# OPTIONAL: Depending on your data, you may need to rescale it for the color ramp to work
dta$value <- rescale(dta$value, newrange = c(0, 1))
# Download a shapefile of postal codes into your working directory
download.file(
"http://www.opendoorlogistics.com/wp-content/uploads/Data/UK-postcode-boundaries-Jan-2015.zip",
"postal_shapefile"
)
# Unzip the shapefile
unzip("postal_shapefile")
# Read the shapefile
postal <- readShapeSpatial("./Distribution/Areas")
# Join your data to the shapefile
postal <- raster::merge(postal, dta, by = "name")
# Use the gray function to determine the proper black-and-white color for each postal code
plot(postal, col = gray(postal$value))

Trouble using gcheckboxgroup - use.table =TRUE (gWigets) (R)

I have a long list of elements that I want to display vertically as checkboxes in my GUI.
Here is my code:
library(gWidgets)
library(gWidgetstcltk)
window <- gwindow("ITRAX Data Analysis")
Frame_Elements <- gframe( text="Elements", horizontal=FALSE, container=window)
Group7 <-ggroup(container = Frame_Elements, horizontal = TRUE)
Element_Options <- c( "Ac","Ag","Al", "Ar", "As", "At", "Au", "Ba", "Bi", "Br", "Ca", "Cd", "Ce", "Cl", "Co", "Cr", "Cs", "Cu",
"Dy", "Er", "Eu", "Fe", "Fr", "Ga", "Gd", "Ge", "Hf", "Hg", "Ho", "I", "In", "Ir", "K", "La", "Lu", "Mg",
"Mn", "Mo", "Nb", "Nd", "Ni", "Os", "P", "Pa", "Pb", "Pd", "Pm", "Po", "Pr", "Pt", "Ra", "Rb", "Re", "Rh",
"Ru", "S", "Sb", "Sc", "Si", "Sm", "Sr", "Ta", "Tb", "Tc", "Te", "Th", "Ti", "Tl", "Tm", "U","V", "W", "Y",
"Yb", "Zn", "Zr")
Choose_Elements <- gcheckboxgroup(Element_Options, container = Group7, horizontal = FALSE, checked = FALSE, use.table = TRUE)
My problem is that the scroll table does not always open when I run the code. Sometimes I need to run it multiple times (without actually editing the code in any way) in order for the scroll box to appear. In other words, the window and frame of the GUI will appear, but will be empty.
Also, when the scroll box does open, the scroll bar doesn't actually work.
Any help with resolving this issue would be highly appreciated.

Develop Reference

r css asp.net wordpress firebase qt symfony nginx http apache-flex

Classifying the words as per emotions & counts in the song lyrics - r

Related

Creating a tdm with only one variable

Getting simple table to a word list with Kable

Filling in the values of a column based on matching strings from the column of another dataset [duplicate]

How do I build a UK postcode area map in R?

Trouble using gcheckboxgroup - use.table =TRUE (gWigets) (R)

Categories

Resources