Getting simple table to a word list with Kable - r

I am trying to get a simple table for a strings (words) list using Kable in a Rstudio markdown. With MSword i get this example:
I am trying to get this with Rstudio/Markdown in differents ways, but only get this:
library(kableExtra)
table= data.frame(words=c("uno", "dos", "tres", "cuatro", "cinco"))
kable(table)
table2= as.vector(table)
kable(table2)
table3=c("uno", "dos", "tres", "cuatro", "cinco")
kable(table3)
EDIT (I add another example) :
#manro reply, work fine but not work for long list of string words.
table_long= data.frame(words=c("uno", "hoy", "a", "al", "desconcierto", "nos", "estan", "por", "era", "otra", "con", "de", "hacia", "_"son", "h"sobre", "hasta", "quienes", "tratar", "nuestro", "y", "porque", "su", "eres" , "otro", "al", "mi", "aos", ".", "vueltas", "", "hacer"utilizando", "haciendo", "estimado", "realizando", "baja", "hago", "existen", "resto", "traves", "todas", "realice", "saber"posteriormente", "presentaron", "mandarlo", "veces", "ya", "posee", "b", "cosas", "hechos", "ud", "durante", "eran", "coste, "de", "o", "que", "todo","te", "ni", "tu", "antes", "pag", "bajo", "asi", "fue","una", "muy", "les", "ha sido", "desde", "puede", "a", rso", "sugiero", "oceanografia", "grabe", "ella", "casi", "fin", "da", "sr", "ahora","sido", "la", "a", "en", "y", "seguir", "nada"pues", "pudo", "profesor", "profesora", "punto", "profe", "temas", "unos", "dia", "gabriele", "a", "al", "de", "vuelven", "hizo", "de", "de", "que", "se", "y", "a", "un", "a", "al ", " al ", "ha", "no", "se", "los", "por", "al", "yo", "enviar", "usted", "cristian", s", "del", "lo", "es", "para", "como", "direccion", "toda", "toco", "ser", "este", "pero", "donde", "el", "entre", "estos", "antes", "s, "ya", "o", "e", "mas", "dos", "han", "dirección", "decir", "le", "ex", "ante", "tras", "http", "noviembre", "borde", "segun", "quien", emas", "sobre", "esta", "dijo", "debe", "tengan", "parte", "tips", "tipo", "habia", "solo", "mil", "mucha", "mucho", "tener", "tiene", "si", "me", "ese", "eso", "hace", "hablaba", "ellos", "esto", "cuando", "esa", "va", "nos", "cada", "ahi", "tus", "uso", "dafne", "tanto", "vez", "año", "años", "tienen", "mayor", "hyperlink", "tenian", "tenia", "tan", "sea", "podria", "aunque", "mismo", "creo", "n", "tres", "ayer", "otros", "tambien", "https", "p", "nop", "dado", "alguna", "algunas", "estaba", "deberia", "cualquier", "hora", "poquito", "parecen", "grande", "hilo", "cerrar", "paso", "algo", "ocasion", "respecto", "sola", "segundo", "llegar", "parecer", "dice", "todos", "luego", "c", "fueron", "mientras", "pag", "alfo", "dentro", "alla", "cual", "caso", "dejado", "haces", "estar", "dan", "vaya", "pocos", "nuestra", "una", "hemos", "estamos", "decia", "tuviera", "aveces", "tenido", "u", "vemos", "he", "aula", "mar", "topado", "sino", "cabo", "ac", "esas", "alfinal", "dante", "dan", "hofer", "juan", "deben", "estas", "ti", "quizas", "valio", "tenga", "siga", "pone", "via", "etc", "èl", "fui", "den", "ah", "ir", "tuve", "claudio", "has", "joaquim"))

You can try this:
```{r, echo = FALSE}
library(kableExtra)
library(data.table)
table3 <- data.frame("uno, dos, tres, cuatro, cinco")
table_t3 <- transpose(table3)
colnames(table_t3) <- NULL
kable(table_t3, format = "latex", align ="|c|", booktabs = T) %>%
kable_styling(latex_options =c("striped", "hold_position"))
```
For the long list:
add the full_width = TRUE to the kable_styling
kable_styling(latex_options =c("striped", "hold_position"), full_width = TRUE)
But borders don't work in this case.

Related

Classifying the words as per emotions & counts in the song lyrics

library(rvest)
library(dplyr)
library(tidyr)
library(spotifyr)
library(tidytext)
library(textdata)
Using the above libraries I scraped artist data from Spotify using the API token.
I've got the data of words with sentiments (i.e. anger let's say) and the details about the songs.
I now want to run it in loop over multiple such word category (eg. anger) and see which words are most used in that particular emotion, and in general too I want to plot a histogram for the words used in the songs.
So I use the following functions:
data %>%
unnest() %>%
unnest_tokens(word, lyric) %>%
anti_join(stop_words, by = "word") %>%
left_join(angry_words, by = "word") %>%
group_by(track_name, energy, album_name, duration_ms, valence) %>%
summarize(angry_words = sum(anger, na.rm = TRUE)) %>%
ungroup() %>%
select(track_name, album_name, angry_words) %>%
arrange(desc(angry_words))
Every-time I run the code I get the following error:
Error in `fn()`:
! In row 64, can't recycle input of size 3 to size 2.
Run `rlang::last_error()` to see where the error occurred.
Warning message:
`cols` is now required when using unnest().
Please use `cols = c(album_images, artists, available_markets)`
All suggestions will be helpful.
Here the data and angry_words data frames are:
data <- structure(list(artist_name = c("María José Llergo", "María José Llergo"
), artist_id = c("70GBRlKEGjfueop2lfdQ4Q", "70GBRlKEGjfueop2lfdQ4Q"
), album_id = c("6BMyWViSAgXtUVlPfXiGES", "6BMyWViSAgXtUVlPfXiGES"
), album_type = c("album", "album"), album_images = list(structure(list(
height = c(640L, 300L, 64L), url = c("https://i.scdn.co/image/ab67616d0000b2735f3d845e18e06df1bbe95178",
"https://i.scdn.co/image/ab67616d00001e025f3d845e18e06df1bbe95178",
"https://i.scdn.co/image/ab67616d000048515f3d845e18e06df1bbe95178"
), width = c(640L, 300L, 64L)), class = "data.frame", row.names = c(NA,
3L)), structure(list(height = c(640L, 300L, 64L), url = c("https://i.scdn.co/image/ab67616d0000b2735f3d845e18e06df1bbe95178",
"https://i.scdn.co/image/ab67616d00001e025f3d845e18e06df1bbe95178",
"https://i.scdn.co/image/ab67616d000048515f3d845e18e06df1bbe95178"
), width = c(640L, 300L, 64L)), class = "data.frame", row.names = c(NA,
3L))), album_release_date = c("2020-01-31", "2020-01-31"), album_release_year = c(2020,
2020), album_release_date_precision = c("day", "day"), danceability = c(0.612,
0.5), energy = c(0.342, 0.267), key = c(4L, 7L), loudness = c(-9.193,
-11.736), mode = 0:1, speechiness = c(0.0419, 0.0448), acousticness = c(0.358,
0.815), instrumentalness = c(0.000502, 2.66e-06), liveness = c(0.257,
0.0981), valence = c(0.122, 0.264), tempo = c(99.993, 114.192
), track_id = c("7pB0e4E78UfAmKBPzQPo8a", "1sgH6adzL1BBaIXRC7NOYI"
), analysis_url = c("https://api.spotify.com/v1/audio-analysis/7pB0e4E78UfAmKBPzQPo8a",
"https://api.spotify.com/v1/audio-analysis/1sgH6adzL1BBaIXRC7NOYI"
), time_signature = 3:4, artists = list(structure(list(href = "https://api.spotify.com/v1/artists/70GBRlKEGjfueop2lfdQ4Q",
id = "70GBRlKEGjfueop2lfdQ4Q", name = "María José Llergo",
type = "artist", uri = "spotify:artist:70GBRlKEGjfueop2lfdQ4Q",
external_urls.spotify = "https://open.spotify.com/artist/70GBRlKEGjfueop2lfdQ4Q"), class = "data.frame", row.names = 1L),
structure(list(href = "https://api.spotify.com/v1/artists/70GBRlKEGjfueop2lfdQ4Q",
id = "70GBRlKEGjfueop2lfdQ4Q", name = "María José Llergo",
type = "artist", uri = "spotify:artist:70GBRlKEGjfueop2lfdQ4Q",
external_urls.spotify = "https://open.spotify.com/artist/70GBRlKEGjfueop2lfdQ4Q"), class = "data.frame", row.names = 1L)),
available_markets = list(c("AD", "AE", "AG", "AL", "AM",
"AO", "AR", "AT", "AU", "AZ", "BA", "BB", "BD", "BE", "BF",
"BG", "BH", "BI", "BJ", "BN", "BO", "BR", "BS", "BT", "BW",
"BY", "BZ", "CA", "CD", "CG", "CH", "CI", "CL", "CM", "CO",
"CR", "CV", "CW", "CY", "CZ", "DE", "DJ", "DK", "DM", "DO",
"DZ", "EC", "EE", "EG", "ES", "FI", "FJ", "FM", "FR", "GA",
"GB", "GD", "GE", "GH", "GM", "GN", "GQ", "GR", "GT", "GW",
"GY", "HK", "HN", "HR", "HT", "HU", "ID", "IE", "IL", "IN",
"IQ", "IS", "IT", "JM", "JO", "JP", "KE", "KG", "KH", "KI",
"KM", "KN", "KR", "KW", "KZ", "LA", "LB", "LC", "LI", "LK",
"LR", "LS", "LT", "LU", "LV", "LY", "MA", "MC", "MD", "ME",
"MG", "MH", "MK", "ML", "MN", "MO", "MR", "MT", "MU", "MV",
"MW", "MX", "MY", "MZ", "NA", "NE", "NG", "NI", "NL", "NO",
"NP", "NR", "NZ", "OM", "PA", "PE", "PG", "PH", "PK", "PL",
"PS", "PT", "PW", "PY", "QA", "RO", "RS", "RW", "SA", "SB",
"SC", "SE", "SG", "SI", "SK", "SL", "SM", "SN", "SR", "ST",
"SV", "SZ", "TD", "TG", "TH", "TJ", "TL", "TN", "TO", "TR",
"TT", "TV", "TW", "TZ", "UA", "UG", "US", "UY", "UZ", "VC",
"VE", "VN", "VU", "WS", "XK", "ZA", "ZM", "ZW"), c("AD",
"AE", "AG", "AL", "AM", "AO", "AR", "AT", "AU", "AZ", "BA",
"BB", "BD", "BE", "BF", "BG", "BH", "BI", "BJ", "BN", "BO",
"BR", "BS", "BT", "BW", "BY", "BZ", "CA", "CD", "CG", "CH",
"CI", "CL", "CM", "CO", "CR", "CV", "CW", "CY", "CZ", "DE",
"DJ", "DK", "DM", "DO", "DZ", "EC", "EE", "EG", "ES", "FI",
"FJ", "FM", "FR", "GA", "GB", "GD", "GE", "GH", "GM", "GN",
"GQ", "GR", "GT", "GW", "GY", "HK", "HN", "HR", "HT", "HU",
"ID", "IE", "IL", "IN", "IQ", "IS", "IT", "JM", "JO", "JP",
"KE", "KG", "KH", "KI", "KM", "KN", "KR", "KW", "KZ", "LA",
"LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", "LV", "LY",
"MA", "MC", "MD", "ME", "MG", "MH", "MK", "ML", "MN", "MO",
"MR", "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", "NE",
"NG", "NI", "NL", "NO", "NP", "NR", "NZ", "OM", "PA", "PE",
"PG", "PH", "PK", "PL", "PS", "PT", "PW", "PY", "QA", "RO",
"RS", "RW", "SA", "SB", "SC", "SE", "SG", "SI", "SK", "SL",
"SM", "SN", "SR", "ST", "SV", "SZ", "TD", "TG", "TH", "TJ",
"TL", "TN", "TO", "TR", "TT", "TV", "TW", "TZ", "UA", "UG",
"US", "UY", "UZ", "VC", "VE", "VN", "VU", "WS", "XK", "ZA",
"ZM", "ZW")), disc_number = c(1L, 1L), duration_ms = c(197316L,
313028L), explicit = c(FALSE, FALSE), track_href = c("https://api.spotify.com/v1/tracks/7pB0e4E78UfAmKBPzQPo8a",
"https://api.spotify.com/v1/tracks/1sgH6adzL1BBaIXRC7NOYI"
), is_local = c(FALSE, FALSE), track_name = c("¿De Qué Me Sirve Llorar?",
"Niña De Las Dunas"), track_preview_url = c("https://p.scdn.co/mp3-preview/1ed3fba536f1813af99c88f69893dfe6272df847?cid=cf686ca455c74783b8f27d0c35dfc5b0",
"https://p.scdn.co/mp3-preview/e4f9386ef79ff5027800aa9ccd8560a622df28d0?cid=cf686ca455c74783b8f27d0c35dfc5b0"
), track_number = 1:2, type = c("track", "track"), track_uri = c("spotify:track:7pB0e4E78UfAmKBPzQPo8a",
"spotify:track:1sgH6adzL1BBaIXRC7NOYI"), external_urls.spotify = c("https://open.spotify.com/track/7pB0e4E78UfAmKBPzQPo8a",
"https://open.spotify.com/track/1sgH6adzL1BBaIXRC7NOYI"),
album_name = c("Sanación", "Sanación"), key_name = c("E",
"G"), mode_name = c("minor", "major"), key_mode = c("E minor",
"G major")), row.names = 1:2, class = "data.frame")
angry_words <- structure(list(word = c("abandoned", "abandonment", "abhor",
"abhorrent", "abolish", "abomination", "abuse", "accursed", "accusation",
"accused", "accuser", "accusing", "actionable", "adder", "adversary",
"adverse", "adversity", "advocacy", "affront", "aftermath", "aggravated",
"aggravating", "aggravation", "aggression", "aggressive", "aggressor",
"agitated", "agitation", "agony", "alcoholism", "alienate", "alienation",
"allegation", "altercation", "ambush", "anarchism", "anarchist",
"anarchy", "anathema", "anger", "angry", "anguish", "animosity",
"animus", "annihilate", "annihilated", "annihilation", "annoy",
"annoyance", "annoying", "antagonism", "antagonist", "antagonistic",
"antichrist", "antipathy", "antisocial", "antithesis", "anxiety",
"argue", "argument", "argumentation", "arguments", "armament",
"armed", "arraignment", "arrogant", "arson", "assail", "assailant",
"assassin", "assassinate", "assassination", "assault", "asshole",
"atrocious", "atrocity", "attack", "attacking", "attorney", "avarice"
), anger = c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
TRUE, TRUE, TRUE, TRUE, TRUE, TRUE)), row.names = c(NA, -80L), class = c("tbl_df",
"tbl", "data.frame"))

Creating a tdm with only one variable

So I need to prepare a term-document-matrix for each of the sets of text I want to subsequently run against each other in a classification procedure (rolling.classify() in Stylo package).
So I created a tdm of the whole text corpus, then want to make two sets only of selected texts, one which should contain one text only. So multiple texts works fine (a), but one texts only does not (b), I cannot do this?
freq.list <- make.frequency.list(words, head = 265) # Creating frequency list using only the frequencies of the
# selected features from word-list (words)
word.frequencies <- make.table.of.frequencies(corpus = x, features = freq.list)
# Document-term matrix of whole corpus and matching frequencies.
# Making two subsets now:
a <- word.frequencies[c(1,2,3,17,19,20,21,22,23), 1:263]
dim(a) # Double-check that it is the right no. of texts
b <- word.frequencies[18,1:263]
dim(b) # Double-check
> dim(a)
[1] 9 263
> dim(b)
NULL
data:
(used dput())
x <- structure(list(middleFr_Calmative_1946 = c("the", "calmative",
"i", "don’t", "know", "when", "i", "died", ".", "it", "always",
"seemed", "to", "me", "i", "died", "old", ",", "about", "ninety",
"years", "old", ",", "and", "what", "years", ",", "and", "that",
"my", "body", "bore", "it", "out", ",", "from", "head", "to",
"foot", ".", "but", "this", "evening", ",", "alone", "in", "my",
"icy", "bed", ",", "i", "have", "the", "feeling", "i’ll", "be",
"older", "than", "the", "day", ",", "the", "night", ",", "when",
"the", "sky", "with", "all", "its", "lights", "fell", "upon",
"me", ",", "the", "same", "i", "had", "so", "often", "gazed",
"resolved", "to", "speak", "to", "him", ".", "so", "i", "marshalled",
"the", "words", "and", "opened", "my", "mouth", ",", "thinking",
"i", "would", "hear", "them", ".", "but", "all", "i", "heard",
"was", "a", "kind", "of", "rattle", ",", "unintelligible", "even",
"have", "a", "penny", "in", "my", "pocket", ",", "nor", "anything",
"resembling", "it", "."), middleFr_End_1946 = c("the", "end",
"they", "clothed", "me", "and", "gave", "me", "money", ".", "i",
"back", "mine", ".", "i", "added", ",", "give", "me", "back",
"my", "greatcoat", ".", "they", "replied", "that", "they", "had",
"burnt", "them", ",", "together", "with", "my", "other", "clothes",
".", "i", "understood", "then", "that", "the", "end", "was",
"near", ",", "at", "least", "fairly", "near", ".", "later", "on",
"i", "tried", "to", "exchange", "this", "hat", "for", "a", "cap",
",", "or", "a", "slouch", "which", "could", "be", "pulled", "down",
"over", "my", "face", ",", "but", "without", "much", "success",
".", "and", "yet", "i", "could", "not", "go", "about", "bare",
"-", "headed", ",", "with", "my", "skull", "in", "the", "state",
"it", "was", ".", "at", "first", "this", "hat", "was", "too",
"small", ",", "then", "it", "got", "used", "to", "me", ".", "they",
"gave", "me", "a", "tie", ",", "after", "long", "discussion",
".", "it", "seemed", "a", "pretty", "tie", "to", "me", ",", "but",
"i", "didn’t", "like", "it", ".", "when", "it", "came", "at",
"last", "i", "was", "too", "tired", "to", "send", "it", "back",
".", "but", "in", "the", "end", "it", "came", "in", "useful",
".", "it", "was", "blue", ",", "with", "kinds", "of", "little",
"stars", ".", "i", "didn’t", "feel", "well", ",", "but", "they",
"told", "me", "i", "was", "well", "enough", "."), middleFr_Expelled_1946 = c("the",
"expelled", "there", "were", "not", "many", "steps", ".", "i",
"had", "counted", "them", "a", "thousand", "times", ",", "both",
"going", "up", "and", "coming", "down", ",", "but", "the", "figure",
"has", "gone", "from", "my", "mind", ".", "i", "have", "never",
"known", "whether", "you", "should", "say", "one", "with", "your",
"every", "day", "several", "times", "a", "day", ",", "until",
"they", "sink", "forever", "in", "the", "mud", ".", "that’s",
"an", "order", ".")), class = "stylo.corpus", call = load.corpus.and.parse(files = "all",
corpus.dir = "x", markup.type = "plain", corpus.lang = "English.all",
splitting.rule = ("[ \t\n]+"), sampling = "no.sampling",
features = "w", ngram.size = 1, preserve.case = FALSE, encoding = "UTF-8"))
freq.list <- c("", "-", "—", ",", ";", ":", "!", "?", ".", "’", "\"",
"(", ")", "a", "about", "above", "across", "after", "again",
"against", "ah", "all", "almost", "along", "Already", "also",
"always", "am", "among", "an", "and", "another", "any", "anything",
"are", "as", "at", "away", "back", "be", "because", "been", "before",
"behind", "being", "best", "better", "between", "beyond", "both",
"but", "by", "came", "can", "can't", "can’t", "cannot", "come",
"comes", "could", "did", "didn’t", "different", "do", "does",
"doing", "don't", "don’t", "done", "down", "each", "either",
"else", "even", "ever", "every", "everything", "except", "far",
"few", "fifteen", "first", "five", "for", "forward", "four",
"from", "get", "go", "goes", "going", "got", "great", "had",
"half", "has", "have", "having", "he", "her", "here", "herself",
"him", "himself", "his", "how", "however", "hundred", "i", "i'll",
"i'm", "i’ll", "if", "in", "indeed", "instead", "into", "is",
"it", "it's", "it’s", "its", "itself", "just", "last", "late",
"least", "left", "less", "let", "like", "little", "long", "made",
"make", "many", "may", "me", "merely", "might", "mine", "more",
"most", "moved", "much", "must", "my", "myself", "near", "neither",
"never", "next", "no", "none", "nor", "not", "nothing", "now",
"of", "off", "often", "oh", "on", "once", "one", "only", "or",
"other", "others", "otherwise", "our", "out", "over", "own",
"perhaps", "place", "quite", "rather", "really", "right", "said",
"same", "say", "second", "shall", "she", "should", "since", "six",
"small", "so", "some", "someone", "something", "sometimes", "somewhere",
"soon", "still", "such", "ten", "than", "that", "that's", "that’s",
"the", "their", "them", "themselves", "then", "there", "therefore",
"these", "they", "thing", "things", "third", "this", "those",
"though", "three", "through", "thus", "till", "time", "times",
"to", "together", "too", "towards", "two", "under", "unless",
"until", "up", "upon", "us", "very", "was", "way", "we", "well",
"went", "were", "what", "whatever", "when", "where", "whether",
"which", "while", "who", "whom", "whose", "why", "will", "with",
"within", "without", "won't", "would", "yes", "yet", "you", "your",
"yourself")
You can do:
b <- word.frequencies[18,1:263, drop = F]
dim(b)
# [1] 1 263

Filling in the values of a column based on matching strings from the column of another dataset [duplicate]

This question already has answers here:
How to join (merge) data frames (inner, outer, left, right)
(13 answers)
Closed 2 years ago.
I am working with the following two datasets :
will_can
structure(list(will_can.REGION = c("AB", "B", "B", "B", "BB",
"BB", "BD", "BH", "BH", "BH", "BR", "BS", "BS", "BT", "BT", "CF",
"CF", "CM", "CO", "CV", "CV", "CV", "CW", "DA", "DA", "DD", "DE",
"DE", "DG", "DG", "DG", "DG", "DL", "DN", "DT", "E", "E", "E",
"EH", "EH", "EH", "EH", "EH", "EH", "EH", "EX", "EX", "EX", "FK",
"FK", "FY", "G", "G", "G", "GL", "GL", "HA", "HD", "HD", "IV",
"KA", "KA", "KA", "KA", "KA", "KA", "KA", "KA", "KA", "KA", "KA",
"KA", "KA", "KA", "KA", "KA", "KA", "KT", "KY", "KY", "KY", "L",
"L", "L", "LA", "LA", "LE", "LE", "M", "M", "ME", "ME", "MK",
"ML", "N", "N", "N", "NE", "NG", "NN", "NN", "NR", "NW", "OL",
"OX", "OX", "PH", "PO", "PR", "RG", "RH", "RM", "RM", "S", "S",
"S", "S", "SA", "SE", "SE", "SE", "SE", "SE", "SG", "SL", "SN",
"SN", "SO", "SO", "SO", "SS", "ST", "ST", "ST", "ST", "SW", "SW",
"SW", "SW", "SY", "SY", "SY", "TA", "TD", "TD", "TN", "TW", "UB",
"UB", "W", "W", "W", "W", "WA", "WC", "WD")), class = "data.frame", row.names = c(NA,
-156L))
will_can_region_norm
structure(list(norm = c(67.3112073766083, 0, 62.9924341677094,
0, 134.940019161483, 86.0271073135687, 233.710968710152, 0, 0,
136.210220315945, 72.0106074505199, 54.9624828839958, 0, 0, 46.5061888459603,
0, 51.9149234846709, 85.3970454501009, 0, 0, 141.438961332615,
122.50716299382, 197.887432921107, 96.646567080111, 108.996678489718,
873.779493880704, 0, 109.106806944561, 56.7421763178016, 249.99781251914,
0, 106.993398828272, 0, 182.997053590583, 0, 225.716259764203,
217.655353412983, 98.8344746903195, 70.3435951664196, 106.870878390986,
0, 0, 113.255439262354, 226.344150395729, 0, 0, 0, 0, 0, 0, 0,
0, 0, 92.5698187029358, 0, 1159.88543061088, 59.5746039659052,
0, 217.977759293264, 88.627745595238, 155.299651064979, 0, 70.3301130229532,
0, 0, 0, 0, 36.166169734453, 162.12380892704, 74.7710230881704,
112.29824076945, 120.249189991435, 25.6209421071498, 36.7120335621411,
115.238964414265, 0, 50.4621322067494, 59.9490876378327, 82.9160720202368,
132.342362545417, 0, 0, 209.987774511768, 0, 45.0104437732687,
59.5244437425851, 54.7420581590574, 77.921490980977, 132.545922191567,
100.083647410414, 51.5757713324224, 102.602449571922, 98.8984492920948,
0, 129.885834248271, 0, 189.332549749021, 149.846130500895, 0,
0, 73.4653456617979, 220.103517986062, 111.317004279081, 375.711503660056,
156.229153172374, 760.35739839154, 0, 83.1515916711375, 0, 0,
0, 73.5483180088058, 269.518568414391, 102.141462145838, 55.2886923953334,
151.949727736478, 148.297412239816, 0, 0, 0, 0, 0, 0, 0), REGION = c("AB",
"AL", "B", "BA", "BB", "BD", "BH", "BL", "BN", "BR", "BS", "BT",
"CA", "CB", "CF", "CH", "CM", "CO", "CR", "CT", "CV", "CW", "DA",
"DD", "DE", "DG", "DH", "DL", "DN", "DT", "DY", "E", "EC", "EH",
"EN", "EX", "FK", "FY", "G", "GL", "GU", "GY", "HA", "HD", "HG",
"HP", "HR", "HS", "HU", "HX", "IG", "IM", "IP", "IV", "JE", "KA",
"KT", "KW", "KY", "L", "LA", "LD", "LE", "LL", "LN", "LS", "LU",
"M", "ME", "MK", "ML", "N", "NE", "NG", "NN", "NP", "NR", "NW",
"OL", "OX", "PA", "PE", "PH", "PL", "PO", "PR", "RG", "RH", "RM",
"S", "SA", "SE", "SG", "SK", "SL", "SM", "SN", "SO", "SP", "SR",
"SS", "ST", "SW", "SY", "TA", "TD", "TF", "TN", "TQ", "TR", "TS",
"TW", "UB", "W", "WA", "WC", "WD", "WF", "WN", "WR", "WS", "WV",
"YO", "ZE")), row.names = c(NA, -124L), class = "data.frame")
I am trying to add a new column, will_can$norm, which would contain values of will_can_region_norm$norm based on matching values of the variable "REGION" which is the same in both datasets. So gaps from the second dataset to the first would be automatically filled based on matching strings of REGION
Based on another question in the forum I tried the following function:
will_can2 <- merge(will_can, will_can_region_norm[,"norm"], by = "REGION", all=TRUE)
But I get the following error:
Error in fix.by(by.y, y) :
'by' must specify a unique correct column [translated from French]
Is there something I'm missing here? Would be grateful for some help !
Cameron
For your merge(will_can, will_can_region_norm[,"norm"], by = "REGION", all=TRUE) command to work, both data.frames would need a column called REGION. In your example:
will_can doesn't have this column, but it does have one called will_can.REGION.
You've extracted a single column from will_can_norm called norm, and tried to merge based on that single column. Unfortunately, the merge() command never sees the REGION column of will_can_norm.
In your case, try something like
merge(will_can, will_can_region_norm, by.x = "will_can.REGION", by.y="REGION", all=TRUE)

Convert string categorical data in data frame to Numeric data

I have the following values (800) in my data frame
cat1 <- c("bi", "bt", "ch", "fs", "hc", "lh", "mo", "ms", "nn", "ro", "sc", "si", "so", "ti", "ww", "dt", "3et", "a", "a", "a", "a", "a", "a", "aam", "aao", "ac", "acs", "aeo", "aeq", "afm", "aic", "aio", "akq", "am", "am", "am", "am", "amc", "amc", "aoq", "aoq", "aot", "apm", "apo", "apo", "aqf", "ass", "ata", "ata", "atc", "atf", "atq", "atr", "aun", "bae", "baf", "bai", "bcm", "bcs", "bea", "bee", "bef", "bem", "bem", "bem", "bem", "bem", "beo", "beo", "beq", "beq", "beq", "bhm", "bkr", "bm", "bm", "bme", "bmm", "bmm", "bmo", "bmq", "bmq", "brm", "brm", "brq", "bsm", "bsm", "bsm", "bsm", "bso", "bta", "bwa", "clm", "dd", "dm", "ne", "pp", "pv", "rt", "se", "sw")
I want to replace all string values with numeric values so that I can feed them in a neural network eg I want all "am" to be replaced with 5 or 0.5 and all "bem" to be replaced with 7 or 0.7 means according to some logic.
Tried many things but able to achieve anything
If you have know what is the replacement rule, you can establish a dictionary and using named variables for look up.
For instance,
cat1 <- c("bem","am","am","bem","am")
dict <- `names<-`(c(7,5),c("bem","am"))
res <- dict[cat1]
and you will get
> res
bem am am bem am
7 5 5 7 5

Trouble using gcheckboxgroup - use.table =TRUE (gWigets) (R)

I have a long list of elements that I want to display vertically as checkboxes in my GUI.
Here is my code:
library(gWidgets)
library(gWidgetstcltk)
window <- gwindow("ITRAX Data Analysis")
Frame_Elements <- gframe( text="Elements", horizontal=FALSE, container=window)
Group7 <-ggroup(container = Frame_Elements, horizontal = TRUE)
Element_Options <- c( "Ac","Ag","Al", "Ar", "As", "At", "Au", "Ba", "Bi", "Br", "Ca", "Cd", "Ce", "Cl", "Co", "Cr", "Cs", "Cu",
"Dy", "Er", "Eu", "Fe", "Fr", "Ga", "Gd", "Ge", "Hf", "Hg", "Ho", "I", "In", "Ir", "K", "La", "Lu", "Mg",
"Mn", "Mo", "Nb", "Nd", "Ni", "Os", "P", "Pa", "Pb", "Pd", "Pm", "Po", "Pr", "Pt", "Ra", "Rb", "Re", "Rh",
"Ru", "S", "Sb", "Sc", "Si", "Sm", "Sr", "Ta", "Tb", "Tc", "Te", "Th", "Ti", "Tl", "Tm", "U","V", "W", "Y",
"Yb", "Zn", "Zr")
Choose_Elements <- gcheckboxgroup(Element_Options, container = Group7, horizontal = FALSE, checked = FALSE, use.table = TRUE)
My problem is that the scroll table does not always open when I run the code. Sometimes I need to run it multiple times (without actually editing the code in any way) in order for the scroll box to appear. In other words, the window and frame of the GUI will appear, but will be empty.
Also, when the scroll box does open, the scroll bar doesn't actually work.
Any help with resolving this issue would be highly appreciated.

Resources