structure(tibble(c("top", "jng", "mid", "bot", "sup"), c("369", "Karsa", "knight", "JackeyLove", "yuyanjia"),
c("Malphite", "Rek'Sai", "Zoe", "Aphelios", "Braum"), c("1", "1", "1", "1", "1"), c("7", "5", "7", "5", "0"),
c("6079-7578", "6079-7578", "6079-7578", "6079-7578", "6079-7578")), .Names = c("position", "player", "champion", "result", "kills", "gameid"))
Output:
# A tibble: 5 x 6
position player champion result kills gameid
* <chr> <chr> <chr> <chr> <chr> <chr>
1 top 369 Malphite 1 7 6079-7578
2 jng Karsa Rek'Sai 1 5 6079-7578
3 mid knight Zoe 1 7 6079-7578
4 bot JackeyLove Aphelios 1 5 6079-7578
5 sup yuyanjia Braum 1 0 6079-7578
My desired output would be:
structure(list(gameid = "6079-7578", result = "1", player_top = "369",
player_jng = "Karsa", player_mid = "knight", player_bot = "JackeyLove",
player_sup = "yuyanjia", champion_top = "Malphite", champion_jng = "Rek'Sai",
champion_mid = "Zoe", champion_bot = "Aphelios", champion_sup = "Braum",
kills_top = "7", kills_jng = "5", kills_mid = "7", kills_bot = "5",
kills_sup = "0"), row.names = c(NA, -1L), class = c("tbl_df",
"tbl", "data.frame"))
which looks like this:
gameid result player_top player_jng player_mid player_bot player_sup champion_top champion_jng champion_mid champion_bot champion_sup
1 6079-7578 1 369 Karsa knight JackeyLove yuyanjia Malphite RekSai Zoe Aphelios Braum
kills_top kills_jng kills_mid kills_bot kills_sup
1 7 5 7 5 0
I know I should use pivot_wider() and something like drop_na, but I don't know how to do pivot_wider() with mutiple columns and collapse the rows at the same time. Any help would be appreciated.
You can use pivot_wider() for this, defining the "position" variable as the variable that the new column names come from in names_from and the three variables with values you want to use to fill those columns with as values_from.
By default the multiple values_from variables are pasted on to the front of new columns names. This can be changed, but in this case that matches the naming structure you want.
All other variables in the original dataset will be used as the id_cols in the order that they appear.
library(tidyr)
pivot_wider(dat,
names_from = "position",
values_from = c("player", "champion", "kills"))
#> result gameid player_top player_jng player_mid player_bot player_sup
#> 1 1 6079-7578 369 Karsa knight JackeyLove yuyanjia
#> champion_top champion_jng champion_mid champion_bot champion_sup kills_top
#> 1 Malphite Rek'Sai Zoe Aphelios Braum 7
#> kills_jng kills_mid kills_bot kills_sup
#> 1 5 7 5 0
You can control the order of your id columns in the output by explicitly writing them out via id_cols. Here's an example, matching your desired output.
pivot_wider(dat, id_cols = c("gameid", "result"),
names_from = "position",
values_from = c("player", "champion", "kills"))
#> gameid result player_top player_jng player_mid player_bot player_sup
#> 1 6079-7578 1 369 Karsa knight JackeyLove yuyanjia
#> champion_top champion_jng champion_mid champion_bot champion_sup kills_top
#> 1 Malphite Rek'Sai Zoe Aphelios Braum 7
#> kills_jng kills_mid kills_bot kills_sup
#> 1 5 7 5 0
Created on 2021-06-24 by the reprex package (v2.0.0)
Using data.table might help here. In dcast() each row will be identified by a unique combo of gameid and result, the columns will be spread by position, and filled with values from the variables listed in value.var.
library(data.table)
library(dplyr)
df <- structure(tibble(c("top", "jng", "mid", "bot", "sup"), c("369", "Karsa", "knight", "JackeyLove", "yuyanjia"),
c("Malphite", "Rek'Sai", "Zoe", "Aphelios", "Braum"), c("1", "1", "1", "1", "1"), c("7", "5", "7", "5", "0"),
c("6079-7578", "6079-7578", "6079-7578", "6079-7578", "6079-7578")), .Names = c("position", "player", "champion", "result", "kills", "gameid"))
df2 <- dcast(setDT(df), gameid + result~position, value.var = list('player','champion','kills'))
Related
We have the following data frame a with something like this:
> a
google_prod Value
1 categoria ML
2 google 120
3 youtube 24
4 categoria AO
5 google 2
6 youtube 0
7 categoria ML
8 google 27
9 youtube 0
10 categoria AO
11 google 5
12 youtube 0
We would like to get this:
categoria google_prod Value
1 ML google 120
2 ML youtube 24
3 AO google 2
4 AO youtube 0
5 ML google 27
6 ML youtube 0
7 AO google 5
8 AO youtube 0
In other words, perform a type of application of the Spread or similar function, where only one value is taken from the google_prod column to apply it, in this case it would be the 'categoria' value.
library(tidyverse)
# getting the data
category <- rep(c("categoria", "google", "youtube"), 4)
value <- c("ML", "120", "24", "AO", "2", "0", "ML", "27", "0", "AO", "5", "0")
df <- tibble(category, value)
df %>%
mutate(helper = rep(1:(nrow(df)/3), each = 3)) %>%
pivot_wider(names_from = category, values_from = value) %>%
select(-helper) %>%
pivot_longer(names_to = "google_prod", values_to = "values", -1)
# # A tibble: 8 x 3
# categoria google_prod values
# <chr> <chr> <chr>
# 1 ML google 120
# 2 ML youtube 24
# 3 AO google 2
# 4 AO youtube 0
# 5 ML google 27
# 6 ML youtube 0
# 7 AO google 5
# 8 AO youtube 0
Here is another idea with creating a group with cumsum and extracting the first element
library(dplyr)
mydf %>%
group_by(grp = cumsum(google_prod == 'categoria')) %>%
mutate(categoria = first(Value)) %>%
slice(-1) %>%
ungroup %>%
select(-grp) %>%
type.convert(as.is = TRUE)
# A tibble: 8 x 3
# google_prod Value categoria
# <chr> <int> <chr>
#1 google 120 ML
#2 youtube 24 ML
#3 google 2 AO
#4 youtube 0 AO
#5 google 27 ML
#6 youtube 0 ML
#7 google 5 AO
#8 youtube 0 AO
data
mydf <- structure(list(google_prod = c("categoria", "google", "youtube",
"categoria", "google", "youtube", "categoria", "google", "youtube",
"categoria", "google", "youtube"), Value = c("ML", "120", "24",
"AO", "2", "0", "ML", "27", "0", "AO", "5", "0")),
class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"))
One idea would be the following. As far as I see the pattern, you are targeting values that contain two capital letters in Value. I searched where they are using grep() and obtained indice. Using this information, I created a group variable using findIntervals(). For each group, I aggregated data; I extracted and put the capital-letter value in categoria. In similar ways, I created two more columns. They are lists. So I used unnest() in the end to get the output.
library(tidyverse)
ind <- grep(x = mydf$Value, pattern = "[A-Z]+")
group_by(mydf, group = findInterval(x = 1:n(), vec = ind)) %>%
summarize(categoria = Value[google_prod == "categoria"],
Google_prod = list(google_prod[google_prod != "categoria"]),
Value = list(Value[google_prod != "categoria"])) %>%
unnest(cols = Google_prod:Value)
group categoria Google_prod Value
<int> <chr> <chr> <chr>
1 1 ML google 120
2 1 ML youtube 24
3 2 AO google 2
4 2 AO youtube 0
5 3 ML google 27
6 3 ML youtube 0
7 4 AO google 5
8 4 AO youtube 0
DATA
mydf <- structure(list(google_prod = c("categoria", "google", "youtube",
"categoria", "google", "youtube", "categoria", "google", "youtube",
"categoria", "google", "youtube"), Value = c("ML", "120", "24",
"AO", "2", "0", "ML", "27", "0", "AO", "5", "0")), class = "data.frame", row.names = c("1",
"2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12"))
I have a data frame containing a person's stage, as follows (this is only a sample of a very large one):
df = structure(list(DeceasedDate = c(0.283219178082192, 1.12678843226788,
2.02865296803653, 0.892465753424658, NA, 0.88013698630137, NA
), LastClinicalEventMonthEnd = c(0.244862981988838, 1.03637744165398,
10.9464611555048, 0.763698598427194, 3.35011412354135, 0.677397228564181,
3.83687211440893), FirstYStage = c("N/A", "2", "2", "2", "2",
"2", "3.1"), SecondYStage = c("N/A", "N/A", "2", "N/A", "2",
"N/A", "3.1"), ThirdYStage = c("N/A", "N/A", "2", "N/A", "2",
"N/A", "3.1"), FourthYStage = c("N/A", "N/A", "N/A", "N/A", "2",
"N/A", "3.1"), FifthYStage = c("N/A", "N/A", "N/A", "N/A", "N/A",
"N/A", "N/A")), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-7L))
The 5 right hand columns are a stage of a person, but do not contain all the information yet. I need to include the information in the first two columns, in which the numbers are in years, as follows:
if the value in column 1 is smaller than a year, FirstYStage should be "Deceased", and also all the next columns (the person is still dead...); if the value is between 1 and 2, it SecondYStage should be "Deceased", and so on.
if the value in column 2 is smaller than a year, SecondYStage should be "EndOfEvents"; if the value is between 1 and 2, it SecondYStage should be "EndOfEvents", and so on.
So the expected output in this case should be:
df_updated = structure(list(DeceasedDate = c(0.283219178082192,
1.12678843226788,
2.02865296803653, 0.892465753424658, NA, 0.88013698630137, NA
), LastClinicalEventMonthEnd = c(0.244862981988838, 1.03637744165398,
10.9464611555048, 0.763698598427194, 3.35011412354135, 0.677397228564181,
3.83687211440893), FirstYStage = c("Deceased", "2", "2", "Deceased",
"2", "Deceased", "3.1"), SecondYStage = c("Deceased", "Deceased",
"2", "Deceased", "2", "Deceased", "3.1"), ThirdYStage = c("Deceased",
"Deceased", "Deceased", "Deceased", "2", "Deceased", "3.1"),
FourthYStage = c("Deceased", "Deceased", "Deceased", "Deceased",
"2", "Deceased", "3.1"), FifthYStage = c("Deceased", "Deceased",
"Deceased", "Deceased", "LastEvent", "Deceased", "LastEvent"
)), row.names = c(NA, -7L), class = c("tbl_df", "tbl", "data.frame"
))
One important point is that "Death" should be given priority, in other words, if there is a clash and on the one hand there is a number and "death" is contradicting it, we should prefer death.
How do I do this in the most efficient way? At the moment I am doing if's but I think it is not the best course of action
This is what I would do:
Reshape from wide to long format
Compute years from column names
Selectively update the value column
Reshape back to wide format
data.table
As I am more fluent in data.table than in dplyr here is the approach implemented in data.table syntax. (Apologies but I will add a dplyr solution if time permits.)
library(data.table)
long <- melt(setDT(df)[, rn := .I], measure.vars = patterns("Stage$"))
long[, year := as.integer(variable)] # column index
long[floor(DeceasedDate) < year, value := "Deceased"]
long[is.na(DeceasedDate) & floor(LastClinicalEventMonthEnd) + 1 < year, value := "EndOfEvents"]
dcast(long, rn + DeceasedDate + LastClinicalEventMonthEnd ~ variable)
rn DeceasedDate LastClinicalEventMonthEnd FirstYStage SecondYStage ThirdYStage FourthYStage FifthYStage
1: 1 0.2832192 0.2448630 Deceased Deceased Deceased Deceased Deceased
2: 2 1.1267884 1.0363774 2 Deceased Deceased Deceased Deceased
3: 3 2.0286530 10.9464612 2 2 Deceased Deceased Deceased
4: 4 0.8924658 0.7636986 Deceased Deceased Deceased Deceased Deceased
5: 5 NA 3.3501141 2 2 2 2 EndOfEvents
6: 6 0.8801370 0.6773972 Deceased Deceased Deceased Deceased Deceased
7: 7 NA 3.8368721 3.1 3.1 3.1 3.1 EndOfEvents
dplyr / tidyr
As promised, here is also a dplyr/tidyr implemention of the same approach:
library(tidyr)
library(dplyr)
df %>%
mutate(rn = row_number()) %>%
gather(key, val, ends_with("Stage"), factor_key = TRUE) %>%
mutate(year = as.integer(key)) %>%
mutate(val = if_else(!is.na(DeceasedDate) & floor(DeceasedDate) < year, "Deceased", val)) %>%
mutate(val = if_else(is.na(DeceasedDate) & floor(LastClinicalEventMonthEnd) + 1 < year, "EndOfEvents", val)) %>%
select(-year) %>%
spread(key, val) %>%
arrange(rn)
DeceasedDate LastClinicalEventMonthEnd rn FirstYStage SecondYStage ThirdYStage FourthYStage FifthYStage
1 0.2832192 0.2448630 1 Deceased Deceased Deceased Deceased Deceased
2 1.1267884 1.0363774 2 2 Deceased Deceased Deceased Deceased
3 2.0286530 10.9464612 3 2 2 Deceased Deceased Deceased
4 0.8924658 0.7636986 4 Deceased Deceased Deceased Deceased Deceased
5 NA 3.3501141 5 2 2 2 2 EndOfEvents
6 0.8801370 0.6773972 6 Deceased Deceased Deceased Deceased Deceased
7 NA 3.8368721 7 3.1 3.1 3.1 3.1 EndOfEvents
or without creating a year column:
df %>%
mutate(rn = row_number()) %>%
gather(key, val, ends_with("Stage"), factor_key = TRUE) %>%
mutate(val = if_else(!is.na(DeceasedDate) & floor(DeceasedDate) < as.integer(key),
"Deceased", val)) %>%
mutate(val = if_else(is.na(DeceasedDate) & floor(LastClinicalEventMonthEnd) + 1 < as.integer(key),
"EndOfEvents", val)) %>%
spread(key, val) %>%
arrange(rn)
After looking at a few other asked questions, and reading a few guides, I'm not able to find a suitable solution to my specific problem. Here's an example of the data to begin:
data <- data.frame(
Date = sample(c("1993-07-05", "1993-07-05", "1993-07-05", "1993-08-30", "1993-08-30", "1993-08-30", "1993-08-30", "1993-09-04", "1993-09-04")),
Site = sample(c("1", "1", "1", "1", "1", "1", "1", "1", "1")),
Station = sample(c("1", "2", "3", "1", "2", "3", "4", "1", "2")),
Oxygen = sample(c("0.9", "0.4", "4.2", "5.6", "7.3", "4.3", "9.5", "5.3", "0.3")))
I want to average all the oxygen values for the stations that are nested within a site that corresponds to a date. My dataset has a couple of thousand rows, and like in the example, there are an uneven number of stations, and the dates are uneven in length.
The output I'm looking for are columns like, "Date -> Site -> Average Oxygen", foregoing the need for a station column altogether in the new version of the time series.
Any help would be greatly appreciated!
After grouping by 'Site', 'Date', get the mean of 'Oxygen' (after converting it to numeric - it is factor column)
library(tidyverse)
data %>%
group_by(Site, Date) %>%
summarise(AverageOxygen = mean(as.numeric(as.character(Oxygen))))
# A tibble: 3 x 3
# Groups: Site [1]
# Site Date AverageOxygen
# <fct> <fct> <dbl>
#1 1 1993-07-05 3.97
#2 1 1993-08-30 5.2
#3 1 1993-09-04 2.55
Try:
library(hablar)
library(tidyverse)
data %>%
retype() %>%
group_by(Site, Date) %>%
summarize(AverageOxygen = mean(Oxygen))
which gives you:
# A tibble: 3 x 3
# Groups: Site [?]
Site Date AverageOxygen
<int> <date> <dbl>
1 1 1993-07-05 4.7
2 1 1993-08-30 3.55
3 1 1993-09-04 4.75
I would like to know how to concatenate string to form sequences of different and varying lengths & varying content according to one condition.
Here is a dataframe example (my DF is actually about 60000 rows).
column index: just an index
to_concat: the string item i want to concatenate
max_seq: one example of the condition for concatenation (to_concat should only concatenate if it is part of the same sequence - and I have indicated the position of the string in the sequence for now.
concat_result: The result I would like to have
index to_concat max_seq concat_result
1 Abc! 1 <abc!+def+_>
2 def 2 <abc!+def+_>
3 _ 3 <abc!+def+_>
4 x93 1 <x93+afza+5609+5609+Abc!+def>
5 afza 2 <x93+afza+5609+5609+Abc!+def>
6 5609 3 <x93+afza+5609+5609+Abc!+def>
7 5609 4 <x93+afza+5609+5609+Abc!+def>
8 Abc! 5 <x93+afza+5609+5609+Abc!+def>
9 def 6 <x93+afza+5609+5609+Abc!+def>
10 _ 1 <_+x93+afza>
11 x93 2 <_+x93+afza>
12 afza 3 <_+x93+afza>
I know of paste & aggregate, length, probably usefull.. But do not see in which order to do that and especially how to formulate the paste.
I suppose I should also include an "second" index better done for max_seq (such as : all strings to be concatenated in the same sequence have the same number so here we would have a 3 sequences " 1 1 1 2 2 2 2 2 2 3 3 3 ".
But I do not know if that is the quickest/easiest solution and also I do not know how to paste varying length...
Could you please help a fellow PhD? Thanks a lot in advance.
Reproductible example:
dput(dat)
> dput(dat)
structure(list(V1 = c("index", "1", "2", "3", "4", "5", "6",
"7", "8", "9", "10", "11", "12"), V2 = c("to_concat", "Abc!",
"def", "_", "x93", "afza", "5609", "5609", "Abc!", "def", "_",
"x93", "afza"), V3 = c("max_seq", "1", "2", "3", "1", "2", "3",
"4", "5", "6", "1", "2", "3"), V4 = c("concat_result", "<abc!+def+_>",
"<abc!+def+_>", "<abc!+def+_>", "<x93+afza+5609+5609+Abc!+def>",
"<x93+afza+5609+5609+Abc!+def>", "<x93+afza+5609+5609+Abc!+def>",
"<x93+afza+5609+5609+Abc!+def>", "<x93+afza+5609+5609+Abc!+def>",
"<x93+afza+5609+5609+Abc!+def>", "<_+x93+afza>", "<_+x93+afza>",
"<_+x93+afza>")), .Names = c("V1", "V2", "V3", "V4"), class = "data.frame", row.names = c(NA,
-13L))
Several options to get the desired result:
1) Using base R:
mydf$grp <- cumsum(mydf$max_seq < c(1,head(mydf$max_seq, -1))) + 1
mydf$concat_result <- ave(mydf$to_concat, mydf$grp,
FUN = function(x) paste0('<',paste(x,collapse='+'),'>'))
which gives:
> mydf
index to_concat max_seq grp concat_result
1 1 Abc! 1 1 <Abc!+def+_>
2 2 def 2 1 <Abc!+def+_>
3 3 _ 3 1 <Abc!+def+_>
4 4 x93 1 2 <x93+afza+5609+5609+Abc!+def>
5 5 afza 2 2 <x93+afza+5609+5609+Abc!+def>
6 6 5609 3 2 <x93+afza+5609+5609+Abc!+def>
7 7 5609 4 2 <x93+afza+5609+5609+Abc!+def>
8 8 Abc! 5 2 <x93+afza+5609+5609+Abc!+def>
9 9 def 6 2 <x93+afza+5609+5609+Abc!+def>
10 10 _ 1 3 <_+x93+afza>
11 11 x93 2 3 <_+x93+afza>
12 12 afza 3 3 <_+x93+afza>
2) Or using the data.table package:
library(data.table)
setDT(mydf)[, grp := cumsum(max_seq < shift(max_seq, fill = 0))+1
][, concat_result := paste0('<',paste(to_concat,collapse='+'),'>'), grp][]
3) Or using the dplyr package:
library(dplyr)
mydf %>%
mutate(grp = cumsum(max_seq < lag(max_seq, n=1, default=0))+1) %>%
group_by(grp) %>%
mutate(concat_result = paste0('<',paste(to_concat,collapse='+'),'>'))
Used data:
mydf <- structure(list(index = 1:12,
to_concat = c("Abc!", "def", "_", "x93", "afza", "5609", "5609", "Abc!", "def", "_", "x93", "afza"),
max_seq = c(1L, 2L, 3L, 1L, 2L, 3L, 4L, 5L, 6L, 1L, 2L, 3L)),
.Names = c("index", "to_concat", "max_seq"), class = "data.frame", row.names = c(NA, -12L))
This question already has answers here:
Transposing a dataframe maintaining the first column as heading
(5 answers)
Closed 1 year ago.
Happy Weekends.
I've been trying to replicate the results from this blog post in R. I am looking for a method of transposing the data without using t, preferably using tidyr or reshape. In example below, metadata is obtained by transposing data.
metadata <- data.frame(colnames(data), t(data[1:4, ]) )
colnames(metadata) <- t(metadata[1,])
metadata <- metadata[-1,]
metadata$Multiplier <- as.numeric(metadata$Multiplier)
Though it achieves what I want, I find it little unskillful. Is there any efficient workflow to transpose the data frame?
dput of data
data <- structure(list(Series.Description = c("Unit:", "Multiplier:",
"Currency:", "Unique Identifier: "), Nominal.Broad.Dollar.Index. = c("Index:_1997_Jan_100",
"1", NA, "H10/H10/JRXWTFB_N.M"), Nominal.Major.Currencies.Dollar.Index. = c("Index:_1973_Mar_100",
"1", NA, "H10/H10/JRXWTFN_N.M"), Nominal.Other.Important.Trading.Partners.Dollar.Index. = c("Index:_1997_Jan_100",
"1", NA, "H10/H10/JRXWTFO_N.M"), AUSTRALIA....SPOT.EXCHANGE.RATE..US..AUSTRALIAN...RECIPROCAL.OF.RXI_N.M.AL. = c("Currency:_Per_AUD",
"1", "USD", "H10/H10/RXI$US_N.M.AL"), SPOT.EXCHANGE.RATE...EURO.AREA. = c("Currency:_Per_EUR",
"1", "USD", "H10/H10/RXI$US_N.M.EU"), NEW.ZEALAND....SPOT.EXCHANGE.RATE..US..NZ...RECIPROCAL.OF.RXI_N.M.NZ.. = c("Currency:_Per_NZD",
"1", "USD", "H10/H10/RXI$US_N.M.NZ"), United.Kingdom....Spot.Exchange.Rate..US..Pound.Sterling.Reciprocal.of.rxi_n.m.uk = c("Currency:_Per_GBP",
"0.01", "USD", "H10/H10/RXI$US_N.M.UK"), BRAZIL....SPOT.EXCHANGE.RATE..REAIS.US.. = c("Currency:_Per_USD",
"1", "BRL", "H10/H10/RXI_N.M.BZ"), CANADA....SPOT.EXCHANGE.RATE..CANADIAN...US.. = c("Currency:_Per_USD",
"1", "CAD", "H10/H10/RXI_N.M.CA"), CHINA....SPOT.EXCHANGE.RATE..YUAN.US.. = c("Currency:_Per_USD",
"1", "CNY", "H10/H10/RXI_N.M.CH"), DENMARK....SPOT.EXCHANGE.RATE..KRONER.US.. = c("Currency:_Per_USD",
"1", "DKK", "H10/H10/RXI_N.M.DN"), HONG.KONG....SPOT.EXCHANGE.RATE..HK..US.. = c("Currency:_Per_USD",
"1", "HKD", "H10/H10/RXI_N.M.HK"), INDIA....SPOT.EXCHANGE.RATE..RUPEES.US. = c("Currency:_Per_USD",
"1", "INR", "H10/H10/RXI_N.M.IN"), JAPAN....SPOT.EXCHANGE.RATE..YEA.US.. = c("Currency:_Per_USD",
"1", "JPY", "H10/H10/RXI_N.M.JA"), KOREA....SPOT.EXCHANGE.RATE..WON.US.. = c("Currency:_Per_USD",
"1", "KRW", "H10/H10/RXI_N.M.KO"), Malaysia...Spot.Exchange.Rate..Ringgit.US.. = c("Currency:_Per_USD",
"1", "MYR", "H10/H10/RXI_N.M.MA"), MEXICO....SPOT.EXCHANGE.RATE..PESOS.US.. = c("Currency:_Per_USD",
"1", "MXN", "H10/H10/RXI_N.M.MX"), NORWAY....SPOT.EXCHANGE.RATE..KRONER.US.. = c("Currency:_Per_USD",
"1", "NOK", "H10/H10/RXI_N.M.NO"), SWEDEN....SPOT.EXCHANGE.RATE..KRONOR.US.. = c("Currency:_Per_USD",
"1", "SEK", "H10/H10/RXI_N.M.SD"), SOUTH.AFRICA....SPOT.EXCHANGE.RATE..RAND.US.. = c("Currency:_Per_USD",
"1", "ZAR", "H10/H10/RXI_N.M.SF"), Singapore...SPOT.EXCHANGE.RATE..SINGAPORE...US.. = c("Currency:_Per_USD",
"1", "SGD", "H10/H10/RXI_N.M.SI"), SRI.LANKA....SPOT.EXCHANGE.RATE..RUPEES.US.. = c("Currency:_Per_USD",
"1", "LKR", "H10/H10/RXI_N.M.SL"), SWITZERLAND....SPOT.EXCHANGE.RATE..FRANCS.US.. = c("Currency:_Per_USD",
"1", "CHF", "H10/H10/RXI_N.M.SZ"), TAIWAN....SPOT.EXCHANGE.RATE..NT..US.. = c("Currency:_Per_USD",
"1", "TWD", "H10/H10/RXI_N.M.TA"), THAILAND....SPOT.EXCHANGE.RATE....THAILAND. = c("Currency:_Per_USD",
"1", "THB", "H10/H10/RXI_N.M.TH"), VENEZUELA....SPOT.EXCHANGE.RATE..BOLIVARES.US.. = c("Currency:_Per_USD",
"1", "VEB", "H10/H10/RXI_N.M.VE")), .Names = c("Series.Description",
"Nominal.Broad.Dollar.Index.", "Nominal.Major.Currencies.Dollar.Index.",
"Nominal.Other.Important.Trading.Partners.Dollar.Index.", "AUSTRALIA....SPOT.EXCHANGE.RATE..US..AUSTRALIAN...RECIPROCAL.OF.RXI_N.M.AL.",
"SPOT.EXCHANGE.RATE...EURO.AREA.", "NEW.ZEALAND....SPOT.EXCHANGE.RATE..US..NZ...RECIPROCAL.OF.RXI_N.M.NZ..",
"United.Kingdom....Spot.Exchange.Rate..US..Pound.Sterling.Reciprocal.of.rxi_n.m.uk",
"BRAZIL....SPOT.EXCHANGE.RATE..REAIS.US..", "CANADA....SPOT.EXCHANGE.RATE..CANADIAN...US..",
"CHINA....SPOT.EXCHANGE.RATE..YUAN.US..", "DENMARK....SPOT.EXCHANGE.RATE..KRONER.US..",
"HONG.KONG....SPOT.EXCHANGE.RATE..HK..US..", "INDIA....SPOT.EXCHANGE.RATE..RUPEES.US.",
"JAPAN....SPOT.EXCHANGE.RATE..YEA.US..", "KOREA....SPOT.EXCHANGE.RATE..WON.US..",
"Malaysia...Spot.Exchange.Rate..Ringgit.US..", "MEXICO....SPOT.EXCHANGE.RATE..PESOS.US..",
"NORWAY....SPOT.EXCHANGE.RATE..KRONER.US..", "SWEDEN....SPOT.EXCHANGE.RATE..KRONOR.US..",
"SOUTH.AFRICA....SPOT.EXCHANGE.RATE..RAND.US..", "Singapore...SPOT.EXCHANGE.RATE..SINGAPORE...US..",
"SRI.LANKA....SPOT.EXCHANGE.RATE..RUPEES.US..", "SWITZERLAND....SPOT.EXCHANGE.RATE..FRANCS.US..",
"TAIWAN....SPOT.EXCHANGE.RATE..NT..US..", "THAILAND....SPOT.EXCHANGE.RATE....THAILAND.",
"VENEZUELA....SPOT.EXCHANGE.RATE..BOLIVARES.US.."), row.names = c(NA,
4L), class = "data.frame")
Using tidyr, you gather all the columns except the first, and then you spread the gathered columns.
Try:
library(dplyr)
library(tidyr)
data %>%
gather(var, val, 2:ncol(data)) %>%
spread(Series.Description, val)
library(dplyr)
# Omitted data <- structure part ...
Here is something that replicates what's in the main answer, but more generically (e.g., works where Series.Description is not the first column of the result) and using the newer pivot_wider/pivot_longer verbs.
df_transpose <- function(df) {
df %>%
tidyr::pivot_longer(-1) %>%
tidyr::pivot_wider(names_from = 1, values_from = value)
}
df_transpose(data)
#> # A tibble: 26 x 5
#> name `Unit:` `Multiplier:` `Currency:` `Unique Identifi…
#> <chr> <chr> <chr> <chr> <chr>
#> 1 Nominal.Broad.Dollar.… Index:_19… 1 <NA> H10/H10/JRXWTFB_…
#> 2 Nominal.Major.Currenc… Index:_19… 1 <NA> H10/H10/JRXWTFN_…
#> 3 Nominal.Other.Importa… Index:_19… 1 <NA> H10/H10/JRXWTFO_…
#> 4 AUSTRALIA....SPOT.EXC… Currency:… 1 USD H10/H10/RXI$US_N…
#> 5 SPOT.EXCHANGE.RATE...… Currency:… 1 USD H10/H10/RXI$US_N…
#> 6 NEW.ZEALAND....SPOT.E… Currency:… 1 USD H10/H10/RXI$US_N…
#> 7 United.Kingdom....Spo… Currency:… 0.01 USD H10/H10/RXI$US_N…
#> 8 BRAZIL....SPOT.EXCHAN… Currency:… 1 BRL H10/H10/RXI_N.M.…
#> 9 CANADA....SPOT.EXCHAN… Currency:… 1 CAD H10/H10/RXI_N.M.…
#> 10 CHINA....SPOT.EXCHANG… Currency:… 1 CNY H10/H10/RXI_N.M.…
#> # … with 16 more rows
But note that (like the answer above) the name of the first column is lost. The following retains this (as, I guess does the spread_(names(data)[1], "val") approach proposed by #jbkunst above).
df_transpose <- function(df) {
first_name <- colnames(df)[1]
temp <-
df %>%
tidyr::pivot_longer(-1) %>%
tidyr::pivot_wider(names_from = 1, values_from = value)
colnames(temp)[1] <- first_name
temp
}
df_transpose(data)
#> # A tibble: 26 x 5
#> Series.Description `Unit:` `Multiplier:` `Currency:` `Unique Identif…
#> <chr> <chr> <chr> <chr> <chr>
#> 1 Nominal.Broad.Dollar.In… Index:_1… 1 <NA> H10/H10/JRXWTFB…
#> 2 Nominal.Major.Currencie… Index:_1… 1 <NA> H10/H10/JRXWTFN…
#> 3 Nominal.Other.Important… Index:_1… 1 <NA> H10/H10/JRXWTFO…
#> 4 AUSTRALIA....SPOT.EXCHA… Currency… 1 USD H10/H10/RXI$US_…
#> 5 SPOT.EXCHANGE.RATE...EU… Currency… 1 USD H10/H10/RXI$US_…
#> 6 NEW.ZEALAND....SPOT.EXC… Currency… 1 USD H10/H10/RXI$US_…
#> 7 United.Kingdom....Spot.… Currency… 0.01 USD H10/H10/RXI$US_…
#> 8 BRAZIL....SPOT.EXCHANGE… Currency… 1 BRL H10/H10/RXI_N.M…
#> 9 CANADA....SPOT.EXCHANGE… Currency… 1 CAD H10/H10/RXI_N.M…
#> 10 CHINA....SPOT.EXCHANGE.… Currency… 1 CNY H10/H10/RXI_N.M…
#> # … with 16 more rows
Created on 2021-05-30 by the reprex package (v2.0.0)