remove quotes from colnames? - r

I have a dataframe of the following form
"column1"
"column2"
1
5
2
6
3
7
How do I remove the quotation mark from the column names? I've tried using gsub but I can't quote quotation marks haha. Also need a way to do this that isn't just names(data) <- c("column1", "column2"). Thank you all!

You can use gsub with single-quotes in order to reference the double-quote character for replacement:
names(df) = gsub('"', "", names(df))
Test:
# Set up data
d = mtcars[1:3, 1:4]
names(d)[1:2] = c('"column1"', '"column2"')
names(d)
#> [1] "\"column1\"" "\"column2\"" "disp" "hp"
d
#> "column1" "column2" disp hp
#> Mazda RX4 21.0 6 160 110
#> Mazda RX4 Wag 21.0 6 160 110
#> Datsun 710 22.8 4 108 93
# Remove quotation marks from column names
names(d) = gsub('"', "", names(d))
names(d)
#> [1] "column1" "column2" "disp" "hp"
d
#> column1 column2 disp hp
#> Mazda RX4 21.0 6 160 110
#> Mazda RX4 Wag 21.0 6 160 110
#> Datsun 710 22.8 4 108 93
Created on 2021-01-19 by the reprex package (v0.3.0)

Related

updating variable names in R

I'm still learning some very basic concepts in R. I have an excel file that I import into R, but it has atrocious variable names. I have another file with 2 columns: the first is the original column names in my data file, the second is what I want the variable names to be.
What's the most efficient way to update all column names using this auxiliary file that I have?
Since names or colnames of data frames is a character vector and every column in a data frame is an atomic vector, simply re-assign original with new names.
str(names_df)
# EXPECTED TWO COLUMNS OF CHR TYPE
# RE-ORDER COLUMNS BY PASSING CHARACTER VECTOR
excel_df <- excel_df[names_df$original_names]
# RE-ASSIGN NAMES: TWO METHODS
names(excel_df) <- names_df$new_names
excel_df <- setNames(excel_df, names_df$new_names)
This method is a little overkill if all columns are perfectly accounted for and in the correct order. If there are columns out of order or new columns, however, this method is robust by only changing those you intend to change and are found.
mt <- mtcars
head(mt, 3)
# mpg cyl disp hp drat wt qsec vs am gear carb
# Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
# Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
# Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
namechange <- data.frame(oldname = c("mpg", "cyl", "hp"), newname = c("MPG", "CYL", "HP"))
namechange
# oldname newname
# 1 mpg MPG
# 2 cyl CYL
# 3 hp HP
ind <- match(names(mtcars), namechange$oldname)
ind
# [1] 1 2 NA 3 NA NA NA NA NA NA NA
ifelse(is.na(ind), names(mt), namechange$newname[ind])
# [1] "MPG" "CYL" "disp" "HP" "drat" "wt" "qsec" "vs" "am" "gear" "carb"
names(mt) <- ifelse(is.na(ind), names(mt), namechange$newname[ind])
head(mt, 3)
# MPG CYL disp HP drat wt qsec vs am gear carb
# Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
# Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
# Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
you can use the rename command
library(dplyr)
#Exemple with base mtcars
View(mtcars)
new_mtcars<-mtcars%>%
rename('new_MPG'='mpg', 'new_cyl'='cyl')#Change the columns mpg and cyl
View(new_mtcars)
output

How to find how many rows beginning with a certain letter?

I don't know how to find how rows begin with a certain letter in a data frame.
For example.
mtcars
mpg cyl disp hp drat wt ...
Mazda RX4 21.0 6 160 110 3.90 2.62 ...
Mazda RX4 Wag 21.0 6 160 110 3.90 2.88 ...
Datsun 710 22.8 4 108 93 3.85 2.32 ...
I want to find out how many cars begins with the letter 'M'
Thanks
You can do
sum(startsWith(rownames(mtcars), "M"))
# [1] 10
Other less efficient possibilities include
sum(grepl("^M", rownames(mtcars)))
# [1] 10
length(grep("^M", rownames(mtcars)))
# [1] 10
sum(regexpr("^M", rownames(mtcars)) == 1L)
# [1] 10
sum(substr(rownames(mtcars), 1, 1) == "M")
# [1] 10
Another good way to do this is with stringr package
library(stringr)
str_view(rownames(mtcars),'^M') # to see all results or
str_view(rownames(mtcars),'^M', match = T) # to see only the match results

Maintain rownames when filter a data frame with %>%

Look two codes below, myup1 maintain row names, myup2 does not.
myup1<-outdf2[outdf2$label == "Up-Regulated", ]
myup2<-outdf2 %>%filter(label == "Up-Regulated" )
Is there a way to report rownames with %>% approach?
To expand my comment with an example, we can use add_rownames but it is deprecated, so use tibble::rownames_to_column() instead.
library(dplyr)
library(tibble)
df1 <- mtcars[1:5, 1:3]
df1
# mpg cyl disp
# Mazda RX4 21.0 6 160
# Mazda RX4 Wag 21.0 6 160
# Datsun 710 22.8 4 108
# Hornet 4 Drive 21.4 6 258
# Hornet Sportabout 18.7 8 3
df1[ df1$cyl == 6, ]
# mpg cyl disp
# Mazda RX4 21.0 6 160
# Mazda RX4 Wag 21.0 6 160
# Hornet 4 Drive 21.4 6 258
df1 %>%
rownames_to_column("myCars") %>%
filter(cyl == 6)
# # A tibble: 3 x 4
# myCars mpg cyl disp
# <chr> <dbl> <dbl> <dbl>
# 1 Mazda RX4 21.0 6 160
# 2 Mazda RX4 Wag 21.0 6 160
# 3 Hornet 4 Drive 21.4 6 258

Rename Columns with names from another data frame

I'm learning R programming as such have hit a few problems - and with your help have been able to fix them.
But I now have a need to rename columns of a data frame. I have a translation data frame with 2 columns that contains the column names and what the new columns should be called.
Here is my code: my question is how do I select the two columns from the trans dataframe and use them here as trans$old and trans$new variables?
I have 7 columns I'm renaming, and this might be even longer hence the translation table.
replace_header <- function()
{
names(industries)[names(industries)==trans$old] <- trans$new
replaced <- industries
return (replaced)
}
replaced_industries <- replace_header()
Here's an example using the built-in mtcars data frame. We'll use the match function to find the indices of the columns names we want to replace and then replace them with new names.
# Copy of built-in data frame
mt = mtcars
head(mt,3)
mpg cyl disp hp drat wt qsec vs am gear carb
Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
# Data frame with column name substitutions
dat = data.frame(old=c("mpg","am"), new=c("new.name1","new.name2"), stringsAsFactors=FALSE)
dat
old new
1 mpg new.name1
2 am new.name2
Use match to find the indices of the "old" names in the mt data frame:
match(dat[,"old"], names(mt))
[1] 1 9
Substitute "old" names with "new" names:
names(mt)[match(dat[,"old"], names(mt))] = dat[,"new"]
head(mt,3)
new.name1 cyl disp hp drat wt qsec vs new.name2 gear carb
Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
I'd recommend setnames from "data.table" for this. Using #eipi10's example:
mt = mtcars
dat = data.frame(old=c("mpg","am"), new=c("new.name1","new.name2"), stringsAsFactors=FALSE)
library(data.table)
setnames(mt, dat$old, dat$new)
names(mt)
# [1] "new.name1" "cyl" "disp" "hp" "drat" "wt"
# [7] "qsec" "vs" "new.name2" "gear" "carb"
If there's a concern as indicated by #jmbadia that the data.frame with the old and new names, you can add skip_absent=TRUE to setnames.
improving a bit the eipi10's answer, if we want to use a "rename dataframe" with old names not always present on the mt dataframe (e.g. because mt is provided by differnt sources so we don't always know its colnames) we can consider the following code
mt = mtcars
head(mt,3)
mpg cyl disp hp drat wt qsec vs am gear carb
Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
# dataframe with possible names to replace
dat = data.frame(old=c("strangeName","am"), new=c("new.name1","new.name2"), stringsAsFactors=FALSE)
# find which old names are present in mt
namesMatched <- dat[dat$old %in% names(mt)
#renaming
names(mt)[match(namesMatched,"old"], names(mt))] = dat[namesMatched,"new"]
head(mt,3)
mpg cyl disp hp drat wt qsec vs new.name2 gear carb
Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1

Column names that contain NAs in R

I have a large data frame that contains 900 variables per row. I am trying to write a function that gives me the name of each column that contains a NA for each row.
For example:
x->
mpg cyl disp hp draw wt
Mazda RX4 21.0 6 160 110 NA 2.62
Mazda RX4 Wag 21.0 6 NA 110 3.90 NA
Datsun 710 22.8 4 NA 93 NA NA
I would like a function to return:
Mazda RX4: "draw"
Mazda RX4 Wag: "disp", "wt"
Datsun 710: "disp","draw","wt"
Run apply by row to select from colnames(x). Probably going to get a list since the result is ragged.
apply(x, 1, function(x2) colnames(x)[ is.na(x2) ] )
$`Mazda RX4`
[1] "draw"
$`Mazda RX4 Wag`
[1] "disp" "wt"
$`Datsun 710`
[1] "disp" "draw" "wt"

Resources