Query Oracle DNS in RStudio - r

I am using RStudio with package RODBC using the following code
require(RODBC)
channel<-odbcConnect(dsn = "USA", uid = "AA", pwd = "***" )
odbcGetInfo(channel)
This returns all the details but when I try and do a sql query
test<-sqlQuery(channel,"select * from cnty", rows_at_time = 1)
It returns an error with
Error in odbcFetchRows(channel, max = max, buffsize = buffsize, nullstring = nullstring, :
negative length vectors are not allowed
This works if I open Microsoft AccessDB - External Data -ODBC DataBase - link to data source click machine Data Source and select the source which then allows me to do a select query.
I have also tried using
debug(odbcFetchRows) test<-sqlQuery(channel,"select * from cnty", rows_at_time = 1)
This returns
function (channel, max = 0, buffsize = 1000, nullstring = NA_character_,
believeNRows = TRUE)
{
if (!odbcValidChannel(channel))
stop("first argument is not an open RODBC channel")
.Call(C_RODBCFetchRows, attr(channel, "handle_ptr"), max,
buffsize, as.character(nullstring), believeNRows)
}

I got this working by using test<-sqlQuery(channel,"select * from cnty", rows_at_time = 1,believeNRows = FALSE)

Related

Saving to a date / datetime field using SQLSAVE

I'm using the following R code to save into database. However, the date field is not properly saved.
storedata$update = as.character(excelSerialNumToDate(tape$date),"%Y-%m-%d")
sqlSave(channel = dbhandle,dat = storedata,tablename = 'dbo.storeinfo',append = T,rownames = F,colnames = F,verbose = T,safer = F,fast = F, nastring = NULL)
What is the best way to save into a date field using SqlSave in R?
Wrote my own SqlSave to eliminate these issues.

Insert/Update R data.table into PostgreSQL table

I have a PostgreSQL database set up with a table and columns already defined. The primary key for the table is a combination of (Id, datetime) column. I need to periodically INSERT data for different Ids from R data.table into the database. However, if data for a particular (Id, datetime) combination already exists it should be UPDATED (overwritten). How can I do this using RPostgres or RPostgreSQL packages?
When I try to insert a data.table where some (Id, datetime) rows already exist I get an error saying the primary key constraint is violated:
dbWriteTable(con, table, dt, append = TRUE, row.names = FALSE)
Error in connection_copy_data(conn#ptr, sql, value) :
COPY returned error: ERROR: duplicate key value violates unique constraint "interval_data_pkey"
DETAIL: Key (id, dttm_utc)=(a0za000000CSdLoAAL, 2018-10-01 05:15:00+00) already exists.
CONTEXT: COPY interval_data, line 1
You can use my pg package that has upsert functionality, or just grab code for upsert from there: https://github.com/jangorecki/pg/blob/master/R/pg.R#L249
It is basically what others said in comments. Write data into temp table and then insert into destination table using on conflict clause.
pgSendUpsert = function(stage_name, name, conflict_by, on_conflict = "DO NOTHING", techstamp = TRUE, conn = getOption("pg.conn"), .log = getOption("pg.log",TRUE)){
stopifnot(!is.null(conn), is.logical(.log), is.logical(techstamp), is.character(on_conflict), length(on_conflict)==1L)
cols = pgListFields(stage_name)
cols = setdiff(cols, c("run_id","r_timestamp")) # remove techstamp to have clean column list, as the fresh one will be used, if any
# sql
insert_into = sprintf("INSERT INTO %s.%s (%s)", name[1L], name[2L], paste(if(techstamp) c(cols, c("run_id","r_timestamp")) else cols, collapse=", "))
select = sprintf("SELECT %s", paste(cols, collapse=", "))
if(techstamp) select = sprintf("%s, %s::INTEGER run_id, '%s'::TIMESTAMPTZ r_timestamp", select, get_run_id(), format(Sys.time(), "%Y-%m-%d %H:%M:%OS"))
from = sprintf("FROM %s.%s", stage_name[1L], stage_name[2L])
if(!missing(conflict_by)) on_conflict = paste(paste0("(",paste(conflict_by, collapse=", "),")"), on_conflict)
on_conflict = paste("ON CONFLICT",on_conflict)
sql = paste0(paste(insert_into, select, from, on_conflict), ";")
pgSendQuery(sql, conn = conn, .log = .log)
}
#' #rdname pg
pgUpsertTable = function(name, value, conflict_by, on_conflict = "DO NOTHING", stage_name, techstamp = TRUE, conn = getOption("pg.conn"), .log = getOption("pg.log",TRUE)){
stopifnot(!is.null(conn), is.logical(.log), is.logical(techstamp), is.character(on_conflict), length(on_conflict)==1L)
name = schema_table(name)
if(!missing(stage_name)){
stage_name = schema_table(stage_name)
drop_stage = FALSE
} else {
stage_name = name
stage_name[2L] = paste("tmp", stage_name[2L], sep="_")
drop_stage = TRUE
}
if(pgExistsTable(stage_name)) pgTruncateTable(name = stage_name, conn = conn, .log = .log)
pgWriteTable(name = stage_name, value = value, techstamp = techstamp, conn = conn, .log = .log)
on.exit(if(drop_stage) pgDropTable(stage_name, conn = conn, .log = .log))
pgSendUpsert(stage_name = stage_name, name = name, conflict_by = conflict_by, on_conflict = on_conflict, techstamp = techstamp, conn = conn, .log = .log)
}

Truncated updated string with R DBI package

I need to update a wide table on an SQL SERVER from R. So the package DBI seems to be very useful for that.
The problem is that the R data.frame contains strings of more than 3000 characters and when I use the DBI dbSendQuery function, all strings are truncated to 256 characters.
Here could be a code example :
con <- odbc::dbConnect(drv = odbc::odbc(),
dsn = '***',
UID = '***',
PWD = '***')
df = data.frame(TEST = paste(rep("A", 300), collapse=""),
TEST_ID = 1068858)
df$TEST = df$TEST %>% as.character
query = paste0('UPDATE MY_TABLE SET "TEST"=? WHERE TEST_ID=?')
update <- DBI::dbSendQuery(con, query)
DBI::dbBind(update, df)
DBI::dbClearResult(update)
odbc::dbDisconnect(con)
Then the following request return 256 instead of 300 :
SELECT LEN(TEST) FROM MY_TABLE WHERE TEST_ID = 1068858
NB : TEST is of type (varchar(max), NULL) and already contains strings of more than 256 chars.
Thanks in advance for any advice
In the end, I choose to get rid of sophisticated functions. A solution was to write the table in .csv file and bulk insert it into the database. Here is an example using RODBC package :
write.table(x = df,
file = "/path/DBI_error_test.csv",
sep = ";",
row.names = FALSE, col.names = FALSE,
na = "NULL",
quote = FALSE)
Query = paste("CREATE TABLE #MY_TABLE_TMP (
TEST varchar(max),
TEST_ID int
);
BULK INSERT #MY_TABLE_TMP
FROM 'C:\\DBI_error_test.csv'
WITH
(
FIELDTERMINATOR = ';',
ROWTERMINATOR = '\n',
BATCHSIZE = 500000,
CHECK_CONSTRAINTS
)
UPDATE R
SET R.TEST = #MY_TABLE_TMP.TEST
FROM MY_TABLE AS R
INNER JOIN #MY_TABLE_TMP ON #MY_TABLE_TMP.TEST_ID = R.TEST_ID;
DROP TABLE #MY_TABLE_TMP;
")
channel <- RODBC::odbcConnect(dsn = .DB_DSN_NAME,
uid = .DB_UID,
pwd = .DB_PWD)
RODBC::sqlQuery(channel = channel, query = query, believeNRows = FALSE)
RODBC::odbcClose(channel = channel)

Issue with src_snowflakedb(): 'src_sql' is not an exported object

I am trying to use dplyr with a snowflake db, using the dplyr.snowflakedb package (on GitHub). I am able to install and load the libraries, then set the classpath pointing to the latest JDBC driver (snowflake-jdbc-3.0.9.jar).
# need to load RJDBC, or error 'could not find function ".jinit"' is thrown
library(RJDBC)
library(dplyr)
library(dplyr.snowflakedb)
options(dplyr.jdbc.classpath = "drivers/snowflake-jdbc-3.0.9.jar")
When trying to setup the connection object with src_snowflakedb(), I get the following error message (I removed the account details, but they are correct in the actual code):
> nike_db <- src_snowflakedb(user = "user",
password = "user",
account = "acme",
opts = list(warehouse = "my_wh",
db = "my_db",
schema = "my_schema"))
URL: jdbc:snowflake://acme.snowflakecomputing.com:443/?account=acme&warehouse=my_wh&my_db=db&schema=my_schema
Error: 'src_sql' is not an exported object from 'namespace:dplyr'
Indeed the current version of dplyr doesn't export nor include any src_sql() function:
> dplyr:::src_sql
Error in get(name, envir = asNamespace(pkg), inherits = FALSE) :
object 'src_sql' not found
Is there any way to fix this?
Please try the below sample code :
Sys.getenv("JAVA_HOME")
Sys.setenv(JAVA_HOME="C:\\Program Files\\Java\\jdk-1.8\\jre")
Sys.getenv("JAVA_HOME")
install.packages(c("rJava"))
install.packages(c("RJDBC", "DBI", "dplyr"))
install.packages("devtools")
devtools::install_github("snowflakedb/dplyr-snowflakedb")
library(RJDBC)
library(dplyr)
library(dplyr.snowflakedb)
options(dplyr.jdbc.classpath = "C:\\Driver\\snowflake-jdbc-3.11.1.jar")
my_db <- src_snowflakedb(user = "USERNAME" , password = "PASSWORD", account = "test",host = 'test.us-east-1.snowflakecomputing.com',opts = list(warehouse = "WAREHOUSE_NAME",db='DATABASE_NAME',schema='SCHEMA_NAME'))
tbl(my_db, "TABLE_NAME")
Note :
a) If your Snowflake Account URL is like "https://test.snowflakecomputing.com" use below format :
my_db <- src_snowflakedb(user = "USERNAME" , password = "PASSWORD", account = "test", opts = list(warehouse = "WAREHOUSE_NAME",db='DATABASE_NAME',schema='SCHEMA_NAME'))
b) If your Snowflake Account URL is like "https://test.us-east-1.snowflakecomputing.com" use below format :
my_db <- src_snowflakedb(user = "USERNAME" , password = "PASSWORD", account = "test", host = 'test.us-east-1.snowflakecomputing.com', opts = list(warehouse = "WAREHOUSE_NAME",db='DATABASE_NAME',schema='SCHEMA_NAME'))
I had the same issue, and had to go back to resort to the new version of the JDBC connection via SF: you can see link here:
https://support.snowflake.net/s/article/ka131000000O5Jr/connecting-r-to-snowflake-using-the-jdbc-driver-mac-os-x
all you really need though is this:
result <- dbGetQuery(jdbcConnection, "select current_timestamp() as now")
print(result)

R : Updating an entry in mongodb using mongolite

I have a mongo database with information that I am passing to some R scripts for analysis. I am currently using the mongolite package to pass the information from mongo to R.
I have a field in each mongo entry called checkedByR, which is a binary that indicates whether the entry has been analysed by the R scripts already. Specifically, I am collecting a mongo entry by its respective mongo ID, running the scripts on the entry, assigning the checkedByR field with a 1, and then moving on.
For completeness, I am querying the database with the following request:
library(mongolite)
mongoID <- "1234abcd1234abcd1234"
m <- mongolite::mongo(url = "mongodb://localhost:27017",
collection = "collection",
db = "database")
rawData <- m$find(query = paste0('{"_id": { "$oid" : "',mongoID,'" }}'),
fields = '{"_id" : 1,
"checkedByR" : 1,
"somethingToCheck" : 1}')
checkedByR <- 1
However, I am having trouble successfully updating the mongo entry with the new checkedByR field.
I realise that an update function exists in the mongolite package (please consider : https://cran.r-project.org/web/packages/mongolite/mongolite.pdf), but I am having trouble gathering relevant examples to help me complete the updating process.
Any help would be greatly appreciated.
the mongo$update() function takes a query and a update argument. You use the query to find the data you want to update, and the update to tell it which field to update.
Consider this example
library(mongolite)
## create some dummy data and insert into mongodb
df <- data.frame(id = 1:10,
value = letters[1:10]
)
mongo <- mongo(collection = "another_test",
db = "test",
url = "mongodb://localhost")
mongo$insert(df)
## the 'id' of the document I want to update
mongoID <- "575556825dabbf2aea1d7cc1"
## find some data
rawData <- mongo$find(query = paste0('{"_id": { "$oid" : "',mongoID,'" }}'),
fields = '{"_id" : 1,
"id" : 1,
"value" : 1}'
)
## ...
## do whatever you want to do in R...
## ...
## use update to query on your ID, then 'set' to set the 'checkedByR' value to 1
mongo$update(
query = paste0('{"_id": { "$oid" : "', mongoID, '" } }'),
update = '{ "$set" : { "checkedByR" : 1} }'
)
## in my original data I didn't have a 'checkedByR' value, but it's added anyway
Update
the rmongodb library is no longer on CRAN, so the below code won't work
And for more complex structures & updates you can do things like
library(mongolite)
library(jsonlite)
library(rmongodb) ## used to insert a non-data.frame into mongodb
## create some dummy data and insert into mongodb
lst <- list(id = 1,
value_doc = data.frame(id = 1:5,
value = letters[1:5],
stringsAsFactors = FALSE),
value_array = c(letters[6:10])
)
## using rmongodb
mongo <- mongo.create(db = "test")
coll <- "test.another_test"
mongo.insert(mongo,
ns = coll,
b = mongo.bson.from.list(lst)
)
mongo.destroy(mongo)
## update document with specific ID
mongoID <- "5755f646ceeb7846c87afd90"
## using mongolite
mongo <- mongo(db = "test",
coll = "another_test",
url = "mongodb://localhost"
)
## to add a single value to an array
mongo$update(
query = paste0('{"_id": { "$oid" : "', mongoID, '" } }'),
update = '{ "$addToSet" : { "value_array" : "checkedByR" } }'
)
## To add a document to the value_array
mongo$update(
query = paste0('{"_id": { "$oid" : "', mongoID, '" } }'),
update = '{ "$addToSet" : { "value_array" : { "checkedByR" : 1} } }'
)
## To add to a nested array
mongo$update(
query = paste0('{"_id": { "$oid" : "', mongoID, '" } }'),
update = '{ "$addToSet" : { "value_doc.value" : "checkedByR" } }'
)
rm(mongo); gc()
see mongodb update documemtation for further details

Resources