Truncated updated string with R DBI package - r

I need to update a wide table on an SQL SERVER from R. So the package DBI seems to be very useful for that.
The problem is that the R data.frame contains strings of more than 3000 characters and when I use the DBI dbSendQuery function, all strings are truncated to 256 characters.
Here could be a code example :
con <- odbc::dbConnect(drv = odbc::odbc(),
dsn = '***',
UID = '***',
PWD = '***')
df = data.frame(TEST = paste(rep("A", 300), collapse=""),
TEST_ID = 1068858)
df$TEST = df$TEST %>% as.character
query = paste0('UPDATE MY_TABLE SET "TEST"=? WHERE TEST_ID=?')
update <- DBI::dbSendQuery(con, query)
DBI::dbBind(update, df)
DBI::dbClearResult(update)
odbc::dbDisconnect(con)
Then the following request return 256 instead of 300 :
SELECT LEN(TEST) FROM MY_TABLE WHERE TEST_ID = 1068858
NB : TEST is of type (varchar(max), NULL) and already contains strings of more than 256 chars.
Thanks in advance for any advice

In the end, I choose to get rid of sophisticated functions. A solution was to write the table in .csv file and bulk insert it into the database. Here is an example using RODBC package :
write.table(x = df,
file = "/path/DBI_error_test.csv",
sep = ";",
row.names = FALSE, col.names = FALSE,
na = "NULL",
quote = FALSE)
Query = paste("CREATE TABLE #MY_TABLE_TMP (
TEST varchar(max),
TEST_ID int
);
BULK INSERT #MY_TABLE_TMP
FROM 'C:\\DBI_error_test.csv'
WITH
(
FIELDTERMINATOR = ';',
ROWTERMINATOR = '\n',
BATCHSIZE = 500000,
CHECK_CONSTRAINTS
)
UPDATE R
SET R.TEST = #MY_TABLE_TMP.TEST
FROM MY_TABLE AS R
INNER JOIN #MY_TABLE_TMP ON #MY_TABLE_TMP.TEST_ID = R.TEST_ID;
DROP TABLE #MY_TABLE_TMP;
")
channel <- RODBC::odbcConnect(dsn = .DB_DSN_NAME,
uid = .DB_UID,
pwd = .DB_PWD)
RODBC::sqlQuery(channel = channel, query = query, believeNRows = FALSE)
RODBC::odbcClose(channel = channel)

Related

How to query LOB field in Oracle using R

How can I query a LOB field from Oracle using ROracle library?
library(ROracle)
drv <- dbDriver("Oracle")
connect.string <- paste(
"(DESCRIPTION=",
"(ADDRESS=(PROTOCOL=TCP)(HOST=", host, ")(PORT=", PORT, "))",
"(CONNECT_DATA=(GLOBAL_NAME=",GLOBAL_NAME,")(SID=", SID, ")))", sep = "")
con <- ROracle::dbConnect(drv, username = username, password = password, dbname = connect.string)
I am a newbie and tried this:
rs <- dbSendQuery(con, "SELECT UTL_ENCODE.BASE64_ENCODE(CAST(LOB_FIELD AS RAW)) FROM TABLEDATA WHERE OTHER_FIELD = 'something'")
data <- ROracle::fetch(rs)
# Error in .oci.SendQuery(conn, statement, data = data, prefetch = prefetch, :
ORA-00906: missing left parenthesis
rs <- dbSendQuery(con, "SELECT dbms_lob.READ(LOB_FIELD) FROM TABLEDATA WHERE OTHER_FIELD = 'something'")
data <- ROracle::fetch(rs)
#Error in .oci.SendQuery(conn, statement, data = data, prefetch = prefetch, :
ORA-00904: "DBMS_LOB"."READ": invalid identifier
Try simple with the query
select LOB_FIELD from tab where OTHER_FIELD = 'something
where LOB_FILED is the CLOB column
df <- dbGetQuery(conn, "select LOB_FIELD from tab where OTHER_FIELD = 'something'")
nchar(df$LOB_FIELD)
[1] 68000

How to include / exclude filter statement in R httr query for Localytics

I can successfully query data from Localytics using R, such as the following example:
r <- POST(url = "https://api.localytics.com/v1/query,
body=list(app_id=<APP_ID>,
metrics=c("occurrences","users"),
dimensions=c('a:URI'),
conditions=list(day = c("between", "2020-02-11", "2020-03-12"),
event_name = "Content Viewed",
"a:Item URI" = "testing")
),
encode="json",
authenticate(Key,Secret),
accept("application/json"),
content_type("application/json"))
stop_for_status(r)
But what I would like to do is create a function so I can do this quickly and not have to copy/paste data.
The issue I am running into is with the line "a:Item URI" = "testing", where I am filtering all searches by the Item URI where they all equal "testing", but sometimes, I don't want to include the filter statement, so I just remove that line entirely.
When I wrote my function, I tried something like the following:
get_localytics <- function(appID, metrics, dimensions, from = Sys.Date()-30,
to = Sys.Date(), eventName = "Content Viewed",
Key, Secret, filterDim = NULL, filterCriteria = NULL){
r <- httr::POST(url = "https://api.localytics.com/v1/query",
body = list(app_id = appID,
metrics = metrics,
dimensions = dimensions,
conditions = list(day = c("between", as.character(from), as.character(to)),
event_name = eventName,
filterDim = filterCriteria)
),
encode="json",
authenticate(Key, Secret),
accept("application/json"),
content_type("application/json"))
stop_for_status(r)
result <- paste(rawToChar(r$content),collapse = "")
document <- fromJSON(result)
df <- document$results
return(df)
}
But my attempt at adding filterDim and filterCriteria only produce the error Unprocessable Entity. (Keep in mind, there are lots of variables I can filter by, not just "a:Item URI" so I need to be able to manipulate that as well.
How can I include a statement, where if I need to filter, I can incorporate that line, but if I don't need to filter, that line isn't included?
conditions is just a list, so you can conditionally add elements to it. Here we just use an if statement to test of the values are passed and if so, add them in.
get_localytics <- function(appID, metrics, dimensions, from = Sys.Date()-30,
to = Sys.Date(), eventName = "Content Viewed",
Key, Secret, filterDim = NULL, filterCriteria = NULL){
conditions <- list(day = c("between", as.character(from), as.character(to)),
event_name = eventName)
if (!is.null(filterDim) & !is.null(filterCriteria)) {
conditions[[filterDim]] <- filterCriteria)
}
r <- httr::POST(url = "https://api.localytics.com/v1/query",
body = list(app_id = appID,
metrics = metrics,
dimensions = dimensions,
conditions = conditions),
encode="json",
authenticate(Key, Secret),
accept("application/json"),
content_type("application/json"))
stop_for_status(r)
result <- paste(rawToChar(r$content),collapse = "")
document <- fromJSON(result)
df <- document$results
return(df)
}

Insert/Update R data.table into PostgreSQL table

I have a PostgreSQL database set up with a table and columns already defined. The primary key for the table is a combination of (Id, datetime) column. I need to periodically INSERT data for different Ids from R data.table into the database. However, if data for a particular (Id, datetime) combination already exists it should be UPDATED (overwritten). How can I do this using RPostgres or RPostgreSQL packages?
When I try to insert a data.table where some (Id, datetime) rows already exist I get an error saying the primary key constraint is violated:
dbWriteTable(con, table, dt, append = TRUE, row.names = FALSE)
Error in connection_copy_data(conn#ptr, sql, value) :
COPY returned error: ERROR: duplicate key value violates unique constraint "interval_data_pkey"
DETAIL: Key (id, dttm_utc)=(a0za000000CSdLoAAL, 2018-10-01 05:15:00+00) already exists.
CONTEXT: COPY interval_data, line 1
You can use my pg package that has upsert functionality, or just grab code for upsert from there: https://github.com/jangorecki/pg/blob/master/R/pg.R#L249
It is basically what others said in comments. Write data into temp table and then insert into destination table using on conflict clause.
pgSendUpsert = function(stage_name, name, conflict_by, on_conflict = "DO NOTHING", techstamp = TRUE, conn = getOption("pg.conn"), .log = getOption("pg.log",TRUE)){
stopifnot(!is.null(conn), is.logical(.log), is.logical(techstamp), is.character(on_conflict), length(on_conflict)==1L)
cols = pgListFields(stage_name)
cols = setdiff(cols, c("run_id","r_timestamp")) # remove techstamp to have clean column list, as the fresh one will be used, if any
# sql
insert_into = sprintf("INSERT INTO %s.%s (%s)", name[1L], name[2L], paste(if(techstamp) c(cols, c("run_id","r_timestamp")) else cols, collapse=", "))
select = sprintf("SELECT %s", paste(cols, collapse=", "))
if(techstamp) select = sprintf("%s, %s::INTEGER run_id, '%s'::TIMESTAMPTZ r_timestamp", select, get_run_id(), format(Sys.time(), "%Y-%m-%d %H:%M:%OS"))
from = sprintf("FROM %s.%s", stage_name[1L], stage_name[2L])
if(!missing(conflict_by)) on_conflict = paste(paste0("(",paste(conflict_by, collapse=", "),")"), on_conflict)
on_conflict = paste("ON CONFLICT",on_conflict)
sql = paste0(paste(insert_into, select, from, on_conflict), ";")
pgSendQuery(sql, conn = conn, .log = .log)
}
#' #rdname pg
pgUpsertTable = function(name, value, conflict_by, on_conflict = "DO NOTHING", stage_name, techstamp = TRUE, conn = getOption("pg.conn"), .log = getOption("pg.log",TRUE)){
stopifnot(!is.null(conn), is.logical(.log), is.logical(techstamp), is.character(on_conflict), length(on_conflict)==1L)
name = schema_table(name)
if(!missing(stage_name)){
stage_name = schema_table(stage_name)
drop_stage = FALSE
} else {
stage_name = name
stage_name[2L] = paste("tmp", stage_name[2L], sep="_")
drop_stage = TRUE
}
if(pgExistsTable(stage_name)) pgTruncateTable(name = stage_name, conn = conn, .log = .log)
pgWriteTable(name = stage_name, value = value, techstamp = techstamp, conn = conn, .log = .log)
on.exit(if(drop_stage) pgDropTable(stage_name, conn = conn, .log = .log))
pgSendUpsert(stage_name = stage_name, name = name, conflict_by = conflict_by, on_conflict = on_conflict, techstamp = techstamp, conn = conn, .log = .log)
}

Query Oracle DNS in RStudio

I am using RStudio with package RODBC using the following code
require(RODBC)
channel<-odbcConnect(dsn = "USA", uid = "AA", pwd = "***" )
odbcGetInfo(channel)
This returns all the details but when I try and do a sql query
test<-sqlQuery(channel,"select * from cnty", rows_at_time = 1)
It returns an error with
Error in odbcFetchRows(channel, max = max, buffsize = buffsize, nullstring = nullstring, :
negative length vectors are not allowed
This works if I open Microsoft AccessDB - External Data -ODBC DataBase - link to data source click machine Data Source and select the source which then allows me to do a select query.
I have also tried using
debug(odbcFetchRows) test<-sqlQuery(channel,"select * from cnty", rows_at_time = 1)
This returns
function (channel, max = 0, buffsize = 1000, nullstring = NA_character_,
believeNRows = TRUE)
{
if (!odbcValidChannel(channel))
stop("first argument is not an open RODBC channel")
.Call(C_RODBCFetchRows, attr(channel, "handle_ptr"), max,
buffsize, as.character(nullstring), believeNRows)
}
I got this working by using test<-sqlQuery(channel,"select * from cnty", rows_at_time = 1,believeNRows = FALSE)

R : Updating an entry in mongodb using mongolite

I have a mongo database with information that I am passing to some R scripts for analysis. I am currently using the mongolite package to pass the information from mongo to R.
I have a field in each mongo entry called checkedByR, which is a binary that indicates whether the entry has been analysed by the R scripts already. Specifically, I am collecting a mongo entry by its respective mongo ID, running the scripts on the entry, assigning the checkedByR field with a 1, and then moving on.
For completeness, I am querying the database with the following request:
library(mongolite)
mongoID <- "1234abcd1234abcd1234"
m <- mongolite::mongo(url = "mongodb://localhost:27017",
collection = "collection",
db = "database")
rawData <- m$find(query = paste0('{"_id": { "$oid" : "',mongoID,'" }}'),
fields = '{"_id" : 1,
"checkedByR" : 1,
"somethingToCheck" : 1}')
checkedByR <- 1
However, I am having trouble successfully updating the mongo entry with the new checkedByR field.
I realise that an update function exists in the mongolite package (please consider : https://cran.r-project.org/web/packages/mongolite/mongolite.pdf), but I am having trouble gathering relevant examples to help me complete the updating process.
Any help would be greatly appreciated.
the mongo$update() function takes a query and a update argument. You use the query to find the data you want to update, and the update to tell it which field to update.
Consider this example
library(mongolite)
## create some dummy data and insert into mongodb
df <- data.frame(id = 1:10,
value = letters[1:10]
)
mongo <- mongo(collection = "another_test",
db = "test",
url = "mongodb://localhost")
mongo$insert(df)
## the 'id' of the document I want to update
mongoID <- "575556825dabbf2aea1d7cc1"
## find some data
rawData <- mongo$find(query = paste0('{"_id": { "$oid" : "',mongoID,'" }}'),
fields = '{"_id" : 1,
"id" : 1,
"value" : 1}'
)
## ...
## do whatever you want to do in R...
## ...
## use update to query on your ID, then 'set' to set the 'checkedByR' value to 1
mongo$update(
query = paste0('{"_id": { "$oid" : "', mongoID, '" } }'),
update = '{ "$set" : { "checkedByR" : 1} }'
)
## in my original data I didn't have a 'checkedByR' value, but it's added anyway
Update
the rmongodb library is no longer on CRAN, so the below code won't work
And for more complex structures & updates you can do things like
library(mongolite)
library(jsonlite)
library(rmongodb) ## used to insert a non-data.frame into mongodb
## create some dummy data and insert into mongodb
lst <- list(id = 1,
value_doc = data.frame(id = 1:5,
value = letters[1:5],
stringsAsFactors = FALSE),
value_array = c(letters[6:10])
)
## using rmongodb
mongo <- mongo.create(db = "test")
coll <- "test.another_test"
mongo.insert(mongo,
ns = coll,
b = mongo.bson.from.list(lst)
)
mongo.destroy(mongo)
## update document with specific ID
mongoID <- "5755f646ceeb7846c87afd90"
## using mongolite
mongo <- mongo(db = "test",
coll = "another_test",
url = "mongodb://localhost"
)
## to add a single value to an array
mongo$update(
query = paste0('{"_id": { "$oid" : "', mongoID, '" } }'),
update = '{ "$addToSet" : { "value_array" : "checkedByR" } }'
)
## To add a document to the value_array
mongo$update(
query = paste0('{"_id": { "$oid" : "', mongoID, '" } }'),
update = '{ "$addToSet" : { "value_array" : { "checkedByR" : 1} } }'
)
## To add to a nested array
mongo$update(
query = paste0('{"_id": { "$oid" : "', mongoID, '" } }'),
update = '{ "$addToSet" : { "value_doc.value" : "checkedByR" } }'
)
rm(mongo); gc()
see mongodb update documemtation for further details

Resources