Db2 on Cloud: Problem with column in querying from R - r

I created a connection between R and Db2 on Cloud
library(RODBC)
dsn_driver <- "{IBM DB2 ODBC Driver}"
dsn_database <- "bludb" # e.g. "bludb"
dsn_hostname <- "**"
dsn_port <- "***" # e.g. "32733"
dsn_protocol <- "TCPIP" # i.e. "TCPIP"
dsn_uid <- "**" #
dsn_pwd <- "**" #
dsn_security <- "ssl"
conn_path <- paste("DRIVER=",dsn_driver,
";DATABASE=",dsn_database,
";HOSTNAME=",dsn_hostname,
";PORT=",dsn_port,
";PROTOCOL=",dsn_protocol,
";UID=",dsn_uid,
";PWD=",dsn_pwd,
";SECURITY=",dsn_security,
sep="")
conn <- odbcDriverConnect(conn_path)
conn
Then I created the table
myschema <- "**" #
tables <- c("Annual_Crop")
for (table in tables){
# Drop School table if it already exists
out <- sqlTables(conn, tableType = "TABLE", schema = myschema, tableName =table)
if (nrow(out)>0) {
err <- sqlDrop (conn, paste(myschema,".",table,sep=""), errors=FALSE)
if (err==-1){
cat("An error has occurred.\n")
err.msg <- odbcGetErrMsg(conn)
for (error in err.msg) {
cat(error,"\n")
}
} else {
cat ("Table: ", myschema,".",table," was dropped\n")
}
} else {
cat ("Table: ", myschema,".",table," does not exist\n")
}
}
df1 <- sqlQuery (conn, "CREATE TABLE Annual_Crop(
CD_ID char (6) NOT NULL,
YEAR CHAR (20),
CROP_TYPE varchar (50),
GEO varchar (50),
SEEDED_AREA CHAR (50) ,
HARVESTED_AREA CHAR (50),
PRODUCTION CHAR (50),
AVG_YIELD CHAR (50),
PRIMARY KEY (CD_ID))",
errors = FALSE)
if(df1 == -1){
cat ("An error has occured.\n")
msg <- odbcGetErrMsg(conn)
print (msg)
} else {
cat ("Table was createdd successfuly.\n")
}
I loaded the dataset from a file into the table
anual_cropdf <- read.csv("/resources/labs/MYDATA/data1.csv")
sqlSave(conn, anual_cropdf, 'Annual_Crop', append=TRUE, fast=FALSE, rownames=FALSE, colnames=FALSE, verbose=FALSE)
Then I tried to fetch from the table and it works
FARMDB <- sqlFetch(conn, "Annual_Crop")
tail(FARMDB)
Finally, when I want to perform a query, it was not working. The result was just the name of columns 0X8
info <- paste('select * from Annual_Crop
where Geo = 41600')
query <- sqlQuery(conn,info,believeNRows = FALSE)
query
Why?

Based on your table schema, the data type for Geo is VARCHAR. Have you tried a query like this?
select * from Annual_Crop where Geo = 'Alberta'
or
select * from Annual_Crop where Geo = '41600'
Varchar / string needs to use single quotes for the value.

Related

How to query LOB field in Oracle using R

How can I query a LOB field from Oracle using ROracle library?
library(ROracle)
drv <- dbDriver("Oracle")
connect.string <- paste(
"(DESCRIPTION=",
"(ADDRESS=(PROTOCOL=TCP)(HOST=", host, ")(PORT=", PORT, "))",
"(CONNECT_DATA=(GLOBAL_NAME=",GLOBAL_NAME,")(SID=", SID, ")))", sep = "")
con <- ROracle::dbConnect(drv, username = username, password = password, dbname = connect.string)
I am a newbie and tried this:
rs <- dbSendQuery(con, "SELECT UTL_ENCODE.BASE64_ENCODE(CAST(LOB_FIELD AS RAW)) FROM TABLEDATA WHERE OTHER_FIELD = 'something'")
data <- ROracle::fetch(rs)
# Error in .oci.SendQuery(conn, statement, data = data, prefetch = prefetch, :
ORA-00906: missing left parenthesis
rs <- dbSendQuery(con, "SELECT dbms_lob.READ(LOB_FIELD) FROM TABLEDATA WHERE OTHER_FIELD = 'something'")
data <- ROracle::fetch(rs)
#Error in .oci.SendQuery(conn, statement, data = data, prefetch = prefetch, :
ORA-00904: "DBMS_LOB"."READ": invalid identifier
Try simple with the query
select LOB_FIELD from tab where OTHER_FIELD = 'something
where LOB_FILED is the CLOB column
df <- dbGetQuery(conn, "select LOB_FIELD from tab where OTHER_FIELD = 'something'")
nchar(df$LOB_FIELD)
[1] 68000

Insert/Update R data.table into PostgreSQL table

I have a PostgreSQL database set up with a table and columns already defined. The primary key for the table is a combination of (Id, datetime) column. I need to periodically INSERT data for different Ids from R data.table into the database. However, if data for a particular (Id, datetime) combination already exists it should be UPDATED (overwritten). How can I do this using RPostgres or RPostgreSQL packages?
When I try to insert a data.table where some (Id, datetime) rows already exist I get an error saying the primary key constraint is violated:
dbWriteTable(con, table, dt, append = TRUE, row.names = FALSE)
Error in connection_copy_data(conn#ptr, sql, value) :
COPY returned error: ERROR: duplicate key value violates unique constraint "interval_data_pkey"
DETAIL: Key (id, dttm_utc)=(a0za000000CSdLoAAL, 2018-10-01 05:15:00+00) already exists.
CONTEXT: COPY interval_data, line 1
You can use my pg package that has upsert functionality, or just grab code for upsert from there: https://github.com/jangorecki/pg/blob/master/R/pg.R#L249
It is basically what others said in comments. Write data into temp table and then insert into destination table using on conflict clause.
pgSendUpsert = function(stage_name, name, conflict_by, on_conflict = "DO NOTHING", techstamp = TRUE, conn = getOption("pg.conn"), .log = getOption("pg.log",TRUE)){
stopifnot(!is.null(conn), is.logical(.log), is.logical(techstamp), is.character(on_conflict), length(on_conflict)==1L)
cols = pgListFields(stage_name)
cols = setdiff(cols, c("run_id","r_timestamp")) # remove techstamp to have clean column list, as the fresh one will be used, if any
# sql
insert_into = sprintf("INSERT INTO %s.%s (%s)", name[1L], name[2L], paste(if(techstamp) c(cols, c("run_id","r_timestamp")) else cols, collapse=", "))
select = sprintf("SELECT %s", paste(cols, collapse=", "))
if(techstamp) select = sprintf("%s, %s::INTEGER run_id, '%s'::TIMESTAMPTZ r_timestamp", select, get_run_id(), format(Sys.time(), "%Y-%m-%d %H:%M:%OS"))
from = sprintf("FROM %s.%s", stage_name[1L], stage_name[2L])
if(!missing(conflict_by)) on_conflict = paste(paste0("(",paste(conflict_by, collapse=", "),")"), on_conflict)
on_conflict = paste("ON CONFLICT",on_conflict)
sql = paste0(paste(insert_into, select, from, on_conflict), ";")
pgSendQuery(sql, conn = conn, .log = .log)
}
#' #rdname pg
pgUpsertTable = function(name, value, conflict_by, on_conflict = "DO NOTHING", stage_name, techstamp = TRUE, conn = getOption("pg.conn"), .log = getOption("pg.log",TRUE)){
stopifnot(!is.null(conn), is.logical(.log), is.logical(techstamp), is.character(on_conflict), length(on_conflict)==1L)
name = schema_table(name)
if(!missing(stage_name)){
stage_name = schema_table(stage_name)
drop_stage = FALSE
} else {
stage_name = name
stage_name[2L] = paste("tmp", stage_name[2L], sep="_")
drop_stage = TRUE
}
if(pgExistsTable(stage_name)) pgTruncateTable(name = stage_name, conn = conn, .log = .log)
pgWriteTable(name = stage_name, value = value, techstamp = techstamp, conn = conn, .log = .log)
on.exit(if(drop_stage) pgDropTable(stage_name, conn = conn, .log = .log))
pgSendUpsert(stage_name = stage_name, name = name, conflict_by = conflict_by, on_conflict = on_conflict, techstamp = techstamp, conn = conn, .log = .log)
}

Truncated updated string with R DBI package

I need to update a wide table on an SQL SERVER from R. So the package DBI seems to be very useful for that.
The problem is that the R data.frame contains strings of more than 3000 characters and when I use the DBI dbSendQuery function, all strings are truncated to 256 characters.
Here could be a code example :
con <- odbc::dbConnect(drv = odbc::odbc(),
dsn = '***',
UID = '***',
PWD = '***')
df = data.frame(TEST = paste(rep("A", 300), collapse=""),
TEST_ID = 1068858)
df$TEST = df$TEST %>% as.character
query = paste0('UPDATE MY_TABLE SET "TEST"=? WHERE TEST_ID=?')
update <- DBI::dbSendQuery(con, query)
DBI::dbBind(update, df)
DBI::dbClearResult(update)
odbc::dbDisconnect(con)
Then the following request return 256 instead of 300 :
SELECT LEN(TEST) FROM MY_TABLE WHERE TEST_ID = 1068858
NB : TEST is of type (varchar(max), NULL) and already contains strings of more than 256 chars.
Thanks in advance for any advice
In the end, I choose to get rid of sophisticated functions. A solution was to write the table in .csv file and bulk insert it into the database. Here is an example using RODBC package :
write.table(x = df,
file = "/path/DBI_error_test.csv",
sep = ";",
row.names = FALSE, col.names = FALSE,
na = "NULL",
quote = FALSE)
Query = paste("CREATE TABLE #MY_TABLE_TMP (
TEST varchar(max),
TEST_ID int
);
BULK INSERT #MY_TABLE_TMP
FROM 'C:\\DBI_error_test.csv'
WITH
(
FIELDTERMINATOR = ';',
ROWTERMINATOR = '\n',
BATCHSIZE = 500000,
CHECK_CONSTRAINTS
)
UPDATE R
SET R.TEST = #MY_TABLE_TMP.TEST
FROM MY_TABLE AS R
INNER JOIN #MY_TABLE_TMP ON #MY_TABLE_TMP.TEST_ID = R.TEST_ID;
DROP TABLE #MY_TABLE_TMP;
")
channel <- RODBC::odbcConnect(dsn = .DB_DSN_NAME,
uid = .DB_UID,
pwd = .DB_PWD)
RODBC::sqlQuery(channel = channel, query = query, believeNRows = FALSE)
RODBC::odbcClose(channel = channel)

R : Updating an entry in mongodb using mongolite

I have a mongo database with information that I am passing to some R scripts for analysis. I am currently using the mongolite package to pass the information from mongo to R.
I have a field in each mongo entry called checkedByR, which is a binary that indicates whether the entry has been analysed by the R scripts already. Specifically, I am collecting a mongo entry by its respective mongo ID, running the scripts on the entry, assigning the checkedByR field with a 1, and then moving on.
For completeness, I am querying the database with the following request:
library(mongolite)
mongoID <- "1234abcd1234abcd1234"
m <- mongolite::mongo(url = "mongodb://localhost:27017",
collection = "collection",
db = "database")
rawData <- m$find(query = paste0('{"_id": { "$oid" : "',mongoID,'" }}'),
fields = '{"_id" : 1,
"checkedByR" : 1,
"somethingToCheck" : 1}')
checkedByR <- 1
However, I am having trouble successfully updating the mongo entry with the new checkedByR field.
I realise that an update function exists in the mongolite package (please consider : https://cran.r-project.org/web/packages/mongolite/mongolite.pdf), but I am having trouble gathering relevant examples to help me complete the updating process.
Any help would be greatly appreciated.
the mongo$update() function takes a query and a update argument. You use the query to find the data you want to update, and the update to tell it which field to update.
Consider this example
library(mongolite)
## create some dummy data and insert into mongodb
df <- data.frame(id = 1:10,
value = letters[1:10]
)
mongo <- mongo(collection = "another_test",
db = "test",
url = "mongodb://localhost")
mongo$insert(df)
## the 'id' of the document I want to update
mongoID <- "575556825dabbf2aea1d7cc1"
## find some data
rawData <- mongo$find(query = paste0('{"_id": { "$oid" : "',mongoID,'" }}'),
fields = '{"_id" : 1,
"id" : 1,
"value" : 1}'
)
## ...
## do whatever you want to do in R...
## ...
## use update to query on your ID, then 'set' to set the 'checkedByR' value to 1
mongo$update(
query = paste0('{"_id": { "$oid" : "', mongoID, '" } }'),
update = '{ "$set" : { "checkedByR" : 1} }'
)
## in my original data I didn't have a 'checkedByR' value, but it's added anyway
Update
the rmongodb library is no longer on CRAN, so the below code won't work
And for more complex structures & updates you can do things like
library(mongolite)
library(jsonlite)
library(rmongodb) ## used to insert a non-data.frame into mongodb
## create some dummy data and insert into mongodb
lst <- list(id = 1,
value_doc = data.frame(id = 1:5,
value = letters[1:5],
stringsAsFactors = FALSE),
value_array = c(letters[6:10])
)
## using rmongodb
mongo <- mongo.create(db = "test")
coll <- "test.another_test"
mongo.insert(mongo,
ns = coll,
b = mongo.bson.from.list(lst)
)
mongo.destroy(mongo)
## update document with specific ID
mongoID <- "5755f646ceeb7846c87afd90"
## using mongolite
mongo <- mongo(db = "test",
coll = "another_test",
url = "mongodb://localhost"
)
## to add a single value to an array
mongo$update(
query = paste0('{"_id": { "$oid" : "', mongoID, '" } }'),
update = '{ "$addToSet" : { "value_array" : "checkedByR" } }'
)
## To add a document to the value_array
mongo$update(
query = paste0('{"_id": { "$oid" : "', mongoID, '" } }'),
update = '{ "$addToSet" : { "value_array" : { "checkedByR" : 1} } }'
)
## To add to a nested array
mongo$update(
query = paste0('{"_id": { "$oid" : "', mongoID, '" } }'),
update = '{ "$addToSet" : { "value_doc.value" : "checkedByR" } }'
)
rm(mongo); gc()
see mongodb update documemtation for further details

Error in switch(code, ic9 = { : EXPR must be a length 1 vector

Help me resolve this error:
Error in switch(code, ic9 = { : EXPR must be a length 1 vector
When I switch from one radio button to another, it should start displaying the result for the radio button selected and the result associated with it. Thats why I've used the switch case, but there is error in it.
CODE:
library(shiny)
library(RPostgreSQL)
shinyServer(function(input, output){
# Return the requested dataset
inputCode <- reactive({
input$code
code <<- {switch(input$code,
i9= ic9,
i10= ic10)}
input$icd
})
dbConn <- function(inputCode,out){
con <- dbConnect(dbDriver("PostgreSQL"), user="postgres", password="xyz", dbname="ICD_9_10_Mapping", host = "localhost", port = 5432)
on.exit(dbDisconnect(con), add=TRUE)
query1 <- "SELECT icd_10_codes, description, flags FROM icd_9_10_dia WHERE icd_9_codes ='"
query2 <- "SELECT icd_9_codes, description, flags FROM icd_10_9_dia WHERE icd_10_codes ='"
switch(code,
ic9 = {X <<- query1},
ic10 = {X <<- query2})
invert <- "'"
stmt <- paste(X,toString(input$icd, width = 10),invert, sep = "")
res <- dbSendQuery(con, statement = stmt)
out <<- fetch(res, n = -1)
}
output$out1 <- renderTable(dbConn(inputCode,out))})
To simplify this a bit just use your input$code directly with if statements rather than switch. For example:
if(input$code == 'ic9') {
query <- "SELECT icd_10_codes, description, flags FROM icd_9_10_dia WHERE icd_9_codes ='"
} else {
query <- "SELECT icd_9_codes, description, flags FROM icd_10_9_dia WHERE icd_10_codes ='"
}
You can use input$code in your function or put it in your call to dbConn (i.e., dbConn(input$code) ). I don't see where 'out' is coming from. It is defined in dbConn but you are using it in a call to dbConn which will not work. Also, the call to the reactive should be inputCode() but with the if statements suggested above you wouldn't need it.

Resources