How to query LOB field in Oracle using R - r

How can I query a LOB field from Oracle using ROracle library?
library(ROracle)
drv <- dbDriver("Oracle")
connect.string <- paste(
"(DESCRIPTION=",
"(ADDRESS=(PROTOCOL=TCP)(HOST=", host, ")(PORT=", PORT, "))",
"(CONNECT_DATA=(GLOBAL_NAME=",GLOBAL_NAME,")(SID=", SID, ")))", sep = "")
con <- ROracle::dbConnect(drv, username = username, password = password, dbname = connect.string)
I am a newbie and tried this:
rs <- dbSendQuery(con, "SELECT UTL_ENCODE.BASE64_ENCODE(CAST(LOB_FIELD AS RAW)) FROM TABLEDATA WHERE OTHER_FIELD = 'something'")
data <- ROracle::fetch(rs)
# Error in .oci.SendQuery(conn, statement, data = data, prefetch = prefetch, :
ORA-00906: missing left parenthesis
rs <- dbSendQuery(con, "SELECT dbms_lob.READ(LOB_FIELD) FROM TABLEDATA WHERE OTHER_FIELD = 'something'")
data <- ROracle::fetch(rs)
#Error in .oci.SendQuery(conn, statement, data = data, prefetch = prefetch, :
ORA-00904: "DBMS_LOB"."READ": invalid identifier

Try simple with the query
select LOB_FIELD from tab where OTHER_FIELD = 'something
where LOB_FILED is the CLOB column
df <- dbGetQuery(conn, "select LOB_FIELD from tab where OTHER_FIELD = 'something'")
nchar(df$LOB_FIELD)
[1] 68000

Related

Db2 on Cloud: Problem with column in querying from R

I created a connection between R and Db2 on Cloud
library(RODBC)
dsn_driver <- "{IBM DB2 ODBC Driver}"
dsn_database <- "bludb" # e.g. "bludb"
dsn_hostname <- "**"
dsn_port <- "***" # e.g. "32733"
dsn_protocol <- "TCPIP" # i.e. "TCPIP"
dsn_uid <- "**" #
dsn_pwd <- "**" #
dsn_security <- "ssl"
conn_path <- paste("DRIVER=",dsn_driver,
";DATABASE=",dsn_database,
";HOSTNAME=",dsn_hostname,
";PORT=",dsn_port,
";PROTOCOL=",dsn_protocol,
";UID=",dsn_uid,
";PWD=",dsn_pwd,
";SECURITY=",dsn_security,
sep="")
conn <- odbcDriverConnect(conn_path)
conn
Then I created the table
myschema <- "**" #
tables <- c("Annual_Crop")
for (table in tables){
# Drop School table if it already exists
out <- sqlTables(conn, tableType = "TABLE", schema = myschema, tableName =table)
if (nrow(out)>0) {
err <- sqlDrop (conn, paste(myschema,".",table,sep=""), errors=FALSE)
if (err==-1){
cat("An error has occurred.\n")
err.msg <- odbcGetErrMsg(conn)
for (error in err.msg) {
cat(error,"\n")
}
} else {
cat ("Table: ", myschema,".",table," was dropped\n")
}
} else {
cat ("Table: ", myschema,".",table," does not exist\n")
}
}
df1 <- sqlQuery (conn, "CREATE TABLE Annual_Crop(
CD_ID char (6) NOT NULL,
YEAR CHAR (20),
CROP_TYPE varchar (50),
GEO varchar (50),
SEEDED_AREA CHAR (50) ,
HARVESTED_AREA CHAR (50),
PRODUCTION CHAR (50),
AVG_YIELD CHAR (50),
PRIMARY KEY (CD_ID))",
errors = FALSE)
if(df1 == -1){
cat ("An error has occured.\n")
msg <- odbcGetErrMsg(conn)
print (msg)
} else {
cat ("Table was createdd successfuly.\n")
}
I loaded the dataset from a file into the table
anual_cropdf <- read.csv("/resources/labs/MYDATA/data1.csv")
sqlSave(conn, anual_cropdf, 'Annual_Crop', append=TRUE, fast=FALSE, rownames=FALSE, colnames=FALSE, verbose=FALSE)
Then I tried to fetch from the table and it works
FARMDB <- sqlFetch(conn, "Annual_Crop")
tail(FARMDB)
Finally, when I want to perform a query, it was not working. The result was just the name of columns 0X8
info <- paste('select * from Annual_Crop
where Geo = 41600')
query <- sqlQuery(conn,info,believeNRows = FALSE)
query
Why?
Based on your table schema, the data type for Geo is VARCHAR. Have you tried a query like this?
select * from Annual_Crop where Geo = 'Alberta'
or
select * from Annual_Crop where Geo = '41600'
Varchar / string needs to use single quotes for the value.

Connect to Database Using dbPool RJDBC in R

I am trying to use a pool to connect to my database in R, but I get the error:
Schema must be specified when session schema is not set
How does one specify a schema ? It seems like I need to specify it inside the pool. If that's the case, what's the parameter name for a schema?
pool <- dbPool(
drv = RJDBC::JDBC(
"xxx",
"dir_to_jar", "`"
),
dbname = "db",
schema = "schema" # this didn't work
url = url,
user = user,
password = password,
SSL = 'true'
)
pool %>% tbl("schema.table")
I tried several other methods using DBI::dbConnect combined with Id and it worked:
pool <- DBI::dbConnect(
drv = RJDBC::JDBC(
"xxx",
"dir_to_jar", "`"
),
url = url,
user = user,
password = password,
SSL = 'true'
)
# Didn't work
pool %>% tbl(dbplyr::in_schema("catalog.schema", "table"))
# Works!
s <- Id(catalog = "catalog", schema = "schema", table = "table")
df <- dbReadTable(pool, s)

Insert/Update R data.table into PostgreSQL table

I have a PostgreSQL database set up with a table and columns already defined. The primary key for the table is a combination of (Id, datetime) column. I need to periodically INSERT data for different Ids from R data.table into the database. However, if data for a particular (Id, datetime) combination already exists it should be UPDATED (overwritten). How can I do this using RPostgres or RPostgreSQL packages?
When I try to insert a data.table where some (Id, datetime) rows already exist I get an error saying the primary key constraint is violated:
dbWriteTable(con, table, dt, append = TRUE, row.names = FALSE)
Error in connection_copy_data(conn#ptr, sql, value) :
COPY returned error: ERROR: duplicate key value violates unique constraint "interval_data_pkey"
DETAIL: Key (id, dttm_utc)=(a0za000000CSdLoAAL, 2018-10-01 05:15:00+00) already exists.
CONTEXT: COPY interval_data, line 1
You can use my pg package that has upsert functionality, or just grab code for upsert from there: https://github.com/jangorecki/pg/blob/master/R/pg.R#L249
It is basically what others said in comments. Write data into temp table and then insert into destination table using on conflict clause.
pgSendUpsert = function(stage_name, name, conflict_by, on_conflict = "DO NOTHING", techstamp = TRUE, conn = getOption("pg.conn"), .log = getOption("pg.log",TRUE)){
stopifnot(!is.null(conn), is.logical(.log), is.logical(techstamp), is.character(on_conflict), length(on_conflict)==1L)
cols = pgListFields(stage_name)
cols = setdiff(cols, c("run_id","r_timestamp")) # remove techstamp to have clean column list, as the fresh one will be used, if any
# sql
insert_into = sprintf("INSERT INTO %s.%s (%s)", name[1L], name[2L], paste(if(techstamp) c(cols, c("run_id","r_timestamp")) else cols, collapse=", "))
select = sprintf("SELECT %s", paste(cols, collapse=", "))
if(techstamp) select = sprintf("%s, %s::INTEGER run_id, '%s'::TIMESTAMPTZ r_timestamp", select, get_run_id(), format(Sys.time(), "%Y-%m-%d %H:%M:%OS"))
from = sprintf("FROM %s.%s", stage_name[1L], stage_name[2L])
if(!missing(conflict_by)) on_conflict = paste(paste0("(",paste(conflict_by, collapse=", "),")"), on_conflict)
on_conflict = paste("ON CONFLICT",on_conflict)
sql = paste0(paste(insert_into, select, from, on_conflict), ";")
pgSendQuery(sql, conn = conn, .log = .log)
}
#' #rdname pg
pgUpsertTable = function(name, value, conflict_by, on_conflict = "DO NOTHING", stage_name, techstamp = TRUE, conn = getOption("pg.conn"), .log = getOption("pg.log",TRUE)){
stopifnot(!is.null(conn), is.logical(.log), is.logical(techstamp), is.character(on_conflict), length(on_conflict)==1L)
name = schema_table(name)
if(!missing(stage_name)){
stage_name = schema_table(stage_name)
drop_stage = FALSE
} else {
stage_name = name
stage_name[2L] = paste("tmp", stage_name[2L], sep="_")
drop_stage = TRUE
}
if(pgExistsTable(stage_name)) pgTruncateTable(name = stage_name, conn = conn, .log = .log)
pgWriteTable(name = stage_name, value = value, techstamp = techstamp, conn = conn, .log = .log)
on.exit(if(drop_stage) pgDropTable(stage_name, conn = conn, .log = .log))
pgSendUpsert(stage_name = stage_name, name = name, conflict_by = conflict_by, on_conflict = on_conflict, techstamp = techstamp, conn = conn, .log = .log)
}

Truncated updated string with R DBI package

I need to update a wide table on an SQL SERVER from R. So the package DBI seems to be very useful for that.
The problem is that the R data.frame contains strings of more than 3000 characters and when I use the DBI dbSendQuery function, all strings are truncated to 256 characters.
Here could be a code example :
con <- odbc::dbConnect(drv = odbc::odbc(),
dsn = '***',
UID = '***',
PWD = '***')
df = data.frame(TEST = paste(rep("A", 300), collapse=""),
TEST_ID = 1068858)
df$TEST = df$TEST %>% as.character
query = paste0('UPDATE MY_TABLE SET "TEST"=? WHERE TEST_ID=?')
update <- DBI::dbSendQuery(con, query)
DBI::dbBind(update, df)
DBI::dbClearResult(update)
odbc::dbDisconnect(con)
Then the following request return 256 instead of 300 :
SELECT LEN(TEST) FROM MY_TABLE WHERE TEST_ID = 1068858
NB : TEST is of type (varchar(max), NULL) and already contains strings of more than 256 chars.
Thanks in advance for any advice
In the end, I choose to get rid of sophisticated functions. A solution was to write the table in .csv file and bulk insert it into the database. Here is an example using RODBC package :
write.table(x = df,
file = "/path/DBI_error_test.csv",
sep = ";",
row.names = FALSE, col.names = FALSE,
na = "NULL",
quote = FALSE)
Query = paste("CREATE TABLE #MY_TABLE_TMP (
TEST varchar(max),
TEST_ID int
);
BULK INSERT #MY_TABLE_TMP
FROM 'C:\\DBI_error_test.csv'
WITH
(
FIELDTERMINATOR = ';',
ROWTERMINATOR = '\n',
BATCHSIZE = 500000,
CHECK_CONSTRAINTS
)
UPDATE R
SET R.TEST = #MY_TABLE_TMP.TEST
FROM MY_TABLE AS R
INNER JOIN #MY_TABLE_TMP ON #MY_TABLE_TMP.TEST_ID = R.TEST_ID;
DROP TABLE #MY_TABLE_TMP;
")
channel <- RODBC::odbcConnect(dsn = .DB_DSN_NAME,
uid = .DB_UID,
pwd = .DB_PWD)
RODBC::sqlQuery(channel = channel, query = query, believeNRows = FALSE)
RODBC::odbcClose(channel = channel)

Error in switch(code, ic9 = { : EXPR must be a length 1 vector

Help me resolve this error:
Error in switch(code, ic9 = { : EXPR must be a length 1 vector
When I switch from one radio button to another, it should start displaying the result for the radio button selected and the result associated with it. Thats why I've used the switch case, but there is error in it.
CODE:
library(shiny)
library(RPostgreSQL)
shinyServer(function(input, output){
# Return the requested dataset
inputCode <- reactive({
input$code
code <<- {switch(input$code,
i9= ic9,
i10= ic10)}
input$icd
})
dbConn <- function(inputCode,out){
con <- dbConnect(dbDriver("PostgreSQL"), user="postgres", password="xyz", dbname="ICD_9_10_Mapping", host = "localhost", port = 5432)
on.exit(dbDisconnect(con), add=TRUE)
query1 <- "SELECT icd_10_codes, description, flags FROM icd_9_10_dia WHERE icd_9_codes ='"
query2 <- "SELECT icd_9_codes, description, flags FROM icd_10_9_dia WHERE icd_10_codes ='"
switch(code,
ic9 = {X <<- query1},
ic10 = {X <<- query2})
invert <- "'"
stmt <- paste(X,toString(input$icd, width = 10),invert, sep = "")
res <- dbSendQuery(con, statement = stmt)
out <<- fetch(res, n = -1)
}
output$out1 <- renderTable(dbConn(inputCode,out))})
To simplify this a bit just use your input$code directly with if statements rather than switch. For example:
if(input$code == 'ic9') {
query <- "SELECT icd_10_codes, description, flags FROM icd_9_10_dia WHERE icd_9_codes ='"
} else {
query <- "SELECT icd_9_codes, description, flags FROM icd_10_9_dia WHERE icd_10_codes ='"
}
You can use input$code in your function or put it in your call to dbConn (i.e., dbConn(input$code) ). I don't see where 'out' is coming from. It is defined in dbConn but you are using it in a call to dbConn which will not work. Also, the call to the reactive should be inputCode() but with the if statements suggested above you wouldn't need it.

Resources