Build a table from XML data file using R language - r

I am new learner in R Programming,i have sample xml file as shown below
<Attribute ID="GroupSEO" MultiValued="false" ProductMode="Property" FullTextIndexed="false" ExternallyMaintained="false" Derived="false" Mandatory="false">
<Name>Group SEO Name</Name>
<Validation BaseType="text" MinValue="" MaxValue="" MaxLength="1024" InputMask=""/>
<DimensionLink DimensionID="Language"/>
<MetaData>
<Value AttributeID="Attribute-Group-Order">1</Value>
<Value AttributeID="Enterprise-Label">NAV-GR-SEONAME</Value>
<Value ID="#NAMED" AttributeID="Attribute-Group-Name">#NAMED</Value>
<Value AttributeID="Enterprise-Description">Navigation Group SEO Name</Value>
<Value AttributeID="Attribute-Order">3</Value>
</MetaData>
<AttributeGroupLink AttributeGroupID="HTCategorizationsNavigation"/>
<AttributeGroupLink AttributeGroupID="HTDigitalServicesModifyClassifications"/>
<UserTypeLink UserTypeID="ENT-Group"/>
<UserTypeLink UserTypeID="NAVGRP"/>
<UserTypeLink UserTypeID="ENT-SubCategory"/>
<UserTypeLink UserTypeID="ENT-Category"/>
i want to convert this into data frame using R language.My expected output is
## FullTextIndexed MultiValued ProductMode ExternallyMaintained Derived Mandatory Attribute-Group-Order Enterprise-Description UserTypeID
1 false false Property false false false 1 Navigation group seo name ENT-Group,ENT-Category,..
i have searched the internet but couldn't find a solution to my problem.
I got a code from internet
library("XML")
library("methods")
setwd("E:/Project")
xmldata<-xmlToDataFrame("Sample.xml")
print(xmldata)
but when i execute the code i get the below error
Error in `[<-.data.frame`(`*tmp*`, i, names(nodes[[i]]), value = c(Name = "You YoutubeLink7 (URL)", :
duplicate subscripts for columns
In addition: Warning message:
In names(x) == varNames :
longer object length is not a multiple of shorter object length
> print(xmldata)
Error in print(xmldata) : object 'xmldata' not found
could anyone help me know about what the error means and also a solution to my problem,sorry for the formatting issue.
Thanks in advance for the solution.
Thanks

With a correct xml data (attribute tag at the end of the file).
<?xml version="1.0" encoding="UTF-8"?>
<Attribute ID="GroupSEO" MultiValued="false" ProductMode="Property" FullTextIndexed="false" ExternallyMaintained="false" Derived="false" Mandatory="false">
<Name>Group SEO Name</Name>
<Validation BaseType="text" MinValue="" MaxValue="" MaxLength="1024" InputMask=""/>
<DimensionLink DimensionID="Language"/>
<MetaData>
<Value AttributeID="Attribute-Group-Order">1</Value>
<Value AttributeID="Enterprise-Label">NAV-GR-SEONAME</Value>
<Value ID="#NAMED" AttributeID="Attribute-Group-Name">#NAMED</Value>
<Value AttributeID="Enterprise-Description">Navigation Group SEO Name</Value>
<Value AttributeID="Attribute-Order">3</Value>
</MetaData>
<AttributeGroupLink AttributeGroupID="HTCategorizationsNavigation"/>
<AttributeGroupLink AttributeGroupID="HTDigitalServicesModifyClassifications"/>
<UserTypeLink UserTypeID="ENT-Group"/>
<UserTypeLink UserTypeID="NAVGRP"/>
<UserTypeLink UserTypeID="ENT-SubCategory"/>
<UserTypeLink UserTypeID="ENT-Category"/>
</Attribute>
Then we use xpath to get all we need. Change the path to your xml file in the htmlParse step.
library(XML)
data=htmlParse("C:/Users/.../yourxmlfile.xml")
fulltextindexed=xpathSApply(data,"normalize-space(//attribute/#fulltextindexed)")
multivalued=xpathSApply(data,"normalize-space(//attribute/#multivalued)")
productmode=xpathSApply(data,"normalize-space(//attribute/#productmode)")
externallymaintained=xpathSApply(data,"normalize-space(//attribute/#externallymaintained)")
derived=xpathSApply(data,"normalize-space(//attribute/#derived)")
mandatory=xpathSApply(data,"normalize-space(//attribute/#mandatory)")
attribute.group.order=xpathSApply(data,"//value[#attributeid='Attribute-Group-Order']",xmlValue)
enterprise.description=xpathSApply(data,"//value[#attributeid='Enterprise-Description']",xmlValue)
user.type.id=paste(xpathSApply(data,"//usertypelink/#usertypeid"),collapse = "|")
df=data.frame(fulltextindexed,multivalued,productmode,externallymaintained,derived,mandatory,attribute.group.order,enterprise.description,user.type.id)
Result :

Using tidyverse and xml2
DATA
data <- read_xml('<Attribute ID="GroupSEO" MultiValued="false" ProductMode="Property" FullTextIndexed="false" ExternallyMaintained="false" Derived="false" Mandatory="false">
<Name>Group SEO Name</Name>
<Validation BaseType="text" MinValue="" MaxValue="" MaxLength="1024" InputMask=""/>
<DimensionLink DimensionID="Language"/>
<MetaData>
<Value AttributeID="Attribute-Group-Order">1</Value>
<Value AttributeID="Enterprise-Label">NAV-GR-SEONAME</Value>
<Value ID="#NAMED" AttributeID="Attribute-Group-Name">#NAMED</Value>
<Value AttributeID="Enterprise-Description">Navigation Group SEO Name</Value>
<Value AttributeID="Attribute-Order">3</Value>
</MetaData>
<AttributeGroupLink AttributeGroupID="HTCategorizationsNavigation"/>
<AttributeGroupLink AttributeGroupID="HTDigitalServicesModifyClassifications"/>
<UserTypeLink UserTypeID="ENT-Group"/>
<UserTypeLink UserTypeID="NAVGRP"/>
<UserTypeLink UserTypeID="ENT-SubCategory"/>
<UserTypeLink UserTypeID="ENT-Category"/>
</Attribute>')
CODE
#For attribute tag
Attributes <- xml_find_all(data, "//Attribute")
Attributes <- Attributes %>%
map(xml_attrs) %>%
map_df(~as.list(.))
#find AttributeID nodes
nodes <- xml_find_all(data, "//Value")
AGO <- nodes[xml_attr(nodes, "AttributeID")=="Attribute-Group-Order"]
Attributes["Attribute-Group-Order"] <- xml_text(AGO)
ED <- nodes[xml_attr(nodes, "AttributeID")=="Enterprise-Description"]
Attributes["Enterprise-Description"] <- xml_text(ED)
#UserTypelink tags
UserTypeLink <- xml_find_all(data, "//UserTypeLink")
UserTypeLink <- UserTypeLink %>%
map(xml_attrs) %>%
map_df(~as.list(.)) %>%
mutate(UserTypeID=map_chr(UserTypeID, ~toString(UserTypeID, .x))) %>%
filter(row_number()==1)
#Final output
do.call("cbind", list(Attributes,UserTypeLink))

Related

How to build XML file with package PMML using mlr?

I want to convert a logistic model built by the mlr-package directly into a XML-file using the package pmml. The problem is that the model.learner built by the mlr wrapper doesn't include the model link in the list, like it is in the normal stats::glm function. So here is an example:
library(dplyr)
library(titanic)
library(pmml)
library(ParamHelpers)
library(mlr)
Titanic_data = select(titanic_train, Survived, Pclass, Sex, Age)
Titanic_data$Survived = as.factor(Titanic_data$Survived)
Titanic_data$Sex = as.factor(Titanic_data$Sex)
Titanic_data$Pclass = as.factor(Titanic_data$Pclass)
Titanic_data = na.omit(Titanic_data)
lrn <- makeLearner("classif.logreg", predict.type = "prob")
task = makeClassifTask(data = Titanic_data, target = "Survived", positive = "1")
model = train(lrn, task)
model_glm = glm(Survived ~ ., data = Titanic_data, family = "binomial")
str(model$learner.model) # list of 29
str(model_glm) # list of 30
As you can see, the structure of both models is a list of different elements and they are all the same, beside the fact that the model is missing in the wrapper. Therefore I get an error message using pmml:
pmml(model_glm)
# Error in pmml.glm(model$learner.model) : object 'model.link' not found
The one built by stats::glm is working:
pmml(model)
<PMML version="4.4" xmlns="http://www.dmg.org/PMML-4_4" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.dmg.org/PMML-4_4 http://www.dmg.org/pmml/v4-4/pmml-4-4.xsd">
<Header copyright="Copyright (c) 2020 TBeige" description="Generalized Linear Regression Model">
<Extension name="user" value="TBeige" extender="SoftwareAG PMML Generator"/>
<Application name="SoftwareAG PMML Generator" version="2.3.1"/>
<Timestamp>2020-05-12 09:50:15</Timestamp>
</Header>
<DataDictionary numberOfFields="4">
<DataField name="Survived" optype="categorical" dataType="string">
<Value value="0"/>
<Value value="1"/>
</DataField>
<DataField name="Pclass" optype="categorical" dataType="string">
<Value value="1"/>
<Value value="2"/>
<Value value="3"/>
</DataField>
<DataField name="Sex" optype="categorical" dataType="string">
<Value value="female"/>
<Value value="male"/>
</DataField>
<DataField name="Age" optype="continuous" dataType="double"/>
</DataDictionary>
<GeneralRegressionModel modelName="General_Regression_Model" modelType="generalizedLinear" functionName="classification" algorithmName="glm" distribution="binomial" linkFunction="logit">
<MiningSchema>
<MiningField name="Survived" usageType="predicted" invalidValueTreatment="returnInvalid"/>
<MiningField name="Pclass" usageType="active" invalidValueTreatment="returnInvalid"/>
<MiningField name="Sex" usageType="active" invalidValueTreatment="returnInvalid"/>
<MiningField name="Age" usageType="active" invalidValueTreatment="returnInvalid"/>
</MiningSchema>
<Output>
<OutputField name="Probability_1" targetField="Survived" feature="probability" value="1" optype="continuous" dataType="double"/>
<OutputField name="Predicted_Survived" feature="predictedValue" optype="categorical" dataType="string"/>
</Output>
<ParameterList>
<Parameter name="p0" label="(Intercept)"/>
<Parameter name="p1" label="Pclass2"/>
<Parameter name="p2" label="Pclass3"/>
<Parameter name="p3" label="Sexmale"/>
<Parameter name="p4" label="Age"/>
</ParameterList>
<FactorList>
<Predictor name="Pclass"/>
<Predictor name="Sex"/>
</FactorList>
<CovariateList>
<Predictor name="Age"/>
</CovariateList>
<PPMatrix>
<PPCell value="2" predictorName="Pclass" parameterName="p1"/>
<PPCell value="3" predictorName="Pclass" parameterName="p2"/>
<PPCell value="male" predictorName="Sex" parameterName="p3"/>
<PPCell value="1" predictorName="Age" parameterName="p4"/>
</PPMatrix>
<ParamMatrix>
<PCell targetCategory="1" parameterName="p0" df="1" beta="3.77701265255885"/>
<PCell targetCategory="1" parameterName="p1" df="1" beta="-1.30979926778885"/>
<PCell targetCategory="1" parameterName="p2" df="1" beta="-2.58062531749203"/>
<PCell targetCategory="1" parameterName="p3" df="1" beta="-2.52278091988034"/>
<PCell targetCategory="1" parameterName="p4" df="1" beta="-0.0369852655754339"/>
</ParamMatrix>
</GeneralRegressionModel>
</PMML>
Any idea how I can use mlr and creating a xml find using pmml?
The problem seems to be inside pmml
From pmml::pmml.glm:
if (model$call[[1]] == "glm") {
model.type <- model$family$family
model.link <- model$family$link
}
else {
model.type <- "unknown"
}
In the mlr model we have
model$learner.model$call[[1]]
# stats::glm
So you can just hack
model$learner.model$call[[1]] = "glm"
and then
pmml(model$learner.model)
works.
To be honest it seems to be weird code in the pmml package.

XML parsing with namespaces using XML package in R

I am trying to parse a xml file using XML package of R. Sample XML content is as follows:
<soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/">
<soapenv:Header>
<messageContext xmlns="http://www.deltavista.com/dspone/ordercheck-if/V001">
<credentials>
<user>foobar</user>
<password>barbaz</password>
</credentials>
</messageContext>
</soapenv:Header>
<soapenv:Body>
<ns1:orderCheckResponse xmlns:ns1="http://www.deltavista.com/dspone/ordercheck-if/V001">
<ns1:returnCode>1</ns1:returnCode>
<ns1:product>
<ns1:name>Consumer</ns1:name>
<ns1:country>POL</ns1:country>
<ns1:language>POL</ns1:language>
</ns1:product>
<ns1:archiveID>420</ns1:archiveID>
<ns1:reportCreationTime>201911151220</ns1:reportCreationTime>
<ns1:foundAddress>
<ns1:legalForm>PERSON</ns1:legalForm>
<ns1:address>
<ns1:name>John</ns1:name>
<ns1:firstName>Dow</ns1:firstName>
<ns1:gender>MALE</ns1:gender>
<ns1:dateOfBirth>19960410</ns1:dateOfBirth>
<ns1:location>
<ns1:street>nowhere</ns1:street>
<ns1:house>48</ns1:house>
<ns1:city>farfarland</ns1:city>
<ns1:zip>00-500</ns1:zip>
<ns1:country>POL</ns1:country>
</ns1:location>
</ns1:address>
</ns1:foundAddress>
<ns1:myDecision>
<ns1:decision>YELLOW</ns1:decision>
</ns1:myDecision>
<ns1:personBasicData>
<ns1:knownSince>20181201</ns1:knownSince>
<ns1:contact>
<ns1:item>EMAIL</ns1:item>
<ns1:value>foo#gmail.com</ns1:value>
</ns1:contact>
<ns1:contact>
<ns1:item>PHONE</ns1:item>
<ns1:value>123456789</ns1:value>
</ns1:contact>
</ns1:personBasicData>
<ns1:decisionMatrix>
<ns1:identificationDecision>
<ns1:personStatus xsi:type="ns1:DecisionMatrixItemPersonStatus" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<ns1:partialDecision>GREEN</ns1:partialDecision>
<ns1:value>ADULT</ns1:value>
</ns1:personStatus>
<ns1:identificationType xsi:type="ns1:DecisionMatrixItemIdentificationType" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<ns1:partialDecision>GREEN</ns1:partialDecision>
<ns1:value>IDENTITY_IN_CITY</ns1:value>
</ns1:identificationType>
<ns1:similarHit xsi:type="ns1:DecisionMatrixItemInt" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<ns1:partialDecision>GREEN</ns1:partialDecision>
<ns1:value>0</ns1:value>
</ns1:similarHit>
<ns1:houseType xsi:type="ns1:DecisionMatrixItemString" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<ns1:partialDecision>GREEN</ns1:partialDecision>
<ns1:value>SHARED_USAGE</ns1:value>
</ns1:houseType>
<ns1:nameHint xsi:type="ns1:DecisionMatrixItemNameHint" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<ns1:partialDecision>GREEN</ns1:partialDecision>
<ns1:value>CONFIRMED</ns1:value>
</ns1:nameHint>
<ns1:locationIdentificationType xsi:type="ns1:DecisionMatrixItemLocationIdentificationType" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<ns1:partialDecision>GREEN</ns1:partialDecision>
<ns1:value>HOUSE_CONFIRMED</ns1:value>
</ns1:locationIdentificationType>
</ns1:identificationDecision>
<ns1:solvencyDecision>
<ns1:paymentExperience xsi:type="ns1:DecisionMatrixItemPHS" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<ns1:partialDecision>GREEN</ns1:partialDecision>
<ns1:value>NOPROBLEM</ns1:value>
</ns1:paymentExperience>
<ns1:externalSourcesProcessingStatus xsi:type="ns1:DecisionMatrixItemExternalProcessingStatus" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<ns1:partialDecision>GREEN</ns1:partialDecision>
<ns1:value>OK</ns1:value>
</ns1:externalSourcesProcessingStatus>
</ns1:solvencyDecision>
<ns1:clientExtensionsDecision>
<ns1:applicationFilter xsi:type="ns1:DecisionMatrixItemStringWithOverride" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<ns1:partialDecision>GREEN</ns1:partialDecision>
<ns1:value>0</ns1:value>
</ns1:applicationFilter>
<ns1:myScore xsi:type="ns1:DecisionMatrixItemInt" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<ns1:partialDecision>YELLOW</ns1:partialDecision>
<ns1:value>401</ns1:value>
</ns1:myScore>
</ns1:clientExtensionsDecision>
</ns1:decisionMatrix>
<ns1:paymentHistory>
<ns1:currency>PLN</ns1:currency>
<ns1:count>0</ns1:count>
<ns1:dateOfLastEntry>20191111</ns1:dateOfLastEntry>
<ns1:amountTotal>0.0</ns1:amountTotal>
<ns1:amountTotalOpen>0.0</ns1:amountTotalOpen>
<ns1:creditStatusMax>0</ns1:creditStatusMax>
<ns1:masterRiskStatus>Brak danych o negatywnej historii</ns1:masterRiskStatus>
</ns1:paymentHistory>
<ns1:normalization>
<ns1:searchedAddress xsi:type="ns1:SearchedAddressN" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<ns1:name>John</ns1:name>
<ns1:firstName>Dow</ns1:firstName>
<ns1:gender>MALE</ns1:gender>
<ns1:dateOfBirth>19960410</ns1:dateOfBirth>
<ns1:location>
<ns1:street>nowhere</ns1:street>
<ns1:house>39</ns1:house>
<ns1:houseExtension/>
<ns1:city>farfarland</ns1:city>
<ns1:zip>00-500</ns1:zip>
<ns1:country>POL</ns1:country>
</ns1:location>
<ns1:addressID>123</ns1:addressID>
<ns1:unitID>123</ns1:unitID>
<ns1:liableID>1231</ns1:liableID>
<ns1:houseID>1232</ns1:houseID>
<ns1:streetID>1233</ns1:streetID>
<ns1:cityID>1234</ns1:cityID>
</ns1:searchedAddress>
<ns1:foundAddress>
<ns1:addressID>1235</ns1:addressID>
<ns1:unitID>1236</ns1:unitID>
<ns1:liableID>1237</ns1:liableID>
<ns1:houseID>1238</ns1:houseID>
<ns1:streetID>1239</ns1:streetID>
<ns1:cityID>1230</ns1:cityID>
</ns1:foundAddress>
</ns1:normalization>
<ns1:clientExtensions>
<ns1:additionalData>
<ns1:name>pesel_verification_status</ns1:name>
<ns1:value>1</ns1:value>
</ns1:additionalData>
<ns1:additionalData>
<ns1:name>pesel_verification_execution_code</ns1:name>
<ns1:value>200</ns1:value>
</ns1:additionalData>
<ns1:additionalData>
<ns1:name>pesel_verification_codes</ns1:name>
<ns1:value>12010; 12013</ns1:value>
</ns1:additionalData>
</ns1:clientExtensions>
<ns1:executionStrategy/>
</ns1:orderCheckResponse>
</soapenv:Body>
</soapenv:Envelope>
Here is the code I am using to read and parse this XML content; str henceforth:
library(XML)
foobar <- xmlInternalTreeParse(str, encoding = 'KOI8-R', useInternalNodes = F)
xmlSApply(foobar$doc$children$Envelope, function(x) xmlSApply(x, names))
xmlSApply(foobar$doc$children$Envelope, function(x) xmlSApply(x, function(x1) xmlSApply(x1, names)))
Here I am able to parse the XML content and tried iterating over nodes to at least print the names. However I couldn't extract the values inside, even though I read many SO questions and tried countless combinations using xPathApply() etc. (reference)
Any hint on what I might be doing wrong here.
Consider a straightforward descendant XPath search with //* that acknowledges namespaces to retrieve all element names and values. However, since your XPath does not reference any namespace prefixes, for this specific search it is redundant in the xpathSApply calls:
doc <- xmlParse(str, asText=TRUE)
nmsp <- c(soapenv = "http://schemas.xmlsoap.org/soap/envelope/",
doc = "http://www.deltavista.com/dspone/ordercheck-if/V001",
ns1 = "http://www.deltavista.com/dspone/ordercheck-if/V001")
# NAMED CHARACTER VECTOR OF ALL 117 ELEMENT NAMES AND VALUES
elem_vals <- setNames(xpathSApply(doc, path="//*", namespaces = nmsp, xmlValue) ,
xpathSApply(doc, path="//*", namespaces = nmsp, xmlName))
Output
Names (first 20 items)
head(names(elem_vals), 20)
# [1] "Envelope" "Header" "messageContext" "credentials" "user"
# [6] "password" "Body" "orderCheckResponse" "returnCode" "product"
# [11] "name" "country" "language" "archiveID" "reportCreationTime"
# [16] "foundAddress" "legalForm" "address" "name" "firstName"
Values (last 20 items)
tail(elem_vals, 20)
# streetID cityID
# "1233" "1234"
# foundAddress addressID
# "123512361237123812391230" "1235"
# unitID liableID
# "1236" "1237"
# houseID streetID
# "1238" "1239"
# cityID clientExtensions
# "1230" "pesel_verification_status1pesel_verification_execution_code200pesel_verification_codes12010; 12013"
# additionalData name
# "pesel_verification_status1" "pesel_verification_status"
# value additionalData
# "1" "pesel_verification_execution_code200"
# name value
# "pesel_verification_execution_code" "200"
# additionalData name
# "pesel_verification_codes12010; 12013" "pesel_verification_codes"
# value executionStrategy
# "12010; 12013" ""

How to parse a complex xml in Rinto a dataframe?

I want to parse a nested xml file with the layout below in R and load it into a dataframe. I tried using several eays including the xml and xml2 packages but could not get it to work.
<?xml version="1.0" encoding="UTF-8"?>
<Targets>
<Target TYPE="myserver.mgmt.Metric" NAME="metric1">
<Attribute NAME="name" VALUE="metric1"></Attribute>
<Attribute NAME="Value" VALUE="2.4"></Attribute>
<Attribute NAME="collectionTime" VALUE="1525118288000"></Attribute>
<Attribute NAME="State" VALUE="normal"></Attribute>
<Attribute NAME="ObjectName" VALUE="obj1"></Attribute>
<Attribute NAME="ValueHistory" VALUE="5072"></Attribute>
</Target>
...
<Target TYPE="myserver.mgmt.Metric" NAME="metric999">
<Attribute NAME="name" VALUE="metric999"></Attribute>
<Attribute NAME="Value" VALUE="60.35"></Attribute>
<Attribute NAME="collectionTime" VALUE="1525118288000"></Attribute>
<Attribute NAME="State" VALUE="normal"></Attribute>
<Attribute NAME="ObjectName" VALUE="obj1"></Attribute>
<Attribute NAME="ValueHistory" VALUE="9550"></Attribute>
</Target>
</Targets>
The final outcome I am looking to get is:
name Value collectionTime State ObjectName ValueHistory
metric1 2.4 1525118288000 normal obj1 5072
metric2 60.35 1525118288000 normal obj2 9550
Any help is appreciated.
We can make use of XML with tidyverse
library(XML)
library(tidyverse)
lst1 <- getNodeSet(xml1, path = "//Target")
map_df(seq_along(lst1), ~
XML:::xmlAttrsToDataFrame(lst1[[.x]]) %>%
mutate_all(as.character) %>%
deframe %>%
as.list %>%
as_tibble) %>%
mutate_all(type.convert, as.is = TRUE)
# A tibble: 2 x 6
# name Value collectionTime State ObjectName ValueHistory
# <chr> <dbl> <dbl> <chr> <chr> <int>
#1 metric1 2.4 1525118288000 normal obj1 5072
#2 metric999 60.4 1525118288000 normal obj1 9550
data
xml1 <- xmlParse('<?xml version="1.0" encoding="UTF-8"?>
<Targets>
<Target TYPE="myserver.mgmt.Metric" NAME="metric1">
<Attribute NAME="name" VALUE="metric1"></Attribute>
<Attribute NAME="Value" VALUE="2.4"></Attribute>
<Attribute NAME="collectionTime" VALUE="1525118288000"></Attribute>
<Attribute NAME="State" VALUE="normal"></Attribute>
<Attribute NAME="ObjectName" VALUE="obj1"></Attribute>
<Attribute NAME="ValueHistory" VALUE="5072"></Attribute>
</Target>
<Target TYPE="myserver.mgmt.Metric" NAME="metric999">
<Attribute NAME="name" VALUE="metric999"></Attribute>
<Attribute NAME="Value" VALUE="60.35"></Attribute>
<Attribute NAME="collectionTime" VALUE="1525118288000"></Attribute>
<Attribute NAME="State" VALUE="normal"></Attribute>
<Attribute NAME="ObjectName" VALUE="obj1"></Attribute>
<Attribute NAME="ValueHistory" VALUE="9550"></Attribute>
</Target>
</Targets>
')

unable to parse xml file in R

here is the file that need to parse and eventually convert in dataframe-
<?xml version="1.0" encoding="UTF-8"?>
-<message hash="fb73481d3f3d2b9a70733d69268de71c84f151f8" type="xml" sessionid="https" connector_id="4510010" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<comment/>
-<drive utc_timestamp="2015-09-03T00:09:01.362058" report_name="HTTP Logging to RSP" device_type="ACS 800" sw_appl="ASARF012" sw_type="AS7R7322" serial_number="-">
-<signals timezone="UTC +00:00">
<signal unit="rpm" scale="1" timestamp="2015-09-03T00:07:28.006" name="02_02_01_speed_ref_2" value="0"/>
<signal unit="%" scale="1" timestamp="2015-09-03T00:07:28.511" name="02_01_05_torque" value="0"/>
<signal unit="C" scale="1" timestamp="2015-09-03T00:07:45.352" name="02_01_32_pp_3_temp" value="0"/>
<signal unit="C" scale="1" timestamp="2015-09-03T00:07:46.203" name="02_01_33_pp_4_temp" value="0"/>
<signal unit="C" scale="1" timestamp="2015-09-03T00:07:48.155" name="02_01_35_motor_1_temp" value="0"/>
<signal unit="C" scale="1" timestamp="2015-09-03T00:07:48.911" name="02_01_36_motor_2_temp" value="0"/>
<signal unit="C" scale="1" timestamp="2015-09-03T00:07:52.723" name="02_01_37_motor_temp_est" value="29.999969"/>
<signal unit="" scale="1" timestamp="2015-09-03T00:07:53.638" name="02_03_18_alarm_word_5" value="0"/>
<signal unit="" scale="1" timestamp="2015-09-03T00:07:54.747" name="02_03_19_int_init_fault" value="0"/>
<signal unit="" scale="1" timestamp="2015-09-03T00:07:56.884" name="02_03_11_follower_mcw" value="0"/>
<signal unit="" scale="1" timestamp="2015-09-03T00:07:58.405" name="02_03_13_aux_status_word_3" value="1030"/>
<signal unit="" scale="1" timestamp="2015-09-03T00:07:59.806" name="02_03_14_aux_status_word_4" value="0"/>
<signal unit="" scale="1" timestamp="2015-09-03T00:08:00.485" name="02_03_16_alarm_word_4" value="0"/>
</signals>
</drive>
</message>
I have tried multiple options (XML package is used) but I am not able to convert it into a dataframe/structured form.
## method 1
result <- xmlParse(file = "test.xml")
print(result)
# method 2
xmldataframe <- xmlToDataFrame("test.xml")
print(xmldataframe)
# method 3
xmldoc <- xmlParse(file = test.xml)
rootNode <- xmlRoot(xmldoc)
xmlSApply(rootNode,function(x) xmlSApply(x, xmlValue))
cd.catalog <- data.frame(t(data),row.names=NULL)
none of results/xmldataframe/cd.catalog has dataframe/list in structured form that i can use for further analysis.
Hope you are looking for something like this:
library(xml2)
library(dplyr)
xml_doc <- read_xml("test.xml")
df <- xml_doc %>%
xml_find_all("//signal") %>%
xml_attrs() %>%
unlist() %>%
matrix(ncol=5, byrow=T) %>%
as.data.frame(stringsAsFactors=FALSE)
colnames(df) <- c('unit','scale','timestamp','name','value')
df

Outputting clustering information (dendogram) in R in a human readable format like XML or JSON

I wanto export the clustering information visualized in a dendogram in a human readable format (XML, JSON) with distances and the nodes and children nodes.
Have a look at the pmml package for a XML-based representation.
require(pmml)
# a 2-dimensional example
x <- rbind(matrix(rnorm(100, sd = 0.3), ncol = 2),
matrix(rnorm(100, mean = 1, sd = 0.3), ncol = 2))
colnames(x) <- c("x", "y")
cl <- kmeans(x, 2)
plot(x, col = cl$cluster)
pmml(cl, centers = cl$centers)
<PMML version="4.1" xmlns="http://www.dmg.org/PMML-4_1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.dmg.org/PMML-4_1 http://www.dmg.org/v4-1/pmml-4-1.xsd">
<Header copyright="Copyright (c) 2013 edisz" description="KMeans cluster model">
<Extension name="user" value="edisz" extender="Rattle/PMML"/>
<Application name="Rattle/PMML" version="1.4"/>
<Timestamp>2013-11-11 15:02:46</Timestamp>
</Header>
<DataDictionary numberOfFields="2">
<DataField name="x" optype="continuous" dataType="double"/>
<DataField name="y" optype="continuous" dataType="double"/>
</DataDictionary>
<ClusteringModel modelName="KMeans_Model" functionName="clustering" algorithmName="KMeans: Hartigan and Wong" modelClass="centerBased" numberOfClusters="2">
<MiningSchema>
<MiningField name="x"/>
<MiningField name="y"/>
</MiningSchema>
<Output>
<OutputField name="predictedValue" feature="predictedValue"/>
<OutputField name="clusterAffinity_1" feature="clusterAffinity" value="1"/>
<OutputField name="clusterAffinity_2" feature="clusterAffinity" value="2"/>
</Output>
<ComparisonMeasure kind="distance">
<squaredEuclidean/>
</ComparisonMeasure>
<ClusteringField field="x" compareFunction="absDiff"/>
<ClusteringField field="y" compareFunction="absDiff"/>
<Cluster name="1" size="49" id="1">
<Array n="2" type="real">1.08242766097448 0.970387920586825</Array>
</Cluster>
<Cluster name="2" size="51" id="2">
<Array n="2" type="real">0.0261601744749776 0.0786776972701963</Array>
</Cluster>
</ClusteringModel>
</PMML>
But I don't know if this is more human-readable...

Resources