UK Postcode to Census Data using the API - ons-api

Using the Office for National Statistics website I can get a census summary for a UK postcode.
https://neighbourhood.statistics.gov.uk/dissemination/
I expected that I should be able to do the same thing using the API.
https://neighbourhood.statistics.gov.uk/HTMLDocs/downloads/QuickStart-Guide-V2.1.pdf
But it isn't clear to me how to get from the postcode to the neighbourhood (or Lower Layer Super Output Area as the Office for National Statistics calls them). It seems that I need to use the Delivery endpoint like this.
http://neighbourhood.statistics.gov.uk/NDE2/Deli/getChildAreaTables?ParentAreaId=276980&LevelTypeId=141&Datasets=67
But how do I find out which parameters to use for a specific postcode?

It looks like three calls are required to get a dataset.
import xml.etree.ElementTree as ElementTree
import json
import requests
API_KEY = "YOUR_API_KEY"
def get_area_id(level_type, postcode):
""" Get the area id for the pos
:param level_type: The resolution you are interested in. 14 = ward level data.
:param postcode: A UK postcode
:return: string area identifier
"""
base_url = "http://neighbourhood.statistics.gov.uk/NDE2/Disco/FindAreas"
payload = {'HierarchyId': '27', 'Postcode': postcode}
response = requests.get(base_url, params=payload)
xml = ElementTree.fromstring(response.content)
namespaces = {'ns1': 'http://neighbourhood.statistics.gov.uk/nde/v1-0/discoverystructs'}
xpath_for_area = './/ns1:Area'
areas = xml.findall(xpath_for_area, namespaces)
ward_area_id = ''
for area in areas:
level_type_id = area.find('ns1:LevelTypeId', namespaces).text
if level_type_id == str(level_type): # find the Ward (=14)
ward_area_id = area.find('ns1:AreaId', namespaces).text
return ward_area_id
def get_ext_code(area_id):
""" Get the ext code (whatever that is) from an area id
:param area_id: the area id for a postcode
:return: the ext code for an area (I think is the GSS code)
"""
base_url = "http://neighbourhood.statistics.gov.uk/NDE2/Disco/GetAreaDetail"
payload = {'AreaId': area_id}
response = requests.get(base_url, params=payload)
xml = ElementTree.fromstring(response.content)
namespaces = {'ns1': 'http://neighbourhood.statistics.gov.uk/nde/v1-0/discoverystructs',
'structure': 'http://www.SDMX.org/resources/SDMXML/schemas/v2_0/structure'}
xpath_for_ext_code = './/ns1:ExtCode'
ext_code = xml.find(xpath_for_ext_code, namespaces).text
return ext_code
def get_data(data_set, geog_code):
""" Get the data for a geographical code
:param data_set: string identifier from http://www.nomisweb.co.uk/census/2011 /quick_statistics
:param geog_code: the ext code for the geographical area
:return: a json object with the data
"""
base_url = "http://data.ons.gov.uk/ons/api/data/dataset/"
payload = {'apikey': API_KEY, 'context': 'Census', 'geog': '2011WARDH', 'dm/2011WARDH': geog_code,
'totals': 'false', 'jsontype': 'json-stat'}
r = requests.get(base_url + "/" + data_set + ".json", params=payload)
obj = json.loads(r.text)
return obj
def process(json_object, data_set):
data = {}
values = json_object[data_set]['value']
index = json_object[data_set]['dimension'][json_object[data_set]['dimension']['id'][1]]['category']['index']
labels = json_object[data_set]['dimension'][json_object[data_set]['dimension']['id'][1]]['category']['label']
for l in labels:
num = index[l]
count = values[str(num)]
data[labels[l]] = count
return data
area_id = get_area_id(14, "SW1A 0AA")
gss_code = get_ext_code(area_id)
data_returned = get_data("QS208EW", gss_code) # QS208EW = religion
print(process(data_returned, "QS208EW"))

Have you tried looking at the code in the VBA example?
Function RunAreas()
Dim txtResponse
Dim postcode As String
Dim extCode
Set rootSheet = GetSheet("Query")
Set areaSheet = GetSheet("Areas")
endPoint = "http://neighbourhood.statistics.gov.uk/NDE2/Disco/FindAreas?HierarchyId=27&Postcode="
postcode = rootSheet.Range("A2").Value
Application.StatusBar = "Getting areas for " + postcode
txtResponse = GetAreas(postcode)
delim = "<delim>"
data = GetElements(txtResponse, "Area")
If UBound(data) < 0 Then
Application.StatusBar = False
MsgBox "Postcode " + postcode + " not found", vbExclamation
Exit Function
End If
For i = 0 To UBound(data)
curLevelType = GetValue(data(i), "LevelTypeId")
curHierarchy = GetValue(data(i), "HierarchyId")
curId = GetValue(data(i), "AreaId")
curName = GetValue(data(i), "Name")
Select Case curLevelType
Case 15
extCode = UpdateArea("Output Area", 2, curId, curName, curHierarchy)
Case 14
extCode = UpdateArea("Ward", 3, curId, curName, curHierarchy)
Case 13
extCode = UpdateArea("LA", 4, curId, curName, curHierarchy)
Case 11
extCode = UpdateArea("Region", 5, curId, curName, curHierarchy)
Case 10
extCode = UpdateArea("Country", 6, curId, curName, curHierarchy)
End Select
Next
MsgBox ("Areas Found")
Application.StatusBar = "Get Areas completed"
End Function

Related

azure kql parse function - unable to parse ? using regex (zero or one time)

I'm trying to parse this line:
01/11/1011 11:11:11: LOG SERVER = 1 URL = /one/one.aspx/ AccountId = 1111 MainId = 1111 UserAgent = Browser = Chrome , Version = 11.0, IsMobile = False, IP = 1.1.1.1 MESSAGE = sample message TRACE = 1
using this parse statement:
parse-where kind=regex flags=i message with
timestamp:datetime
":.*LOG SERVER = " log_server:string
".*URL = " url:string
".*AccountId = " account_id:string
".*MainId = " main_id:string
".*?UserAgent = " user_agent:string
",.*Version = " version:string
",.*IsMobile = " is_mobile:string
",.*IP = " ip:string
".*MESSAGE = " event:string
".*TRACE = " trace:string
now the thing is that sometimes I got records that has one "key=value" missing but the order of the rest of the columns remains the same.
to match all kinds of rows I just wanted to add (<name_of_colum>)? for example:
"(,.*Version = )?" version:string
but it fails everytime.
I think parse/parse-where operators are more useful when you have well formatted inputs - the potentially missing values in this case would make it tricky/impossible to use these operators.
If you control the formatting of the input strings, consider normalizing it to always include all fields and/or add delimiters and quotes where appropriate.
Otherwise, you could use the extract function to parse it - the following expression would work even if some lines are missing some fields:
| extend
timestamp = extract("(.*): .*", 1, message, typeof(datetime)),
log_server = extract(".*LOG SERVER = ([^\\s]*).*", 1, message),
url = extract(".*URL = ([^\\s]*).*", 1, message),
main_id = extract(".*MainId = ([^\\s]*).*", 1, message),
user_agent = extract(".*UserAgent = ([^,]*).*", 1, message),
version = extract(".*Version = ([^,]*).*", 1, message),
is_mobile = extract(".*IsMobile = ([^,]*).*", 1, message),
ip = extract(".*IP = ([^\\s]*).*", 1, message),
event = iff(message has "TRACE", extract(".*MESSAGE = (.*) TRACE.*", 1, message), extract(".*MESSAGE = (.*)", 1, message)),
trace = extract(".*TRACE = (.*)", 1, message)

How to get all of the records in COVID-19 Data Lake linelistrecords

I'd like to use the https://api.c3.ai/covid/api/1/linelistrecord/fetch API but only get 2000 records back. I know that there are more than 2000 records -- how do I get them?
Here's my code in R:
library(tidyverse)
library(httr)
library(jsonlite)
resp <- POST(
"https://api.c3.ai/covid/api/1/linelistrecord/fetch",
body = list(
spec = {}
) %>% toJSON(auto_unbox = TRUE),
accept("application/json")
)
length(content(resp)$objs)
I get 2000 records.
The spec you are passing in has the following optional fields, among others:
limit // maximum number of objects to return
offset // offset to use for paged reads
The default value of limit is 2000.
The fetch result that is returned has a boolean field, along with the array of objects, called hasMore, which indicates whether there are more records in the underlying data store.
You can write a loop that ends once hasMore is false. Start with an offset of 0, and limit n (say , n=2000), and then iteratively increase offset by n.
library(tidyverse)
library(httr)
library(jsonlite)
limit <- 2000
offset <- 0
hasMore <- TRUE
all_objs <- c()
while(hasMore) {
resp <- POST(
"https://api.c3.ai/covid/api/1/linelistrecord/fetch",
body = list(
spec = list(
limit = limit,
offset = offset,
filter = "contains(location, 'California')" # just as an example, to cut down on the dataset
)
) %>% toJSON(auto_unbox = TRUE),
accept("application/json")
)
hasMore <- content(resp)$hasMore
offset <- offset + limit
all_objs <- c(all_objs, content(resp)$objs)
}
length(all_objs)
You could also do something similar in Python too. Here is a code snippet for doing the same in Python
import requests
headers = {'Accept': 'application/json'}
import io
import pandas as pd
def read_data(url, payload, headers = headers):
df_list = []
has_more = True
offset = 0
payload['spec']['offset'] = offset
while has_more:
response = requests.post('https://api.c3.ai/covid/api/1/linelistrecord/fetch', json=payload, headers = headers)
df = pd.DataFrame.from_dict(response.json()['objs'])
has_more = response.json()['hasMore']
payload['spec']['offset'] += df.shape[0]
df_list.append(df)
df = pd.concat(df_list)
return df
url = 'https://api.c3.ai/covid/api/1/linelistrecord/fetch'
payload = {
"spec":{
"filter": "exists(hospitalAdmissionDate)",
"include": "caseConfirmationDate, outcomeDate, hospitalAdmissionDate, age"
}
}
df = read_data(url, payload)

How to include / exclude filter statement in R httr query for Localytics

I can successfully query data from Localytics using R, such as the following example:
r <- POST(url = "https://api.localytics.com/v1/query,
body=list(app_id=<APP_ID>,
metrics=c("occurrences","users"),
dimensions=c('a:URI'),
conditions=list(day = c("between", "2020-02-11", "2020-03-12"),
event_name = "Content Viewed",
"a:Item URI" = "testing")
),
encode="json",
authenticate(Key,Secret),
accept("application/json"),
content_type("application/json"))
stop_for_status(r)
But what I would like to do is create a function so I can do this quickly and not have to copy/paste data.
The issue I am running into is with the line "a:Item URI" = "testing", where I am filtering all searches by the Item URI where they all equal "testing", but sometimes, I don't want to include the filter statement, so I just remove that line entirely.
When I wrote my function, I tried something like the following:
get_localytics <- function(appID, metrics, dimensions, from = Sys.Date()-30,
to = Sys.Date(), eventName = "Content Viewed",
Key, Secret, filterDim = NULL, filterCriteria = NULL){
r <- httr::POST(url = "https://api.localytics.com/v1/query",
body = list(app_id = appID,
metrics = metrics,
dimensions = dimensions,
conditions = list(day = c("between", as.character(from), as.character(to)),
event_name = eventName,
filterDim = filterCriteria)
),
encode="json",
authenticate(Key, Secret),
accept("application/json"),
content_type("application/json"))
stop_for_status(r)
result <- paste(rawToChar(r$content),collapse = "")
document <- fromJSON(result)
df <- document$results
return(df)
}
But my attempt at adding filterDim and filterCriteria only produce the error Unprocessable Entity. (Keep in mind, there are lots of variables I can filter by, not just "a:Item URI" so I need to be able to manipulate that as well.
How can I include a statement, where if I need to filter, I can incorporate that line, but if I don't need to filter, that line isn't included?
conditions is just a list, so you can conditionally add elements to it. Here we just use an if statement to test of the values are passed and if so, add them in.
get_localytics <- function(appID, metrics, dimensions, from = Sys.Date()-30,
to = Sys.Date(), eventName = "Content Viewed",
Key, Secret, filterDim = NULL, filterCriteria = NULL){
conditions <- list(day = c("between", as.character(from), as.character(to)),
event_name = eventName)
if (!is.null(filterDim) & !is.null(filterCriteria)) {
conditions[[filterDim]] <- filterCriteria)
}
r <- httr::POST(url = "https://api.localytics.com/v1/query",
body = list(app_id = appID,
metrics = metrics,
dimensions = dimensions,
conditions = conditions),
encode="json",
authenticate(Key, Secret),
accept("application/json"),
content_type("application/json"))
stop_for_status(r)
result <- paste(rawToChar(r$content),collapse = "")
document <- fromJSON(result)
df <- document$results
return(df)
}

How to update record value in SML?

I am writing SML program to update records in a list.For example, I have type person_name.
type person_name = {fname:string, lname:string, mname:string}
Then I have person_bio which has person_name embedded in it.
type person_bio = {age:real, gender:string, name:person_name, status:string}
Next I have employee which has person_bio.
type employee = {p:person_bio, payrate:real, whours:real} list;
Now, I have to define function 'updateLastName' by passing the first name.
As of now, created one record 'e1' with below data.
{p={age=40.0,gender="M",name{fname="rob",lname="sen",mname=""},status="M"},
payrate=30.0,whours=10.0}
But I am facing challenge to traverse the list and then updating one field in record.
fun updateLastName(x:string,l:employee)=
if (L=[]) then []
else if (x= #fname(#name(#p hd l)) //cheking name of 1st record in list
//not getting how to update,this kind of line did not work
#fname(#name(#p hd l) = "abc"
else updateLastName(x,tl(l)); // hope this is right
Please suggest.
You have stumbled upon something difficult: Updating a deeply nested record.
For records you have getters, so #fname (#name (#p employee)) gets the field that you're checking against to know that this is the employee whose last name you are going to update. But records don't grant you equivalent setters, so you have to make those. If you're curious, lenses (Haskell) are a general way to solve this, but I don't know of any implementation of lenses for Standard ML.
I'll go ahead and remove the list part in your employee type; you should probably want an employee list if you want multiple employees modelled, rather than to say that an employee is multiple persons.
type person_name = { fname:string, lname:string, mname:string }
type person_bio = { age:real, gender:string, name:person_name, status:string }
type employee = { p:person_bio, payrate:real, whours:real }
val name1 = { fname = "John", lname = "Doe", mname = "W." } : person_name
val bio1 = { age = 42.0, gender = "M", name = name1, status = "?" } : person_bio
val my_employee1 = { p = bio1, payrate = 1000.0, whours = 37.0 } : employee
val name2 = { fname = "Freddy", lname = "Mercury", mname = "X." } : person_name
val bio2 = { age = 45.0, gender = "M", name = name2, status = "?" } : person_bio
val my_employee2 = { p = bio2, payrate = 2000.0, whours = 37.0 } : employee
val my_employees = [ my_employee1, my_employee2 ] : employee list
As for the setters (the ones that you could automatically derive using lenses),
fun setP (p : person_bio, e : employee) =
{ p = p
, payrate = #payrate e
, whours = #whours e } : employee
fun setName (name : person_name, pb : person_bio) =
{ age = #age pb
, gender = #gender pb
, name = name
, status = #status pb } : person_bio
fun setLname (lname, pn : person_name) =
{ fname = #fname pn
, lname = lname
, mname = #mname pn } : person_name
you can compose these, e.g. like:
- setP (setName (setLname ("Johnson", #name (#p my_employee1)), #p my_employee1), my_employee1)
> val it =
{p =
{age = 42.0, gender = "M",
name = {fname = "John", lname = "Johnson", mname = "W."},
status = "?"}, payrate = 1000.0, whours = 37.0} :
{p :
{age : real, gender : string,
name : {fname : string, lname : string, mname : string},
status : string}, payrate : real, whours : real}
Or you can split that line a little apart to make it more readable:
fun updateLname (fname, lname, employees) =
let fun update employee =
if #fname (#name (#p employee)) = fname
then let val new_name = setLname (lname, #name (#p employee))
val new_bio = setName (new_name, #p employee)
val new_employee = setP (new_bio, employee)
in new_employee end
else employee
in List.map update employees
end
Trying this out:
- updateLname ("Freddy", "Johnson", my_employees);
> val it =
[{p = ... {fname = "John", lname = "Doe", mname = "W."}, ... },
{p = ... {fname = "Freddy", lname = "Johnson", mname = "X."}, ... }]
- updateLname ("John", "Johnson", my_employees);
> val it =
[{p = ... {fname = "John", lname = "Johnson", mname = "W."}, ... },
{p = ... {fname = "Freddy", lname = "Mercury", mname = "X."}, ... }]
Depending on your situation, references may be appropriate here.
For any values you may need to change, you can make them a reference, i.e.
type person_name = {fname:string, lname:string ref, mname:string}
type person_bio = {age:real, gender:string, name:person_name, status:string}
fun change_lname(new_lname: string, bio: person_bio) = (#lname (#name bio)) := new_lname
val p1 = ...
print !(#lname (#name p1)) ==> LastName1
change_lname("LastName2", p1)
print !(#lname (#name p1)) ==> LastName2
If you plan on modifying data in a record a lot, it's probably a good idea to make it a reference so that your program is not rewriting memory every time it needs to change one value (though in many situations the compiler/interpreter will be able to optimize this). It also saves you from having to rewrite setter functions if the signature of your record changes. The downside is that you'll be introducing complexity into your program by using references.
For example, in the above code, we're not actually modifying p1's last name, instead p1 and a copy (passed to the function) both point to the same string, and we modify that string in the function. At no point are we actually changing any of the data in either record, we're only changing data that the records point to. It's a subtle difference, and it doesn't really make a difference in this example, but it can lead to strange bugs that are hard to debug.

How to get transaction history without certain state

I try to get transaction history on corda.
I need to get the amount of the transaction for a certain period
My api for this :
#GET
#Path("transactions")
#Produces(MediaType.APPLICATION_JSON)
fun gettransatcions(): List<StateAndRef<ContractState>> {
val TODAY = Instant.now()
val pagingSpec = PageSpecification(DEFAULT_PAGE_NUM, 100)
val start = TODAY.minus(1, ChronoUnit.HOURS)
val end = TODAY.plus(1, ChronoUnit.HOURS)
val recordedBetweenExpression = QueryCriteria.TimeCondition(
QueryCriteria.TimeInstantType.RECORDED,
ColumnPredicate.Between(start, end))
val criteria = QueryCriteria.VaultQueryCriteria(timeCondition = recordedBetweenExpression,status = Vault.StateStatus.ALL)
val results = rpcOps.vaultQueryBy<ContractState>(criteria, paging = pagingSpec)
val size = results.states.count()
return rpcOps.vaultQueryBy<ContractState>().states
}
where:
val rpcOps: CordaRPCOps
I can explicitly specify States for which to receive transactions like:
val criteria = VaultQueryCriteria(contractStateTypes = setOf(Cash.State::class.java, DealState::class.java))
but, I need to get transactions across all states except for a certain.
Have corda got any mechanism for this ?
There is no type of query criteria that specifically excludes certain states. However, you can define a query criteria that specifically includes certain states, then combine that with your existing criteria using an AND composition:
val TODAY = Instant.now()
val pagingSpec = PageSpecification(DEFAULT_PAGE_NUM, 100)
val start = TODAY.minus(1, ChronoUnit.HOURS)
val end = TODAY.plus(1, ChronoUnit.HOURS)
val recordedBetweenExpression = QueryCriteria.TimeCondition(
QueryCriteria.TimeInstantType.RECORDED,
ColumnPredicate.Between(start, end))
val timeCriteria = QueryCriteria.VaultQueryCriteria(timeCondition = recordedBetweenExpression, status = Vault.StateStatus.ALL)
val typeCriteria = QueryCriteria.VaultQueryCriteria(contractStateTypes = setOf(State1::class.java, State2::class.java), status = Vault.StateStatus.ALL)
val combinedCriteria = timeCriteria.and(typeCriteria)
val results = rpcOps.vaultQueryBy<ContractState>(combinedCriteria, paging = pagingSpec)
This will retrieve all the states that meet both your time criteria and your type criteria.

Resources