DC.js histogram of crossfilter dimension counts - crossfilter

I have a crossfilter with the following data structure being inputted.
project | subproject | cost
data = [
["PrA", "SubPr1", 100],
["PrA", "SubPr2", 150],
["PrA", "SubPr3", 100],
["PrB", "SubPr4", 300],
["PrB", "SubPr5", 500],
["PrC", "SubPr6", 450]]
I can create a barchart that has the summed cost per project:
var ndx = crossfilter(data)
var projDim = ndx.dimension(function(d){return d.project;});
var projGroup = costDim.group().reduceSum(function(d){return d.budget;});
What I want to do is create a dc.js histogram by project cost...so {450: 2, 300: 1}, etc. As far as I can tell, crossfilter can have only attributes of each row be input for a dimension. Is there a way around this?

Accepting the challenge!
It is true, crossfilter does not support this kind of double-reduction, but if you are willing to accept a slight loss of efficiency, you can create "fake dimensions" and "fake groups" with the desired behavior. Luckily, dc.js doesn't use very much of the crossfilter API, so you don't have to implement too many methods.
The first part of the trick is to duplicate the dimension and group so that the new dimension and old dimension will each observe filtering on the other.
The second part is to create the fake groups and dimensions, which walk the bins of the copied group and rebin and refilter based on the values instead of the keys.
A start of a general solution is below. For some charts it is also necessary to implement group.top(), and it is usually okay to just forward that to group.all().
function values_dimension(dim, group) {
return {
filter: function(v) {
if(v !== null)
throw new Error("don't know how to do this!");
return dim.filter(null);
},
filterFunction: function(f) {
var f2 = [];
group.all().forEach(function(kv) {
if(f(kv.value))
f2.push(kv.key);
});
dim.filterFunction(function(k) {
return f2.indexOf(k) >= 0;
});
return this;
}
};
}
function values_group(group) {
return {
all: function() {
var byv = [];
group.all().forEach(function(kv) {
if(kv.value === 0)
return;
byv[kv.value] = (byv[kv.value] || 0) + 1;
});
var all2 = [];
byv.forEach(function(d, i) {
all2.push({key: i, value: d});
});
return all2;
}
};
}
// duplicate the dimension & group so each will observe filtering on the other
var projDim2 = ndx.dimension(function(d){return d.project;});
var projGroup2 = projDim2.group().reduceSum(function(d){return d.budget;});
var countBudgetDim = values_dimension(projDim2, projGroup2),
countBudgetGroup = values_group(projGroup2);
jsfiddle here: http://jsfiddle.net/gordonwoodhull/55zf7L1L/

JSFillde Link
Denormalize + Map-reduce. Note the data already include the cost per project as the 4th column ( and this can be pre-calculated easily). It's a hack, but hopefully an easy one in order to get DC.js and crossfilter works without too much change.
var data = [
["PrA", "SubPr1", 100, 450],
["PrA", "SubPr2", 150, 450],
["PrA", "SubPr3", 200, 450],
["PrB", "SubPr4", 300, 800],
["PrB", "SubPr5", 500, 800],
["PrC", "SubPr6", 450, 450]
];
var newdata = data.map(function (d) {
return {
project: d[0],
subproject: d[1],
budget: d[2],
cost: d[3]
};
})
var ndx = crossfilter(newdata),
costDim = ndx.dimension(function (d) {
return d.cost;
}),
visitedProj = {},
costGroup = costDim.group().reduce(function (p, v) {
if (visitedProj[v.project]) return p;
console.info(v.project);
visitedProj[v.project] = true;
return p + 1;
}, null, function () {
return 0;
});
dc.rowChart("#costChart")
.renderLabel(true)
.dimension(costDim)
.group(costGroup)
.xAxis().ticks(2);
dc.renderAll();
Map-Reduce can be very powerful and the API can be accessed from here. JSFillde Link

Related

Computing a pixel-wise R-squared value between two image collections

I have several image collections: september_CCI (chlorophyll-carotenoid index), SDD (snow disappearance date), cumulative_summer_VPD (vapor pressure deficit), and cumulative_summer_precipitation. Each image collection covers the same years (2000-2020) and they are the same spatial dimensions.
CCI is the dependent variable. SDD, cumulative_summer_VPD, and cumulative_summer_precipitation are the independent variables.
I would like to compute the pixel-wise R-squared between CCI and each of the independent variables.
Here's some code that I found elsewhere and have tried to adapt to my case:
// 1. Join datasets. First, define filter. This is based on the date.
var filter = ee.Filter.equals({
leftField: 'system:time_start',
rightField: 'system:time_start'
});
// 2. Create the join.
var innerJoin = ee.Join.inner('primary', 'secondary');
// 3. Apply the join.
var merge = innerJoin.apply(septCCI, sdd2, filter);
// 4. Merge both collections.
var mergedCCI_SDD = merge.map(function(f){
var cci = ee.Image(f.get('primary')).rename('cci');
var sdd = ee.Image(f.get('secondary')).rename('sdd');
return cci.addBands(sdd).copyProperties(cci);
});
var covariance = function(mergedCCI_SDD, cci, sdd) {
return mergedCCI_SDD.select([cci, sdd]).map(function(image) {
return image.toArray();
}).reduce(ee.Reducer.covariance(), 8);
};
var correlation = function(vcArrayImage) {
var covariance = ee.Image(vcArrayImage).arrayGet([0, 1]);
var sd0 = ee.Image(vcArrayImage).arrayGet([0, 0]).sqrt();
var sd1 = ee.Image(vcArrayImage).arrayGet([1, 1]).sqrt();
return covariance.divide(sd0).divide(sd1).rename('correlation');
};
// Compute and display cross-covariance.
var covCCI_SDD = covariance(mergedCCI_SDD, 'cci', 'sdd');
Map.addLayer(covCCI_SDD.arrayGet([0, 1]), {}, 'CCI-SDD cov');
// Compute and display cross-correlation.
var corrCCI_SDD = correlation(covCCI_SDD);
Map.addLayer(corrCCI_SDD, {min: -0.5, max: 0.5}, 'CCI-SDD corr');
When I try to run this (within a much larger piece of code), I get the following error, which corresponds to the line on which
there is this code: return image.toArray();
Line 643: Required argument (properties) missing to function: Feature.toArray(feature, properties)
Creates an array from the given properties of an object, which must all be numbers.
Args:
feature (Feature): The object from which to select array properties.
properties (List): The property selectors for each array element.
Thanks for any help you can provide.

How can I apply cloud masking to mndwi image in Earth Engine?

I want to apply cloud masking to a MDNWI image but I get the error message "s2SR.map is not a function". I don't know how to resolve it.
var geometry=ee.Geometry.Polygon([[41.55427215633343,41.57962485896675],
[41.607143860434995,41.57962485896675],
[41.607143860434995,41.62429558047125],
[41.55427215633343,41.62429558047125],
[41.55427215633343,41.57962485896675]]);
var s2SR = ee.ImageCollection('COPERNICUS/S2_SR')
//filter start and end date
.filter(ee.Filter.calendarRange(2018,2018,'year'))
.filter(ee.Filter.calendarRange(9,10,'month'))
//filter according to drawn boundary
.filterBounds(geometry)
.filterMetadata('CLOUD_COVERAGE_ASSESSMENT', 'less_than',1)
.mean();
//print(typeof(s2SR))
//print("s2SR", s2SR);
//Map.addLayer(s2SR, {bands: ['B4', 'B3', 'B2'], min: 0, max: 2000}, 'Sentinel ');
Map.centerObject(geometry,10)
var Green = s2SR.select("B3");
var SWIR = s2SR.select("B11");
var mndwi = Green.subtract(SWIR).divide(Green.add(SWIR)).rename('MNDWI');
//Map.addLayer(mndwi, {min:0, max:1}, 'mndwı');
///// Cloud
var S2maskedVeg = function(image) {
var MNDWI = image.select(['MNDWI']);
return image.addBands(ee.Image(1).updateMask(mndwi.gte(0.95)).rename('MNDVI_mask'));
};
var S2collection = s2SR.map(mndwi).map(S2maskedVeg)
Map.addLayer(S2collection,{}, 'S2 NDWI mask');
(https://code.earthengine.google.com/6835ca74e985d40df906681039358ee1)
Here, you already calculate the MNDWI over the whole collection:
var mndwi = Green.subtract(SWIR).divide(Green.add(SWIR)).rename('MNDWI');
Therefore, this line does not make much sense:
var S2collection = s2SR.map(mndwi).map(S2maskedVeg)
map() requires a function as input, but it is an ImageCollection. You can just skip this part and map your S2maskedVeg() function over mndwi directly:
var S2collection = mndwi.map(S2maskedVeg)

.pluck returning undefined in Meteor

Trying to pull a list of ratings from a collection of Reviews and then average them to come up with an aggregated average rating for a Plate. When I look at the data output from the ratings variable I get nothing but "undefined undefined undefined".
averageRating: function() {
var reviews = Reviews.findOne({plateId: this._id});
var ratings = _.pluck(reviews, 'rating');
var sum = ratings.reduce(function(pv, cv){return pv + cv;}, 0);
var avg = sum / ratings.length;
//Testing output
var test = "";
var x;
for (x in reviews) {
text += reviews[x] + ',';
}
return test;
}
Sorry if this is a super newbie question, but I've been at this for hours and cannot figure it out.
I figured out the issue. As listed above var reviews gets set to a cursor which apparently .pluck does not work on. By first converting the cursor to an array of objects I was then able to use .pluck. So updated code looks like this:
averageRating: function() {
var reviewsCursor = Reviews.find({plateId: this._id});
//Converts cursor to an array of objects
var reviews = reviewsCursor.fetch();
var ratings = _.pluck(reviews, 'rating');
var sum = ratings.reduce(function(pv, cv){return pv + cv;}, 0);
var avg = (sum / ratings.length).toPrecision(2);
return avg;
}

Is there a good way to use crossfilter to query multiple dimensions for unique values, not aggregates?

I've got a big set of data loaded into crossfilter for a dc.js project I'm working on. Of course a number of my columns have repeated values in them and I'd like to be able to find the unique list of values in one column that correspond to the repeated values in another. The sample data below probably illustrates the point more clearly.
var data = [
{ state: "WA", city: "Seattle", data: "a" },
{ state: "WA", city: "Seattle", data: "b" },
{ state: "WA", city: "Tacoma", data: "c" },
{ state: "OR", city: "Portland", data: "d" },
{ state: "OR", city: "Bend", data: "e" },
{ state: "OR", city: "Bend", data: "f" }
];
I'd like to be able to filter on a particular state and then find the unique list of cities for that state. So, if the input was "WA", I'd like get back a two element array containing "Seattle" and "Tacoma". The code below actually does exactly that (and also provides the counts, though I really don't care about those) but having to create a second crossfilter object feels very clumsy to me. I also don't know about the performance since I'll end up having to iterate through this several times, once for each state.
var Ndx = crossfilter(data);
var stateDim = Ndx.dimension(function (d) { return d.state; });
var cityDim = Ndx.dimension(function (d) { return d.city; });
var stateFilter = stateDim.filter("WA");
var stateRows = stateFilter.top(Infinity);
// It seems like there should be a better way than this.
var cityNdx = crossfilter(stateRows);
var cityDim2 = cityNdx.dimension(function (d) { return d.city; });
var cites = cityDim2.group().top(Infinity);
cites.forEach(function(d) {
console.log("City: " + d.key + ", Count: " + d.value);
});
/* --------------------------- *\
Log output:
City: Seattle, Count: 2
City: Tacoma, Count: 1
\* --------------------------- */
It seems like the should be a way to get to this kind of result with some filtering, grouping, or reducing strategy, but after spending way too much time trying, I haven't been able to come up with one. All the examples I've seen that use multiple dimensions produce aggregates, but that's not what I need. I need values. Is there a better way to go about this?
I'd use a custom reduce function to keep an array of all city values that have appeared for a given state. Something like the following (completely untested - sorry) should work:
var Ndx = crossfilter(data);
var stateDim = Ndx.dimension(function (d) { return d.state; });
var stateGroup = stateDim.group().reduce(
function(p, v) {
p.count++;
if(p.uniques.indexOf(v.city) === -1) p.uniques.push(v.city);
},
function(p, v) {
p.count--;
// Note: uniques are not filtered. You need to use a map and keep
// count of uniques to have uniques that match your current filter
},
function() {
return { count: 0, uniques: [] };
}
);
stateGroup.top(Infinity).forEach( function(g) {
console.log("State " + g.key + " has count " + g.value.count);
console.log("Unique cities in " + g.key + ":");
g.value.uniques.forEach(function (c) {
console.log(c);
});
});

Simple line chart in DC and CrossFilter

I have some data like this
var data = [{date:'2013/01/01', claimNo:1},
{date:'2013/01/01', claimNo:2},
{date:'2013/01/02', claimNo:3}]
I want to plot a line chart in DC so that the days are on the X-Axis and the total # of claims are on the Y-Axis.
I have code like this
var ndx = crossfilter(data);
data.forEach(function (e) {
e.dd = dateFormat.parse(e.dd);
e.month = d3.time.month(e.dd);
});
var dateDim = ndx.dimension(function (d) {
return d.dd;
});
var datesClaimsGroup = dateDim.group();
var claimsLineChart = dc.lineChart("#claims-line-chart");
claimsLineChart
.width(200)
.height(40)
.renderArea(true)
.margins({ top: 0, left: -1, right: 2, bottom: 1 })
.group(datesClaimsGroup)
.dimension(dateDim)
.x(d3.time.scale().domain([data[0].dd, data[data.length - 1].dd]))
.title(function (d) {
return d.value;
});
The chart is plotted but the values in Y-Axis are the date occurance counts and not the claim counts. I know I am supposed to use a function to count the claims but I am not getting there.
For datesClaimsGroup you need to provide a reduce function to count the claims. Otherwise just .group() will default to an identity count reduce function as you observed.
var datesClaimsGroup = dateDim.group().reduceSum(...)
or
var datesClaimsGroup = dateDim.group().reduce(...)

Resources