I have around 450K records in the database on which I have to make a Fuzzy match. I have implemented the Parallel.foreach loop to see if incoming request is matching with any of the 450K records. If during the search I found any match, I stop the foreach loop and give response back to caller with true (It says that match has been found).
I am calling database and store all the 450K records in static object (in memory). The logic for Parallel.foreach is working good. The problem is that it takes almost 40-45 seconds to determine whether it is match or not.
I am planning to run multiple parallel.foreach for 100k data on each loop so that overall time is reduced. My challenge is how would I Stop the execution of other parallel.foreach loops once I get match on one parrallel.foreach?
Below is my code for a single parallel.foreach:
public bool Checkmatch(CheckRequest request)
{
bool isPCCMatch = false;
//This will load all the data in Static object to avoid db call for each request
PolicyInformation.Initialize(request.IsCacheRefresher);
try
{
logger.Debug(MethodBase.GetCurrentMethod().Name + ": Matching alogorithm is started");
Parallel.ForEach(PolicyInformation.Policies, (policy, loopState) =>
{
double state=0, stateF=0;
if (!request.stateF)
{
state= GetPrecentageMatch(request.state, policy.state);
//this if condition is for checkibnf first field condition early so that it will not get all the percentage
if (state== OneFieldSSNPercentage)
{
isPCCMatch = true;
loopState.Stop();
}
}
else
{
stateF= GetPrecentageMatch(request.stateF, policy.stateF);
//this if condition is for checkibnf first field condition early so that it will not get all the percentage
if (stateF== OneFieldFEINPercentage)
{
isPCCMatch = true;
loopState.Stop();
}
}
double SEIN = GetPrecentageMatch(request.SEIN, policy.FEIN);
//this if condtion is for checkibnf first field condtion earily so that it will not get all the perecentage
if (SEIN == OneFieldSEINPercentage)
{
isPCCMatch = true;
loopState.Stop();
}
double WCIRB = GetPrecentageMatch(request.WCIRB, policy.WCIRB);
//this if condition is for checkibnf first field condition early so that it will not get all the percentage
if (WCIRB == OneFieldWCIRBPercentage)
{
isPCCMatch = true;
loopState.Stop();
}
double DUN = GetPrecentageMatch(request.DUNS, policy.DUNS);
//this if condition is for checkibnf first field condition early so that it will not get all the percentage
if (DUN == OneFieldDUNSPercentage)
{
isPCCMatch = true;
loopState.Stop();
}
double legalNames = GetPrecentageMatch(request.LegalName, policy.LegalName);
//this if condition is for checkibnf first field condition early so that it will not get all the percentage
if (legalNames == OneFieldLegalNamePercentage)
{
isPCCMatch = true;
loopState.Stop();
}
double tradeNames = GetPrecentageMatch(request.TradeName, policy.TradeName);
//this if condition is for checkibnf first field condition early so that it will not get all the percentage
if (tradeNames == OneFieldTradeNamePercentage)
{
isPCCMatch = true;
loopState.Stop();
}
double mailingname = GetPrecentageMatch(request.MailingName, policy.MailingName);
//this if condition is for checkibnf first field condition early so that it will not get all the percentage
if (mailingname == OneFieldMailingNamePercentage)
{
isPCCMatch = true;
loopState.Stop();
}
double ownerInfo = GetPrecentageMatch(request.OwnerName, policy.Ownership);
int partitalmatchcount = 0;
int addressmatchcount = 0;
// condtion 2
// get the partial count. if it is not more than 2 then compare the address an
if (GetPartialMatchFieldCount(SSN, FEIN, SEIN, DUN, legalNames, tradeNames, mailingname, ownerInfo, out partitalmatchcount) < 2)
{
// it will be hit if IsAddressmatch is true or count of partitalmatchcount & addressmatchcount is equal or more then 2
if (IsAddressmatch(request, policy, out addressmatchcount, partitalmatchcount) || (addressmatchcount + partitalmatchcount >= 2))
{
logger.Debug(MethodBase.GetCurrentMethod().Name + ": policy matched 2nd condition " + policy.ID);
isPCCMatch = true;
loopState.Stop();
}
}
else
{
isPCCMatch = true;
loopState.Stop();
}
// check for the 3 fields
if (GroupAmatchcount(SSN, FEIN, SEIN) + GroupBmatchcount(legalNames, tradeNames, mailingname) + GroupCMatchCount(request,
policy) + GroupDMatchCount(ownerInfo) >= 3)
{
logger.Debug(MethodBase.GetCurrentMethod().Name + ": policy matched 3rd condition " + policy.ID);
isPCCMatch = true;
loopState.Stop();
}
//else if ()
});
logger.Debug(MethodBase.GetCurrentMethod().Name + ": Matching algorithm is ended");
}
catch (Exception ex)
{
logger.ErrorException(MethodBase.GetCurrentMethod().Name, ex);
throw;
}
// }
return isPCCMatch;
}
I want to run the above loop for 100k records, so it will have 4 parallel.foreach loops. Once I found a match on one loop, I want to stop the execution of other parallel.foreach loops.
Related
I just started learning C# and while loops are confusing me. Unlike Java, where I can use a while loop to loop a program if a user entered a invalid input, it's not acting the same way in C#.
using System;
namespace first {
class Program {
static void Main(string[] args) {
Console.WriteLine("Hi! What is your name");
string userName = Console.ReadLine();
Console.WriteLine("oh! you are:" + userName);
Console.WriteLine("let play a game");
string answer="Y";
while (answer == "Y") {
Random random = new Random();
int correntNumber = random.Next(1, 2);
int guess = 0;
Console.WriteLine("Guess a number");
while (guess != correntNumber) {
string userGuess = Console.ReadLine();
//validate input method 1
try {
guess = int.Parse(userGuess);
} catch (Exception e) {
Console.WriteLine("Invalid inout", e);
}
//validate input method 2
//if(!int.TryParse(userGuess, out guess)) {
// Console.WriteLine("invalid input");
//}
if (guess != correntNumber) {
Console.WriteLine("try again!");
}
}
Console.WriteLine("Yes! corrector");
Console.WriteLine("Play again?");
//string answer;
answer = Console.ReadLine().ToUpper();
if(answer == "Y") {
continue;
} else if (answer == "N") {
Console.WriteLine("bye");
return;
} else if (answer != "Y" || answer != "N") {
Console.WriteLine("y or n");
answer = Console.ReadLine().ToUpper();
continue;
}
}
}
}
}
When I enter a value other than y or n, the message appears,Console.WriteLine("Y or n only");, but the game restarts while it shouldn't.
I am sorry this is a simple and rather silly question, but I can't pin point where I am going wrong.
the problem is that after printing to the user "y or n only" message you take the input but you don't actually do anything with it
so the loop just restarts regardless of the input , to fix this issue you could replace the last if part with this code
while(answer != 'Y' && answer != 'N'){
Console.WriteLine("y or n only");
answer = Convert.ToChar(Console.ReadLine().ToUpper());
}
if(answer == 'Y')
{
continue;
}
else if(answer == 'N')
{
Console.WriteLine("goodbye");
return;
}
so after you read the first input answer of him for repeating or no you check if it's a valid input or not and if it's not you keep asking him for "y or n only" till he enters "Y" or "N" and then you process this answer for whether it's a "Y" or "N" in the if part
heres my code below...
TableColumn tc = new TableColumn();
TableColumn[] tc2 = new TableColumn[10];
for(int i=0; i<5, i++){
tc.getColumns().add(tc2[i]);
}
and i try to override commit method for editing cells.
public void commit(Object val) {
// Get the table
TableView<MainTable> t = this.getTableView();
// Get the selected row/column
MainTable selectedRow = t.getItems().get(this.getTableRow().getIndex());
TableColumn<MainTable, ?> selectedColumn = t.getColumns().get(t.getColumns().indexOf(this.getTableColumn()));
// Get current property name
String propertyName = ((PropertyValueFactory) selectedColumn.getCellValueFactory()).getProperty();
// Create a method name conforming to java standards ( setProperty )
propertyName = ("" + propertyName.charAt(0)).toUpperCase() + propertyName.substring(1);
// Try to run the update
try {
// Type specific checks - could be done inside each setProperty() method
if(val instanceof Double) {
Method method = selectedRow.getClass().getMethod("set" + propertyName, double.class);
method.invoke(selectedRow, (double) val);
}
if(val instanceof String) {
Method method = selectedRow.getClass().getMethod("set" + propertyName, String.class);
method.invoke(selectedRow, (String) val);
}
if(val instanceof Integer) {
Method method = selectedRow.getClass().getMethod("set" + propertyName, int.class);
method.invoke(selectedRow, (int) val);
}
} catch (Exception e) {
e.printStackTrace();
}
// CommitEdit for good luck
commitEdit((String) val);
}
and i got ArrayIndexOutofBoundsException on console view.
so my question is
how can i select getcolumns added other column???
TableColumn<MainTable, ?> selectedColumn = t.getColumns().get(t.getColumns().indexOf(this.getTableColumn()));
i think this code has to be changed...
anyone got ideas??
Nested columns are not part of the TableView.columns list.
If you need the corresponding TableView column, just go up through the hierarchy until you reach a column without a parentColumn:
TableColumn<MainTable, ?> selectedColumn = this.getTableColumn();
TableColumn<MainTable, ?> c = selectedColumn;
while ((c = selectedColumn.getParentColumn()) != null) {
selectedColumn = c;
}
If you just need the column itself, simply use this.getTableColumn(), instead of finding the index of the column in the columns list and then accessing that index in the same list. (I guess the latter is what you need.)
Furthermore, if PropertyValueFactory returns properties of the item class, you could use this property to set the value instead of using reflection:
ObservableValue obs = selectedColumn.getCellObservableValue(this.getIndex());
if (obs instanceof WritableValue) {
((WritableValue) obs).setValue(val);
} else {
// reflecitive approach
}
Furthermore you shouldn't add null as a nested column, but you're doing it here:
TableColumn[] tc2 = new TableColumn[10];
for(int i=0; i<5, i++){
tc.getColumns().add(tc2[i]);
}
Note, this question was previously very different. This is now the real issue. Which is...
When making a call to executeStoredProcedure() using the node.js client I get a 408 code, RequestTimeout and I get no data back from the sproc's "body". This seems to occur at about 5 seconds, but when I time bound things from inside the sproc itself, any value over say 700 milliseconds causes me to get a network timeout (although I don't see it until about 5 seconds have passed).
Note, I can have longer running sprocs with read operations. This only seems to occur when I have a lot of createDocument() operations, so I don't think it's on the client side. I think something is happening on the server side.
It's still possible that my original thought is true and I'm not getting a false back from a createDocument() call which causes my sproc to keep running past its timeout and that's what's causing the 408.
Here is the time limited version of my create documents sproc
generateData = function(memo) {
var collection, collectionLink, nowTime, row, startTime, timeout;
if ((memo != null ? memo.remaining : void 0) == null) {
throw new Error('generateData must be called with an object containing a `remaining` field.');
}
if (memo.totalCount == null) {
memo.totalCount = 0;
}
memo.countForThisRun = 0;
timeout = memo.timeout || 600; // Works at 600. Fails at 800.
startTime = new Date();
memo.stillTime = true;
collection = getContext().getCollection();
collectionLink = collection.getSelfLink();
memo.stillQueueing = true;
while (memo.remaining > 0 && memo.stillQueueing && memo.stillTime) {
row = {
a: 1,
b: 2
};
getContext().getResponse().setBody(memo);
memo.stillQueueing = collection.createDocument(collectionLink, row);
if (memo.stillQueueing) {
memo.remaining--;
memo.countForThisRun++;
memo.totalCount++;
}
nowTime = new Date();
memo.nowTime = nowTime;
memo.startTime = startTime;
memo.stillTime = (nowTime - startTime) < timeout;
if (memo.stillTime) {
memo.continuation = null;
} else {
memo.continuation = 'Value does not matter';
}
}
getContext().getResponse().setBody(memo);
return memo;
};
The stored procedure above queues document creates in a while loop until the API returns false.
Keep in mind that createDocument() is an asynchronous method. The boolean returned represents whether it is time to wrap up execution right there and then. The return value isn't "smart" enough to estimate and account for how much time the async call will take; so it can't be used for queueing a bunch of calls in a while() loop.
As a result, the stored procedure above doesn't terminate gracefully when the boolean returns false because it has a bunch of createDocument() calls that are still running. The end result is a timeout (which eventually leads to blacklisting on repeated attempts).
In short, avoid this pattern:
while (stillQueueing) {
stillQueueing = collection.createDocument(collectionLink, row);
}
Instead, you should use the callback for control flow. Here is the refactored code:
function(memo) {
var collection = getContext().getCollection();
var collectionLink = collection.getSelfLink();
var row = {
a: 1,
b: 2
};
if ((memo != null ? memo.remaining : void 0) == null) {
throw new Error('generateData must be called with an object containing a `remaining` field.');
}
if (memo.totalCount == null) {
memo.totalCount = 0;
}
memo.countForThisRun = 0;
createMemo();
function createMemo() {
var isAccepted = collection.createDocument(collectionLink, row, function(err, createdDoc) {
if (err) throw err;
memo.remaining--;
memo.countForThisRun++;
memo.totalCount++;
if (memo.remaining > 0) {
createMemo();
} else {
getContext().getResponse().setBody(memo);
}
});
if (!isAccepted) {
getContext().getResponse().setBody(memo);
}
}
};
At the end of my submit button click handler, Resharper warns that, "Not all code paths return a value."
What value would it be expecting from an event handler?
In deference to full disclosure, this is that event handler:
$("#submit_button").click(function() {
// http://stackoverflow.com/questions/18192288/how-can-i-compare-date-time-values-using-the-jqueryui-datepicker-and-html5-time
var begD = $.datepicker.parseDate('mm/dd/yy', $('#BeginDate').val());
var endD = $.datepicker.parseDate('mm/dd/yy', $('#EndDate').val());
if (begD > endD) {
alert('Begin date must be before End date');
$('#BeginDate').focus();
return false;
}
else if (begD.toString() == endD.toString()) {
var dteString = begD.getFullYear() + "/" + (begD.getMonth() + 1) + "/" + begD.getDate();
var begT = new Date(dteString + " " + $('#BeginTime').val());
var endT = new Date(dteString + " " + $('#EndTime').val());
if (begT > endT) {
alert('Begin date must be before End date');
$('#BeginTime').focus();
return false;
}
}
$("#NumberOfResults").css("visibility", "visible");
$("#NumberOfResults").html("Please wait...");
EnableButton("submit_button", false);
// If all are selected, don't enumerate them; just set it at "All" (change of case shows that the logic did execute)
var deptsList = $('#depts').checkedBoxes();
if (deptsList.length < deptsArray.length) {
$('#deptHeader span').html(deptsList.join(", "));
}
else if (deptsList.length == deptsArray.length) {
$('#deptHeader span').html("All");
}
// " "
var sitesList = $('#sites').checkedBoxes();
$('#sitesHeader span').html(sitesList.join(", "));
if (sitesList.length < sitesArray.length) {
$('#sitesHeader span').html(sitesList.join(", "));
}
else if (sitesList.length == sitesArray.length) {
$('#sitesHeader span').html("All");
}
$('#hiddenDepts').val(deptsList);
$('#hiddenSites').val(sitesList);
var UPCs = $('#UPC').val();
if (UPCs == "All") {
$('#UPC').val("1"); // take everything (1 and greater)
}
var resultsText = jQuery.trim($("#spanNumberOfResults").text());
if (resultsText != "") {
$("#NumberOfResults").css("visibility", "visible");
if (resultsText == "0") {
$("#NumberOfResults").css("color", "red");
} else {
var href = '/#ConfigurationManager.AppSettings["ThisApp"]/CCRCriteria/LoadReport';
// report_parms (sic) is referenced from LoadReport
var report_parms = {
GUID: "#Model.GUID",
SerialNumber: "#Model.SerialNumber",
ReportName: "#Model.ReportName"
};
window.open(href, "report_window", "resizable=1, width=850, left=" + (screen.width / 2 - 425));
}
}
}); // end of submit button click
Resharper isn't aware of event handlers.
It sees that your function will sometimes return false and sometimes won't return anything, and it complains.
It doesn't realize that this pattern is perfectly fine for event handlers.
Ignore it. Click handlers "can" return a boolean value indicating whether to process the click normally (true) or ignore it (false).
Resharper sees any return in the function as a clue that it should always return something.
The code below works, but I know it can't be the most efficient. Is there another way to ask if there are any rows rather than using Any()?
I'd like to have the NoResults Div hidden by default and only turned on when no rows are present, likewise have the repeater show up by default and only hidden when no results are listed.
using (AgileEntities context = new AgileEntities())
{
int StoryID = Convert.ToInt32(Request["StoryID"]);
var tasks = from t in context.Tasks
where t.StoryId == StoryID
orderby t.Number
select t;
rptTasks.DataSource = tasks;
rptTasks.DataBind();
if (tasks.Any())
{
rptTasks.Visible = true;
NoResults.Visible = false;
}
else
{
rptTasks.Visible = false;
NoResults.Visible = true;
}
}
Caution - calling .Any() may re-execute your query
I would do this a bit 'safer' to ensure single execution.
//force execution once
var taskList = tasks.ToList();
rptTasks.Visible = taskList.Count>0;
NoResults.Visible = taskList.Count==0;
And
rptTasks.DataSource = tasksList;
rptTasks.DataBind();
The problem with Any() and Count() is they cause your code to execute over and over - a test case
static void Main(string[] args)
{
//Populate the test class
List list = new List(1000);
for (int i=0; i o.CreateDate.AddSeconds(5) > DateTime.Now);
while (newList.Any())
{
//Note - are actual count keeps decreasing.. showing our 'execute' is running every time we call count.
Console.WriteLine(newList.Any());
System.Threading.Thread.Sleep(500);
}
}
You can replace Any() with Count() above to show. Basically the code keeps evaluating the query when you call Any() - I'm not sure if this applies to Linq to Sql though if there is any different caching mechanism.
var tasks = from t in context.Tasks
where t.StoryId == StoryID
orderby t.Number
select t;
var tasksList = tasks.ToList();
rptTasks.DataSource = tasksList;
rptTasks.DataBind();
if (tasksList.Count > 0)
{
rptTasks.Visible = true;
NoResults.Visible = false;
}
else
{
rptTasks.Visible = false;
NoResults.Visible = true;
}
The ToList() call will execute the query and create a list of tasks objects
Your DataBind() call has already caused the query to be executed, so calling Any() on top of that shouldn't cost you anything further.
You can change this with :
rptTasks.Visible = tasks.Any();
NoResults.Visible = !rptTasks.Visible;