dates showing up blank in materialized view? - oracle11g
I have a materialized view that I created as such:
DROP MATERIALIZED VIEW DMSN.SFMV_DS3R_FH_1XRTT_BTS_LVL;
CREATE MATERIALIZED VIEW DMSN.SFMV_DS3R_FH_1XRTT_BTS_LVL (D_DTM_DAY,MRKT_NM,BSC_NM,CLUSTER_NM,CSCD_NM,BTS_ID,REGION_NM,V_ATT_CNT,V_MBL_ORG_CNT,V_MBL_TER_CNT,V_SILENT_RETRY_CNT,V_CUST_BLK_CNT,V_AXS_F_CNT,V_CE_BLK_CNT,V_WCD_BLK_CNT,V_T1_BHL_BLK_CNT,V_PWR_BLK_CNT,V_NON_BTS_EQ_BLK_CNT,V_SFUL_CALL_CNT,V_DRP_CALL_CNT,D_ATT_CNT,D_MBL_ORG_CNT,D_MBL_TER_CNT,D_SILENT_RETRY_CNT,D_CUST_BLK_CNT,D_AXS_F_CNT,D_CE_BLK_CNT,D_WCD_BLK_CNT,D_T1_BHL_BLK_CNT,D_PWR_BLK_CNT,D_NON_BTS_EQ_BLK_CNT,D_SFUL_CALL_CNT,D_DRP_CALL_CNT,V_PRIM_CALL_ERL,V_MOU_TMS,D_PRIM_CALL_ERL,SMS_ATT_CNT,SMS_SXS_CNT,V_HHI_ATT_CNT,V_HHI_BAD_FRM_CNT,V_HHI_CALL_SETUP_SXS_CNT,D_HHI_ATT_CNT,D_HHI_BAD_FRM_CNT,D_HHI_CALL_SETUP_SXS_CNT,PGN_CH_MSG_SZ,AVLBL_PGN_CH_CNT,AVLBL_AXS_CH_CNT,AXS_CH_MSG_SZ,V_IVHHO_ATT_CNT,V_IVHHO_SXS_CNT,D_IVHHO_ATT_CNT,D_IVHHO_SXS_CNT,IBHO_ATT_CNT,IBHO_SXS_CNT,IFHHO_ATT_CNT,IFHHO_SXS_CNT,SHO_ATT_CNT,SHO_SXS_CNT,SRHO_ATT_CNT,SRHO_SXS_CNT,MAX_LOD_SPD,GPM_SDB_DAB_CAL,IBHO_DOWN_ATT_CNT,IBHO_UP_ATT_CNT,IBHO_UP_SUC_CNT,IBHO_DOWN_SUC_CNT,PGN_CH_MAX,PGN_CH_AVG,AXS_CH_AVG,AXS_CH_MAX,PGN_CH_OPCY_NEW,PGN_CH_OPCY_NEW_MAX)
TABLESPACE DMD_SN_01
PCTUSED 0
PCTFREE 10
INITRANS 2
MAXTRANS 255
STORAGE (
INITIAL 64K
NEXT 1M
MINEXTENTS 1
MAXEXTENTS UNLIMITED
PCTINCREASE 0
BUFFER_POOL DEFAULT
FLASH_CACHE DEFAULT
CELL_FLASH_CACHE DEFAULT
)
NOCACHE
LOGGING
BUILD IMMEDIATE
REFRESH FORCE ON DEMAND
WITH PRIMARY KEY
AS
/* Formatted on 10/1/2013 9:06:16 PM (QP5 v5.215.12089.38647) */
SELECT DDTMDAY AS D_DTM_DAY,
MRKTNM AS MRKT_NM,
BSMNM AS BSC_NM,
CLNM AS CLUSTER_NM,
CSCDNM AS CSCD_NM,
BTSID AS BTS_ID,
REGIONNM AS REGION_NM,
SUM (VATTCNT) AS V_ATT_CNT,
SUM (VMBLORGCNT) AS V_MBL_ORG_CNT,
SUM (VMBLTERCNT) AS V_MBL_TER_CNT,
SUM (VSILENTRETRYCNT) AS V_SILENT_RETRY_CNT,
SUM (VCUSTBLKCNT) AS V_CUST_BLK_CNT,
SUM (VAXSFCNT) AS V_AXS_F_CNT,
SUM (VCEBLKCNT) AS V_CE_BLK_CNT,
SUM (VWCDBLKCNT) AS V_WCD_BLK_CNT,
SUM (VT1BHLBLKCNT) AS V_T1_BHL_BLK_CNT,
SUM (VPWRBLKCNT) AS V_PWR_BLK_CNT,
SUM (VNONBTSEQBLKCNT) AS V_NON_BTS_EQ_BLK_CNT,
SUM (VSFULCALLCNT) AS V_SFUL_CALL_CNT,
SUM (VDRPCALLCNT) AS V_DRP_CALL_CNT,
SUM (DATTCNT) AS D_ATT_CNT,
SUM (DMBLORGCNT) AS D_MBL_ORG_CNT,
SUM (DMBLTERCNT) AS D_MBL_TER_CNT,
SUM (DSILENTRETRYCNT) AS D_SILENT_RETRY_CNT,
SUM (DCUSTBLKCNT) AS D_CUST_BLK_CNT,
SUM (DAXSFCNT) AS D_AXS_F_CNT,
SUM (DCEBLKCNT) AS D_CE_BLK_CNT,
SUM (DWCDBLKCNT) AS D_WCD_BLK_CNT,
SUM (DT1BHLBLKCNT) AS D_T1_BHL_BLK_CNT,
SUM (DPWRBLKCNT) AS D_PWR_BLK_CNT,
SUM (DNONBTSEQBLKCNT) AS D_NON_BTS_EQ_BLK_CNT,
SUM (DSFULCALLCNT) AS D_SFUL_CALL_CNT,
SUM (DDRPCALLCNT) AS D_DRP_CALL_CNT,
SUM (VPRIMCALLERL) AS V_PRIM_CALL_ERL,
SUM (VMOUTMS) AS V_MOU_TMS,
SUM (DPRIMCALLERL) AS D_PRIM_CALL_ERL,
SUM (SMSATTCNT) AS SMS_ATT_CNT,
SUM (SMSSXSCNT) AS SMS_SXS_CNT,
SUM (VHHIATTCNT) AS V_HHI_ATT_CNT,
SUM (VHHIBADFRMCNT) AS V_HHI_BAD_FRM_CNT,
SUM (VHHICALLSETUPSXSCNT) AS V_HHI_CALL_SETUP_SXS_CNT,
SUM (DHHIATTCNT) AS D_HHI_ATT_CNT,
SUM (DHHIBADFRMCNT) AS D_HHI_BAD_FRM_CNT,
SUM (DHHICALLSETUPSXSCNT) AS D_HHI_CALL_SETUP_SXS_CNT,
SUM (PGNCHMSGSZ) AS PGN_CH_MSG_SZ,
SUM (AXSCHMSGSZ) AS AVLBL_PGN_CH_CNT,
SUM (VIVHHOATTCNT) AS AVLBL_AXS_CH_CNT,
SUM (VIVHHOSXSCNT) AS AXS_CH_MSG_SZ,
SUM (DIVHHOATTCNT) AS V_IVHHO_ATT_CNT,
SUM (DIVHHOSXSCNT) AS V_IVHHO_SXS_CNT,
SUM (IBHOATTCNT) AS D_IVHHO_ATT_CNT,
SUM (IBHOSXSCNT) AS D_IVHHO_SXS_CNT,
SUM (IFHHOATTCNT) AS IBHO_ATT_CNT,
SUM (IFHHOSXSCNT) AS IBHO_SXS_CNT,
SUM (SHOATTCNT) AS IFHHO_ATT_CNT,
SUM (SHOSXSCNT) AS IFHHO_SXS_CNT,
SUM (SRHOATTCNT) AS SHO_ATT_CNT,
SUM (SRHOSXSCNT) AS SHO_SXS_CNT,
MAX (AVLBLPGNCHCNT) AS SRHO_ATT_CNT,
MAX (AVLBLAXSCHCNT) AS SRHO_SXS_CNT,
SUM (MAXLODSPD) AS MAX_LOD_SPD,
SUM (GPMSDBDABCAL) AS GPM_SDB_DAB_CAL,
SUM (IBHODOWNATTCNT) AS IBHO_DOWN_ATT_CNT,
SUM (IBHOUPATTCNT) AS IBHO_UP_ATT_CNT,
SUM (IBHOUPSUCCNT) AS IBHO_UP_SUC_CNT,
SUM (IBHODOWNSUCCNT) AS IBHO_DOWN_SUC_CNT,
MAX (PGNCHMAX) AS PGN_CH_MAX,
AVG (PGNCHAVG) AS PGN_CH_AVG,
AVG (AXSCHAVG) AS AXS_CH_AVG,
MAX (AXSCHMAX) AS AXS_CH_MAX,
AVG (PGNCHOPCYNEW) AS PGN_CH_OPCY_NEW,
MAX (PGNCHOPCYNEWMAX) AS PGN_CH_OPCY_NEW_MAX
FROM (SELECT TRUNC (D1."D_DTM", 'fmdd') AS "DDTMDAY",
D2."MRKT_NM" AS "MRKTNM",
D3."BSC_NM" AS "BSMNM",
D3."BSC_NM" AS "BSCNM",
D2."CLUSTER_NM" AS "CLNM",
D1."CSCD_NM" AS "CSCDNM",
D1."BTS_ID" AS "BTSID",
D1."V_ATT_CNT" AS "VATTCNT",
D1."V_MBL_ORG_CNT" AS "VMBLORGCNT",
D1."V_MBL_TER_CNT" AS "VMBLTERCNT",
D1."V_SILENT_RETRY_CNT" AS "VSILENTRETRYCNT",
D1."V_CUST_BLK_CNT" AS "VCUSTBLKCNT",
D1."V_AXS_F_CNT" AS "VAXSFCNT",
D1."V_CE_BLK_CNT" AS "VCEBLKCNT",
D1."V_WCD_BLK_CNT" AS "VWCDBLKCNT",
D1."V_T1_BHL_BLK_CNT" AS "VT1BHLBLKCNT",
D1."V_PWR_BLK_CNT" AS "VPWRBLKCNT",
D1."V_NON_BTS_EQ_BLK_CNT" AS "VNONBTSEQBLKCNT",
D1."V_SFUL_CALL_CNT" AS "VSFULCALLCNT",
D1."V_DRP_CALL_CNT" AS "VDRPCALLCNT",
D1."D_ATT_CNT" AS "DATTCNT",
D1."D_MBL_ORG_CNT" AS "DMBLORGCNT",
D1."D_MBL_TER_CNT" AS "DMBLTERCNT",
D1."D_SILENT_RETRY_CNT" AS "DSILENTRETRYCNT",
D1."D_CUST_BLK_CNT" AS "DCUSTBLKCNT",
D1."D_AXS_F_CNT" AS "DAXSFCNT",
D1."D_CE_BLK_CNT" AS "DCEBLKCNT",
D1."D_WCD_BLK_CNT" AS "DWCDBLKCNT",
D1."D_T1_BHL_BLK_CNT" AS "DT1BHLBLKCNT",
D1."D_PWR_BLK_CNT" AS "DPWRBLKCNT",
D1."D_NON_BTS_EQ_BLK_CNT" AS "DNONBTSEQBLKCNT",
D1."D_SFUL_CALL_CNT" AS "DSFULCALLCNT",
D1."D_DRP_CALL_CNT" AS "DDRPCALLCNT",
D1."V_PRIM_CALL_ERL" AS "VPRIMCALLERL",
D1."V_MOU_TMS" AS "VMOUTMS",
D1."D_PRIM_CALL_ERL" AS "DPRIMCALLERL",
D1."SMS_ATT_CNT" AS "SMSATTCNT",
D1."SMS_SXS_CNT" AS "SMSSXSCNT",
D1."V_HHI_ATT_CNT" AS "VHHIATTCNT",
D1."V_HHI_BAD_FRM_CNT" AS "VHHIBADFRMCNT",
D1."V_HHI_CALL_SETUP_SXS_CNT" AS "VHHICALLSETUPSXSCNT",
D1."D_HHI_ATT_CNT" AS "DHHIATTCNT",
D1."D_HHI_BAD_FRM_CNT" AS "DHHIBADFRMCNT",
D1."D_HHI_CALL_SETUP_SXS_CNT" AS "DHHICALLSETUPSXSCNT",
D1."PGN_CH_MSG_SZ" AS "PGNCHMSGSZ",
D1."AVLBL_PGN_CH_CNT" AS "AVLBLPGNCHCNT",
D1."AVLBL_AXS_CH_CNT" AS "AVLBLAXSCHCNT",
D1."AXS_CH_MSG_SZ" AS "AXSCHMSGSZ",
D1."V_IVHHO_ATT_CNT" AS "VIVHHOATTCNT",
D1."V_IVHHO_SXS_CNT" AS "VIVHHOSXSCNT",
D1."D_IVHHO_ATT_CNT" AS "DIVHHOATTCNT",
D1."D_IVHHO_SXS_CNT" AS "DIVHHOSXSCNT",
D1."IBHO_ATT_CNT" AS "IBHOATTCNT",
D1."IBHO_SXS_CNT" AS "IBHOSXSCNT",
D1."IFHHO_ATT_CNT" AS "IFHHOATTCNT",
D1."IFHHO_SXS_CNT" AS "IFHHOSXSCNT",
D1."SHO_ATT_CNT" AS "SHOATTCNT",
D1."SHO_SXS_CNT" AS "SHOSXSCNT",
D1."SRHO_ATT_CNT" AS "SRHOATTCNT",
D1."SRHO_SXS_CNT" AS "SRHOSXSCNT",
D1."MAX_LOD_SPD" AS "MAXLODSPD",
D1."GPM_SDB_DAB_CAL" AS "GPMSDBDABCAL",
D1."IBHO_DOWN_ATT_CNT" AS "IBHODOWNATTCNT",
D1."IBHO_UP_ATT_CNT" AS "IBHOUPATTCNT",
D1."IBHO_UP_SUC_CNT" AS "IBHOUPSUCCNT",
D1."IBHO_DOWN_SUC_CNT" AS "IBHODOWNSUCCNT",
D1."PGN_CH_MAX" AS "PGNCHMAX",
D1."PGN_CH_AVG" AS "PGNCHAVG",
D1."AXS_CH_AVG" AS "AXSCHAVG",
D1."AXS_CH_MAX" AS "AXSCHMAX",
D1."PGN_CH_OPCY_NEW" AS "PGNCHOPCYNEW",
D1."PGN_CH_OPCY_NEW_MAX" AS "PGNCHOPCYNEWMAX",
D2."MRKT_NM" AS "MRKTNM2",
D2."REGION_NM" AS "REGIONNM"
FROM "DMSN"."DS3R_FH_1XRTT_BTS_LVL_KPI" D1
LEFT OUTER JOIN "DMSN"."SITES_GEO_HIERARCHY" D2
ON D1."BTS_ID" = D2."BTS_ID"
AND D1."CSCD_NM" = D2."CSCD_NM"
LEFT OUTER JOIN "DMSN"."SITES_SYS_HIERARCHY" D3
ON D1."BTS_ID" = D3."BTS_ID"
AND D1."CSCD_NM" = D3."CSCD_NM"
WHERE D1."D_DTM" >= TRUNC (SYSDATE, 'dd') - 91) T
GROUP BY DDTMDAY,
MRKTNM,
BSMNM,
CLNM,
CSCDNM,
BTSID,
REGIONNM;
COMMENT ON MATERIALIZED VIEW DMSN.SFMV_DS3R_FH_1XRTT_BTS_LVL IS 'snapshot table for snapshot DMSN.SFMV_DS3R_FH_1XRTT_BTS_LVL';
CREATE INDEX DMSN.IDX1_SFMV_FH_1XRTT_BTS_LVL ON DMSN.SFMV_DS3R_FH_1XRTT_BTS_LVL
(D_DTM_DAY, CSCD_NM, BTS_ID)
LOGGING
TABLESPACE DMD_SN_01
PCTFREE 10
INITRANS 2
MAXTRANS 255
STORAGE (
INITIAL 64K
NEXT 1M
MINEXTENTS 1
MAXEXTENTS UNLIMITED
PCTINCREASE 0
BUFFER_POOL DEFAULT
FLASH_CACHE DEFAULT
CELL_FLASH_CACHE DEFAULT
);
but when I do a simple select * on that in toad, the dates are showing up blanks. In toad when I double click any of the date field in the column, I get a value back. I'm not sure what is going on here but the application isn't even able to query the date column for some reason.
when I run select * in sqlplus, it also shows up blank....
SQL> SELECT ROWNUM, T1.D_DTM_DAY FROM SFMV_DS3R_FH_1XRTT_BTS_LVL T1 WHERE ROWNUM < 6;
ROWNUM D_DTM_DAY
1
2
3
4
5
Any ideas on why this is happening?
This code:
TRUNC(D1."D_DTM", 'fmdd')
... is possibly intended to be:
TRUNC(D1."D_DTM")
... or:
to_number(to_char(D1."D_DTM", 'fmdd'))
Related
Remove all rows above and below a value in R
We have citizen scientist recording data for us using In-Situ Aqua troll 600 instruments. It is similar to a CTD but not. The data format is a little different. Different enough that I cannot use CTD trim from the OCE package in R. I need to remove all the rows of data during the soak time (time in the water before they start lowering the instrument) and the up cast from the data. That is all the rows after they reached the max depth. So I just need that center portion of my dataframe. My Data Date Time Salinity (ppt) (672441) Chlorophyll-a Fluorescence (RFU) (671721) RDO Concentration (mg/L) (672144) Temperature (°C) (676121) Depth (ft) (671051) 16:29.0 0 0.01089297 7.257619 31.91303 0.008220486 16:31.0 0 0.01765913 7.246986 31.93175 0.1499496 16:33.0 0 0.0130412 7.258863 31.93253 0.5387784 16:35.0 0 0.01299242 7.274049 31.93806 0.6187978 16:37.0 0 0.01429801 7.26965 31.94401 0.6640261 16:39.0 0 0.01342988 7.271608 31.93595 0.681709 16:41.0 0 0.01337719 7.271549 31.93503 0.684597 16:43.0 7.087267 0.007094439 6.98015 31.89018 1.598019 16:45.0 28.3442 0.007111916 6.268753 31.83806 1.687673 16:47.0 31.06357 0.007945394 6.197834 31.77821 1.418773 16:49.0 32.07076 0.0080788 6.166986 31.76881 1.382685 16:51.0 31.95504 0.004382414 6.191305 31.72906 1.358556 16:53.0 36.21165 0.01983912 5.732656 29.3942 123.4148 16:55.0 36.37849 0.02243886 5.626586 28.82502 125.2927 16:57.0 36.43061 0.02416219 5.450325 28.23787 126.7997 16:59.0 36.44484 0.02441683 5.421676 28.14037 127.0321 17:01.0 36.46815 4.510316 5.318929 28.09501 127.2064 17:03.0 36.41381 4.012657 5.241654 28.14595 127.2227 17:05.0 36.42724 0.7891375 5.174401 28.20383 127.2019 17:07.0 36.41064 0.4351442 5.120181 28.18592 127.197 17:09.0 36.38155 0.2253969 5.033384 28.21021 127.1895 17:11.0 36.37671 0.2089337 5.019629 28.21222 127.1885 17:13.0 36.43813 0.08728585 4.981099 28.17526 127.2223 17:15.0 36.47644 0.904435 4.951878 28.13579 127.2108 17:17.0 36.54742 0.1230291 4.93056 28.06166 127.2307 17:19.0 36.60466 10.04291 4.908442 27.9397 126.6003 17:21.0 36.61511 11.33922 4.904828 27.92038 126.5161 17:23.0 36.68179 0.6680982 4.87018 27.78319 123.707 17:25.0 36.74612 0.06539913 4.848994 27.72977 119.906 17:27.0 36.75729 0.02414635 4.826871 27.72545 114.9537 17:29.0 37.1578 0.01556828 4.804105 27.81129 113.3405 > depthmax<- max(WS$`Depth (ft) (671051)`, na.rm = TRUE) > output <- WS[WS$"Depth (ft) (671051)" < depthmax,] > Output2 <- output[output$"Depth (ft) (671051)" > 1,] I tried these and got output2 to work but can't seam to get output to work. Is there a more elegant way to do this? Just to recap I need to remove all rows after the depthmax (127.2307) and all the rows before the depth when they start lowering the instrument (~2.41).
Your code does remove the maximum depth, but not the rows after the maximum depth is reached. You want to locate the row index of the the maximum depth and delete that row and the ones after: start <- tail(which(na.omit(WS$`Depth (ft) (671051)`) < 2.41), 1) + 1 end<- which.max(na.omit(WS$`Depth (ft) (671051)`)) - 1 output <- WS[start:end, ] The first line finds the index of the last row less than 2.41 and adds 1 to get the starting row. The second line finds the index of the maximum depth and subtracts 1 to get the row before that.
GameTheory package: Convert data frame of games to Coalition Set
I am looking to explore the GameTheory package from CRAN, but I would appreciate help in converting my data (in the form of a data frame of unique combinations and results) in to the required coalition object. The precursor to this I believe to be an ordered list of all coalition values (https://cran.r-project.org/web/packages/GameTheory/vignettes/GameTheory.pdf). My real data has n ~ 30 'players', and unique combinations = large (say 1000 unique combinations), for which I have 1 and 0 identifiers to describe the combinations. This data is sparsely populated in that I do not have data for all combinations, but will assume combinations not described have zero value. I plan to have one specific 'player' who will appear in all combinations, and act as a baseline. By way of example this is the data frame I am starting with: require(GameTheory) games <- read.csv('C:\\Users\\me\\Desktop\\SampleGames.csv', header = TRUE, row.names = 1) games n1 n2 n3 n4 Stakes Wins Success_Rate 1 1 1 0 0 800 60 7.50% 2 1 0 1 0 850 45 5.29% 3 1 0 0 1 150000 10 0.01% 4 1 1 1 0 300 25 8.33% 5 1 1 0 1 1800 65 3.61% 6 1 0 1 1 1900 55 2.89% 7 1 1 1 1 700 40 5.71% 8 1 0 0 0 3000000 10 0.00333% where n1 is my universal player, and in this instance, I have described all combinations. To calculate my 'base' coalition value from player {1} alone, I am looking to perform the calculation: 0.00333% (success rate) * all stakes, i.e. 0.00333% * (800 + 850 + 150000 + 300 + 1800 + 1900 + 700 + 3000000) = 105 I'll then have zero values for {2}, {3} and {4} as they never "play" alone in this example. To calculate my first pair coalition value, I am looking to perform the calculation: 7.5%(800 + 300 + 1800 + 700) + 0.00333%(850 + 150000 + 1900 + 3000000) = 375 This is calculated as players {1,2} base win rate (7.5%) by the stakes they feature in, plus player {1} base win rate (0.00333%) by the combinations he features in that player {2} does not - i.e. exclusive sets. This logic is repeated for the other unique combinations. For example row 4 would be the combination of {1,2,3} so the calculation is: 7.5%(800+1800) + 5.29%(850+1900) + 8.33%(300+700) + 0.00333%(3000000+150000) = 529 which descriptively is set {1,2} success rate% by Stakes for the combinations it appears in that {3} does not, {1,3} by where {2} does not feature, {1,2,3} by their occurrences, and the base player {1} by examples where neither {2} nor {3} occur. My expected outcome therefore should look like this I believe: c(105,0,0,0, 375,304,110,0,0,0, 529,283,246,0, 400) where the first four numbers are the single player combinations {1} {2} {3} and {4}, the next six numbers are two player combinations {1,2} {1,3} {1,4} (and the null cases {2,3} {2,4} {3,4} which don't exist), then the next four are the three player combinations {1,2,3} {1,2,4} {1,3,4} and the null case {2,3,4}, and lastly the full combination set {1,2,3,4}. I'd then feed this in to the DefineGame function of the package to create my coalitions object. Appreciate any help: I have tried to be as descriptive as possible. I really don't know where to start on generating the necessary sets and set exclusions.
Crystal Reports Difference of group total
I have a report which has two groups. Group B always has only 2 values. I want to get the difference of total values of Item Type 01 and Item Type 02 to the Group B footer (Tot type01 - tot type02). Help me to achieve this. I tried few formulas but non of them works for me Month01 Month2 Group A Group B Item Type 01 ab 10 10 ac 20 30 ad 30 30 **Total** 60 70 Item Type 02 ab 10 20 ac 10 15 ad 20 5 **Total** 40 30 **Difference 20 40** I want something like this NumberVar sum01 := 0; Numbervar sum02 := 0; GroupName ({DataTable1.IncomeType}) = Type 01 Then sum01 := Sum ({DataTable1.Month01}, {DataTable1.IncomeType}) if GroupName ({DataTable1.IncomeType}) = Type 02 Then sum02 := Sum ({DataTable1.Month01}, {DataTable1.IncomeType}) sum01 - sum02 I know this isn't correct. I used it to explain my question for you as much as possible. Really appreciate your guidence
You can do this using arrays.. Take 2 arrays and store values for Month1 and Month2 and in group footer retrive and add those. Create a formula #Month1Array and place in Item Type group footer after Month1 summary Shared Numbervar array x; x:=x+sum(Month1,Item GRoup); 1; Create a formula #Month2Array and place in Item Type group footer after Month2 summary Shared Numbervar array y; y:=y+sum(Month2,Item GRoup); 1; Now in the footer where you want to see the difference write below formula for Create a formula #Month1 Shared Numbervar array x; x[1]-x[2] Create a formula #Month2 Shared Numbervar array y; y[1]-y[2]
Hash Table + Binary Search
I'm using an Hash Table to store some values. Here are the details: There will be roughly 1M items to store (not known before, so no perfect-hash possible). Table is 10M large. Hash function is MurMurHash3. I did some tests and storing 1M values I get 350,000 collisions and 30 elements at the most-colliding hash table's slot. Are these result good? Would it make sense to implement Binary Search for lists that get created at colliding hash-table's slots? What' your advice to improve performances? EDIT: Here is my code var HashList: array [0..10000000 - 1] of Integer; for I := 0 to High(HashList) do HashList[I] := 0; for I := 1 to 1000000 do begin Y := MurmurHash3(UIntToStr(I)); Y := Y mod Length(HashList); Inc(HashList[Y]); if HashList[Y] > 1 then Inc(TotalCollisionsCount); if HashList[Y] > MostCollidingSlotItemCount then MostCollidingSlotItemCount := HashList[Y]; end; Writeln('Total: ' + IntToStr(TotalCollisionsCount) + ' Max: ' + IntToStr(MostCollidingSlotItemCount)); Here is the result I get: Total: 48169 Max: 5 Am I missing something?
This is what you get when you put 1M items randomly into 10M cells calendar_size=10000000 nperson = 1000000 E/cell| Ncell | frac | Nelem | frac |h/cell| hops | Cumhops ----+---------+--------+----------+--------+------+--------+-------- 0: 9048262 (0.904826) 0 (0.000000) 0 0 0 1: 905064 (0.090506) 905064 (0.905064) 1 905064 905064 2: 45136 (0.004514) 90272 (0.090272) 3 135408 1040472 3: 1488 (0.000149) 4464 (0.004464) 6 8928 1049400 4: 50 (0.000005) 200 (0.000200) 10 500 1049900 ----+---------+--------+----------+--------+------+--------+-------- 5: 10000000 1000000 1.049900 1049900 The left column is the number of items in a cell. The second: the number of cells having this itemcount. WRT the binary search: it is obvious that for small tables like this (maximum chain length=4, but most chains are of length=1), linear search outperforms binary search. The takeover-point is probably somewhere between 10 and 100.
PIG - Scalar has more than one row in the output. 1s
I have data set in the following format: 100000853384|RETAIL|OTHER|4.625|280000|360|02/2012|04/2012|31|31|1|23|801|NO|CASH-OUT REFINANCE|SF|1|INVESTOR|CA|945||FRM 100003735682|RETAIL|SUNTRUST MORTGAGE INC.|3.99|466000|360|01/2012|03/2012|80|80|2|30|788|NO|PURCHASE|SF|1|PRINCIPAL|MD|208||FRM 100006367485|CORRESPONDENT|PHH MORTGAGE CORPORATION|4|229000|360|02/2012|04/2012|67|67|2|36|794|NO|NO CASH-OUT REFINANCE|SF|1|PRINCIPAL|CA|959||FRM 4th record is the ORIGINAL_INTEREST_RATE. Now My Question is What is the interest rate for which most number of people have taken a loan. I write following codes LOAD DATA SET loanAqiData = LOAD 'hdfs://masterNode:8020/home/hadoop/hadoop_data/LOAN_Acquisition_DATA/Acquisition_2012Q1.txt' USING PigStorage('|') AS ( LOAN_IDENTIFIER:chararray , CHANNEL:chararray , SELLER_NAME:chararray , ORIGINAL_INTEREST_RATE:float , ORIGINAL_UNPAID_PRINCIPAL_BALANCE :float , ORIGINAL_LOAN_TERM :float , ORIGINATION_DATE:chararray , FIRST_PAYMENT_DATE:chararray , ORIGINAL_LOAN_TO_VALUE:float , ORIGINAL_COMBINED_LOAN_TO_VALUE :float , NUMBER_OF_BORROWERS:float , DEBT_TO_INCOME_RATIO:float , CREDIT_SCORE:float , FIRST_TIME_HOME_BUYER_INDICATOR:chararray , LOAN_PURPOSE:chararray , PROPERTY_TYPE:chararray , NUMBER_OF_UNITS:chararray , OCCUPANCY_STATUS:chararray , PROPERTY_STATE:chararray , ZIP:chararray , MORTGAGE_INSURANCE_PERCENTAGE:float , PRODUCT_TYPE:chararray ); //- Group By Interest Rate grouped_by_interest_rate = group loanAqiData by ORIGINAL_INTEREST_RATE; No of Counts for individual Interest Rate count_for_specific_interest = FOREACH grouped_by_interest_rate GENERATE group as INTEREST_RATE, COUNT(loanAqiData) as NO_OF_PEOPLE; Dump dump count_for_specific_interest Output (3.625,1) (3.75,2) (3.875,26) (3.99,8) (4.0,21) (4.1,1) (4.125,15) (4.25,16) (4.375,15) (4.376,26) (4.5,10) (4.625,3) But I want to get (3.875,26) and (4.376,26) How Can I get ? Also If I want to get the Loan Interest for which minimum No of people has taken Loan ..
I'd suggest you use the MAX() function (http://pig.apache.org/docs/r0.11.0/func.html#max) to determine the highest number of people and then filter by this number. Here is an example of code that should work (not tested) : FOREACH count_for_specific_interest { max_value= MAX($1.NO_OF_PEOPLE); GENERATE INTEREST_RATE, NO_OF_PEOPLE, max_value; } RESULT = FILTER count_for_specific_interest BY NO_OF_PEOPLE==max_value; For the min you would be able to use exactly the same script replacing MAX() by MIN()
Finally this is resolved. let me write down the steps 1) Load 2) Group by Interest grp = group loanAqiData by ORIGINAL_INTEREST_RATE; 3) Count No of people against each Interest cntForEachGrp = FOREACH grp GENERATE group as INTEREST_RATE, COUNT(loanAqiData) as NO_OF_PEOPLE; Output (3.625,1) (3.75,2) (3.875,26) (3.99,8) (4.0,21) (4.1,1) (4.125,15) (4.25,16) (4.375,15) (4.376,26) (4.5,10) (4.625,3) 4) Group them all to put in the same BAG grpALL = GROUP cntForEachGrp ALL; (all,{(3.625,1),(3.75,2),(3.875,26),(3.99,8),(4.0,21),(4.1,1),(4.125,15),(4.25,16),(4.375,15),(4.376,1),(4.5,10),(4.625,3),(4.75,5),(4.875,4),(5.0,2),(5.25,1)}) 5) Calculate Max No of people from the BAG maxVal = FOREACH grpALL { max_value= MAX(cntForEachGrp.NO_OF_PEOPLE); GENERATE cntForEachGrp.INTEREST_RATE, cntForEachGrp.NO_OF_PEOPLE, max_value as max_no; } grunt> describe maxVal; maxVal: {{(INTEREST_RATE: float)},{(NO_OF_PEOPLE: long)},max_no: long} dump maxVal; ({(3.625),(3.75),(3.875),(3.99),(4.0),(4.1),(4.125),(4.25),(4.375),(4.376),(4.5),(4.625),(4.75),(4.875),(5.0),(5.25)},{(1),(2),(26),(8),(21),(1),(15),(16),(15),(1),(10),(3),(5),(4),(2),(1)},26) 6)Filter out Loan interest having Max no of people RESULT=FILTER cntForEachGrp BY NO_OF_PEOPLE == maxVal.max_no ; After dump we get interest Rate -3.875 has max no of people 26. Why we have to do grpALL = GROUP cntForEachGrp ALL; and what is the inner meaning of the nested foreach in (5)