Skip to content

Commit

Permalink
Merge pull request #109 from nikmagini/submissiontool
Browse files Browse the repository at this point in the history
Add new MViews to exlcude production jobs from PopDB queries


New CRAB PopDB version including:

1) DB schema changes: add new MViews in which wmagent file accesses are filtered out
2) DB collector changes: add new MViews to MView refresh script
3) Web changes: new 'includewmagent' parameter to toggle between old MViews (including WMAgent file accesses) and new MViews (excluding WMAgent). Default value is 'n'
  • Loading branch information
nikmagini committed May 24, 2016
2 parents dd1a324 + e12bebe commit bcc9d0e
Show file tree
Hide file tree
Showing 9 changed files with 310 additions and 16 deletions.
134 changes: 134 additions & 0 deletions DataPopularity/popdb.crab/collector/config/db/CreateMVDSStat1.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
--- aggregation: day, sitename, dataset
--- exclude production
--- count distinct users per day, aggregating later more days will
--- double count the same users that submitted in several days

CREATE MATERIALIZED VIEW MV_DS_stat1_aggr1
COMPRESS
PCTFREE 0
BUILD IMMEDIATE
ENABLE QUERY REWRITE
AS
select TDay , siteName, collName,
sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count(distinct userid) as numUsers
from MV_DS_stat0
where submissiontool!='wmagent'
GROUP BY TDay, siteName, collName
;

--- aggregation: day, dataset
--- exclude production
--- count distinct users per day, aggregating later more days will
--- double count the same users that submitted in several days

CREATE MATERIALIZED VIEW MV_DS_stat1_aggr1_summ
COMPRESS
PCTFREE 0
BUILD IMMEDIATE
ENABLE QUERY REWRITE
AS
select TDay , collName,
sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count(distinct userid) as numUsers
from MV_DS_stat0
where submissiontool!='wmagent'
GROUP BY TDay, collName
;

--- aggregation: day, sitename, dataTier
--- count distinct users per day, aggregating later more days will
--- double count the same users that submitted in several days
--- excludes wmagent jobs

CREATE MATERIALIZED VIEW MV_DS_stat1_aggr2
COMPRESS
PCTFREE 0
BUILD IMMEDIATE
ENABLE QUERY REWRITE
AS
select TDay , siteName, substr(collName,INSTR(collName, '/',-1)+1) as collName,
sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count (distinct userid) as numUsers
from MV_DS_stat0
where submissiontool!='wmagent'
GROUP BY TDay, siteName, substr(collName,INSTR(collName, '/',-1)+1)
;


--- aggregation: day, datatier
--- count distinct users per day, aggregating later more days will
--- double count the same users that submitted in several days
--- excludes wmagent jobs

CREATE MATERIALIZED VIEW MV_DS_stat1_aggr2_summ
COMPRESS
PCTFREE 0
BUILD IMMEDIATE
ENABLE QUERY REWRITE
AS
select TDay , substr(collName,INSTR(collName, '/',-1)+1) as collName,
sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count (distinct userid) as numUsers
from MV_DS_stat0
where submissiontool!='wmagent'
GROUP BY TDay, substr(collName,INSTR(collName, '/',-1)+1)
;


--- First aggregation: day, sitename, dataset namespace (ReReco, Summer2011, etc)
--- count distinct users per day, aggregating later more days will
--- double count the same users that submitted in several days
--- excludes wmagent jobs

CREATE MATERIALIZED VIEW MV_DS_stat1_aggr4
COMPRESS
PCTFREE 0
BUILD IMMEDIATE
ENABLE QUERY REWRITE
AS
select TDay , siteName,
substr(collname,INSTR(collname, '/',-1,2)+1,INSTR(collname, '/',-1,1)-INSTR(collname, '/',-1,2)-1) as collName,
sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count (distinct userid) as numUsers
from MV_DS_stat0
where
substr(collname,INSTR(collname, '/',-1,1)+1) not like 'USER'
and
collname not like 'unknown'
and submissiontool!='wmagent'
GROUP BY TDay, siteName, substr(collname,INSTR(collname, '/',-1,2)+1,INSTR(collname, '/',-1,1)-INSTR(collname, '/',-1,2)-1)
;


--- Second aggregation: day, dataset namespace (ReReco, Summer2011, etc)
--- count distinct users per day, aggregating later more days will
--- double count the same users that submitted in several days
--- excludes wmagent jobs

CREATE MATERIALIZED VIEW MV_DS_stat1_aggr4_summ
COMPRESS
PCTFREE 0
BUILD IMMEDIATE
ENABLE QUERY REWRITE
AS
select TDay ,
substr(collname,INSTR(collname, '/',-1,2)+1,INSTR(collname, '/',-1,1)-INSTR(collname, '/',-1,2)-1) as collName,
sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count (distinct userid) as numUsers
from MV_DS_stat0
where
substr(collname,INSTR(collname, '/',-1,1)+1) not like 'USER'
and
collname not like 'unknown'
and submissiontool!='wmagent'
GROUP BY TDay, substr(collname,INSTR(collname, '/',-1,2)+1,INSTR(collname, '/',-1,1)-INSTR(collname, '/',-1,2)-1)
;

grant select on MV_DS_STAT1_AGGR1 to CMS_POPULARITY_SYSTEM_R;
grant select on MV_DS_STAT1_AGGR1_SUMM to CMS_POPULARITY_SYSTEM_R;
grant select on MV_DS_STAT1_AGGR2 to CMS_POPULARITY_SYSTEM_R;
grant select on MV_DS_STAT1_AGGR2_SUMM to CMS_POPULARITY_SYSTEM_R;
grant select on MV_DS_STAT1_AGGR4 to CMS_POPULARITY_SYSTEM_R;
grant select on MV_DS_STAT1_AGGR4_SUMM to CMS_POPULARITY_SYSTEM_R;

grant select, insert, update, delete on MV_DS_STAT1_AGGR1 to CMS_POPULARITY_SYSTEM_W;
grant select, insert, update, delete on MV_DS_STAT1_AGGR1_SUMM to CMS_POPULARITY_SYSTEM_W;
grant select, insert, update, delete on MV_DS_STAT1_AGGR2 to CMS_POPULARITY_SYSTEM_W;
grant select, insert, update, delete on MV_DS_STAT1_AGGR2_SUMM to CMS_POPULARITY_SYSTEM_W;
grant select, insert, update, delete on MV_DS_STAT1_AGGR4 to CMS_POPULARITY_SYSTEM_W;
grant select, insert, update, delete on MV_DS_STAT1_AGGR4_SUMM to CMS_POPULARITY_SYSTEM_W;
Original file line number Diff line number Diff line change
Expand Up @@ -237,11 +237,12 @@ BUILD IMMEDIATE
REFRESH FAST
ENABLE QUERY REWRITE
AS
select trunc(finishedtimestamp) as TDay , siteName, userid, inputcollection as collName, isRemote,
select trunc(finishedtimestamp) as TDay , siteName, userid, inputcollection as collName, isRemote,
submissiontool,
count(*) as numAccesses, sum(CPUTIME) as totCPU
from raw_file
where fileexitflag=1
GROUP BY trunc(finishedtimestamp), siteName, userid, inputcollection, isRemote
GROUP BY trunc(finishedtimestamp), siteName, userid, inputcollection, isRemote, submissiontool
;
commit;

Expand Down
124 changes: 124 additions & 0 deletions DataPopularity/popdb.crab/collector/config/db/MigratePopDB03.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
-- SQL USED FOR POPDB SCHEMA MIGRATION ON 2015-05-24
-- NOT TO BE USED FOR A FRESH SCHEMA DEPLOYMENT
-- TO DEPLOY SCHEMA FROM SCRATCH, RUN CreateMVTable
-- and CreateMVDSStat1

DROP MATERIALIZED VIEW MV_DS_stat0;

CREATE MATERIALIZED VIEW MV_DS_stat0
COMPRESS
PCTFREE 0
BUILD IMMEDIATE
REFRESH FAST
ENABLE QUERY REWRITE
AS
select trunc(finishedtimestamp) as TDay , siteName, userid, inputcollection as collName, isRemote,
submissiontool,
count(*) as numAccesses, sum(CPUTIME) as totCPU
from raw_file
where fileexitflag=1
GROUP BY trunc(finishedtimestamp), siteName, userid, inputcollection, isRemote, submissiontool
;

CREATE MATERIALIZED VIEW MV_DS_stat1_aggr1
COMPRESS
PCTFREE 0
BUILD IMMEDIATE
ENABLE QUERY REWRITE
AS
select TDay , siteName, collName,
sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count(distinct userid) as numUsers
from MV_DS_stat0
where submissiontool!='wmagent'
GROUP BY TDay, siteName, collName
;

CREATE MATERIALIZED VIEW MV_DS_stat1_aggr1_summ
COMPRESS
PCTFREE 0
BUILD IMMEDIATE
ENABLE QUERY REWRITE
AS
select TDay , collName,
sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count(distinct userid) as numUsers
from MV_DS_stat0
where submissiontool!='wmagent'
GROUP BY TDay, collName
;

CREATE MATERIALIZED VIEW MV_DS_stat1_aggr2
COMPRESS
PCTFREE 0
BUILD IMMEDIATE
ENABLE QUERY REWRITE
AS
select TDay , siteName, substr(collName,INSTR(collName, '/',-1)+1) as collName,
sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count (distinct userid) as numUsers
from MV_DS_stat0
where submissiontool!='wmagent'
GROUP BY TDay, siteName, substr(collName,INSTR(collName, '/',-1)+1)
;

CREATE MATERIALIZED VIEW MV_DS_stat1_aggr2_summ
COMPRESS
PCTFREE 0
BUILD IMMEDIATE
ENABLE QUERY REWRITE
AS
select TDay , substr(collName,INSTR(collName, '/',-1)+1) as collName,
sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count (distinct userid) as numUsers
from MV_DS_stat0
where submissiontool!='wmagent'
GROUP BY TDay, substr(collName,INSTR(collName, '/',-1)+1)
;

CREATE MATERIALIZED VIEW MV_DS_stat1_aggr4
COMPRESS
PCTFREE 0
BUILD IMMEDIATE
ENABLE QUERY REWRITE
AS
select TDay , siteName,
substr(collname,INSTR(collname, '/',-1,2)+1,INSTR(collname, '/',-1,1)-INSTR(collname, '/',-1,2)-1) as collName,
sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count (distinct userid) as numUsers
from MV_DS_stat0
where
substr(collname,INSTR(collname, '/',-1,1)+1) not like 'USER'
and
collname not like 'unknown'
and submissiontool!='wmagent'
GROUP BY TDay, siteName, substr(collname,INSTR(collname, '/',-1,2)+1,INSTR(collname, '/',-1,1)-INSTR(collname, '/',-1,2)-1)
;

CREATE MATERIALIZED VIEW MV_DS_stat1_aggr4_summ
COMPRESS
PCTFREE 0
BUILD IMMEDIATE
ENABLE QUERY REWRITE
AS
select TDay ,
substr(collname,INSTR(collname, '/',-1,2)+1,INSTR(collname, '/',-1,1)-INSTR(collname, '/',-1,2)-1) as collName,
sum(numAccesses) as numAccesses, sum(totCPU) as totCPU, count (distinct userid) as numUsers
from MV_DS_stat0
where
substr(collname,INSTR(collname, '/',-1,1)+1) not like 'USER'
and
collname not like 'unknown'
and submissiontool!='wmagent'
GROUP BY TDay, substr(collname,INSTR(collname, '/',-1,2)+1,INSTR(collname, '/',-1,1)-INSTR(collname, '/',-1,2)-1)
;

grant select on MV_DS_STAT1_AGGR1 to CMS_POPULARITY_SYSTEM_R;
grant select on MV_DS_STAT1_AGGR1_SUMM to CMS_POPULARITY_SYSTEM_R;
grant select on MV_DS_STAT1_AGGR2 to CMS_POPULARITY_SYSTEM_R;
grant select on MV_DS_STAT1_AGGR2_SUMM to CMS_POPULARITY_SYSTEM_R;
grant select on MV_DS_STAT1_AGGR4 to CMS_POPULARITY_SYSTEM_R;
grant select on MV_DS_STAT1_AGGR4_SUMM to CMS_POPULARITY_SYSTEM_R;

grant select, insert, update, delete on MV_DS_STAT0 to CMS_POPULARITY_SYSTEM_W;
grant select, insert, update, delete on MV_DS_STAT1_AGGR1 to CMS_POPULARITY_SYSTEM_W;
grant select, insert, update, delete on MV_DS_STAT1_AGGR1_SUMM to CMS_POPULARITY_SYSTEM_W;
grant select, insert, update, delete on MV_DS_STAT1_AGGR2 to CMS_POPULARITY_SYSTEM_W;
grant select, insert, update, delete on MV_DS_STAT1_AGGR2_SUMM to CMS_POPULARITY_SYSTEM_W;
grant select, insert, update, delete on MV_DS_STAT1_AGGR4 to CMS_POPULARITY_SYSTEM_W;
grant select, insert, update, delete on MV_DS_STAT1_AGGR4_SUMM to CMS_POPULARITY_SYSTEM_W;
6 changes: 4 additions & 2 deletions DataPopularity/popdb.crab/collector/lib/popdbRefresh.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,8 +311,10 @@ def _refresh_MV(self):

_refresh_with_alter('MV_DS_STAT0', 'F')

mvPool = Pool(10)
table_input=['MV_DS_STAT0_AGGR1', 'MV_DS_STAT0_AGGR2', 'MV_DS_STAT0_AGGR1_SUMM', 'MV_DS_STAT0_AGGR2_SUMM', 'MV_DS_STAT0_AGGR3', 'MV_DS_STAT0_AGGR4', 'MV_DS_STAT0_AGGR4_SUMM', 'MV_DS_CountFiles', 'MV_block_stat0_aggr_5_weeks', 'MV_DS_stat0_remote']
mvPool = Pool(16)
table_input=['MV_DS_STAT0_AGGR1', 'MV_DS_STAT0_AGGR2', 'MV_DS_STAT0_AGGR1_SUMM', 'MV_DS_STAT0_AGGR2_SUMM', 'MV_DS_STAT0_AGGR3', 'MV_DS_STAT0_AGGR4', 'MV_DS_STAT0_AGGR4_SUMM',
'MV_DS_STAT1_AGGR1', 'MV_DS_STAT1_AGGR2', 'MV_DS_STAT1_AGGR1_SUMM', 'MV_DS_STAT1_AGGR2_SUMM', 'MV_DS_STAT1_AGGR4', 'MV_DS_STAT1_AGGR4_SUMM',
'MV_DS_CountFiles', 'MV_block_stat0_aggr_5_weeks', 'MV_DS_stat0_remote']
map_input = [ (x, 'C') for x in table_input ]
mvPool.map(_refresh_SingleMV_Wrapper, map_input)
mvPool.close()
Expand Down
2 changes: 1 addition & 1 deletion DataPopularity/popdb.crab/collector/module.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name = popularity-crab-cron
author = CMS Data Popularity Team (formerly in CERN IT-SDC, IT-ES)
author_email = [email protected]
url = ''
version = 0.2.3
version = 0.3.0
long_description = CMS Data Popularity Collection service to collect the usage of CMS official data from CRAB reports
description = CMS Data Popularity Collection service for CRAB reports

27 changes: 20 additions & 7 deletions DataPopularity/popdb.web/lib/Apps/popularity/database/popDB.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,20 @@
def DSStatInTimeWindow(params, MView):

#cursor = connection.cursor()
if params.includeWMAgent == 'y':
baseMV = "STAT0"
elif params.includeWMAgent == 'n':
baseMV = "STAT1"

if MView == 'DataTier':
table = "%s.%s" % (DBUSER, "MV_DS_STAT0_AGGR2")
aggrMV = "AGGR2"
elif MView == 'DS':
table = "%s.%s" % (DBUSER, "MV_DS_STAT0_AGGR1")
aggrMV = "AGGR1"
elif MView == 'DSName':
table = "%s.%s" % (DBUSER, "MV_DS_STAT0_AGGR4")

aggrMV = 'AGGR4'

table = "%s.MV_DS_%s_%s" % (DBUSER, baseMV, aggrMV)

vars = '''collName , sum(numAccesses) as nAcc, round(sum(totCPU)/3600,0)
as totCPU, sum(numUsers) as nUsers'''
whereCondition = '''TDay >= to_date('%s','YYYY-MM-DD')
Expand Down Expand Up @@ -73,13 +79,20 @@ def MostPopDSStat(params, MView, collName):

#cursor = connection.cursor()

if params.includeWMAgent == 'y':
baseMV = "STAT0"
elif params.includeWMAgent == 'n':
baseMV = "STAT1"

if MView == 'DataTier':
table = "%s.%s" % (DBUSER, "MV_DS_STAT0_AGGR2")
aggrMV = "AGGR2"
elif MView == 'DS':
table = "%s.%s" % (DBUSER, "MV_DS_STAT0_AGGR1")
aggrMV = "AGGR1"
elif MView == 'DSName':
table = "%s.%s" % (DBUSER, "MV_DS_STAT0_AGGR4")
aggrMV = 'AGGR4'

table = "%s.MV_DS_%s_%s" % (DBUSER, baseMV, aggrMV)

#TimeFormats: timeformat acts to the displayed date, timeformatTrunc acts to the truncation of the input dates, and should be keept with the format of the aggregation

timeformat = 'YYYY/MM/DD'
Expand Down
Loading

0 comments on commit bcc9d0e

Please sign in to comment.