From 358deb67cbe12f6a00c449f568923d9b5cf6315b Mon Sep 17 00:00:00 2001 From: strayMat Date: Wed, 29 Jul 2020 17:39:15 +0200 Subject: [PATCH 1/3] Added LPP event and extractor --- .../cnam/etl/events/ProductOrService.scala | 11 +++ .../DcirProductOrServiceExtractor.scala | 80 +++++++++++++++++++ .../extractors/sources/dcir/DcirSource.scala | 3 +- 3 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/events/ProductOrService.scala create mode 100644 src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/productsorservices/DcirProductOrServiceExtractor.scala diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/ProductOrService.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/ProductOrService.scala new file mode 100644 index 00000000..5238b4bc --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/events/ProductOrService.scala @@ -0,0 +1,11 @@ +package fr.polytechnique.cmap.cnam.etl.events +import java.sql.Timestamp + +object ProductOrService extends ProductOrService + +trait ProductOrService extends AnyEvent with EventBuilder{ + override val category: EventCategory[ProductOrService] = "product_or_service" + + def apply(patientID: String, groupID: String, name: String, quantity: Double, date: Timestamp): Event[ProductOrService] = + Event(patientID, category, groupID, name, quantity, date, None) +} diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/productsorservices/DcirProductOrServiceExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/productsorservices/DcirProductOrServiceExtractor.scala new file mode 100644 index 00000000..d94c218e --- /dev/null +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/productsorservices/DcirProductOrServiceExtractor.scala @@ -0,0 +1,80 @@ +// License: BSD 3 clause + +package fr.polytechnique.cmap.cnam.etl.extractors.events.productsorservices + +import fr.polytechnique.cmap.cnam.etl.events.{DcirAct, EventBuilder, ProductOrService} +import fr.polytechnique.cmap.cnam.etl.extractors.StartsWithStrategy +import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes +import fr.polytechnique.cmap.cnam.etl.extractors.sources.dcir.DcirSimpleExtractor +import org.apache.spark.sql.Row + +import scala.util.Try + +/** + * Gets all type of Acts from DCIR. + * + * The main addition of this class is the groupId method that allows to get the + * source of the act: Liberal, PublicAmbulatory, PrivateAmbulatory, Unkown and when + * the information is not available a default DCIRAct. + * + * @param codes : List of Act codes to be tracked in the study or empty to get all the Acts. + */ +final case class DcirProductOrServiceExtractor(codes: SimpleExtractorCodes) extends DcirSimpleExtractor[ProductOrService] + with StartsWithStrategy[ProductOrService] { + + override val columnName: String = ColNames.TipCode + override val eventBuilder: EventBuilder = ProductOrService + + // Implementation of the Extractor Trait + override def getCodes: SimpleExtractorCodes = codes + + // Implementation of the EventRowExtractor + override def usedColumns: List[String] = List( + ColNames.TipCode, ColNames.TipQuantity, + ColNames.InstitutionCode, ColNames.GHSCode, ColNames.Sector + ) ++ super.usedColumns + + // Number of products/services + override def extractWeight(r: Row): Double = r.getAs[Int](ColNames.TipQuantity).toDouble + + final val PrivateInstitutionCodes = Set(4D, 5D, 6D, 7D) + + /** + * Get the information of the origin of DCIR act that is being extracted. It returns a + * Failure[IllegalArgumentException] if the DCIR schema is old, a success if the DCIR schema contains an information. + * + * @param r the row of DCIR to be investigated. + * @return Try[String] + */ + // TODO: REMOVE THIS + override def extractGroupId(r: Row): String = { + Try { + + if (!r.isNullAt(r.fieldIndex(ColNames.Sector)) && getSector(r) == 1) { + DcirAct.groupID.PublicAmbulatory + } + else { + if (r.isNullAt(r.fieldIndex(ColNames.GHSCode))) { + DcirAct.groupID.Liberal + } else { + // Value is not at null, it is not liberal + lazy val ghs = getGHS(r) + lazy val institutionCode = getInstitutionCode(r) + // Check if it is a private ambulatory + if (ghs == 0 && PrivateInstitutionCodes.contains(institutionCode)) { + DcirAct.groupID.PrivateAmbulatory + } + else { + DcirAct.groupID.Unknown + } + } + } + } recover { case _: IllegalArgumentException => DcirAct.groupID.DcirAct } + }.get + + private def getGHS(r: Row): Double = r.getAs[Double](ColNames.GHSCode) + + private def getInstitutionCode(r: Row): Double = r.getAs[Double](ColNames.InstitutionCode) + + private def getSector(r: Row): Double = r.getAs[Double](ColNames.Sector) +} \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/dcir/DcirSource.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/dcir/DcirSource.scala index 52d59b66..0f1f0913 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/dcir/DcirSource.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/sources/dcir/DcirSource.scala @@ -15,6 +15,8 @@ trait DcirSource extends ColumnNames { lazy val CamCode: String = "ER_CAM_F__CAM_PRS_IDE" lazy val BioCode: String = "ER_BIO_F__BIO_PRS_IDE" lazy val GHSCode: String = "ER_ETE_F__ETE_GHS_NUM" + lazy val TipCode: ColName = "ER_TIP_F__TIP_PRS_IDE" + lazy val TipQuantity: ColName = "ER_TIP_F__TIP_ACT_QSN" lazy val InstitutionCode: String = "ER_ETE_F__ETE_TYP_COD" lazy val Sector: String = "ER_ETE_F__PRS_PPU_SEC" lazy val NaturePrestation: ColName = "PRS_NAT_REF" @@ -26,7 +28,6 @@ trait DcirSource extends ColumnNames { lazy val FlowEmitterNumber: ColName = "FLX_EMT_ORD" lazy val OrgId: ColName = "ORG_CLE_NUM" lazy val OrderId: ColName = "DCT_ORD_NUM" - } } From c771a65e659b0e76017d83b0b44a8ef60e698928 Mon Sep 17 00:00:00 2001 From: strayMat Date: Wed, 29 Jul 2020 17:41:35 +0200 Subject: [PATCH 2/3] Added LPP (ProductOrServices) extractor tests : - needed to add ER_TIP_F table to the test data (changed DCIR_bio to DCIR_all, a broader version of the original DCIR test table where biology and LPP (ER_TIP_F) were added. --- src/test/resources/DCIR/ER_TIP_F.csv | 3 ++ .../resources/test-input/DCIR_all.parquet | Bin 0 -> 47367 bytes .../etl/events/ProductOrServiceSuite.scala | 26 +++++++++++++ .../events/acts/DcirBiologyActsSuite.scala | 2 +- ...itionerClaimSpecialityExtractorSuite.scala | 6 +-- .../DcirProductOrServiceSuite.scala | 36 ++++++++++++++++++ 6 files changed, 69 insertions(+), 4 deletions(-) create mode 100644 src/test/resources/DCIR/ER_TIP_F.csv create mode 100644 src/test/resources/test-input/DCIR_all.parquet create mode 100644 src/test/scala/fr/polytechnique/cmap/cnam/etl/events/ProductOrServiceSuite.scala create mode 100644 src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/productsorservices/DcirProductOrServiceSuite.scala diff --git a/src/test/resources/DCIR/ER_TIP_F.csv b/src/test/resources/DCIR/ER_TIP_F.csv new file mode 100644 index 00000000..37dd2d9c --- /dev/null +++ b/src/test/resources/DCIR/ER_TIP_F.csv @@ -0,0 +1,3 @@ +LPP_ECT_MNT,LPP_ECU_MNT,TIP_ACL_DTD,TIP_ACL_DTF,TIP_ACT_PRU,TIP_ACT_QSN,TIP_ORD_NUM,TIP_PRS_IDE,TIP_PRS_TYP,TIP_PUB_PRX,TIP_SIR_NUM,ORG_CLE_NEW,DCT_ORD_NUM,FLX_DIS_DTD,FLX_EMT_NUM,FLX_EMT_ORD,FLX_EMT_TYP,FLX_TRT_DTD,ORG_CLE_NUM,PRS_ORD_NUM,REM_TYP_AFF +0,0,2006-02-01,2006-02-01,100,1,1,5589,6,100,12345,CODE1234,1000,2007-02-01,5,412,3,2007-01-10,CODE1234,5,2 +0,0,2006-02-01,2006-02-01,100,1,1,5600,6,100,12345,CODE1234,10,2006-02-01,3,2,1,2006-01-20,CODE1234,2,1 diff --git a/src/test/resources/test-input/DCIR_all.parquet b/src/test/resources/test-input/DCIR_all.parquet new file mode 100644 index 0000000000000000000000000000000000000000..532f6d2545340a75e45f3c8087948c9c88545a36 GIT binary patch literal 47367 zcmeHweUMz$m0$NvBZiSy$gH~kw4TMRR!jy~R(eLGk;IPUX-)Tx2Bv3PGb1c8-e!=r zm_i6F32giUgxFfLQH>QsDJ&s`5QPwhP%K4=<7K@H%MwK#_x2P0_9g0$ji||L{ za#4<*-??Asy!&pqru(&BN!4~~X8N9c@9&&@&bc4&zI(1}rMba;E+6Ilrt>TExlCr& z#&2xSWFE!Oe;>|d@|pbFe6%sY8f8}Gd-|drQKKA{DP#sjDwE0dZs@}A?)$sCkLOW- zAir@s-%FTlr*A>|yYTY>e*Oy(zJtHOF64hhVpC3(qq2m}DWI+#(AeGG5A|OL@HP3h z)A$4Mp1v$`#SZ{4%NHadQKKB?6CkHRxa0~AnC#&!cP*8sF8vpI8HT%BQ}pi zgd`kMqa5WE9H$a+$~s-zIz7l8pDkAghc*ub5_%8~?y)(M`vz$Dov8rn>C+0xbOWZG z-$^hTW8}~6xK!wkQ>eE7TSjmA-ix1o_&E=it@&G-3>Bmw&-kQJszi1mrNL+c<7;(V zTX717h4Ci{+&4afC)G<-Ha@37jvK!xziK+K)SFv%?qUTMS8I-_k@`)R;uN5S8jZ^< zuUtF50-3`@H^NTztvGn&=HV^AkRzFa6}n8cLYGJgr+QImA!U2XHjv6|FHzaiIR&z4 zZzjJgzlPmrWHqu^tR+Qv`&Rlih<_VDz5=2gQKKB?lWI8yLflH=Wvrxm%*-U@?wXm2 z@Kfj+Fy8=TYfo};nMj2gWg$kU4SYr8KLtdiXxWs9B&7iew}doYAOm>PRX2_M29 zN@APQUHQoGFzPD$P0!`8$Zs&3TQf@icX0dGE&A`4kp6O%AK~|w z?Vc8M`EB`81J!qcMv&U#k=ynkJW?+X#!^Fp=JmR=LlJ);Ot1*rA|z+X&hhbuWXh`_ ze{vyel%ss|Tuy-y_uQ-Vo6O{zuTftr7KiE2_U+s0&)^`9~5S0f~PJt{&gSgtr!3>%=CsPii zaSG(P+6MCdcKwj+$y}1%NO)A9nh7br#RE-@5xJx?2d6;b`D1LD77uc@^LbSzlerZ0 zm#?@eM^pyo6hK*x$H^+PgFH9+!>_GU7zj!tG&}>z>O=R$L2&mwrrtmAV38l zqm-{_yGK1X-~J!FzI*va8^*4>={v9d?}6(AY-~={W6KhoB(*7<$Nq>uE5~+Hqb5rM zHZ~{fv1N%(lA2)SPvj^-Mu;m*t>4%P+W)8|KI}0V-eT(l%Ka36l5j+2I8K4w91e?s z031;nj#DwXn*-93izcOG$E8t)Qx}Azo{@yp{R>&DS8yr@7t-Ai*M#7R%8lie2banB z<*(qCLDU1sf%o)`@wdXNfdCaatA|w|&)F(y)BmerNskK;dF--eQ%-qWOTtZ}MAD{2 zWg~DZ2KQCeTlVs7FRF$`ryL%wIpy(+i%xSQ6-|n+0qmT~DGx4_-;f`W{!^sKoh>6< zqaOQ>{KtN0DsV0?z5vnRq2T-3p98%H(|pVg<`l@>U>Z+{^TTEx z`5O`Gxinb3C_6l8zGAIYEQTJqE&mGRIY(6NWzeQ#FZKXk%10W`kNCq$HoI+5 zjwgf7#*@J5*eoA*58IMmfqSrE)5!RO){635NWPJ~Z$_u`y3u=6sa7 z%JV(@RAQe~67~-~tB~*6^88#wg2lF4rRd~ZBz|vDblsgPpIyu}mb;C5_>%n@#VwvU zvk(`aSvUnUi%4VW<@xnyaJzIHF)Y!q8Tz%uSMUhB@=TO1A&)I&j#A2l*cxu z6h3SN45bMVr$rrolIr7!ngZ0vDHLY)ox})4UBh&7vHcg?XUYb*(K^^dw_sDday@;d z7IMf&3Zpz$7^U!Gh52xlOOTD=D4*ddg^z^GhyeBAX zeIsj79$SM__(*HS;V73N8^KXN!%+$!hNA(_ADgNuVct=B(}#X5MF<4RDG+$%NQ}W$ zj@%(BqDG)R#-kKIEMmx=!wMwo1&4GWj`)$ICZ!`PKkae~*4@koCSVWEGR1D5v zv?Eh|ZWv3z>Sc812D^+#X}|}l$cRf_bn{xImZlrVPb*fZ?3Y~-3 zm#Qp^8kf5_+kH9gBiH9Y-l{rDTvs`wM(Q`sESv(A8Q;IEb`P2gH&P6}%p&2ClN9d$ zZ1;WMFnBv248De+Mf@ZjG)Fm6j`FaxEm9EDIOKg;qFn^c4vTpgAmzHliU|?Dm6aBe z^6~!Dh)5}Xj7jdwcH@YLUIi&9>)l{%tJN1bj8mRgA{^MG05~=QOEJF)#zvE#m_&_o zluu(Kr&vrFheqrUE0Ts;>5O+nf<(v75j9f3>0yym+*Z9Eme~HdZfrAs{(ScsisGD0 zB{0g_5@tk285$LmRMh8DZfqR{m-98vUTVRGzez{sIg8ZGsTgKCLd7P}62GL(xB(zh zzyL($37u2;CY-IQfbkAv)s^fpdOoInQWd8_;AwLWv@bVhzH;@kzxpK9dfDQV`kR35 zu{lwXElXBTQe$lVCCZ+aBbjV@gcRiUAEHJ%$|tEg<-uJ*L$W8JX4{7NRZLmtJ+C>N zl*Q?ZL2Ul3zmVL#+s%?4X%6t-6;b0dnTlJ*pwHpOL6QoJdGSO*_^1xt8tY$uk7{b= z|KBn_W#1TOZzMa@U)V$1`9|2eePx?V37cm)C5@65KQ(5p?;yq1eoe2!DjY3$@+w3l9~YPYb==qYt1n_ zKDv0b#s67?&T{k{`ci*&3QaTPTo+}+PrQo1KtGFe^qVSjN>n6bvf_s*5#{JNRpgYY zNJLaj-S)27$lBs^9nVV;!%PDBVwUT@>fybm_2#mXC<+1Xjz71m*aQ=y03<4-Q_W=R z9MV3pNyxcJ#Y5yII`JZ-W@?BSUMJp$!_teW%^Y!vV`N~y- zwlKGlpjE01^@SU1q7+ePc2_;h>|7bM8==q6P1eV!D^%u6s7P@vk1y0wLe#6x&3lx& zN|iX(OA`pwpjx?m_b(oDpgZb2<{hYEG%ab0QE7aVtn)n}&WsFcE2wCe8qcxfR)Ln9M3pTqbLPo@2k5ZlS%hcv}Te(fG`NzOt|bnu9*4oP}3b_EhTQ3w!F&ytA^U)h6aGt|~VT|5l!> zuq%6J>(vE{Y$6*>T{~B=RBO7UDQ-lWyM8(*U3H~f`6 zSN3wHXfT5$PD_NYZ3JI%^(yF(bER>?Z<5OCLC|4U7u4_;!-DP5N;532B=p%DMwB^F z96I$!C6($gRHjy*ukRYS-smU`pg3-|n)34)H_|Dj5T(Goex2|Z!kVinUys#h z_%xuJnp!UOHnNTKMn@T+xn6semP_@>iF&ELlhHw&uL-|fStH8Awd{$YVDR>^Yjq#H zRfCwqBawEm}{M3R=%o z98o6VDXNyTE>WseGxdcM#w@1HmbJYdMNMF_G7CE^x|Y(bn;0vfC%rlvjUBSzlXc5; zrFyBVJX)2b5xG2GwlrlqODiu_Xc|}Ls2|VIV)m%2{;0}Xn)Gx_D^(|)jV-ObFyZjX z7wJdhS(?NnnRm{j-wVE$X3CMC@|C%IZD*-IQCEM+TiiGs$lEoKIf2^c<&D=$_1d_a z&jYCC%6LOm%G)&$I|fiGPt4&SxJzcwALiIE;0)yPd6_iDO@4~@`X9_eU&aI^N` zKpwaT^X4nN>T@&Khx2yt&@+P9#(@73$MI5?>_LCjsLdmJAXiz-;>w!yw9ZgI7swqi z?Vy>%dcB8Ro*l>hybih&LaaH$s^%)4Q#b*cd1_1Ym|UB<|<-L z^kjAGt^iNUYuw8D-DbWIw1gg6FraX+G_CY}on-w4e7&-OWmmmkE@NJ@Yk!i{&}n5x zqo7%t*%rE7i~zJ+57w*z1nd1dHJ%1)PVQ8*o}NKsJoTz-p@dch>Q%>S-D}oFUOmb~ zTcoXoth8#Gn7@{C1GsK34dl_PLA8?dgx;!U(|VsiA?QLz@}(MlbiRUBm$kc*(6f-W z?T#r-zx6u(q0vaq0D(N`!GS#E!OYQMHsH80RU?kmHLRV$(XHXA9kD#q*DObqM}JD= z({>%%Fjvi~!CY?+jNy735P41mR<9yY^x*(U%601<(MkfSVeY8S?csSvpw3Ut@pFZ! z7uRcH^}4=wKYKab-_w`xTDK+FvtnIr$06TUSY3EBTX+T~Mw{FSvTh?bfpU55S>VwI ze=?c==d&K9*|1QMFJuef%c2M{`R)&T%gw1w0P}+`57TUTD9jVt!V}s454wQKham`) zQz6WUasf=Ncm?J|IbdFdM)5@noC;w+eNg}tJ|Hlkz6hB5h$N|t~+!mI6DLEj*I#e{dDR_!1BjkW(SB6TcR-lG_Kf8hkq2f8y6hN0OI; z1g!D$>KK;WKNDCluU;nBk1pj#t`z%jqOCu=6m8Xgg?>|8ITdQ_Pu3{;w71anjoGpqs=QdC&?6z=Ok_v@r^eV0U_{@JyjO0ek%zn^p}IlIx@~ zhycr1Kk3ys+oG!eeTAbKi9SgH_J)xF#F@y0FuSMs0gfr<7{cr7?l}C;L1HH@m#5zIO1F zeQ^}$|&kTS^ z@}`gm_lE>{{NyT6|Fi(WKEL9j`_@mca=r({>T{FM!Xf=X@;d^P-W`~C83OZ>-vK7y z9YQwcR0#8hjXtJ+8zL}Y*a%EICdR(UsSxH5KI36Zf7WgP!DoQU9!zp_DunsrCJ)oR zkr8r!xCxlLr&)b+Duj8g=wZq}qUAhR1Sa1nLUM8{gn4RE`IesH?E4+`6)Wds+5S_5 z@GX0d2!Er4E=g)^Me^!U0Ni#C2n_I7huQ^yXLA7D_6iOD&gOQ(e>&``I@uk5It*3o z!PlCPQz2EK7-3BN;>@ZU4_7BffXBCxkm)!T!h3P6nsm$q1;2Vzn6yNEajU;L`4Cm? zqo{o|Cq(#gD@536pxb&Y6}csrWIvS{gw}h_B(NVw3X;iT)mMV(D{G^RjO_2}~$w#+2&xu7`N_s(9!phcjb%a!A~E62N+^B-(FY*^2hRRYLpe zzznyaQ=#^Mv_r@yUivD`m(=#jM>}Y_=599OO9_gDA;eQ<50PFC*^Lg3c&e-s5eGyc zLNs54i0@T|>k*oH;jCn&-u+(1cfBVjq!6fL-bbq{^nmrm1c{*U%m}FY@kz1Hpt(i8 z1(}d*na3yL7SQzaZaWF^_+V#<$E<#c`|E7s9qcmgv^-+BnV_3!&M|7p7_VQ;?xNmN zQ# zV;u@pKYpFR9+*5&5hkZXn8){c7BdG!6sC60<9lGSz~dL3Gf55cc;^OBZgbQ`$o@8ih$+``qvOUHf?5Ylu00 zBN}qNt{S2)MhP_Jv71#x^s<#^8FN<0F@5Z2pXpgt!A6v*q7U=9DrXZ_mKLSDut%Zh z0JEQyA<(S6jL@hUv~n3CyKi z9VUS=CZ$4{&wfc^8n?jNDQSb$&5Vq0%DgmP}-R|-7jlOhyQ==a{;3+d{^kWBFROYEWe9VN=Pu<}#muK|X z5Biw;keV?1>jxdCGSCIr0q@_*m=~L4ZVFF#!1wP2o_^V9@i-N7qNOi09*)JigK|Q~ zr7t^7JA-JhhmuSsGY!k_7E_6&LC&!R7lP{d=rg)q+_ z@i5JCKY@Av2r$Kf201wu!aRD9nDxbhL4~Pj{iFBT1^#mGDhc$TvtRLf=p%@V2Oc7@ zv3vF_mdA4LUJ3Ac>RykBdLvrWo37{8c<|J{mPecI`1orc4|7aXwL|mx_-iS7y!8z+ z*0@*OmCq|4`qlPZ-vAGsa#8;o27iN32B;yMpSaKCVGeQ%9w+Wg$>Z(&6%Sqmn1i8; z2d!1!zTaOqJn{gJN!S-y>V=azII8MkI|(n0QD$@`dH+hlA-?x_f#!C0=w%|I;mD~% z(@zH|gYLqYuXtCn1W@06P|9czp5pMRkP&5QI&k-fVD|v`$%G8?zy6+-(W2qlsY25u zB+5X>hBvVUWPJSFVHx!kht?y?EK|l;j)rBF&o3zBWDH!z5|HuTKaeuogSl3ZdfyFY zMkA-ihWD`q(2xIpDYHeldezG;l!45A6AKOLoC;a%xj&4{><$hK+oQ}f^?LRn#AVcI zRz~fgC<7V!J{Ed>cIYjuRGo72{=I?DRhe?-+W zhBT^<1KF-R%KFtG|A&o*H)pn8b(Hn1pQGw_1oPT%4t1;Q+E-8-x}`-FKjd;Mq}vZ3 zZY-WT81BSFSzo-9|ERHe@_3W(ZYT}$^jgHOb~qIh@0CY@Z@0a@=F@X1PM<6B^dbeN z;mXPTX2_M3)DXYpCmOeh_(A20pBX+*POOC( zs~vhz9rOC&ysECf0i|2?hMzqO@$Bp?@WX2mluo%oIQ^LB=hPvNd8>`=Sv?ExDcsbL zvZJ{FMqYs9mJy{PI(3a@-H;<0z4Uk!nROt6vH{7Avdv_E_@6e0 zGEoQ48&KBwhBum2H(?2`I?DRhk3P|uyNO$H)lt^3ewwN`P`C+faMe-Pul{k<>M9n& zRYzIB`iuXp8Fv+~;HsmnU;V?T)m21;tB$gM^=F=J!d(YhxauhDSN~Dd>hZRttY7`{ zf8LCHyzMCKS3lRZdc5r@>sLSi6u8@+JsKlKpEUs=s_o?eC=36OU1ub_(vJzMkqhbQ z5T{0z^(A@wUo@_60!UnSl=Z8>OV#DXE*c`|u7ia`{U}Q#Jw^-to9?6FRP2@YTPGXq zM@}J+WDAdG`%!w-U76&*06ux2lGNM>|FSU?6XoKXjRpVyTvppVwot!A^%lX1HjhkuWa$I$k^{fAwsyA>D6S||S zlY^jae6EFq(Bz`mVKE1J=U;~#;y`OCs`bxNIw6^7(MWBMDs-;j0qqTCPGPP zBFJneM(n+U3AKVgOpP&l>{+ihE{4ZNBPm9@H7E^6_Kqn2MvRiw7^9c}t!NF6JC2bH zY!WR(Y1^XM?$hP(ikj@^&n4vZThE{K89(8JSQx!3d=tBYATcGm_L8qc{G&U?heU6IUmgprm6W2M%T; zsWB$+f42b>9i%6iprm6WZ`xuek{V<3!1n|bF_h5sY$E;yBb0WGmT_sD^r6T81dQZb zDH%Sf7@5KGPl!=+P^T6psj(Kl{4yBHsVW(Xs2I^yg|e8c8s6q5<$vx^8^iibB$Nd# z-|hveMJguklizO)E0;(lEy@Cx@BSiKPQ_p!{xeV8$?5Mr2#hZ)P$;jhjs~G|Bub^RV1gtN9h*v|LZGZd}@Y?1{L@y4Sc>#j5Z)S z6_Wp>Kfe&Z-Z<9!LFpFpzx5Z*@Kr>rkRPRizmJwMp=-zjTgQ*S)*Rn%XlnUU8uFcSNI%d1r^fiQTWNfhZV~_V>%f=8j);Tx`xnZNjzD5@vzZ(LUiiz#q7oEl7@=yE zhNyg*8L5I(AyvF~rU||d=N0%U-J&Vq{?Co^O-Qi7M`_^m#b+cxr$X|7_=Z-sKNR?j zdHvQ7rNd)R_)@f_YZkuy7uf=Z5Jp$sdFa+ni+3&JcC=0EqO?tizkFa5ZcO{~-TRMh zI&|k(Hr=uR$l|`mBa6|}?O$JihIsU-a@ax=Ih`5=EfL=iE2&*VS<<$;g=40A#NW;1P?Pv%cfDWzxtv-MrpM@3PcAGGe}#an2=#cX-5=_ z^~IGV=|Rwi(u3g0VoDHwBO-&erCAeN%qZ<>*2EX512GZC?Smjzf-ZJ!AAtPT+yt2p z!~_*HNL!{cVZ@Bmj%m#Olqo^~sN zv@BWn3dKegBcL%o2wo|s2SL0pgS2dB5uuwFMaSf(L@`mgDN#(^ZAz5%E+=AZS(a8^ z&VrOHn9$S| zARLXF4#dQ#rU7w5stnTVph{!AJ|HBX4B7=jm?=xrx{Zw}2rNyBVy=s3l$JdOqL^q> z25CntCR~(3+LA=wGcF#w+ET3v8D&x0X^)8s6(B9`Ymv8^5Kx9_Mz!=46o8MFEP57fgv_g7+Auod%f*J_c#4LFO<(Dijx*nG&TF%5_ng zEKBRkb$9Bg1L=f?+{OJYN*i_tijJd9fnp*jQzDrER3H_Ki;hf*V&WlFqL>KClqlvu zNnn&#=R*}Zse>BRftaYpbRZ@yv3(Ht-m3C>25Ge?6Nm{cY#)S{UnX*}eGnV`hyRx% zJqRKQ8Kkv$q&C10VQvi3s#+}wBE=Y_T{$KgqJ238NNaM~z`RuYuplY)K~84=50Lit zVL?*J5e(O3iCP_$l5jY@!f-9mWN|zbeaCn$J5Ve*(M01hT)Ng09HR3WE@f*?xE|xB zYpn_2W4M&!nQ%SEODA6Xg9341p}v7|Jr=LE9lazN^v7T+b!TGlI)|r&@j8Onk%<`U z6X7i;wvNSX!&N-ERBt9c>2wrbr=&BHbe)1Tk#(JdGm&xYyFm(eUh2hfa&cx1j3NEpIp3W2|VveP2AI`+Zbp%RBzjXqN<{A^@)(N;y4NA#)HI}ZO zF;n1mWVRM~iP<`Ze4SEXG5{@|`eJx0;H)EfBEXFCTH7zg>@T93bhKLAn~y%23LRs~ z+A(ZQIQ1VOb_}m0jx6L$IC2{Uy9bBivt!LQ|C?jS@JzT@NAObg+b@1I6J2ZKv_5mk zp)2m$f9UXScOJZAaMNIMQ}K#{TkgK?z`iSrn+F$1Mz#zWi`(~azGd4`5m(<04lNFD u8{Ry;cgwb+TlQ@o-n(`4;#Ks&RrX~vfAjBeG|-X;GQa=rOr} Date: Tue, 4 Aug 2020 11:37:09 +0200 Subject: [PATCH 3/3] Changing inherithence from DcirRowActExtractor - Changed DcirRowActExtractor to accept other event types than MedicalAct - Make DcirProductOrServiceExtractor extends DcirRowActExtractor --- .../events/acts/DcirMedicalActExtractor.scala | 17 ++--- .../DcirProductOrServiceExtractor.scala | 63 ++----------------- .../DcirProductOrServiceSuite.scala | 1 - 3 files changed, 14 insertions(+), 67 deletions(-) diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/DcirMedicalActExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/DcirMedicalActExtractor.scala index 7e2bd4f0..bf9691c9 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/DcirMedicalActExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/acts/DcirMedicalActExtractor.scala @@ -3,13 +3,15 @@ package fr.polytechnique.cmap.cnam.etl.extractors.events.acts import java.sql.Timestamp + import scala.util.Try import org.apache.spark.sql.Row -import fr.polytechnique.cmap.cnam.etl.events.{BiologyDcirAct, DcirAct, EventBuilder, MedicalAct} +import fr.polytechnique.cmap.cnam.etl.events.{AnyEvent, BiologyDcirAct, DcirAct, EventBuilder, MedicalAct, ProductOrService} import fr.polytechnique.cmap.cnam.etl.extractors.StartsWithStrategy import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes import fr.polytechnique.cmap.cnam.etl.extractors.sources.dcir.DcirSimpleExtractor import fr.polytechnique.cmap.cnam.util.functions.makeTS +import fr.polytechnique.cmap.cnam.etl.events.AnyEvent /** * Gets all type of Acts from DCIR. @@ -19,8 +21,9 @@ import fr.polytechnique.cmap.cnam.util.functions.makeTS * the information is not available a default DCIRAct. * @param codes: List of Act codes to be tracked in the study or empty to get all the Acts. */ -abstract sealed class DcirRowActExtractor(codes: SimpleExtractorCodes) extends DcirSimpleExtractor[MedicalAct] - with StartsWithStrategy[MedicalAct] { +//TODO This abstract class should be an indepedent class +abstract class DcirRowActExtractor[EventType <: AnyEvent](codes: SimpleExtractorCodes) extends DcirSimpleExtractor[EventType] + with StartsWithStrategy[EventType] { final val PrivateInstitutionCodes = Set(4D, 5D, 6D, 7D) @@ -44,7 +47,7 @@ abstract sealed class DcirRowActExtractor(codes: SimpleExtractorCodes) extends D * @param r the row of DCIR to be investigated. * @return Try[String] */ - // TODO: REMOVE THIS + // TODO: Unification of extractGroupId in DCIR override def extractGroupId(r: Row): String = { Try { @@ -82,7 +85,7 @@ abstract sealed class DcirRowActExtractor(codes: SimpleExtractorCodes) extends D * @param codes: List of Act codes to be tracked in the study or empty to get all the Acts. */ final case class DcirMedicalActExtractor(codes: SimpleExtractorCodes) - extends DcirRowActExtractor(codes) { + extends DcirRowActExtractor[MedicalAct](codes) { // Implementation of the BasicExtractor Trait override val columnName: String = ColNames.CamCode override val eventBuilder: EventBuilder = DcirAct @@ -93,7 +96,7 @@ final case class DcirMedicalActExtractor(codes: SimpleExtractorCodes) * @param codes: List of Act codes to be tracked in the study or empty to get all the Acts. */ final case class DcirBiologyActExtractor(codes: SimpleExtractorCodes) - extends DcirRowActExtractor(codes) { + extends DcirRowActExtractor[MedicalAct](codes) { // Implementation of the BasicExtractor Trait override val columnName: String = ColNames.BioCode override val eventBuilder: EventBuilder = BiologyDcirAct @@ -101,4 +104,4 @@ final case class DcirBiologyActExtractor(codes: SimpleExtractorCodes) // Because BioCode is a Double override def extractValue(row: Row): String = row.getAs[Double](columnName).toString -} +} \ No newline at end of file diff --git a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/productsorservices/DcirProductOrServiceExtractor.scala b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/productsorservices/DcirProductOrServiceExtractor.scala index d94c218e..14d43719 100644 --- a/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/productsorservices/DcirProductOrServiceExtractor.scala +++ b/src/main/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/productsorservices/DcirProductOrServiceExtractor.scala @@ -3,31 +3,17 @@ package fr.polytechnique.cmap.cnam.etl.extractors.events.productsorservices import fr.polytechnique.cmap.cnam.etl.events.{DcirAct, EventBuilder, ProductOrService} -import fr.polytechnique.cmap.cnam.etl.extractors.StartsWithStrategy import fr.polytechnique.cmap.cnam.etl.extractors.codes.SimpleExtractorCodes -import fr.polytechnique.cmap.cnam.etl.extractors.sources.dcir.DcirSimpleExtractor +import fr.polytechnique.cmap.cnam.etl.extractors.events.acts.DcirRowActExtractor import org.apache.spark.sql.Row import scala.util.Try -/** - * Gets all type of Acts from DCIR. - * - * The main addition of this class is the groupId method that allows to get the - * source of the act: Liberal, PublicAmbulatory, PrivateAmbulatory, Unkown and when - * the information is not available a default DCIRAct. - * - * @param codes : List of Act codes to be tracked in the study or empty to get all the Acts. - */ -final case class DcirProductOrServiceExtractor(codes: SimpleExtractorCodes) extends DcirSimpleExtractor[ProductOrService] - with StartsWithStrategy[ProductOrService] { - +final case class DcirProductOrServiceExtractor(codes: SimpleExtractorCodes) + extends DcirRowActExtractor[ProductOrService](codes) { override val columnName: String = ColNames.TipCode override val eventBuilder: EventBuilder = ProductOrService - // Implementation of the Extractor Trait - override def getCodes: SimpleExtractorCodes = codes - // Implementation of the EventRowExtractor override def usedColumns: List[String] = List( ColNames.TipCode, ColNames.TipQuantity, @@ -36,45 +22,4 @@ final case class DcirProductOrServiceExtractor(codes: SimpleExtractorCodes) exte // Number of products/services override def extractWeight(r: Row): Double = r.getAs[Int](ColNames.TipQuantity).toDouble - - final val PrivateInstitutionCodes = Set(4D, 5D, 6D, 7D) - - /** - * Get the information of the origin of DCIR act that is being extracted. It returns a - * Failure[IllegalArgumentException] if the DCIR schema is old, a success if the DCIR schema contains an information. - * - * @param r the row of DCIR to be investigated. - * @return Try[String] - */ - // TODO: REMOVE THIS - override def extractGroupId(r: Row): String = { - Try { - - if (!r.isNullAt(r.fieldIndex(ColNames.Sector)) && getSector(r) == 1) { - DcirAct.groupID.PublicAmbulatory - } - else { - if (r.isNullAt(r.fieldIndex(ColNames.GHSCode))) { - DcirAct.groupID.Liberal - } else { - // Value is not at null, it is not liberal - lazy val ghs = getGHS(r) - lazy val institutionCode = getInstitutionCode(r) - // Check if it is a private ambulatory - if (ghs == 0 && PrivateInstitutionCodes.contains(institutionCode)) { - DcirAct.groupID.PrivateAmbulatory - } - else { - DcirAct.groupID.Unknown - } - } - } - } recover { case _: IllegalArgumentException => DcirAct.groupID.DcirAct } - }.get - - private def getGHS(r: Row): Double = r.getAs[Double](ColNames.GHSCode) - - private def getInstitutionCode(r: Row): Double = r.getAs[Double](ColNames.InstitutionCode) - - private def getSector(r: Row): Double = r.getAs[Double](ColNames.Sector) -} \ No newline at end of file +} diff --git a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/productsorservices/DcirProductOrServiceSuite.scala b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/productsorservices/DcirProductOrServiceSuite.scala index 72b8fbf7..d73f9177 100644 --- a/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/productsorservices/DcirProductOrServiceSuite.scala +++ b/src/test/scala/fr/polytechnique/cmap/cnam/etl/extractors/events/productsorservices/DcirProductOrServiceSuite.scala @@ -10,7 +10,6 @@ import fr.polytechnique.cmap.cnam.util.functions._ import org.apache.spark.sql.DataFrame class DcirProductOrServiceSuite extends SharedContext { - override val debug: Boolean = true "extract" should "extract lpp products from raw data of the dcir (er_tip_f table)" in {