zinggAI · sonalgoyal · May 1, 2023 · Apr 26, 2023 · Apr 26, 2023 · Apr 26, 2023
diff --git a/common/client/src/main/java/zingg/common/client/util/ColName.java b/common/client/src/main/java/zingg/common/client/util/ColName.java
@@ -19,7 +19,7 @@ public interface ColName {
 	public static final String SCORE_MIN_COL = COL_PREFIX + "minScore";
 	public static final String SCORE_MAX_COL = COL_PREFIX + "maxScore";
 	public static final String SPARK_JOB_ID_COL = COL_PREFIX + "sparkJobId";
-	public static final String SOURCE_COL = COL_PREFIX + "source";
+	public static final String SOURCE_COL = COL_PREFIX + "zsource";
 	public static final String SCORE_KEY_COL = COL_PREFIX + "scorekey";
 	public static final String DENSE_COL = COL_PREFIX + "dense";
 	public static final String UPDATED_AT = COL_PREFIX + "updated";

diff --git a/common/core/src/main/resources/zColumnTemplate.ftlh b/common/core/src/main/resources/zColumnTemplate.ftlh
@@ -32,8 +32,8 @@
       <p>"z_score - the probability of a pair of records matching. The higher the score, the more likely they are a match."</p>
     <#elseif title == "z_isMatch">
       <p>z_isMatch - this is the label provided by the user.</p>
-    <#elseif title == "z_source">
-      <p>z_source - the source of data as set in the name property of the data in the Zingg configuration file.</p>
+    <#elseif title == "z_zsource">
+      <p>z_zsource - the source of data as set in the name property of the data in the Zingg configuration file.</p>
     <#else>
       <p>${title} - this field is internally used by Zingg.</p>
     </#if>

diff --git a/models/100/docs/model.html b/models/100/docs/model.html
@@ -28,7 +28,7 @@
     <th class="border-right border-white" > <a href="state.html"> state </a></th>
     <th class="border-right border-white" > <a href="dob.html"> dob </a></th>
     <th class="border-right border-white" > <a href="ssn.html"> ssn </a></th>
-    <th class="border-right border-white" > <a href="z_source.html"> z_source </a></th>
+    <th class="border-right border-white" > <a href="z_zsource.html"> z_zsource </a></th>
     <th class="border-right border-white" > <a href="z_isMatch.html"> z_isMatch </a></th>
   </tr>
   </thead>

diff --git a/models/100/docs/z_source.html → models/100/docs/z_zsource.html b/models/100/docs/z_source.html → models/100/docs/z_zsource.html
@@ -1,6 +1,6 @@
 <html>
 <head>
-  <title>z_source</title>
+  <title>z_zsource</title>
   <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.1.3/css/bootstrap.min.css" integrity="sha384-MCw98/SFnGE8fJT3GXwEOngsV7Zt27NXFoaoApmYm81iuXoPkFOJwJ8ERdknLPMO" crossorigin="anonymous">
 </head>
 
@@ -9,7 +9,7 @@
     <a class="navbar-brand" href="https://www.zingg.ai">
       <img src="https://github.com/zinggai/zingg/raw/main/assets/zinggWhiteTransparent.png" class="d-inline-block align-top" alt="">
     </a>
-    <h1> Field - z_source </h1>
+    <h1> Field - z_zsource </h1>
     <a href="../model.html">
     <div class="justify-content-end">Model 100</div>
     </a>
@@ -22,7 +22,7 @@ <h1> Field - z_source </h1>
   </tbody>
   </table>
   <p>
-      <p>z_source - the source of data as set in the name property of the data in the Zingg configuration file.</p>
+      <p>z_zsource - the source of data as set in the name property of the data in the Zingg configuration file.</p>
   </p>
 </body>
 </html>

diff --git a/perf/joinPlan.txt b/perf/joinPlan.txt
@@ -1,9 +1,9 @@
 i== Physical Plan ==
-*(11) Project [z_z_zid#597L, z_zid#367L, id#72, fname#73, lname#74, stNo#75, add1#76, add2#77, city#78, state#79, dob#80, ssn#81, z_source#82, z_id#1103, z_fname#1104, z_lname#1105, z_stNo#1106, z_add1#1107, z_add2#1108, z_city#1109, z_state#1110, z_dob#1111, z_ssn#1112, z_z_source#1113]
+*(11) Project [z_z_zid#597L, z_zid#367L, id#72, fname#73, lname#74, stNo#75, add1#76, add2#77, city#78, state#79, dob#80, ssn#81, z_zsource#82, z_id#1103, z_fname#1104, z_lname#1105, z_stNo#1106, z_add1#1107, z_add2#1108, z_city#1109, z_state#1110, z_dob#1111, z_ssn#1112, z_z_zsource#1113]
 +- *(11) SortMergeJoin [z_z_zid#597L], [z_z_zid#1102L], Inner
    :- *(9) Sort [z_z_zid#597L ASC NULLS FIRST], false, 0
    :  +- Exchange hashpartitioning(z_z_zid#597L, 3000), false, [id=#472]
-   :     +- *(8) Project [z_zid#367L, z_z_zid#597L, id#72, fname#73, lname#74, stNo#75, add1#76, add2#77, city#78, state#79, dob#80, ssn#81, z_source#82]
+   :     +- *(8) Project [z_zid#367L, z_z_zid#597L, id#72, fname#73, lname#74, stNo#75, add1#76, add2#77, city#78, state#79, dob#80, ssn#81, z_zsource#82]
    :        +- *(8) SortMergeJoin [z_zid#367L], [z_zid#71L], Inner
    :           :- *(6) Sort [z_zid#367L ASC NULLS FIRST], false, 0
    :           :  +- Exchange hashpartitioning(z_zid#367L, 3000), false, [id=#456]
@@ -13,16 +13,16 @@ i== Physical Plan ==
    :           :           :  +- Exchange hashpartitioning(z_hash#379, 3000), false, [id=#372]
    :           :           :     +- *(1) SerializeFromObject [validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 0, z_zid), LongType) AS z_zid#367L, validateexternaltype(getexternalrowfield(assertnotnull(input[0, org.apache.spark.sql.Row, true]), 12, z_hash), IntegerType) AS z_hash#379]
    :           :           :        +- *(1) MapElements zingg.block.Block$BlockFunction@37e7e089, obj#366: org.apache.spark.sql.Row
-   :           :           :           +- *(1) DeserializeToObject createexternalrow(z_zid#71L, id#72.toString, fname#73.toString, lname#74.toString, stNo#75.toString, add1#76.toString, add2#77.toString, city#78.toString, state#79.toString, dob#80.toString, ssn#81.toString, z_source#82.toString, StructField(z_zid,LongType,false), StructField(id,StringType,true), StructField(fname,StringType,true), StructField(lname,StringType,true), StructField(stNo,StringType,true), StructField(add1,StringType,true), StructField(add2,StringType,true), StructField(city,StringType,true), StructField(state,StringType,true), StructField(dob,StringType,true), StructField(ssn,StringType,true), StructField(z_source,StringType,false)), obj#365: org.apache.spark.sql.Row
+   :           :           :           +- *(1) DeserializeToObject createexternalrow(z_zid#71L, id#72.toString, fname#73.toString, lname#74.toString, stNo#75.toString, add1#76.toString, add2#77.toString, city#78.toString, state#79.toString, dob#80.toString, ssn#81.toString, z_zsource#82.toString, StructField(z_zid,LongType,false), StructField(id,StringType,true), StructField(fname,StringType,true), StructField(lname,StringType,true), StructField(stNo,StringType,true), StructField(add1,StringType,true), StructField(add2,StringType,true), StructField(city,StringType,true), StructField(state,StringType,true), StructField(dob,StringType,true), StructField(ssn,StringType,true), StructField(z_zsource,StringType,false)), obj#365: org.apache.spark.sql.Row
    :           :           :              +- Exchange hashpartitioning(z_zid#71L, 3000), false, [id=#324]
-   :           :           :                 +- InMemoryTableScan [z_zid#71L, id#72, fname#73, lname#74, stNo#75, add1#76, add2#77, city#78, state#79, dob#80, ssn#81, z_source#82]
-   :           :           :                       +- InMemoryRelation [z_zid#71L, id#72, fname#73, lname#74, stNo#75, add1#76, add2#77, city#78, state#79, dob#80, ssn#81, z_source#82], StorageLevel(memory, deserialized, 1 replicas)
+   :           :           :                 +- InMemoryTableScan [z_zid#71L, id#72, fname#73, lname#74, stNo#75, add1#76, add2#77, city#78, state#79, dob#80, ssn#81, z_zsource#82]
+   :           :           :                       +- InMemoryRelation [z_zid#71L, id#72, fname#73, lname#74, stNo#75, add1#76, add2#77, city#78, state#79, dob#80, ssn#81, z_zsource#82], StorageLevel(memory, deserialized, 1 replicas)
    :           :           :                             +- Exchange RoundRobinPartitioning(3000), false, [id=#40]
-   :           :           :                                +- *(1) Scan ExistingRDD[z_zid#71L,id#72,fname#73,lname#74,stNo#75,add1#76,add2#77,city#78,state#79,dob#80,ssn#81,z_source#82]
+   :           :           :                                +- *(1) Scan ExistingRDD[z_zid#71L,id#72,fname#73,lname#74,stNo#75,add1#76,add2#77,city#78,state#79,dob#80,ssn#81,z_zsource#82]
    :           :           +- *(4) Sort [z_hash#592 ASC NULLS FIRST], false, 0
    :           :              +- ReusedExchange [z_zid#580L, z_hash#592], Exchange hashpartitioning(z_hash#379, 3000), false, [id=#372]
    :           +- *(7) Sort [z_zid#71L ASC NULLS FIRST], false, 0
-   :              +- ReusedExchange [z_zid#71L, id#72, fname#73, lname#74, stNo#75, add1#76, add2#77, city#78, state#79, dob#80, ssn#81, z_source#82], Exchange hashpartitioning(z_zid#71L, 3000), false, [id=#324]
+   :              +- ReusedExchange [z_zid#71L, id#72, fname#73, lname#74, stNo#75, add1#76, add2#77, city#78, state#79, dob#80, ssn#81, z_zsource#82], Exchange hashpartitioning(z_zid#71L, 3000), false, [id=#324]
    +- *(10) Sort [z_z_zid#1102L ASC NULLS FIRST], false, 0
-      +- *(10) Project [z_zid#71L AS z_z_zid#1102L, id#72 AS z_id#1103, fname#73 AS z_fname#1104, lname#74 AS z_lname#1105, stNo#75 AS z_stNo#1106, add1#76 AS z_add1#1107, add2#77 AS z_add2#1108, city#78 AS z_city#1109, state#79 AS z_state#1110, dob#80 AS z_dob#1111, ssn#81 AS z_ssn#1112, z_source#82 AS z_z_source#1113]
-         +- ReusedExchange [z_zid#71L, id#72, fname#73, lname#74, stNo#75, add1#76, add2#77, city#78, state#79, dob#80, ssn#81, z_source#82], Exchange hashpartitioning(z_zid#71L, 3000), false, [id=#324]
+      +- *(10) Project [z_zid#71L AS z_z_zid#1102L, id#72 AS z_id#1103, fname#73 AS z_fname#1104, lname#74 AS z_lname#1105, stNo#75 AS z_stNo#1106, add1#76 AS z_add1#1107, add2#77 AS z_add2#1108, city#78 AS z_city#1109, state#79 AS z_state#1110, dob#80 AS z_dob#1111, ssn#81 AS z_ssn#1112, z_zsource#82 AS z_z_zsource#1113]
+         +- ReusedExchange [z_zid#71L, id#72, fname#73, lname#74, stNo#75, add1#76, add2#77, city#78, state#79, dob#80, ssn#81, z_zsource#82], Exchange hashpartitioning(z_zid#71L, 3000), false, [id=#324]
diff --git a/python/phases/exportModel.py b/python/phases/exportModel.py
@@ -32,7 +32,7 @@ def main():
 
 def export_data(labelledData, location):
 
-    baseCols = ['z_cluster', 'z_zid', 'z_prediction', 'z_score', 'z_source', 'z_isMatch']
+    baseCols = ['z_cluster', 'z_zid', 'z_prediction', 'z_score', 'z_zsource', 'z_isMatch']
     sourceDataColumns =  [c for c in labelledData.columns if c not in  baseCols]
     additionalTrainingColumns = ['z_cluster','z_isMatch']
     trainingSampleColumns = [*additionalTrainingColumns, *sourceDataColumns]