Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master' into cdcUpdateTest3
Browse files Browse the repository at this point in the history
  • Loading branch information
hqpho committed Nov 8, 2024
2 parents 8373289 + d07ffd8 commit 31a2b73
Show file tree
Hide file tree
Showing 47 changed files with 8,230 additions and 11,590 deletions.
2 changes: 1 addition & 1 deletion deploy/nl/catalog.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ indexes:
bio_ft:
store_type: MEMORY
source_path: ../../tools/nl/embeddings/input/bio
embeddings_path: gs://datcom-nl-models/bio_ft_2024_06_24_23_40_05/embeddings.csv
embeddings_path: gs://datcom-nl-models/bio_ft_2024_11_05_09_59_39/embeddings.csv
model: ft-final-v20230717230459-all-MiniLM-L6-v2
healthcheck_query: "Gene"
base_uae_lance:
Expand Down
2 changes: 1 addition & 1 deletion mixer
Submodule mixer updated 71 files
+11 −11 deploy/storage/base_bigtable_info.yaml
+1 −1 deploy/storage/bigquery.version
+3 −0 deploy/storage/spanner_graph_info.yaml
+1 −0 go.mod
+2 −0 go.sum
+2 −0 internal/proto/query.pb.go
+861 −849 internal/proto/service/mixer.pb.go
+38 −0 internal/proto/service/mixer_grpc.pb.go
+283 −0 internal/proto/v3/node.pb.go
+35 −0 internal/server/datasource/datasource.go
+45 −0 internal/server/handler_v3.go
+2 −2 internal/server/place/golden/get_related_locations/county.json
+1,033 −8 internal/server/recon/golden/recognize_entities/result.json
+71 −0 internal/server/spanner/client.go
+46 −0 internal/server/spanner/datasource.go
+65 −0 internal/server/spanner/dsutil.go
+156 −0 internal/server/spanner/golden/datasource/node.json
+69 −0 internal/server/spanner/golden/datasource_test.go
+139 −0 internal/server/spanner/golden/query/get_node_edges_by_id.json
+68 −0 internal/server/spanner/golden/query_test.go
+32 −0 internal/server/spanner/model.go
+99 −0 internal/server/spanner/query.go
+2 −1 internal/server/stat/golden/get_stat_all/branch.json
+178 −1,338 internal/server/statvar/golden/search_statvar/female.json
+0 −7,180 internal/server/v0/placestatvar/golden/get_place_stat_vars/alb.json
+0 −160 internal/server/v0/placestatvar/golden/get_place_stat_vars/california.json
+0 −500 internal/server/v0/placestatvar/golden/get_place_stat_vars/santa_clara.json
+1 −1 internal/server/v0/statpoint/golden/get_stat_value/umemployed.json
+1 −1 internal/server/v0/triple/golden/get_triples/limit1.json
+106 −0 internal/server/v0/triple/golden/get_triples/place_type.json
+2 −2 internal/server/v1/info/golden/bulk_variable_group_info/sqlite.json
+9 −127 internal/server/v1/info/golden/bulk_variable_info/bulk_bt_and_sql.json
+9 −127 internal/server/v1/info/golden/bulk_variable_info/bulk_result.json
+2 −2 internal/server/v1/info/golden/variable_group_info/demographics_gbr.json
+1 −2 internal/server/v1/info/golden/variable_group_info/root_mtv_jpn.json
+0 −140 internal/server/v1/observationdates/golden/observation_dates_linked/Earth_Country.json
+4 −4 internal/server/v1/observations/golden/bulk_point/all_latest.json
+4 −4 internal/server/v1/observations/golden/bulk_point/preferred_latest.json
+0 −18 internal/server/v1/observations/golden/bulk_point_linked/all_Country.json
+1,805 −5,788 internal/server/v1/observations/golden/bulk_point_linked/all_epa_facility.json
+0 −8 internal/server/v1/observations/golden/bulk_point_linked/preferred_Country.json
+1,805 −5,788 internal/server/v1/observations/golden/bulk_point_linked/preferred_epa_facility.json
+9 −1 internal/server/v1/observations/golden/bulk_series/all_result.json
+9 −1 internal/server/v1/observations/golden/bulk_series/preferred_result.json
+1,704 −27,639 internal/server/v1/observations/golden/bulk_series_linked/all_epa_facility.json
+1,704 −27,639 internal/server/v1/observations/golden/bulk_series_linked/preferred_epa_facility.json
+0 −146 internal/server/v1/page/golden/place_page/asm.Demographics.json
+11 −38 internal/server/v1/page/golden/place_page/asm.Economics.json
+0 −20 internal/server/v1/page/golden/place_page/asm.Equity.json
+6 −2 internal/server/v1/page/golden/place_page/asm.Health.json
+17 −40 internal/server/v1/page/golden/place_page/asm.Overview.json
+12 −5 internal/server/v1/page/golden/place_page/ca.Economics.json
+5,848 −15,892 internal/server/v1/page/golden/place_page/ca.Environment.json
+5,860 −15,551 internal/server/v1/page/golden/place_page/ca.Overview.json
+5,862 −15,616 internal/server/v1/page/golden/place_page/county.Overview.json
+65 −1 internal/server/v1/propertyvalues/golden/bulk_property_values_in/typeOf.json
+0 −168 internal/server/v1/variables/golden/bulk_variables/california.json
+0 −536 internal/server/v1/variables/golden/bulk_variables/california_and_santa_clara_union.json
+0 −7,180 internal/server/v1/variables/golden/variables/alb.json
+0 −14 internal/server/v2/facet/golden/contained_in_facet/country.json
+1 −28,829 internal/server/v2/observation/golden/contained_in_all/epa_facility.json
+0 −26 internal/server/v2/observation/golden/contained_in_latest/Country.json
+1 −7,527 internal/server/v2/observation/golden/contained_in_latest/epa_facility.json
+13 −5 internal/server/v2/observation/golden/direct/all.json
+8 −8 internal/server/v2/observation/golden/direct/latest.json
+10 −0 proto/service/mixer.proto
+41 −0 proto/v3/node.proto
+59 −0 test/setup.go
+56 −0 test/statvar_ranking/missing_USA_county_rankings.json
+28 −0 test/statvar_ranking/missing_USA_state_rankings.json
+5 −0 tools/migration_testing/mixer_api_requests.py
2 changes: 1 addition & 1 deletion run_cdc_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ set -e
source .env/bin/activate
export FLASK_ENV=webdriver

python3 -m pytest --reruns 2 server/webdriver/cdc_tests/
python3 -m pytest -n 5 --reruns 2 server/webdriver/cdc_tests/
24 changes: 20 additions & 4 deletions server/config/nl_page/prop_titles.json
Original file line number Diff line number Diff line change
@@ -1,19 +1,35 @@
{
"<-referenceSNPClusterID{typeOf:GeneticVariantGeneAssociation}->geneSymbol": {
"<-variantID{typeOf:GeneGeneticVariantAssociation}->geneID": {
"displayName": "associated gene",
"titleFormat": "The associated genes for {entity} are"
},
"<-geneSymbol{typeOf:GeneticVariantGeneAssociation}->referenceSNPClusterID": {
"<-geneID{typeOf:GeneGeneticVariantAssociation}->variantID": {
"displayName": "associated genetic variant",
"titleFormat": "The associated genetic variants for {entity} are"
},
"<-diseaseOntologyID{typeOf:DiseaseGeneAssociation}->geneID": {
"<-diseaseID{typeOf:DiseaseGeneAssociation}->geneID": {
"displayName": "associated gene",
"titleFormat": "The associated genes for {entity} are"
},
"<-geneID{typeOf:DiseaseGeneAssociation}->diseaseOntologyID": {
"<-geneID{typeOf:DiseaseGeneAssociation}->diseaseID": {
"displayName": "associated disease",
"titleFormat": "The associated diseases for {entity} are"
},
"<-diseaseID{typeOf:DiseaseGeneticVariantAssociation}->geneticVariantID": {
"displayName": "associated genetic variant",
"titleFormat": "The associated genetic variants for {entity} are"
},
"<-geneticVariantID{typeOf:DiseaseGeneticVariantAssociation}->diseaseID": {
"displayName": "associated disease",
"titleFormat": "The associated diseases for {entity} are"
},
"<-compoundID{typeOf:ChemicalCompoundGeneticVariantAssociation}->variantID": {
"displayName": "associated genetic variant",
"titleFormat": "The associated genetic variants for {entity} are"
},
"<-variantID{typeOf:ChemicalCompoundGeneticVariantAssociation}->compoundID": {
"displayName": "associated chemical compound",
"titleFormat": "The associated chemical compounds for {entity} are"
},
"->mRNA": {
"displayName": "mRNA"
Expand Down
19 changes: 17 additions & 2 deletions server/integration_tests/explore_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,13 +119,23 @@ def handle_response(self,
# TODO: Proper fix should be to make NL server more deterministic
if 'variables' in resp:
resp_var_to_score = {}
dbg['sv_matching']['CosineScore'] = _format_scores(
dbg['sv_matching']['CosineScore'])
for i, sv in enumerate(dbg['sv_matching']['SV']):
score = dbg['sv_matching']['CosineScore'][i]
resp_var_to_score[sv] = float("{:.6f}".format(score))
resp_var_to_score[sv] = dbg['sv_matching']['CosineScore'][i]
sorted_variables = sorted(resp['variables'],
key=lambda x: (-resp_var_to_score.get(x, 0), x))
resp['variables'] = sorted_variables

# Truncate CosineScores to 6 decimals to reduce noisy diffs.
for candidate in dbg['sv_matching']['MultiSV'].get('Candidates', []):
for part in candidate.get('Parts', []):
if multisv_scores := part.get('CosineScore', []):
part['CosineScore'] = _format_scores(multisv_scores)

if props_scores := dbg['props_matching'].get('CosineScore', []):
dbg['props_matching']['CosineScore'] = _format_scores(props_scores)

resp['debug'] = {}
resp['context'] = {}
for category in resp.get('config', {}).get('categories', []):
Expand Down Expand Up @@ -793,3 +803,8 @@ def _del_field(d: dict, path: str):
del tmp[p]
else:
tmp = tmp[p]


# Helper function to consistently format float scores.
def _format_scores(scores):
return [float("{:.6f}".format(score)) for score in scores]
Original file line number Diff line number Diff line change
Expand Up @@ -204,39 +204,6 @@
}
],
"title": "Population"
},
{
"columns": [
{
"tiles": [
{
"barTileSpec": {
"maxPlaces": 15,
"maxVariables": 15,
"sort": "DESCENDING"
},
"comparisonPlaces": [
"country/DZA",
"country/MAR",
"country/LBY",
"country/TUN",
"country/MLI",
"country/ERI",
"country/SEN",
"country/GMB",
"country/MWI",
"country/SSD"
],
"statVarKey": [
"WHO/CM_02_multiple_place_bar_block"
],
"title": "Number of Infant Deaths (${date})",
"type": "BAR"
}
]
}
],
"title": "Number of Infant Deaths"
}
],
"statVarSpec": {
Expand All @@ -260,10 +227,6 @@
"name": "Rate of Population Growth",
"statVar": "GrowthRate_Count_Person"
},
"WHO/CM_02_multiple_place_bar_block": {
"name": "Number Of Infant Deaths",
"statVar": "WHO/CM_02"
},
"sdg/SP_DYN_ADKL.AGE--Y10T14__SEX--F_multiple_place_bar_block": {
"name": "Adolescent birth rate [10 to 14 years old, Female]",
"statVar": "sdg/SP_DYN_ADKL.AGE--Y10T14__SEX--F"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,29 +10,29 @@
"query_with_places_removed": "what is the phylum of",
"sv_matching": {
"CosineScore": [
0.35510897636413574,
0.3460872769355774,
0.3452577292919159,
0.344845712184906,
0.34113651514053345,
0.3384983241558075,
0.3349902033805847,
0.3316609561443329,
0.3260059356689453,
0.32424432039260864,
0.3206062316894531,
0.31426557898521423,
0.3127653896808624,
0.31262922286987305,
0.31250447034835815,
0.31177398562431335,
0.31033921241760254,
0.31007489562034607,
0.30988338589668274,
0.3080539405345917,
0.30745869874954224,
0.30719631910324097,
0.3039420247077942
0.355109,
0.346087,
0.345258,
0.344846,
0.341137,
0.338498,
0.33499,
0.331661,
0.326006,
0.324244,
0.320606,
0.314266,
0.312765,
0.312629,
0.312504,
0.311774,
0.310339,
0.310075,
0.309883,
0.308054,
0.307459,
0.307196,
0.303942
],
"MultiSV": {},
"Query": "what is the phylum of",
Expand Down Expand Up @@ -64,39 +64,42 @@
},
"props_matching": {
"CosineScore": [
0.9999998807907104,
0.4752839505672455,
0.36154165863990784,
0.34200993180274963,
0.3293309211730957,
0.31845176219940186,
0.31466981768608093,
0.3083951473236084,
0.3079407513141632,
0.30463001132011414,
0.28270864486694336,
0.28187549114227295,
0.2808278203010559,
0.2804529666900635,
0.27429836988449097,
0.27150505781173706,
0.2707182466983795,
0.26954683661460876,
0.2599008083343506,
0.2599008083343506,
0.24827907979488373,
0.24662044644355774,
0.24632112681865692,
0.24616354703903198,
0.24212250113487244,
0.23135541379451752,
0.2296244502067566,
0.22773587703704834
1.0,
0.475284,
0.361542,
0.345431,
0.345431,
0.34201,
0.329331,
0.318452,
0.31467,
0.308395,
0.307941,
0.30463,
0.282709,
0.281875,
0.280828,
0.280453,
0.274298,
0.271505,
0.270718,
0.269547,
0.259901,
0.259901,
0.248279,
0.24662,
0.246321,
0.246164,
0.242123,
0.231355,
0.229624
],
"PROP": [
"phylum",
"chemblID",
"geneID",
"<-compoundID{typeOf:ChemicalCompoundGeneticVariantAssociation}->variantID",
"<-variantID{typeOf:ChemicalCompoundGeneticVariantAssociation}->compoundID",
"virusGenus",
"typeOfGene",
"fullName",
Expand All @@ -112,16 +115,15 @@
"virusHost",
"strandOrientation",
"ncbiDNASequenceName",
"<-geneSymbol{typeOf:GeneticVariantGeneAssociation}->referenceSNPClusterID",
"<-referenceSNPClusterID{typeOf:GeneticVariantGeneAssociation}->geneSymbol",
"<-geneID{typeOf:GeneGeneticVariantAssociation}->variantID",
"<-variantID{typeOf:GeneGeneticVariantAssociation}->geneID",
"ncbiProteinAccessionNumber",
"alleleType",
"hg38GenomicLocation",
"ofVirusSpecies",
"hg19GenomicLocation",
"ncbiTaxonID",
"antigenType",
"alleleOrigin"
"antigenType"
]
},
"query_detection_debug_logs": {
Expand Down

Large diffs are not rendered by default.

Loading

0 comments on commit 31a2b73

Please sign in to comment.