Merge branch 'main' into task/1133-pgp-get-gis-type-change

natcap · Dec 14, 2022 · c81205c · c81205c
2 parents 96db291 + 24a954b
commit c81205c
Show file tree

Hide file tree

Showing 10 changed files with 159 additions and 120 deletions.
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -58,6 +58,8 @@ Unreleased Changes
       now reprojected to the ``lulc_cur_path`` raster. This fixes a bug where
       rasters with a different SRS would appear to not intersect the
       ``lulc_cur_path`` even if they did. (https://github.com/natcap/invest/issues/1093)
+    * Paths in the threats table may now be either absolute or relative to the
+      threats table.
 * HRA
     * Fixed a regression relative to InVEST 3.9.0 outputs where spatial
       criteria vectors were being rasterized with the ``ALL_TOUCHED=TRUE``

diff --git a/Makefile b/Makefile
@@ -10,7 +10,7 @@ GIT_TEST_DATA_REPO_REV      := f5e651c9ba0a012dc033b9c1d12d51e42f6f87b0
 
 GIT_UG_REPO                 := https://github.com/natcap/invest.users-guide
 GIT_UG_REPO_PATH            := doc/users-guide
-GIT_UG_REPO_REV             := fd7140b4181005bb620b620a701b4cefe1b64a7b
+GIT_UG_REPO_REV             := b53e67cc61fa91e817a9c50249687e246f5db702
 
 ENV = "./env"
 ifeq ($(OS),Windows_NT)

diff --git a/src/natcap/invest/coastal_blue_carbon/coastal_blue_carbon.py b/src/natcap/invest/coastal_blue_carbon/coastal_blue_carbon.py
@@ -2127,17 +2127,15 @@ def _extract_snapshots_from_table(csv_path):
         paths.  These raster paths will be absolute paths.
 
     """
-    table = utils.read_csv_to_dataframe(csv_path, index_col=False)
-    table.columns = table.columns.str.lower()
+    table = utils.read_csv_to_dataframe(
+        csv_path, to_lower=True, index_col=False,
+        expand_path_cols=['raster_path'])
 
     output_dict = {}
     table.set_index("snapshot_year", drop=False, inplace=True)
-    for index, row in table.iterrows():
-        raster_path = row['raster_path']
-        if not os.path.isabs(raster_path):
-            raster_path = os.path.join(os.path.dirname(csv_path), raster_path)
-        output_dict[int(index)] = os.path.abspath(raster_path)
 
+    for index, row in table.iterrows():
+        output_dict[int(index)] = row['raster_path']
     return output_dict
 
 

diff --git a/src/natcap/invest/coastal_vulnerability.py b/src/natcap/invest/coastal_vulnerability.py
@@ -1929,27 +1929,25 @@ def _schedule_habitat_tasks(
 
     """
     habitat_dataframe = utils.read_csv_to_dataframe(
-        habitat_table_path, to_lower=True)
+        habitat_table_path, to_lower=True, expand_path_cols=['path'])
     habitat_dataframe = habitat_dataframe.rename(
         columns={'protection distance (m)': 'distance'})
 
     habitat_task_list = []
     habitat_pickles_list = []
     for habitat_row in habitat_dataframe.itertuples():
-        base_habitat_path = _sanitize_path(
-            habitat_table_path, habitat_row.path)
         target_habitat_pickle_path = os.path.join(
             working_dir, f'{habitat_row.id}{file_suffix}.pickle')
         habitat_pickles_list.append(target_habitat_pickle_path)
-        gis_type = pygeoprocessing.get_gis_type(base_habitat_path)
+        gis_type = pygeoprocessing.get_gis_type(habitat_row.path)
         if gis_type == 2:
             habitat_task_list.append(task_graph.add_task(
                 func=search_for_vector_habitat,
                 args=(base_shore_point_vector_path,
                       habitat_row.distance,
                       habitat_row.rank,
                       habitat_row.id,
-                      base_habitat_path,
+                      habitat_row.path,
                       target_habitat_pickle_path),
                 target_path_list=[target_habitat_pickle_path],
                 task_name=f'searching for {habitat_row.id}'))
@@ -1961,7 +1959,7 @@ def _schedule_habitat_tasks(
                       habitat_row.distance,
                       habitat_row.rank,
                       habitat_row.id,
-                      base_habitat_path,
+                      habitat_row.path,
                       target_habitat_pickle_path,
                       model_resolution,
                       file_suffix),
@@ -3038,13 +3036,6 @@ def _copy_point_vector_geom_to_gpkg(
     base_vector = None
 
 
-def _sanitize_path(base_path, raw_path):
-    """Return ``raw_path`` if absolute, or make absolute relative to ``base_path``."""
-    if os.path.isabs(raw_path):
-        return raw_path
-    return os.path.join(os.path.dirname(base_path), raw_path)
-
-
 def _make_logger_callback(message, logger):
     """Build a timed logger callback that prints ``message`` replaced.
 
@@ -3086,15 +3077,17 @@ def _validate_habitat_table_paths(habitat_table_path):
     Raises:
         ValueError if any vector in the ``path`` column cannot be opened.
     """
-    habitat_dataframe = utils.read_csv_to_dataframe(habitat_table_path)
+    habitat_dataframe = utils.read_csv_to_dataframe(
+        habitat_table_path, expand_path_cols=['path'])
     bad_paths = []
     for habitat_row in habitat_dataframe.itertuples():
-        base_habitat_path = _sanitize_path(
-            habitat_table_path, habitat_row.path)
         try:
-            gis_type = pygeoprocessing.get_gis_type(base_habitat_path)
+            gis_type = pygeoprocessing.get_gis_type(habitat_row.path)
+            if not gis_type:
+                # Treating an unknown GIS type the same as a bad filepath
+                bad_paths.append(habitat_row.path)
         except ValueError:
-            bad_paths.append(base_habitat_path)
+            bad_paths.append(habitat_row.path)
 
     if bad_paths:
         raise ValueError(

diff --git a/src/natcap/invest/habitat_quality.py b/src/natcap/invest/habitat_quality.py
@@ -278,14 +278,11 @@ def execute(args):
     # Get CSVs as dictionaries and ensure the key is a string for threats.
     threat_dict = {
         str(key): value for key, value in utils.build_lookup_from_csv(
-            args['threats_table_path'], 'THREAT', to_lower=True).items()}
+            args['threats_table_path'], 'THREAT', to_lower=True,
+            expand_path_cols=['cur_path', 'fut_path', 'base_path']).items()}
     sensitivity_dict = utils.build_lookup_from_csv(
         args['sensitivity_table_path'], 'LULC', to_lower=True)
 
-    # Get the directory path for the Threats CSV, used for locating threat
-    # rasters, which are relative to this path
-    threat_csv_dirpath = os.path.dirname(args['threats_table_path'])
-
     half_saturation_constant = float(args['half_saturation_constant'])
 
     # Dictionary for reclassing habitat values
@@ -320,24 +317,17 @@ def execute(args):
             # raster which should be found relative to the Threat CSV
             for threat in threat_dict:
                 LOGGER.debug(f"Validating path for threat: {threat}")
-                # Build absolute threat path from threat table
                 threat_table_path_col = _THREAT_SCENARIO_MAP[lulc_key]
-                threat_path_relative = (
-                    threat_dict[threat][threat_table_path_col])
-                threat_path = os.path.join(
-                    threat_csv_dirpath, threat_path_relative)
-
-                threat_path_err_msg = (
-                    'There was an Error locating a threat raster from '
-                    'the path in CSV for column: '
-                    f'{_THREAT_SCENARIO_MAP[lulc_key]} and threat: '
-                    f'{threat}. The path in the CSV column should be '
-                    'relative to the threat CSV table.')
+                threat_path = threat_dict[threat][threat_table_path_col]
 
                 threat_validate_result = _validate_threat_path(
                     threat_path, lulc_key)
                 if threat_validate_result == 'error':
-                    raise ValueError(threat_path_err_msg)
+                    raise ValueError(
+                        'There was an Error locating a threat raster from '
+                        'the path in CSV for column: '
+                        f'{_THREAT_SCENARIO_MAP[lulc_key]} and threat: '
+                        f'{threat}.')
 
                 threat_path = threat_validate_result
 
@@ -360,7 +350,7 @@ def execute(args):
                         task_name=f'check_threat_values{lulc_key}_{threat}')
                     threat_values_task_lookup[threat_values_task.task_name] = {
                         'task': threat_values_task,
-                        'path': threat_path_relative,
+                        'path': threat_path,
                         'table_col': threat_table_path_col}
 
     LOGGER.info("Checking threat raster values are valid ( 0 <= x <= 1 ).")
@@ -1092,7 +1082,8 @@ def validate(args, limit_to=None):
         # Get CSVs as dictionaries and ensure the key is a string for threats.
         threat_dict = {
             str(key): value for key, value in utils.build_lookup_from_csv(
-                args['threats_table_path'], 'THREAT', to_lower=True).items()}
+                args['threats_table_path'], 'THREAT', to_lower=True,
+                expand_path_cols=['cur_path', 'fut_path', 'base_path']).items()}
         sensitivity_dict = utils.build_lookup_from_csv(
             args['sensitivity_table_path'], 'LULC', to_lower=True)
 
@@ -1111,10 +1102,6 @@ def validate(args, limit_to=None):
 
             invalid_keys.add('sensitivity_table_path')
 
-        # Get the directory path for the Threats CSV, used for locating threat
-        # rasters, which are relative to this path
-        threat_csv_dirpath = os.path.dirname(args['threats_table_path'])
-
         # Validate threat raster paths and their nodata values
         bad_threat_paths = []
         duplicate_paths = []
@@ -1134,9 +1121,7 @@ def validate(args, limit_to=None):
                         break
 
                     # Threat path from threat CSV is relative to CSV
-                    threat_path = os.path.join(
-                        threat_csv_dirpath,
-                        threat_dict[threat][threat_table_path_col])
+                    threat_path = threat_dict[threat][threat_table_path_col]
 
                     threat_validate_result = _validate_threat_path(
                         threat_path, lulc_key)

diff --git a/src/natcap/invest/hra.py b/src/natcap/invest/hra.py
@@ -1782,10 +1782,13 @@ def _open_table_as_dataframe(table_path, **kwargs):
     if extension in {'.xls', '.xlsx'}:
         excel_df = pandas.read_excel(table_path, **kwargs)
         excel_df.columns = excel_df.columns.str.lower()
+        excel_df['path'] = excel_df['path'].apply(
+            lambda p: utils.expand_path(p, table_path))
         return excel_df
     else:
         return utils.read_csv_to_dataframe(
-            table_path, sep=None, to_lower=True, engine='python', **kwargs)
+            table_path, sep=None, to_lower=True, engine='python',
+            expand_path_cols=['path'], **kwargs)
 
 
 def _parse_info_table(info_table_path):
@@ -1814,15 +1817,6 @@ def _parse_info_table(info_table_path):
     table = table.set_index('name')
     table = table.rename(columns={'stressor buffer (meters)': 'buffer'})
 
-    def _make_abspath(row):
-        path = row['path'].replace('\\', '/')
-        if os.path.isabs(path):
-            return path
-        return os.path.join(
-            os.path.dirname(info_table_path), path).replace('\\', '/')
-
-    table['path'] = table.apply(lambda row: _make_abspath(row), axis=1)
-
     # Drop the buffer column from the habitats list; we don't need it.
     habitats = table.loc[table['type'] == 'habitat'].drop(
         columns=['type', 'buffer']).to_dict(orient='index')
@@ -1960,11 +1954,8 @@ def _parse_criteria_table(criteria_table_path, target_composite_csv_path):
                 except ValueError:
                     # If we can't cast it to a float, assume it's a string path
                     # to a raster or vector.
-                    attribute_value = attribute_value.replace('\\', '/')
-                    if not os.path.isabs(attribute_value):
-                        attribute_value = os.path.join(
-                            os.path.dirname(criteria_table_path),
-                            attribute_value).replace('\\', '/')
+                    attribute_value = utils.expand_path(
+                        attribute_value, criteria_table_path)
 
                     try:
                         _ = pygeoprocessing.get_gis_type(attribute_value)
@@ -2032,7 +2023,7 @@ def _calculate_decayed_distance(stressor_raster_path, decay_type,
         # easier to compute.
 
         def _no_buffer(stressor_presence_array):
-            """Trsnslate a stressor presence array to match an EDT.
+            """Translate a stressor presence array to match an EDT.
 
             Args:
                 stressor_presence_array (numpy.array): A numpy byte array with
@@ -2431,10 +2422,8 @@ def _override_datastack_archive_criteria_table_path(
                 # When value is obviously not a number.
                 pass
 
-            if not os.path.isabs(value):
-                value = os.path.join(
-                    os.path.dirname(criteria_table_path), value)
-            value = value.replace("\\", "/")
+            # Expand the path if it's not absolute
+            value = utils.expand_path(value, criteria_table_path)
             if not os.path.exists(value):
                 LOGGER.warning(f'File not found: {value}')
                 continue
@@ -2452,7 +2441,7 @@ def _override_datastack_archive_criteria_table_path(
                 new_path = datastack._copy_spatial_files(
                     value, dir_for_this_spatial_data)
                 criteria_table_array[row, col] = new_path
-                known_files[value] = new_path.replace('\\', '/')
+                known_files[value] = new_path
 
     target_output_path = os.path.join(data_dir, f'{args_key}.csv')
     numpy.savetxt(target_output_path, criteria_table_array, delimiter=',',

diff --git a/src/natcap/invest/recreation/recmodel_client.py b/src/natcap/invest/recreation/recmodel_client.py
@@ -737,15 +737,13 @@ def _schedule_predictor_data_processing(
     }
 
     predictor_table = utils.build_lookup_from_csv(
-        predictor_table_path, 'id')
+        predictor_table_path, 'id', expand_path_cols=['path'])
     predictor_task_list = []
     predictor_json_list = []  # tracks predictor files to add to shp
 
     for predictor_id in predictor_table:
         LOGGER.info(f"Building predictor {predictor_id}")
 
-        predictor_path = _sanitize_path(
-            predictor_table_path, predictor_table[predictor_id]['path'])
         predictor_type = predictor_table[predictor_id]['type'].strip()
         if predictor_type.startswith('raster'):
             # type must be one of raster_sum or raster_mean
@@ -755,8 +753,8 @@ def _schedule_predictor_data_processing(
             predictor_json_list.append(predictor_target_path)
             predictor_task_list.append(task_graph.add_task(
                 func=_raster_sum_mean,
-                args=(predictor_path, raster_op_mode, response_vector_path,
-                      predictor_target_path),
+                args=(predictor_table[predictor_id]['path'], raster_op_mode,
+                      response_vector_path, predictor_target_path),
                 target_path_list=[predictor_target_path],
                 task_name=f'predictor {predictor_id}'))
         # polygon types are a special case because the polygon_area
@@ -768,7 +766,8 @@ def _schedule_predictor_data_processing(
             predictor_task_list.append(task_graph.add_task(
                 func=_polygon_area,
                 args=(predictor_type, response_polygons_pickle_path,
-                      predictor_path, predictor_target_path),
+                      predictor_table[predictor_id]['path'],
+                      predictor_target_path),
                 target_path_list=[predictor_target_path],
                 dependent_task_list=[prepare_response_polygons_task],
                 task_name=f'predictor {predictor_id}'))
@@ -778,7 +777,8 @@ def _schedule_predictor_data_processing(
             predictor_json_list.append(predictor_target_path)
             predictor_task_list.append(task_graph.add_task(
                 func=predictor_functions[predictor_type],
-                args=(response_polygons_pickle_path, predictor_path,
+                args=(response_polygons_pickle_path,
+                      predictor_table[predictor_id]['path'],
                       predictor_target_path),
                 target_path_list=[predictor_target_path],
                 dependent_task_list=[prepare_response_polygons_task],
@@ -1499,7 +1499,8 @@ def _validate_same_projection(base_vector_path, table_path):
     # This will load the table as a list of paths which we can iterate through
     # without bothering the rest of the table structure
     data_paths = utils.read_csv_to_dataframe(
-        table_path, to_lower=True, squeeze=True)['path'].tolist()
+        table_path, to_lower=True, squeeze=True, expand_path_cols=['path']
+    )['path'].tolist()
 
     base_vector = gdal.OpenEx(base_vector_path, gdal.OF_VECTOR)
     base_layer = base_vector.GetLayer()
@@ -1508,8 +1509,7 @@ def _validate_same_projection(base_vector_path, table_path):
     base_vector = None
 
     invalid_projections = False
-    for raw_path in data_paths:
-        path = _sanitize_path(table_path, raw_path)
+    for path in data_paths:
 
         def error_handler(err_level, err_no, err_msg):
             """Empty error handler to avoid stderr output."""
@@ -1590,14 +1590,6 @@ def delay_op(last_time, time_delay, func):
     return last_time
 
 
-def _sanitize_path(base_path, raw_path):
-    """Return ``path`` if absolute, or make absolute local to ``base_path``."""
-    if os.path.isabs(raw_path):
-        return raw_path
-    else:  # assume relative path w.r.t. the response table
-        return os.path.join(os.path.dirname(base_path), raw_path)
-
-
 @validation.invest_validator
 def validate(args, limit_to=None):
     """Validate args to ensure they conform to ``execute``'s contract.