-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
first commit, moving project to this repo
- Loading branch information
0 parents
commit 941ad17
Showing
20 changed files
with
1,300 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
name: usgs_stac | ||
channels: | ||
- conda-forge | ||
dependencies: | ||
- python | ||
- pip | ||
- pdal | ||
- python-pdal | ||
- shapely | ||
- dask | ||
- pyproj | ||
- pystac | ||
- boto3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
[project] | ||
name = "usgs_stac" | ||
requires-python = ">=3.9" | ||
description = "Create STAC from USGS WESM JSON url." | ||
readme = "README.md" | ||
license = { text = "Apache-2.0" } | ||
authors = [ { name = "Kyle Mann", email = "[email protected]" } ] | ||
maintainers = [ { name = "Kyle Mann", email = "[email protected]" } ] | ||
dependencies = [ ] | ||
dynamic = ["version"] | ||
|
||
[project.urls] | ||
homepage = "https://github.com/hobuinc/silvimetric" | ||
repository = "https://github.com/hobuinc/silvimetric" | ||
|
||
[tool.setuptools] | ||
package-dir = {"" = "src"} | ||
|
||
[tool.setuptools.dynamic] | ||
version = { attr = "usgs_stac.__version__" } | ||
dependencies = {file = ["requirements.txt"]} | ||
|
||
[build-system] | ||
requires = ["setuptools>=64.0"] | ||
build-backend = "setuptools.build_meta" | ||
|
||
[project.scripts] | ||
silvimetric = "silvimetric.cli.cli:cli" | ||
|
||
|
||
[tool.pytest.ini_options] | ||
testpaths = [ "tests" ] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
# pdal-plugins>=1.6.0 | ||
# pdal>=3.4.5 | ||
# dask>=2024.9.0 | ||
# pyproj>=3.6.1 | ||
# pystac>=1.10.1 | ||
# shapely>=2.0.6 | ||
# aiohttp |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
__version__='0.0.1' |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
import os | ||
|
||
from urllib.parse import urljoin | ||
from functools import partial | ||
from operator import is_not | ||
|
||
import pystac | ||
|
||
from dask.diagnostics import ProgressBar | ||
from dask import compute | ||
from distributed import get_client, Client | ||
|
||
from .metadata_common import logger, read_json | ||
from .collection import MetaCollection | ||
|
||
|
||
def do_one(col, cat, idx, total): | ||
logger.info(f"{col.id} ({idx}/{total})") | ||
links : list[pystac.Link] = compute(*col.set_children(), scheduler='threading') | ||
col.collection.add_links(filter(partial(is_not, None), links)) | ||
col.collection.set_root(cat) | ||
col.save_local() | ||
|
||
del col | ||
|
||
class MetaCatalog: | ||
""" | ||
MetaCatalog reads the WESM JSON file at the given url, and creates a list of | ||
MetaCollections. Dask Bag helps facilitate the mapping of this in parallel. | ||
""" | ||
def __init__(self, url: str, dst: str, href: str, update=False) -> None: | ||
self.update = update | ||
self.url = url | ||
|
||
if str(dst)[:-1] != '/': | ||
self.dst = str(dst) + '/' | ||
else: | ||
self.dst = str(dst) | ||
|
||
self.children = [ ] | ||
self.catalog = pystac.Catalog(id='WESM Catalog', | ||
description='Catalog representing WESM metadata and associated' | ||
' point cloud files.') | ||
self.catalog.set_root(self.catalog) | ||
self.obj: dict = read_json(self.url) | ||
self.href = href | ||
self.catalog.set_self_href(urljoin(href, "catalog.json")) | ||
|
||
def save_local(self): | ||
""" | ||
Go through the local dest folder and add all collections as child links | ||
into the catalog. These will be referenced from the dest href as opposed | ||
to the local dest. | ||
""" | ||
p = os.path.join(self.dst, "catalog.json") | ||
|
||
## Find the collections available in this dst directory | ||
_, dirs, _ = next(os.walk(self.dst)) | ||
for d in dirs: | ||
newp = os.path.join(self.dst, d) | ||
_, _, files = next(os.walk(newp)) | ||
if 'collection.json' in files: | ||
col_href = urljoin(self.href, | ||
os.path.join(d, 'collection.json')) | ||
link = pystac.Link(rel='child', target=col_href, | ||
media_type='application/json', title=d) | ||
self.catalog.add_link(link) | ||
|
||
|
||
self.catalog.save_object(True, p) | ||
|
||
def set_children(self): | ||
""" | ||
Add child STAC Collections to overall STAC Catalog | ||
""" | ||
|
||
meta_collections = [ | ||
MetaCollection(o, self.href, self.dst, self.update) | ||
for o in self.obj.values() | ||
] | ||
count = len(meta_collections) | ||
|
||
try: | ||
client = get_client() | ||
except: | ||
client = Client() | ||
|
||
futures = [] | ||
for idx, c in enumerate(meta_collections): | ||
c: MetaCollection | ||
futures.append(client.submit(do_one, col=c, cat=self.catalog, idx=idx, total=count)) | ||
|
||
with ProgressBar(): | ||
client.gather(futures) | ||
|
||
def get_stac(self): | ||
""" | ||
Return overall STAC Catalog | ||
""" | ||
return self.catalog |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
import os | ||
import json | ||
|
||
from datetime import datetime | ||
from urllib.parse import urljoin | ||
from urllib.request import urlopen | ||
|
||
import pystac | ||
|
||
from dask import delayed | ||
|
||
from .metadata_common import PCParser, WesmMetadata, session | ||
from .item import MetaItem | ||
|
||
class MetaCollection: | ||
""" | ||
MetaCollection will read the corresponding JSON section, pull relevant info from | ||
it, and create a list of MetaItems from the directory of laz files found at the | ||
pointcloud path found in the JSON. | ||
""" | ||
|
||
def __init__(self, obj: dict[str, any], href: str, dst: str, update=False): | ||
self.update = update | ||
self.meta = WesmMetadata(**obj) | ||
self.id = self.meta.FESMProjectID | ||
self.root_dst = dst | ||
self.root_href = href | ||
|
||
self.valid = True | ||
if self.meta.lpc_link is None: | ||
self.valid = False | ||
|
||
self.dl_link_txt = urljoin(self.meta.lpc_link, '0_file_download_links.txt') | ||
self.pc_dir = urljoin(self.meta.lpc_link, 'LAZ/') | ||
self.sidecar_dir = urljoin(self.meta.lpc_link, 'metadata/') | ||
self.href = urljoin(href, os.path.join(self.id, "collection.json")) | ||
self.dst = os.path.join(dst, self.id, f"collection.json") | ||
extra_fields = { | ||
"seamless_category": self.meta.seamless_category, | ||
"seamless_reason": self.meta.seamless_reason, | ||
"onemeter_category": self.meta.onemeter_category, | ||
"onemeter_reason": self.meta.onemeter_reason, | ||
"lpc_category": self.meta.lpc_category, | ||
"lpc_reason": self.meta.lpc_reason, | ||
"ql": self.meta.ql | ||
} | ||
|
||
e = pystac.Extent( | ||
spatial=pystac.SpatialExtent(bboxes=self.meta.bbox), | ||
temporal=pystac.TemporalExtent(intervals=[ | ||
datetime.fromisoformat(self.meta.collect_start), | ||
datetime.fromisoformat(self.meta.collect_end) | ||
]) | ||
) | ||
self.collection = pystac.Collection( | ||
id=self.id, | ||
description=f'STAC Collection for USGS Project {self.id} derived' | ||
' from WESM JSON.', | ||
extent=e, | ||
extra_fields=extra_fields | ||
) | ||
if self.meta.metadata_link and self.meta.metadata_link is not None: | ||
meta_asset = pystac.Asset(href=self.meta.metadata_link, title='metadata', | ||
description='Metadata', media_type='text/html') | ||
self.collection.add_asset(key='metadata', asset=meta_asset) | ||
if self.meta.lpc_link and self.meta.lpc_link is not None: | ||
lpc_asset = pystac.Asset(href=self.meta.lpc_link, title='pointcloud_links', | ||
description='Pointcloud Links Page', media_type='text/html') | ||
self.collection.add_asset(key='pointcloud_links', asset=lpc_asset) | ||
if self.meta.sourcedem_link and self.meta.sourcedem_link is not None: | ||
dem_asset = pystac.Asset(href=self.meta.sourcedem_link, title='sourcedem_links', | ||
description='DEM Raster Links', media_type='text/html') | ||
self.collection.add_asset(key='sourcedem_links', asset=dem_asset) | ||
|
||
self.collection.set_self_href(self.href) | ||
self.pc_paths = [] | ||
self.sidecar_paths = [] | ||
self.link = pystac.Link(rel='collection', target=self.href, media_type='application/json') | ||
|
||
def save_local(self) -> None: | ||
self.collection.save_object(True, self.dst) | ||
|
||
def set_paths(self) -> None: | ||
# grab pointcloud paths and sidecar paths | ||
try: | ||
self.pc_paths = [p.decode('utf-8').rstrip() for p in urlopen(self.dl_link_txt)] | ||
self.sidecar_paths = [p.replace('.laz', '.xml').replace('LAZ', 'metadata') for p in self.pc_paths] | ||
except Exception as e: | ||
try: | ||
res = session.get(self.pc_dir) | ||
|
||
parser = PCParser() | ||
parser.feed(res.text) | ||
self.pc_paths = [urljoin(self.pc_dir, p) for p in parser.messages] | ||
|
||
meta_messages = [m.replace('.laz', '.xml') for m in parser.messages] | ||
self.sidecar_paths = [ | ||
urljoin(self.sidecar_dir, m) for m in meta_messages | ||
] | ||
except: | ||
self.valid = False | ||
|
||
def set_children(self) -> None: | ||
""" | ||
Add children to the project STAC Collection | ||
""" | ||
if not self.valid: | ||
return [] | ||
|
||
self.set_paths() | ||
|
||
vars = zip(self.pc_paths, self.sidecar_paths) | ||
obj_list: list[MetaItem] = [ | ||
MetaItem(p, s, self.root_dst, self.root_href, self.meta, self.update) | ||
for p, s in vars | ||
] | ||
|
||
return [delayed(lambda i: i.process())(item) for item in obj_list] | ||
|
||
def get_stac(self) -> pystac.Collection: | ||
""" | ||
Return project STAC Collection | ||
""" | ||
return self.collection | ||
|
||
def __repr__(self): | ||
return json.dumps(self.meta) |
Oops, something went wrong.