Skip to content

Commit

Permalink
first commit, moving project to this repo
Browse files Browse the repository at this point in the history
  • Loading branch information
kylemann16 committed Nov 7, 2024
0 parents commit 941ad17
Show file tree
Hide file tree
Showing 20 changed files with 1,300 additions and 0 deletions.
13 changes: 13 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
name: usgs_stac
channels:
- conda-forge
dependencies:
- python
- pip
- pdal
- python-pdal
- shapely
- dask
- pyproj
- pystac
- boto3
32 changes: 32 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
[project]
name = "usgs_stac"
requires-python = ">=3.9"
description = "Create STAC from USGS WESM JSON url."
readme = "README.md"
license = { text = "Apache-2.0" }
authors = [ { name = "Kyle Mann", email = "[email protected]" } ]
maintainers = [ { name = "Kyle Mann", email = "[email protected]" } ]
dependencies = [ ]
dynamic = ["version"]

[project.urls]
homepage = "https://github.com/hobuinc/silvimetric"
repository = "https://github.com/hobuinc/silvimetric"

[tool.setuptools]
package-dir = {"" = "src"}

[tool.setuptools.dynamic]
version = { attr = "usgs_stac.__version__" }
dependencies = {file = ["requirements.txt"]}

[build-system]
requires = ["setuptools>=64.0"]
build-backend = "setuptools.build_meta"

[project.scripts]
silvimetric = "silvimetric.cli.cli:cli"


[tool.pytest.ini_options]
testpaths = [ "tests" ]
7 changes: 7 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# pdal-plugins>=1.6.0
# pdal>=3.4.5
# dask>=2024.9.0
# pyproj>=3.6.1
# pystac>=1.10.1
# shapely>=2.0.6
# aiohttp
1 change: 1 addition & 0 deletions src/usgs_stac/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__version__='0.0.1'
Binary file added src/usgs_stac/__pycache__/__init__.cpython-312.pyc
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added src/usgs_stac/__pycache__/item.cpython-312.pyc
Binary file not shown.
Binary file added src/usgs_stac/__pycache__/main.cpython-312.pyc
Binary file not shown.
Binary file added src/usgs_stac/__pycache__/metadata.cpython-312.pyc
Binary file not shown.
Binary file not shown.
100 changes: 100 additions & 0 deletions src/usgs_stac/catalog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import os

from urllib.parse import urljoin
from functools import partial
from operator import is_not

import pystac

from dask.diagnostics import ProgressBar
from dask import compute
from distributed import get_client, Client

from .metadata_common import logger, read_json
from .collection import MetaCollection


def do_one(col, cat, idx, total):
logger.info(f"{col.id} ({idx}/{total})")
links : list[pystac.Link] = compute(*col.set_children(), scheduler='threading')
col.collection.add_links(filter(partial(is_not, None), links))
col.collection.set_root(cat)
col.save_local()

del col

class MetaCatalog:
"""
MetaCatalog reads the WESM JSON file at the given url, and creates a list of
MetaCollections. Dask Bag helps facilitate the mapping of this in parallel.
"""
def __init__(self, url: str, dst: str, href: str, update=False) -> None:
self.update = update
self.url = url

if str(dst)[:-1] != '/':
self.dst = str(dst) + '/'
else:
self.dst = str(dst)

self.children = [ ]
self.catalog = pystac.Catalog(id='WESM Catalog',
description='Catalog representing WESM metadata and associated'
' point cloud files.')
self.catalog.set_root(self.catalog)
self.obj: dict = read_json(self.url)
self.href = href
self.catalog.set_self_href(urljoin(href, "catalog.json"))

def save_local(self):
"""
Go through the local dest folder and add all collections as child links
into the catalog. These will be referenced from the dest href as opposed
to the local dest.
"""
p = os.path.join(self.dst, "catalog.json")

## Find the collections available in this dst directory
_, dirs, _ = next(os.walk(self.dst))
for d in dirs:
newp = os.path.join(self.dst, d)
_, _, files = next(os.walk(newp))
if 'collection.json' in files:
col_href = urljoin(self.href,
os.path.join(d, 'collection.json'))
link = pystac.Link(rel='child', target=col_href,
media_type='application/json', title=d)
self.catalog.add_link(link)


self.catalog.save_object(True, p)

def set_children(self):
"""
Add child STAC Collections to overall STAC Catalog
"""

meta_collections = [
MetaCollection(o, self.href, self.dst, self.update)
for o in self.obj.values()
]
count = len(meta_collections)

try:
client = get_client()
except:
client = Client()

futures = []
for idx, c in enumerate(meta_collections):
c: MetaCollection
futures.append(client.submit(do_one, col=c, cat=self.catalog, idx=idx, total=count))

with ProgressBar():
client.gather(futures)

def get_stac(self):
"""
Return overall STAC Catalog
"""
return self.catalog
127 changes: 127 additions & 0 deletions src/usgs_stac/collection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
import os
import json

from datetime import datetime
from urllib.parse import urljoin
from urllib.request import urlopen

import pystac

from dask import delayed

from .metadata_common import PCParser, WesmMetadata, session
from .item import MetaItem

class MetaCollection:
"""
MetaCollection will read the corresponding JSON section, pull relevant info from
it, and create a list of MetaItems from the directory of laz files found at the
pointcloud path found in the JSON.
"""

def __init__(self, obj: dict[str, any], href: str, dst: str, update=False):
self.update = update
self.meta = WesmMetadata(**obj)
self.id = self.meta.FESMProjectID
self.root_dst = dst
self.root_href = href

self.valid = True
if self.meta.lpc_link is None:
self.valid = False

self.dl_link_txt = urljoin(self.meta.lpc_link, '0_file_download_links.txt')
self.pc_dir = urljoin(self.meta.lpc_link, 'LAZ/')
self.sidecar_dir = urljoin(self.meta.lpc_link, 'metadata/')
self.href = urljoin(href, os.path.join(self.id, "collection.json"))
self.dst = os.path.join(dst, self.id, f"collection.json")
extra_fields = {
"seamless_category": self.meta.seamless_category,
"seamless_reason": self.meta.seamless_reason,
"onemeter_category": self.meta.onemeter_category,
"onemeter_reason": self.meta.onemeter_reason,
"lpc_category": self.meta.lpc_category,
"lpc_reason": self.meta.lpc_reason,
"ql": self.meta.ql
}

e = pystac.Extent(
spatial=pystac.SpatialExtent(bboxes=self.meta.bbox),
temporal=pystac.TemporalExtent(intervals=[
datetime.fromisoformat(self.meta.collect_start),
datetime.fromisoformat(self.meta.collect_end)
])
)
self.collection = pystac.Collection(
id=self.id,
description=f'STAC Collection for USGS Project {self.id} derived'
' from WESM JSON.',
extent=e,
extra_fields=extra_fields
)
if self.meta.metadata_link and self.meta.metadata_link is not None:
meta_asset = pystac.Asset(href=self.meta.metadata_link, title='metadata',
description='Metadata', media_type='text/html')
self.collection.add_asset(key='metadata', asset=meta_asset)
if self.meta.lpc_link and self.meta.lpc_link is not None:
lpc_asset = pystac.Asset(href=self.meta.lpc_link, title='pointcloud_links',
description='Pointcloud Links Page', media_type='text/html')
self.collection.add_asset(key='pointcloud_links', asset=lpc_asset)
if self.meta.sourcedem_link and self.meta.sourcedem_link is not None:
dem_asset = pystac.Asset(href=self.meta.sourcedem_link, title='sourcedem_links',
description='DEM Raster Links', media_type='text/html')
self.collection.add_asset(key='sourcedem_links', asset=dem_asset)

self.collection.set_self_href(self.href)
self.pc_paths = []
self.sidecar_paths = []
self.link = pystac.Link(rel='collection', target=self.href, media_type='application/json')

def save_local(self) -> None:
self.collection.save_object(True, self.dst)

def set_paths(self) -> None:
# grab pointcloud paths and sidecar paths
try:
self.pc_paths = [p.decode('utf-8').rstrip() for p in urlopen(self.dl_link_txt)]
self.sidecar_paths = [p.replace('.laz', '.xml').replace('LAZ', 'metadata') for p in self.pc_paths]
except Exception as e:
try:
res = session.get(self.pc_dir)

parser = PCParser()
parser.feed(res.text)
self.pc_paths = [urljoin(self.pc_dir, p) for p in parser.messages]

meta_messages = [m.replace('.laz', '.xml') for m in parser.messages]
self.sidecar_paths = [
urljoin(self.sidecar_dir, m) for m in meta_messages
]
except:
self.valid = False

def set_children(self) -> None:
"""
Add children to the project STAC Collection
"""
if not self.valid:
return []

self.set_paths()

vars = zip(self.pc_paths, self.sidecar_paths)
obj_list: list[MetaItem] = [
MetaItem(p, s, self.root_dst, self.root_href, self.meta, self.update)
for p, s in vars
]

return [delayed(lambda i: i.process())(item) for item in obj_list]

def get_stac(self) -> pystac.Collection:
"""
Return project STAC Collection
"""
return self.collection

def __repr__(self):
return json.dumps(self.meta)
Loading

0 comments on commit 941ad17

Please sign in to comment.