Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CKAN_SITE_URL urls in sitemap index + refactoring #236

Merged
merged 6 commits into from
Dec 19, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 21 additions & 14 deletions ckanext/geodatagov/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import hashlib
import json
import logging
import os
import sys
import tempfile
import warnings
Expand Down Expand Up @@ -46,12 +47,12 @@ def datagovs3():
class Sitemap:
"""Sitemap object

Accepts filename, start, page_size
Accepts file_num, start, page_size
"""

def __init__(self, filename: str, start: int, page_size: int) -> None:
self.filename = filename
self.filename_s3 = f"sitemap-{filename}.xml"
def __init__(self, file_num: str, start: int, page_size: int) -> None:
self.file_num = file_num
self.filename_s3 = f"sitemap/sitemap-{file_num}.xml"
self.start = start
self.page_size = page_size
self.xml = ""
Expand Down Expand Up @@ -95,6 +96,12 @@ def get_s3() -> None:
.profile file in catalog repo for s3 config.
"""

global CKAN_SITE_URL
try:
CKAN_SITE_URL = os.environ["CKAN_SITE_URL"]
except KeyError:
log.error("Missing environment variable CKAN_SITE_URL")

log.info("Setting S3 globals...")
global S3
global BUCKET_NAME
Expand Down Expand Up @@ -123,8 +130,9 @@ def get_s3() -> None:


def get_content_type(filename: str) -> str:
"""
Attempts to guess MIME type by filename extension
"""Attempts to guess MIME type by filename extension

Returns content-type str
"""

if filename[-3:].lower() == "xml":
Expand Down Expand Up @@ -186,13 +194,13 @@ def upload_sitemap_index(sitemaps: list) -> None:
for sitemap in sitemaps:
# add sitemaps to sitemap index file
sitemap_index.write_xml("<sitemap>")
loc = f"{S3_ENDPOINT_URL}/{BUCKET_NAME}/{sitemap.filename_s3}"
loc = f"{CKAN_SITE_URL}/{sitemap.filename_s3}"
sitemap_index.write_xml(f"<loc>{loc}</loc>")
sitemap_index.write_xml(f"<lastmod>{current_time}</lastmod>")
sitemap_index.write_xml("</sitemap>")
sitemap_index.write_xml("</sitemapindex>")

upload_to_key(sitemap_index.xml, f"{sitemap_index.filename_s3}")
upload_to_key(sitemap_index.xml, sitemap_index.filename_s3)
log.info(
f"Sitemap index upload complete to: \
{S3_ENDPOINT_URL}/{BUCKET_NAME}/{sitemap_index.filename_s3}"
Expand All @@ -204,8 +212,7 @@ def upload_sitemap_files(sitemaps: list) -> None:

log.info(f"Uploading {len(sitemaps)} sitemap files...")
for sitemap in sitemaps:
filename_on_s3 = f"{sitemap.filename_s3}"
upload_to_key(sitemap.xml, filename_on_s3)
upload_to_key(sitemap.xml, sitemap.filename_s3)
log.info(
f"Sitemap file {sitemap.filename_s3} upload complete to: \
{S3_ENDPOINT_URL}/{BUCKET_NAME}/{sitemap.filename_s3}"
Expand All @@ -228,12 +235,12 @@ def sitemap_to_s3(upload_to_s3: bool, page_size: int, max_per_page: int):
return

start = 0
filename = 1
file_num = 1
sitemaps = []

paginations = (count // page_size) + 1
for _ in range(paginations):
sitemap = Sitemap(str(filename), start, page_size)
sitemap = Sitemap(str(file_num), start, page_size)
sitemap.write_sitemap_header()
sitemap.write_pkgs(package_query)
sitemap.write_sitemap_footer()
Expand All @@ -251,7 +258,7 @@ def sitemap_to_s3(upload_to_s3: bool, page_size: int, max_per_page: int):
sitemaps.append(sitemap)

start += page_size
filename += 1
file_num += 1

if upload_to_s3:
log.info("Starting S3 uploads...")
Expand Down Expand Up @@ -503,7 +510,7 @@ def s3_test(file_type: str):
upload_str += "<head><title>Test Upload</title></head>"
upload_str += f"<body><p>{content}</b></body>"
upload_str += "</html>"
elif file_type == 'txt':
elif file_type == "txt":
upload_str = content
else:
raise Exception(f"Unsupported file type: {file_type}")
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

setup(
name="ckanext-geodatagov",
version="0.1.18",
version="0.1.19",
description="",
long_description=long_description,
long_description_content_type='text/markdown',
Expand Down