diff --git a/tap_github/streams.py b/tap_github/streams.py index 62f1c31e..c150ec46 100644 --- a/tap_github/streams.py +++ b/tap_github/streams.py @@ -310,6 +310,12 @@ def http_headers(self) -> dict: def post_process(self, row: dict, context: Optional[dict] = None) -> dict: row["type"] = "pull_request" if "pull_request" in row else "issue" + if row['body'] is not None: + # some issue bodies include control characters such as \x00 + # that some targets (such as postgresql) choke on. This ensures + # such chars are removed from the data before we pass it on to + # the target + row['body'] = row['body'].encode('utf-8', errors='ignore') return row schema = th.PropertiesList(