From 4dabcb4fc21bcd74fe4d3e3ef683bfc58ecbb654 Mon Sep 17 00:00:00 2001 From: Edward Rosinzonsky Date: Wed, 27 Jun 2018 13:26:40 -0700 Subject: [PATCH 1/5] Updated to current hangouts json format 'conversation_state.conversation' changed to 'conversation.conversation', 'events' now one level up, etc --- hangouts_parser.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/hangouts_parser.py b/hangouts_parser.py index bbd6daa..c97b006 100644 --- a/hangouts_parser.py +++ b/hangouts_parser.py @@ -5,6 +5,10 @@ from conversation import Conversation from attachment import Attachment +def ns_to_dict(ns): + if isinstance(ns, list): return [ ns_to_dict(i) for i in ns ] + if isinstance(ns, Namespace): return { k: ns_to_dict(v) for k,v in vars(ns).items() } + return ns class HangoutsParser: """Parses the Google Takeout JSON export for Hangouts SMS/MMS messages.""" @@ -18,13 +22,13 @@ def parse_input_file(self, hangouts_file_name, user_phone_number): """ conversations = [] self_gaia_id = None # gaia_id for the phone owner - with open(hangouts_file_name, 'rb') as data_file: + with open(hangouts_file_name, 'rt') as data_file: # Read the Hangouts JSON file and turn into objects data = json.load(data_file, object_hook=lambda d: Namespace(**d)) - # Iterate through each conversation in the list - for conversation_state in data.conversation_state: + # Iterate through each conversation in the listi + for conversation_state in data.conversations: # Get the nested conversation_state - state = getattr(conversation_state, "conversation_state", None) + state = getattr(conversation_state, "conversation", None) if state is not None: # Get the conversation object conversation = getattr(state, "conversation", None) @@ -54,7 +58,7 @@ def parse_input_file(self, hangouts_file_name, user_phone_number): read_state, user_phone_number, self_gaia_id) # Get the conversation messages - events = getattr(state, "event", None) + events = getattr(conversation_state, "events", None) if events is not None: current_conversation.messages = self._process_messages(events) conversations.append(current_conversation) From 5812796181380c98b7ac78922fb25e334c94830f Mon Sep 17 00:00:00 2001 From: u2berggeist Date: Fri, 10 Aug 2018 11:04:15 -0400 Subject: [PATCH 2/5] Change encoding to UTF-8 Addressing adein/hangouts_to_sms#15, changing the open to UTF-8 encoding to allow for basic emoji characters (and other characters). I believe adein/hangouts_to_sms#15 only effected Windows users (as the `open` command would default to using `cp1215` encoding instead of `utf-8`), but the addition of the encoding generally makes the code more robust IMO. --- hangouts_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hangouts_parser.py b/hangouts_parser.py index c97b006..04d72c0 100644 --- a/hangouts_parser.py +++ b/hangouts_parser.py @@ -22,7 +22,7 @@ def parse_input_file(self, hangouts_file_name, user_phone_number): """ conversations = [] self_gaia_id = None # gaia_id for the phone owner - with open(hangouts_file_name, 'rt') as data_file: + with open(hangouts_file_name, 'rt', encoding='utf-8') as data_file: # Read the Hangouts JSON file and turn into objects data = json.load(data_file, object_hook=lambda d: Namespace(**d)) # Iterate through each conversation in the listi From 9101a53a9ae3b1d33477d449c3937ecec704dfff Mon Sep 17 00:00:00 2001 From: MattyBoy Date: Wed, 16 Jan 2019 11:21:48 -0600 Subject: [PATCH 3/5] Added support for PNG's. --- titanium_backup_formatter.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/titanium_backup_formatter.py b/titanium_backup_formatter.py index c124bca..7868d83 100644 --- a/titanium_backup_formatter.py +++ b/titanium_backup_formatter.py @@ -134,8 +134,12 @@ def create_output_file(self, conversations, self_gaia_id, output_file_name): if attachment.original_content_url is not None: data = self._convert_url_to_base64_data(attachment.original_content_url) if data is not None: - message_string += MMS_PART.format("image/jpeg", order, "base64", data) - order += 1 + if ".png" in attachment.download_url: + message_string += MMS_PART.format("image/png", order, "base64", data) + order += 1 + else: + message_string += MMS_PART.format("image/jpeg", order, "base64", data) + order += 1 else: print("Error: unable to download image data!") elif attachment.media_type == "ANIMATED_PHOTO": From 800c2c669808a5b6c4c950e727bc6987cb25ed29 Mon Sep 17 00:00:00 2001 From: MattyBoy Date: Wed, 16 Jan 2019 11:24:12 -0600 Subject: [PATCH 4/5] Parser was not reading in photo data --- hangouts_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hangouts_parser.py b/hangouts_parser.py index 04d72c0..1b7fc19 100644 --- a/hangouts_parser.py +++ b/hangouts_parser.py @@ -172,14 +172,14 @@ def _process_message_attachments(self, attachment_list): for current_attachment in attachment_list: embed_item = getattr(current_attachment, "embed_item", None) if embed_item is not None: - plus_photo = getattr(embed_item, "embeds.PlusPhoto.plus_photo", None) + plus_photo = getattr(embed_item, "plus_photo", None) if plus_photo is not None: current_attachment = Attachment() current_attachment.album_id = self._try_int_attribute(plus_photo, "album_id") current_attachment.photo_id = self._try_int_attribute(plus_photo, "photo_id") current_attachment.media_type = getattr(plus_photo, "media_type", None) current_attachment.original_content_url = getattr(plus_photo, "original_content_url", None) - current_attachment.download_url = getattr(plus_photo, "download_url", None) + current_attachment.download_url = getattr(plus_photo, "url", None) attachments.append(current_attachment) return attachments From dbed2dc9effe98ed8af21a74bb57ee6cc90ecd77 Mon Sep 17 00:00:00 2001 From: MattyBoy Date: Wed, 16 Jan 2019 11:25:30 -0600 Subject: [PATCH 5/5] Update attachment.py --- attachment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/attachment.py b/attachment.py index 4b0ceac..9341bae 100644 --- a/attachment.py +++ b/attachment.py @@ -7,7 +7,7 @@ class Attachment: photo_id = None # Google Photos ID (not used) media_type = None # Type of attachment: PHOTO, ANIMATED_PHOTO, VIDEO original_content_url = None - download_url = None # Not used, since it doesn't seem to work + download_url = None # This is used to get file extension def __init__(self, album_id=None, photo_id=None, media_type=None, original_content_url=None, download_url=None): self.album_id = album_id