From 38894d014846ac648b5bca70c4465c4db103299a Mon Sep 17 00:00:00 2001 From: Dima Gerasimov Date: Fri, 25 Jul 2025 01:13:46 +0100 Subject: [PATCH] _parse_keep: make defensive and yield exception to respect error policy Before the change, a keep entry formatted in an unexpected way would result in whole export crashing, even when the error policy isn't 'raise'. This change makes it consistent with other parsers that emit exceptions. --- google_takeout_parser/parse_json.py | 75 +++++++++++++++-------------- 1 file changed, 39 insertions(+), 36 deletions(-) diff --git a/google_takeout_parser/parse_json.py b/google_takeout_parser/parse_json.py index 37a1cbb..2b522e7 100644 --- a/google_takeout_parser/parse_json.py +++ b/google_takeout_parser/parse_json.py @@ -291,39 +291,42 @@ def _parse_keep(p: Path) -> Iterator[Res[Keep]]: json_data = _read_json_data(p) # For google keep, each note is stored as a separate json file, # so technically there is always just one yield value - yield Keep( - title=json_data["title"], - created_dt=datetime.fromtimestamp( - json_data["createdTimestampUsec"] / 1_000_000, tz=timezone.utc - ), - updated_dt=datetime.fromtimestamp( - json_data["userEditedTimestampUsec"] / 1_000_000, tz=timezone.utc - ), - listContent=[ - KeepListContent( - textHtml=content["textHtml"], - text=content["text"], - isChecked=content["isChecked"] - ) for content in json_data.get("listContent", []) - ], - textContent=( - json_data["textContent"] - if "textContent" in json_data else None - ), - textContentHtml=( - json_data["textContentHtml"] - if "textContentHtml" in json_data else None - ), - color=json_data["color"], - annotations=[ - KeepAnnotation( - description=annotation["description"], - source=annotation["source"], - title=annotation["title"], - url=annotation["url"], - ) for annotation in json_data.get("annotations", []) - ], - isTrashed=json_data["isTrashed"], - isPinned=json_data["isPinned"], - isArchived=json_data["isArchived"] - ) + try: + yield Keep( + title=json_data["title"], + created_dt=datetime.fromtimestamp( + json_data["createdTimestampUsec"] / 1_000_000, tz=timezone.utc + ), + updated_dt=datetime.fromtimestamp( + json_data["userEditedTimestampUsec"] / 1_000_000, tz=timezone.utc + ), + listContent=[ + KeepListContent( + textHtml=content["textHtml"], + text=content["text"], + isChecked=content["isChecked"] + ) for content in json_data.get("listContent", []) + ], + textContent=( + json_data["textContent"] + if "textContent" in json_data else None + ), + textContentHtml=( + json_data["textContentHtml"] + if "textContentHtml" in json_data else None + ), + color=json_data["color"], + annotations=[ + KeepAnnotation( + description=annotation["description"], + source=annotation["source"], + title=annotation["title"], + url=annotation["url"], + ) for annotation in json_data.get("annotations", []) + ], + isTrashed=json_data["isTrashed"], + isPinned=json_data["isPinned"], + isArchived=json_data["isArchived"] + ) + except Exception as e: + yield e