diff --git a/serialization/python_pickle.ksy b/serialization/python_pickle.ksy
new file mode 100644
index 000000000..c85f187e7
--- /dev/null
+++ b/serialization/python_pickle.ksy
@@ -0,0 +1,585 @@
+meta:
+  id: python_pickle
+  title: Python pickle serialization format
+  application: Python
+  file-extension:
+    - pickle
+    - pkl
+  license: CC0-1.0
+  endian: le
+  xref:
+    justsolve: Pickle
+    wikidata: Q7190889
+doc: |
+  Python Pickle format serializes Python objects to a byte stream, as a sequence
+  of operations to run on the Pickle Virtual Machine.
+
+  The format is mostly implementation defined, there is no formal specification.
+  Pickle data types are closely coupled to the Python object model.
+  Python singletons, and most builtin types (e.g. `None`, `int`,`dict`, `list`)
+  are serialised using dedicated Pickle opcodes.
+  Other builtin types, and all classes  (e.g. `set`, `datetime.datetime`) are
+  serialised by encoding the name of a constructor callable.
+  They are deserialised by importing that constructor, and calling it.
+  So, unpickling an arbitrary pickle, using the Python's stdlib pickle module
+  can cause arbitrary code execution.
+
+  Pickle format has evolved with Python, later protocols add opcodes & types.
+  Later Python releases can pickle to or unpickle from any earlier protocol.
+
+  * Protocol 0: ASCII clean, no explicit version, fields are '\n' terminated.
+  * Protocol 1: Binary, no explicit version, first length prefixed types.
+  * Protocol 2: Python 2.3+. Explicit versioning, more length prefixed types.
+    https://www.python.org/dev/peps/pep-0307/
+  * Protocol 3: Python 3.0+. Dedicated opcodes for `bytes` objects.
+  * Protocol 4: Python 3.4+. Opcodes for 64 bit strings, framing, `set`.
+    https://www.python.org/dev/peps/pep-3154/
+doc-ref: https://github.com/python/cpython/blob/v3.7.3/Lib/pickletools.py
+seq:
+  # TODO is there a way to declare PROTO is optional, but only valid at position 0?
+  - id: ops
+    type: op
+    repeat: eos
+  # TODO is there a way to declare a trailing STOP is required?
+types:
+  op:
+    seq:
+      - id: code
+        type: u1
+        enum: opcode
+        doc: |
+          Operation code that determines which action should be
+          performed next by the Pickle Virtual Machine. Some opcodes
+          are only available in later versions of the Pickle protocol.
+      - id: arg
+        type:
+          switch-on: code
+          cases:
+            'opcode::int': decimalnl_short
+            'opcode::binint': s4
+            'opcode::binint1': u1
+            'opcode::binint2': u2
+            'opcode::long': decimalnl_long
+            'opcode::long1': long1
+            'opcode::long4': long4
+            'opcode::string': stringnl
+            'opcode::binstring': string4
+            'opcode::short_binstring': string1
+            'opcode::binbytes': bytes4
+            'opcode::short_binbytes': bytes1
+            'opcode::binbytes8': bytes8
+            'opcode::none': no_arg
+            'opcode::newtrue': no_arg
+            'opcode::newfalse': no_arg
+            'opcode::unicode': unicodestringnl
+            'opcode::short_binunicode': unicodestring1
+            'opcode::binunicode': unicodestring4
+            'opcode::binunicode8': unicodestring8
+            'opcode::float': floatnl
+            'opcode::binfloat': f8
+            'opcode::empty_list': no_arg
+            'opcode::append': no_arg
+            'opcode::appends': no_arg
+            'opcode::list': no_arg
+            'opcode::empty_tuple': no_arg
+            'opcode::tuple': no_arg
+            'opcode::tuple1': no_arg
+            'opcode::tuple2': no_arg
+            'opcode::tuple3': no_arg
+            'opcode::empty_dict': no_arg
+            'opcode::dict': no_arg
+            'opcode::setitem': no_arg
+            'opcode::setitems': no_arg
+            'opcode::empty_set': no_arg
+            'opcode::additems': no_arg
+            'opcode::frozenset': no_arg
+            'opcode::pop': no_arg
+            'opcode::dup': no_arg
+            'opcode::mark': no_arg
+            'opcode::pop_mark': no_arg
+            'opcode::get': decimalnl_short
+            'opcode::binget': u1
+            'opcode::long_binget': u4
+            'opcode::put': decimalnl_short
+            'opcode::binput': u1
+            'opcode::long_binput': u4
+            'opcode::memoize': no_arg
+            'opcode::ext1': u1
+            'opcode::ext2': u2
+            'opcode::ext4': u4
+            'opcode::global':  stringnl_noescape_pair
+            'opcode::stack_global': no_arg
+            'opcode::reduce': no_arg
+            'opcode::build': no_arg
+            'opcode::inst': stringnl_noescape_pair
+            'opcode::obj': no_arg
+            'opcode::newobj': no_arg
+            'opcode::newobj_ex': no_arg
+            'opcode::proto': u1
+            'opcode::stop': no_arg
+            'opcode::frame': u8
+            'opcode::persid': stringnl_noescape
+            'opcode::binpersid': no_arg
+        doc: |
+          Optional argument for the operation. Data type and length
+          are determined by the value of the opcode.
+
+  decimalnl_short:
+    seq:
+      - id: val
+        type: str
+        encoding: ascii
+        terminator: 0x0a # "\n"
+    doc: |
+      Integer or boolean, encoded with the ASCII characters [0-9-].
+
+      The values '00' and '01' encode the Python values `False` and `True`.
+      Normally a value would not contain leading '0' characters.
+
+  decimalnl_long:
+    seq:
+      - id: val
+        type: str
+        encoding: ascii
+        terminator: 0x0a # "\n"
+    doc: Integer, encoded with the ASCII chracters [0-9-], followed by 'L'.
+
+  # TODO Can kaitai express constraint that these are quoted?
+  stringnl:
+    seq:
+      - id: val
+        type: str
+        encoding: ascii
+        terminator: 0x0a # "\n"
+    doc: Quoted string, possibly containing Python string escapes.
+
+  stringnl_noescape:
+    seq:
+      - id: val
+        type: str
+        encoding: ascii
+        terminator: 0x0a # "\n"
+    doc: Unquoted string, does not contain string escapes.
+
+  stringnl_noescape_pair:
+    seq:
+      - id: val1
+        type: stringnl_noescape
+      - id: val2
+        type: stringnl_noescape
+    doc: Pair of unquoted, unescaped strings.
+
+  unicodestringnl:
+    seq:
+      - id: val
+        type: str
+        encoding: ascii
+        terminator: 0x0a # "\n"
+    doc: Unquoted string, containing Python Unicode escapes.
+
+  floatnl:
+    seq:
+      - id: val
+        type: str
+        encoding: ascii
+        terminator: 0x0a # "\n"
+    doc: |
+      Double float, encoded with the ASCII characters [0-9.e+-], '-inf', 'inf',
+      or 'nan'.
+
+  long1:
+    seq:
+      - id: len
+        type: u1
+      - id: val
+        size: len
+    doc: |
+      Large signed integer, in the range -2**(8*255-1) to 2**(8*255-1)-1,
+      encoded as two's complement.
+
+  long4:
+    seq:
+      - id: len
+        type: u4
+      - id: val
+        size: len
+    doc: |
+      Large signed integer, in the range -2**(8*2**32-1) to 2**(8*2**32-1)-1,
+      encoded as two's complement.
+
+  string1:
+    seq:
+      - id: len
+        type: u1
+      - id: val
+        size: len
+    doc: |
+      Length prefixed string, between 0 and 255 bytes long. Encoding is
+      unspecified.
+
+      The default Python 2.x string type (`str`) is a sequence of bytes.
+      These are pickled as `string1` or `string4`, when protocol == 2.
+      The bytes are written directly, no explicit encoding is performed.
+
+      Python 3.x will not pickle an object as `string1` or `string4`.
+      Instead, opcodes and types with a known encoding are used.
+      When unpickling
+
+      - `pickle.Unpickler` objects default to ASCII, which can be overriden
+      - `pickletools.dis` uses latin1, and cannot be overriden
+    doc-ref: https://github.com/python/cpython/blob/bb8071a4/Lib/pickle.py#L486-L495
+
+  string4:
+    seq:
+      - id: len
+        # Not a typo, the length really is a signed integer
+        type: s4
+      - id: val
+        size: len
+    doc: |
+      Length prefixed string, between 0 and 2**31-1 bytes long. Encoding is
+      unspecified.
+
+      Although the len field is signed, any length < 0 will raise an exception
+      during unpickling.
+
+      See the documentation for `string1` for further detail about encodings.
+    doc-ref: https://github.com/python/cpython/blob/bb8071a4/Lib/pickle.py#L486-L495
+
+  bytes1:
+    seq:
+      - id: len
+        type: u1
+      - id: val
+        size: len
+    doc: Length prefixed byte string, between 0 and 255 bytes long.
+
+  bytes4:
+    seq:
+      - id: len
+        type: u4
+      - id: val
+        size: len
+    doc: Length prefixed string, between 0 and 2**32-1 bytes long
+
+  bytes8:
+    seq:
+      - id: len
+        type: u8
+      - id: val
+        size: len
+    doc: |
+      Length prefixed string, between 0 and 2**64-1 bytes long.
+
+      Only a 64-bit build of Python would produce a pickle containing strings
+      large enough to need this type. Such a pickle could not be unpickled on
+      a 32-bit build of Python, because the string would be larger than
+      `sys.maxsize`.
+
+  unicodestring1:
+    seq:
+      - id: len
+        type: u4
+      - id: val
+        type: str
+        encoding: utf8
+        size: len
+    doc: Length prefixed string, between 0 and 255 bytes long
+
+  unicodestring4:
+    seq:
+      - id: len
+        type: u4
+      - id: val
+        type: str
+        encoding: utf8
+        size: len
+    doc: Length prefixed string, between 0 and 2**32-1 bytes long
+
+  unicodestring8:
+    seq:
+      - id: len
+        type: u8
+      - id: val
+        type: str
+        encoding: utf8
+        size: len
+    doc: |
+      Length prefixed string, between 0 and 2**64-1 bytes long.
+
+      Only a 64-bit build of Python would produce a pickle containing strings
+      large enough to need this type. Such a pickle could not be unpickled on
+      a 32-bit build of Python, because the string would be larger than
+      `sys.maxsize`.
+
+  no_arg:
+    doc: Some opcodes take no argument, this empty type is used for them.
+
+enums:
+  opcode:
+    0x28: # "("
+      id: "mark"
+      -orig-id: MARK
+      doc: push special markobject on stack
+    0x2e: # "."
+      id: "stop"
+      -orig-id: STOP
+      doc: every pickle ends with STOP
+    0x30: # "0"
+      id: "pop"
+      -orig-id: POP
+      doc: discard topmost stack item
+    0x31: # "1"
+      id: "pop_mark"
+      -orig-id: POP_MARK
+      doc: discard stack top through topmost markobject
+    0x32: # "2"
+      id: "dup"
+      -orig-id: DUP
+      doc: duplicate top stack item
+    0x46: # "F"
+      id: "float"
+      -orig-id: FLOAT
+      doc: push float object; decimal string argument
+    0x49: # "I"
+      id: "int"
+      -orig-id: INT
+      doc: push integer or bool; decimal string argument
+    0x4a: # "J"
+      id: "binint"
+      -orig-id: BININT
+      doc: push four-byte signed int
+    0x4b: # "K"
+      id: "binint1"
+      -orig-id: BININT1
+      doc: push 1-byte unsigned int
+    0x4c: # "L"
+      id: "long"
+      -orig-id: LONG
+      doc: push long; decimal string argument
+    0x4d: # "M"
+      id: "binint2"
+      -orig-id: BININT2
+      doc: push 2-byte unsigned int
+    0x4e: # "N"
+      id: "none"
+      -orig-id: NONE
+      doc: push None
+    0x50: # "P"
+      id: "persid"
+      -orig-id: PERSID
+      doc: push persistent object; id is taken from string arg
+    0x51: # "Q"
+      id: "binpersid"
+      -orig-id: BINPERSID
+      doc: push persistent object; id is taken from stack
+    0x52: # "R"
+      id: "reduce"
+      -orig-id: REDUCE
+      doc: apply callable to argtuple, both on stack
+    0x53: # "S"
+      id: "string"
+      -orig-id: STRING
+      doc: push string; NL-terminated string argument
+    0x54: # "T"
+      id: "binstring"
+      -orig-id: BINSTRING
+      doc: push string; counted binary string argument
+    0x55: # "U"
+      id: "short_binstring"
+      -orig-id: SHORT_BINSTRING
+      doc: push string; counted binary string argument 256 bytes
+    0x56: # "V"
+      id: "unicode"
+      -orig-id: UNICODE
+      doc: push Unicode string; raw-unicode-escaped argument
+    0x58: # "X"
+      id: "binunicode"
+      -orig-id: BINUNICODE
+      doc: push Unicode string; counted UTF-8 string argument
+    0x61: # "a"
+      id: "append"
+      -orig-id: APPEND
+      doc: append stack top to list below it
+    0x62: # "b"
+      id: "build"
+      -orig-id: BUILD
+      doc: call __setstate__ or __dict__.update()
+    0x63: # "c"
+      id: "global"
+      -orig-id: GLOBAL
+      doc: push self.find_class(modname, name); 2 string args
+    0x64: # "d"
+      id: "dict"
+      -orig-id: DICT
+      doc: build a dict from stack items
+    0x7d: # "}"
+      id: "empty_dict"
+      -orig-id: EMPTY_DICT
+      doc: push empty dict
+    0x65: # "e"
+      id: "appends"
+      -orig-id: APPENDS
+      doc: extend list on stack by topmost stack slice
+    0x67: # "g"
+      id: "get"
+      -orig-id: GET
+      doc: push item from memo on stack; index is string arg
+    0x68: # "h"
+      id: "binget"
+      -orig-id: BINGET
+      doc: push item from memo on stack; index is 1-byte arg
+    0x69: # "i"
+      id: "inst"
+      -orig-id: INST
+      doc: build & push class instance
+    0x6a: # "j"
+      id: "long_binget"
+      -orig-id: LONG_BINGET
+      doc: push item from memo on stack; index is 4-byte arg
+    0x6c: # "l"
+      id: "list"
+      -orig-id: LIST
+      doc: build list from topmost stack items
+    0x5d: # "]"
+      id: "empty_list"
+      -orig-id: EMPTY_LIST
+      doc: push empty list
+    0x6f: # "o"
+      id: "obj"
+      -orig-id: OBJ
+      doc: build & push class instance
+    0x70: # "p"
+      id: "put"
+      -orig-id: PUT
+      doc: store stack top in memo; index is string arg
+    0x71: # "q"
+      id: "binput"
+      -orig-id: BINPUT
+      doc: store stack top in memo; index is 1-byte arg
+    0x72: # "r"
+      id: "long_binput"
+      -orig-id: LONG_BINPUT
+      doc: store stack top in memo; index is 4-byte arg
+    0x73: # "s"
+      id: "setitem"
+      -orig-id: SETITEM
+      doc: add key+value pair to dict
+    0x74: # "t"
+      id: "tuple"
+      -orig-id: TUPLE
+      doc: build tuple from topmost stack items
+    0x29: # ")"
+      id: "empty_tuple"
+      -orig-id: EMPTY_TUPLE
+      doc: push empty tuple
+    0x75: # "u"
+      id: "setitems"
+      -orig-id: SETITEMS
+      doc: modify dict by adding topmost key+value pairs
+    0x47: # "G"
+      id: "binfloat"
+      -orig-id: BINFLOAT
+      doc: push float; arg is 8-byte float encoding
+
+    # Protocol 2
+    0x80:
+      id: "proto"
+      -orig-id: PROTO
+      doc: identify pickle protocol
+    0x81:
+      id: "newobj"
+      -orig-id: NEWOBJ
+      doc: build object by applying cls.__new__ to argtuple
+    0x82:
+      id: "ext1"
+      -orig-id: EXT1
+      doc: push object from extension registry; 1-byte index
+    0x83:
+      id: "ext2"
+      -orig-id: EXT2
+      doc: ditto, but 2-byte index
+    0x84:
+      id: "ext4"
+      -orig-id: EXT4
+      doc: ditto, but 4-byte index
+    0x85:
+      id: "tuple1"
+      -orig-id: TUPLE1
+      doc: build 1-tuple from stack top
+    0x86:
+      id: "tuple2"
+      -orig-id: TUPLE2
+      doc: build 2-tuple from two topmost stack items
+    0x87:
+      id: "tuple3"
+      -orig-id: TUPLE3
+      doc: build 3-tuple from three topmost stack items
+    0x88:
+      id: "newtrue"
+      -orig-id: NEWTRUE
+      doc: push True
+    0x89:
+      id: "newfalse"
+      -orig-id: NEWFALSE
+      doc: push False
+    0x8a:
+      id: "long1"
+      -orig-id: LONG1
+      doc: push long from < 256 bytes
+    0x8b:
+      id: "long4"
+      -orig-id: LONG4
+      doc: push really big long
+
+    # Protocol 3 (Python 3.x)
+    0x42: # "B"
+      id: "binbytes"
+      -orig-id: BINBYTES
+      doc: push bytes; counted binary string argument
+    0x43: # "C"
+      id: "short_binbytes"
+      -orig-id: SHORT_BINBYTES
+      doc: push bytes; counted binary string argument < 256 bytes
+
+    # Protocol 4
+    0x8c:
+      id: "short_binunicode"
+      -orig-id: SHORT_BINUNICODE
+      doc: push short string; UTF-8 length < 256 bytes
+    0x8d:
+      id: "binunicode8"
+      -orig-id: BINUNICODE8
+      doc: push very long string
+    0x8e:
+      id: "binbytes8"
+      -orig-id: BINBYTES8
+      doc: push very long bytes string
+    0x8f:
+      id: "empty_set"
+      -orig-id: EMPTY_SET
+      doc: push empty set on the stack
+    0x90:
+      id: "additems"
+      -orig-id: ADDITEMS
+      doc: modify set by adding topmost stack items
+    0x91:
+      id: "frozenset"
+      -orig-id: FROZENSET
+      doc: build frozenset from topmost stack items
+    0x92:
+      id: "newobj_ex"
+      -orig-id: NEWOBJ_EX
+      doc: like NEWOBJ but work with keyword only arguments
+    0x93:
+      id: "stack_global"
+      -orig-id: STACK_GLOBAL
+      doc: same as GLOBAL but using names on the stacks
+    0x94:
+      id: "memoize"
+      -orig-id: MEMOIZE
+      doc: store top of the stack in memo
+    0x95:
+      id: "frame"
+      -orig-id: FRAME
+      doc: indicate the beginning of a new frame