Added minor fixes for PR review

dpkp · Oct 12, 2017 · d10051b · d10051b
1 parent e992fbf
commit d10051b
Show file tree

Hide file tree

Showing 6 changed files with 20 additions and 27 deletions.
diff --git a/benchmarks/record_batch_compose.py b/benchmarks/record_batch_compose.py
@@ -1,19 +1,21 @@
 #!/usr/bin/env python3
 from __future__ import print_function
-import perf
-from kafka.record.memory_records import MemoryRecordsBuilder
-import itertools
-import random
 import hashlib
+import itertools
 import os
+import random
+
+import perf
+
+from kafka.record.memory_records import MemoryRecordsBuilder
 
 
 DEFAULT_BATCH_SIZE = 1600 * 1024
 KEY_SIZE = 6
 VALUE_SIZE = 60
 TIMESTAMP_RANGE = [1505824130000, 1505824140000]
 
-# With values above v1 record is 100 bytes, so 10_000 bytes for 100 messages
+# With values above v1 record is 100 bytes, so 10 000 bytes for 100 messages
 MESSAGES_PER_BATCH = 100
 
 

diff --git a/benchmarks/record_batch_read.py b/benchmarks/record_batch_read.py
@@ -1,11 +1,13 @@
 #!/usr/bin/env python
 from __future__ import print_function
-import perf
-from kafka.record.memory_records import MemoryRecords, MemoryRecordsBuilder
-import itertools
-import random
 import hashlib
+import itertools
 import os
+import random
+
+import perf
+
+from kafka.record.memory_records import MemoryRecords, MemoryRecordsBuilder
 
 
 DEFAULT_BATCH_SIZE = 1600 * 1024

diff --git a/kafka/producer/kafka.py b/kafka/producer/kafka.py
@@ -370,7 +370,7 @@ def __init__(self, **configs):
         else:
             checker, compression_attrs = self._COMPRESSORS[ct]
             assert checker(), "Libraries for {} compression codec not found".format(ct)
-            self.config['compression_type'] = compression_attrs
+            self.config['compression_attrs'] = compression_attrs
 
         message_version = self._max_usable_produce_magic()
         self._accumulator = RecordAccumulator(message_version=message_version, metrics=self._metrics, **self.config)

diff --git a/kafka/producer/record_accumulator.py b/kafka/producer/record_accumulator.py
@@ -149,7 +149,7 @@ class RecordAccumulator(object):
             will block up to max_block_ms, raising an exception on timeout.
             In the current implementation, this setting is an approximation.
             Default: 33554432 (32MB)
-        compression_type (int): The compression type for all data generated by
+        compression_attrs (int): The compression type for all data generated by
             the producer. Valid values are gzip(1), snappy(2), lz4(3), or
             none(0).
             Compression is of full batches of data, so the efficacy of batching
@@ -168,32 +168,20 @@ class RecordAccumulator(object):
     DEFAULT_CONFIG = {
         'buffer_memory': 33554432,
         'batch_size': 16384,
-        'compression_type': None,
+        'compression_attrs': 0,
         'linger_ms': 0,
         'retry_backoff_ms': 100,
         'message_version': 0,
         'metrics': None,
         'metric_group_prefix': 'producer-metrics',
     }
 
-    _COMPRESSORS = {
-        'gzip': LegacyRecordBatchBuilder.CODEC_GZIP,
-        'snappy': LegacyRecordBatchBuilder.CODEC_SNAPPY,
-        'lz4': LegacyRecordBatchBuilder.CODEC_LZ4,
-        None: LegacyRecordBatchBuilder.CODEC_NONE
-    }
-
     def __init__(self, **configs):
         self.config = copy.copy(self.DEFAULT_CONFIG)
         for key in self.config:
             if key in configs:
                 self.config[key] = configs.pop(key)
 
-        # Convert compression type to INT presentation. Mostly for unit tests,
-        # as Producer should pass already converted values.
-        ct = self.config["compression_type"]
-        self.config["compression_type"] = self._COMPRESSORS.get(ct, ct)
-
         self._closed = False
         self._flushes_in_progress = AtomicInteger()
         self._appends_in_progress = AtomicInteger()
@@ -269,7 +257,7 @@ def append(self, tp, timestamp_ms, key, value, max_time_to_block_ms,
 
                 records = MemoryRecordsBuilder(
                     self.config['message_version'],
-                    self.config['compression_type'],
+                    self.config['compression_attrs'],
                     self.config['batch_size']
                 )
 

diff --git a/kafka/protocol/message.py b/kafka/protocol/message.py
@@ -161,7 +161,8 @@ def encode(cls, items, prepend_size=True):
             if prepend_size:
                 # rewind and return all the bytes
                 items.seek(items.tell() - 4)
-            return items.read(size + 4)
+                size += 4
+            return items.read(size)
 
         encoded_values = []
         for (offset, message) in items:

diff --git a/kafka/record/abc.py b/kafka/record/abc.py
@@ -47,7 +47,7 @@ def append(self, offset, timestamp, key, value):
         Arguments:
             offset (int): Relative offset of record, starting from 0
             timestamp (int or None): Timestamp in milliseconds since beginning
-                of the epoch (midnight Jan 1, 1970 (UTC)). If omited, will be
+                of the epoch (midnight Jan 1, 1970 (UTC)). If omitted, will be
                 set to current time.
             key (bytes or None): Key of the record
             value (bytes or None): Value of the record