Skip to content

Commit

Permalink
Added minor fixes for PR review
Browse files Browse the repository at this point in the history
  • Loading branch information
tvoinarovskyi committed Oct 12, 2017
1 parent e992fbf commit d10051b
Show file tree
Hide file tree
Showing 6 changed files with 20 additions and 27 deletions.
12 changes: 7 additions & 5 deletions benchmarks/record_batch_compose.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
#!/usr/bin/env python3
from __future__ import print_function
import perf
from kafka.record.memory_records import MemoryRecordsBuilder
import itertools
import random
import hashlib
import itertools
import os
import random

import perf

from kafka.record.memory_records import MemoryRecordsBuilder


DEFAULT_BATCH_SIZE = 1600 * 1024
KEY_SIZE = 6
VALUE_SIZE = 60
TIMESTAMP_RANGE = [1505824130000, 1505824140000]

# With values above v1 record is 100 bytes, so 10_000 bytes for 100 messages
# With values above v1 record is 100 bytes, so 10 000 bytes for 100 messages
MESSAGES_PER_BATCH = 100


Expand Down
10 changes: 6 additions & 4 deletions benchmarks/record_batch_read.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
#!/usr/bin/env python
from __future__ import print_function
import perf
from kafka.record.memory_records import MemoryRecords, MemoryRecordsBuilder
import itertools
import random
import hashlib
import itertools
import os
import random

import perf

from kafka.record.memory_records import MemoryRecords, MemoryRecordsBuilder


DEFAULT_BATCH_SIZE = 1600 * 1024
Expand Down
2 changes: 1 addition & 1 deletion kafka/producer/kafka.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,7 @@ def __init__(self, **configs):
else:
checker, compression_attrs = self._COMPRESSORS[ct]
assert checker(), "Libraries for {} compression codec not found".format(ct)
self.config['compression_type'] = compression_attrs
self.config['compression_attrs'] = compression_attrs

message_version = self._max_usable_produce_magic()
self._accumulator = RecordAccumulator(message_version=message_version, metrics=self._metrics, **self.config)
Expand Down
18 changes: 3 additions & 15 deletions kafka/producer/record_accumulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ class RecordAccumulator(object):
will block up to max_block_ms, raising an exception on timeout.
In the current implementation, this setting is an approximation.
Default: 33554432 (32MB)
compression_type (int): The compression type for all data generated by
compression_attrs (int): The compression type for all data generated by
the producer. Valid values are gzip(1), snappy(2), lz4(3), or
none(0).
Compression is of full batches of data, so the efficacy of batching
Expand All @@ -168,32 +168,20 @@ class RecordAccumulator(object):
DEFAULT_CONFIG = {
'buffer_memory': 33554432,
'batch_size': 16384,
'compression_type': None,
'compression_attrs': 0,
'linger_ms': 0,
'retry_backoff_ms': 100,
'message_version': 0,
'metrics': None,
'metric_group_prefix': 'producer-metrics',
}

_COMPRESSORS = {
'gzip': LegacyRecordBatchBuilder.CODEC_GZIP,
'snappy': LegacyRecordBatchBuilder.CODEC_SNAPPY,
'lz4': LegacyRecordBatchBuilder.CODEC_LZ4,
None: LegacyRecordBatchBuilder.CODEC_NONE
}

def __init__(self, **configs):
self.config = copy.copy(self.DEFAULT_CONFIG)
for key in self.config:
if key in configs:
self.config[key] = configs.pop(key)

# Convert compression type to INT presentation. Mostly for unit tests,
# as Producer should pass already converted values.
ct = self.config["compression_type"]
self.config["compression_type"] = self._COMPRESSORS.get(ct, ct)

self._closed = False
self._flushes_in_progress = AtomicInteger()
self._appends_in_progress = AtomicInteger()
Expand Down Expand Up @@ -269,7 +257,7 @@ def append(self, tp, timestamp_ms, key, value, max_time_to_block_ms,

records = MemoryRecordsBuilder(
self.config['message_version'],
self.config['compression_type'],
self.config['compression_attrs'],
self.config['batch_size']
)

Expand Down
3 changes: 2 additions & 1 deletion kafka/protocol/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,8 @@ def encode(cls, items, prepend_size=True):
if prepend_size:
# rewind and return all the bytes
items.seek(items.tell() - 4)
return items.read(size + 4)
size += 4
return items.read(size)

encoded_values = []
for (offset, message) in items:
Expand Down
2 changes: 1 addition & 1 deletion kafka/record/abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def append(self, offset, timestamp, key, value):
Arguments:
offset (int): Relative offset of record, starting from 0
timestamp (int or None): Timestamp in milliseconds since beginning
of the epoch (midnight Jan 1, 1970 (UTC)). If omited, will be
of the epoch (midnight Jan 1, 1970 (UTC)). If omitted, will be
set to current time.
key (bytes or None): Key of the record
value (bytes or None): Value of the record
Expand Down

0 comments on commit d10051b

Please sign in to comment.