Skip to content
This repository has been archived by the owner on Dec 15, 2022. It is now read-only.

Commit

Permalink
Start work on constructing Text from a byte stream
Browse files Browse the repository at this point in the history
Signed-off-by: Nathan Sobo <[email protected]>
  • Loading branch information
maxbrunsfeld authored and Nathan Sobo committed Jan 15, 2017
1 parent b41fc78 commit 0bbafab
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 0 deletions.
8 changes: 8 additions & 0 deletions binding.gyp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,13 @@
"src/core/text.cc",
"src/core/marker-index.cc",
"src/core/buffer-offset-index.cc"
],
"conditions": [
['OS=="mac"', {
'link_settings': {
'libraries': ['libiconv.dylib'],
}
}],
]
},
],
Expand All @@ -46,6 +53,7 @@
"CATCH_CONFIG_CPP11_NO_IS_ENUM"
],
"sources": [
"test/native/text-test.cc",
"test/native/patch-test.cc",
"test/native/tests.cc",
],
Expand Down
52 changes: 52 additions & 0 deletions src/core/text.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,70 @@
#include <limits.h>
#include <vector>
#include <memory>
#include <iconv.h>

using std::move;
using std::vector;
using std::unique_ptr;
using std::basic_ostream;
using std::istream;

bool Line::operator==(const Line &other) const {
return content == other.content && ending == other.ending;
}

Text::Text() : lines {Line{{}, LineEnding::NONE}} {}

Text::Text(const vector<Line> &lines) : lines{lines} {}

Text::Text(istream &stream, const char *encoding_name, size_t chunk_size) {
lines.push_back({u"", LineEnding::NONE});

iconv_t conversion = iconv_open("UTF-16LE", encoding_name);
if (conversion == reinterpret_cast<iconv_t>(-1)) {
return;
}

vector<char> byte_vector(chunk_size);
vector<char16_t> character_vector(chunk_size);
char *byte_buffer = byte_vector.data();
char16_t *character_buffer = character_vector.data();

for (;;) {
stream.read(byte_buffer, chunk_size);
size_t bytes_read = stream.gcount();
if (bytes_read == 0) break;

char *byte_pointer = byte_buffer;
char *character_pointer = reinterpret_cast<char *>(character_buffer);
size_t character_limit = chunk_size * (sizeof(char16_t) / sizeof(char));

iconv(
conversion,
&byte_pointer,
&bytes_read,
&character_pointer,
&character_limit
);

for (char16_t *character = character_buffer,
*end_character = reinterpret_cast<char16_t *>(character_pointer);
character != end_character;
character++) {
switch (*character) {
case '\n':
lines.back().ending = LineEnding::LF;
lines.push_back({u"", LineEnding::NONE});
break;

default:
lines.back().content.push_back(*character);
break;
}
}
}
}

void Text::append(TextSlice slice) {
Line &last_line = lines.back();

Expand Down
3 changes: 3 additions & 0 deletions src/core/text.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <memory>
#include <vector>
#include <ostream>
#include <istream>
#include "point.h"

enum class LineEnding : uint8_t {
Expand All @@ -29,6 +30,8 @@ struct Text {
std::vector<Line> lines;

Text();
Text(const std::vector<Line> &);
Text(std::istream &stream, const char *encoding_name, size_t chunk_size);

bool operator==(const Text &other) const;
Point Extent() const;
Expand Down
16 changes: 16 additions & 0 deletions test/native/text-test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#include "test-helpers.h"
#include <sstream>
#include "text.h"

using std::stringstream;

TEST_CASE("builds a Text from a UTF8 stream") {
stringstream stream("abcdefg\nhijklmnop", std::ios_base::in);

Text text(stream, "UTF8", 3);

REQUIRE(text == Text({
Line{u"abcdefg", LineEnding::LF},
Line{u"hijklmnop", LineEnding::NONE}
}));
}

0 comments on commit 0bbafab

Please sign in to comment.