Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

src: refactor SplitString in util #48491

Merged
merged 2 commits into from
Jun 22, 2023

Conversation

anonrig
Copy link
Member

@anonrig anonrig commented Jun 18, 2023

See quick-bench.com

Screenshot 2023-06-18 at 12 48 39 PM
benchmark code
#include <string_view>
#include <algorithm>

// uses string::find_first_of
std::vector<std::string>
split(const std::string& str, const std::string& delims = " ")
{
	std::vector<std::string> output;
	//output.reserve(str.length() / 4);
	size_t first = 0;

	while (first < str.size())
	{
		const auto second = str.find_first_of(delims, first);

		if (first != second)
		{
			output.emplace_back(str.substr(first, second-first));
		}

		if (second == std::string::npos)
			break;

		first = second + 1;
	}

	return output;
}

std::vector<std::string>
splitStd(const std::string& str, const std::string& delims = " ")
{
	std::vector<std::string> output;
	auto first = std::cbegin(str);

	while (first != std::cend(str))
	{
		const auto second = std::find_first_of(first, std::cend(str),
			std::cbegin(delims), std::cend(delims));
		
		if (first != second)
			output.emplace_back(first, second);

		if (second == std::cend(str))
			break;

		first = std::next(second);
	}

	return output;
}

std::vector<std::string> splitPtr(const std::string& str, const std::string& delims = " ")
{
	std::vector<std::string> output;
//	output.reserve(str.size() / 2);

	for (auto first = str.data(), second = str.data(), last = first + str.size(); second != last && first != last; first = second + 1) {
		second = std::find_first_of(first, last, std::cbegin(delims), std::cend(delims));

		if (first != second)
			output.emplace_back(first, second);
	}

	return output;
}

std::vector<std::string_view>
splitSV(std::string_view strv, std::string_view delims = " ")
{
	std::vector<std::string_view> output;
	size_t first = 0;

	while (first < strv.size())
	{
		const auto second = strv.find_first_of(delims, first);
		
		if (first != second)
			output.emplace_back(strv.substr(first, second-first));

		if (second == std::string_view::npos)
			break;

		first = second + 1;
	}

	return output;
}

std::vector<std::string_view>
splitSVStd(std::string_view strv, std::string_view delims = " ")
{
	std::vector<std::string_view> output;
	//output.reserve(strv.length() / 4);
	auto first = strv.begin();

	while (first != strv.end())
	{
		const auto second = std::find_first_of(first, std::cend(strv),
			std::cbegin(delims), std::cend(delims));
		//std::cout << first << ", " << second << '\n';
		if (first != second)
		{
			output.emplace_back(strv.substr(std::distance(strv.begin(), first), std::distance(first, second)));
		}

		if (second == strv.end())
			break;

		first = std::next(second);
	}

	return output;
}

std::vector<std::string_view> splitSVPtr(std::string_view str, std::string_view delims = " ")
{
	std::vector<std::string_view> output;
	//output.reserve(str.size() / 2);

	for (auto first = str.data(), second = str.data(), last = first + str.size(); second != last && first != last; first = second + 1) {
		second = std::find_first_of(first, last, std::cbegin(delims), std::cend(delims));

		if (first != second)
			output.emplace_back(first, second - first);
	}

	return output;
}

const std::string_view LoremIpsumStrv{ "Lorem ipsum dolor sit amet, consectetur adipiscing elit,"
"sed do eiusmod tempor incididuntsuperlongwordsuper ut labore et dolore magna aliqua. Ut enim ad minim veniam, "
"quis nostrud exercitation ullamco laboris nisi ut aliquipsuperlongword ex ea commodo consequat. Duis aute"
"irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur."
"Excepteur sint occaecat cupidatatsuperlongword non proident, sunt in culpa qui officia deserunt mollit anim id est laborum." };

static void StringSplit(benchmark::State& state) {
  std::string str { LoremIpsumStrv };
  // Code inside this loop is measured repeatedly
  for (auto _ : state) {
    auto v = split(str);
    benchmark::DoNotOptimize(v);
  }
}
// Register the function as a benchmark
BENCHMARK(StringSplit);

static void StringSplitStd(benchmark::State& state) {
  std::string str { LoremIpsumStrv };
  // Code inside this loop is measured repeatedly
  for (auto _ : state) {
    auto v = splitStd(str);
    benchmark::DoNotOptimize(v);
  }
}
// Register the function as a benchmark
BENCHMARK(StringSplitStd);

static void StringSplitPtr(benchmark::State& state) {
  std::string str { LoremIpsumStrv };
  // Code inside this loop is measured repeatedly
  for (auto _ : state) {
    auto v = splitPtr(str);
    benchmark::DoNotOptimize(v);
  }
}
// Register the function as a benchmark
BENCHMARK(StringSplitPtr);

static void StringViewSplit(benchmark::State& state) {
  for (auto _ : state) {
    auto v = splitSV(LoremIpsumStrv);
    benchmark::DoNotOptimize(v);
  }
}
BENCHMARK(StringViewSplit);

static void StringViewSplitStd(benchmark::State& state) {
  for (auto _ : state) {
    auto v = splitSVStd(LoremIpsumStrv);
    benchmark::DoNotOptimize(v);
  }
}
BENCHMARK(StringViewSplitStd);

static void StringViewSplitPtr(benchmark::State& state) {
  for (auto _ : state) {
    auto v = splitSVPtr(LoremIpsumStrv);
    benchmark::DoNotOptimize(v);
  }
}
BENCHMARK(StringViewSplitPtr);

@nodejs-github-bot
Copy link
Collaborator

Review requested:

  • @nodejs/security-wg

@nodejs-github-bot nodejs-github-bot added c++ Issues and PRs that require attention from people who are familiar with C++. lib / src Issues and PRs related to general changes in the lib or src directory. needs-ci PRs that need a full CI run. labels Jun 18, 2023
@anonrig anonrig added the performance Issues and PRs related to the performance of Node.js. label Jun 18, 2023
@targos
Copy link
Member

targos commented Jun 18, 2023

Please at least post the benchmark results in the PR. Your link may become dead in the future

@anonrig anonrig added author ready PRs that have at least one approval, no pending requests for changes, and a CI started. request-ci Add this label to start a Jenkins CI on a PR. labels Jun 21, 2023
@github-actions github-actions bot removed the request-ci Add this label to start a Jenkins CI on a PR. label Jun 21, 2023
@nodejs-github-bot
Copy link
Collaborator

@nodejs-github-bot
Copy link
Collaborator

@anonrig anonrig removed the author ready PRs that have at least one approval, no pending requests for changes, and a CI started. label Jun 22, 2023
src/node_options.cc Outdated Show resolved Hide resolved
src/node_options.cc Outdated Show resolved Hide resolved
src/node_options.cc Outdated Show resolved Hide resolved
src/node_options.cc Outdated Show resolved Hide resolved
src/node_options.cc Outdated Show resolved Hide resolved
src/node_v8_platform-inl.h Outdated Show resolved Hide resolved
@anonrig anonrig force-pushed the refactor-split-string branch from 8304083 to 27d1a87 Compare June 22, 2023 19:27
@anonrig
Copy link
Member Author

anonrig commented Jun 22, 2023

cc @nodejs/cpp-reviewers

@anonrig anonrig force-pushed the refactor-split-string branch from 27d1a87 to 63f8f96 Compare June 22, 2023 19:47
@RafaelGSS RafaelGSS added the request-ci Add this label to start a Jenkins CI on a PR. label Jun 22, 2023
@github-actions github-actions bot removed the request-ci Add this label to start a Jenkins CI on a PR. label Jun 22, 2023
@nodejs-github-bot
Copy link
Collaborator

@anonrig anonrig added commit-queue Add this label to land a pull request using GitHub Actions. commit-queue-squash Add this label to instruct the Commit Queue to squash all the PR commits into the first one. labels Jun 22, 2023
@nodejs-github-bot nodejs-github-bot removed the commit-queue Add this label to land a pull request using GitHub Actions. label Jun 22, 2023
@nodejs-github-bot nodejs-github-bot merged commit 640a791 into nodejs:main Jun 22, 2023
@nodejs-github-bot
Copy link
Collaborator

Landed in 640a791

RafaelGSS pushed a commit that referenced this pull request Jul 3, 2023
PR-URL: #48491
Reviewed-By: James M Snell <[email protected]>
Reviewed-By: Stephen Belanger <[email protected]>
Reviewed-By: Rafael Gonzaga <[email protected]>
@RafaelGSS RafaelGSS mentioned this pull request Jul 3, 2023
Ceres6 pushed a commit to Ceres6/node that referenced this pull request Aug 14, 2023
PR-URL: nodejs#48491
Reviewed-By: James M Snell <[email protected]>
Reviewed-By: Stephen Belanger <[email protected]>
Reviewed-By: Rafael Gonzaga <[email protected]>
Ceres6 pushed a commit to Ceres6/node that referenced this pull request Aug 14, 2023
PR-URL: nodejs#48491
Reviewed-By: James M Snell <[email protected]>
Reviewed-By: Stephen Belanger <[email protected]>
Reviewed-By: Rafael Gonzaga <[email protected]>
ruyadorno pushed a commit that referenced this pull request Sep 10, 2023
PR-URL: #48491
Reviewed-By: James M Snell <[email protected]>
Reviewed-By: Stephen Belanger <[email protected]>
Reviewed-By: Rafael Gonzaga <[email protected]>
@ruyadorno ruyadorno mentioned this pull request Sep 10, 2023
ruyadorno pushed a commit that referenced this pull request Sep 13, 2023
PR-URL: #48491
Reviewed-By: James M Snell <[email protected]>
Reviewed-By: Stephen Belanger <[email protected]>
Reviewed-By: Rafael Gonzaga <[email protected]>
tniessen added a commit to tniessen/node that referenced this pull request Nov 10, 2023
The use of string_view and subsequent copying to a string was supposed
to be a minor optimization in 640a7918, however, since 413c16e, no
string splitting occurs anymore. Therefore, we can simply pass around
some references instead of using string_view or copying strings.

Refs: nodejs#48491
Refs: nodejs#49047
tniessen added a commit to tniessen/node that referenced this pull request Nov 17, 2023
The use of string_view and subsequent copying to a string was supposed
to be a minor optimization in 640a7918, however, since 413c16e, no
string splitting occurs anymore. Therefore, we can simply pass around
some references instead of using string_view or copying strings.

Refs: nodejs#48491
Refs: nodejs#49047
nodejs-github-bot pushed a commit that referenced this pull request Nov 19, 2023
The use of string_view and subsequent copying to a string was supposed
to be a minor optimization in 640a7918, however, since 413c16e, no
string splitting occurs anymore. Therefore, we can simply pass around
some references instead of using string_view or copying strings.

Refs: #48491
Refs: #49047
PR-URL: #50662
Reviewed-By: Yagiz Nizipli <[email protected]>
Reviewed-By: Rafael Gonzaga <[email protected]>
Reviewed-By: Marco Ippolito <[email protected]>
Reviewed-By: James M Snell <[email protected]>
Reviewed-By: Luigi Pinca <[email protected]>
targos pushed a commit that referenced this pull request Nov 23, 2023
The use of string_view and subsequent copying to a string was supposed
to be a minor optimization in 640a7918, however, since 413c16e, no
string splitting occurs anymore. Therefore, we can simply pass around
some references instead of using string_view or copying strings.

Refs: #48491
Refs: #49047
PR-URL: #50662
Reviewed-By: Yagiz Nizipli <[email protected]>
Reviewed-By: Rafael Gonzaga <[email protected]>
Reviewed-By: Marco Ippolito <[email protected]>
Reviewed-By: James M Snell <[email protected]>
Reviewed-By: Luigi Pinca <[email protected]>
martenrichter pushed a commit to martenrichter/node that referenced this pull request Nov 26, 2023
The use of string_view and subsequent copying to a string was supposed
to be a minor optimization in 640a7918, however, since 413c16e, no
string splitting occurs anymore. Therefore, we can simply pass around
some references instead of using string_view or copying strings.

Refs: nodejs#48491
Refs: nodejs#49047
PR-URL: nodejs#50662
Reviewed-By: Yagiz Nizipli <[email protected]>
Reviewed-By: Rafael Gonzaga <[email protected]>
Reviewed-By: Marco Ippolito <[email protected]>
Reviewed-By: James M Snell <[email protected]>
Reviewed-By: Luigi Pinca <[email protected]>
lucshi pushed a commit to lucshi/node that referenced this pull request Nov 27, 2023
The use of string_view and subsequent copying to a string was supposed
to be a minor optimization in 640a7918, however, since 413c16e, no
string splitting occurs anymore. Therefore, we can simply pass around
some references instead of using string_view or copying strings.

Refs: nodejs#48491
Refs: nodejs#49047
PR-URL: nodejs#50662
Reviewed-By: Yagiz Nizipli <[email protected]>
Reviewed-By: Rafael Gonzaga <[email protected]>
Reviewed-By: Marco Ippolito <[email protected]>
Reviewed-By: James M Snell <[email protected]>
Reviewed-By: Luigi Pinca <[email protected]>
RafaelGSS pushed a commit that referenced this pull request Nov 29, 2023
The use of string_view and subsequent copying to a string was supposed
to be a minor optimization in 640a7918, however, since 413c16e, no
string splitting occurs anymore. Therefore, we can simply pass around
some references instead of using string_view or copying strings.

Refs: #48491
Refs: #49047
PR-URL: #50662
Reviewed-By: Yagiz Nizipli <[email protected]>
Reviewed-By: Rafael Gonzaga <[email protected]>
Reviewed-By: Marco Ippolito <[email protected]>
Reviewed-By: James M Snell <[email protected]>
Reviewed-By: Luigi Pinca <[email protected]>
RafaelGSS pushed a commit that referenced this pull request Nov 30, 2023
The use of string_view and subsequent copying to a string was supposed
to be a minor optimization in 640a7918, however, since 413c16e, no
string splitting occurs anymore. Therefore, we can simply pass around
some references instead of using string_view or copying strings.

Refs: #48491
Refs: #49047
PR-URL: #50662
Reviewed-By: Yagiz Nizipli <[email protected]>
Reviewed-By: Rafael Gonzaga <[email protected]>
Reviewed-By: Marco Ippolito <[email protected]>
Reviewed-By: James M Snell <[email protected]>
Reviewed-By: Luigi Pinca <[email protected]>
UlisesGascon pushed a commit that referenced this pull request Dec 11, 2023
The use of string_view and subsequent copying to a string was supposed
to be a minor optimization in 640a7918, however, since 413c16e, no
string splitting occurs anymore. Therefore, we can simply pass around
some references instead of using string_view or copying strings.

Refs: #48491
Refs: #49047
PR-URL: #50662
Reviewed-By: Yagiz Nizipli <[email protected]>
Reviewed-By: Rafael Gonzaga <[email protected]>
Reviewed-By: Marco Ippolito <[email protected]>
Reviewed-By: James M Snell <[email protected]>
Reviewed-By: Luigi Pinca <[email protected]>
UlisesGascon pushed a commit that referenced this pull request Dec 19, 2023
The use of string_view and subsequent copying to a string was supposed
to be a minor optimization in 640a7918, however, since 413c16e, no
string splitting occurs anymore. Therefore, we can simply pass around
some references instead of using string_view or copying strings.

Refs: #48491
Refs: #49047
PR-URL: #50662
Reviewed-By: Yagiz Nizipli <[email protected]>
Reviewed-By: Rafael Gonzaga <[email protected]>
Reviewed-By: Marco Ippolito <[email protected]>
Reviewed-By: James M Snell <[email protected]>
Reviewed-By: Luigi Pinca <[email protected]>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
c++ Issues and PRs that require attention from people who are familiar with C++. commit-queue-squash Add this label to instruct the Commit Queue to squash all the PR commits into the first one. lib / src Issues and PRs related to general changes in the lib or src directory. needs-ci PRs that need a full CI run. performance Issues and PRs related to the performance of Node.js.
Projects
None yet
Development

Successfully merging this pull request may close these issues.

7 participants