-
Notifications
You must be signed in to change notification settings - Fork 86
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
URI encoding/decoding fixes #192
Changes from 2 commits
e2828f8
b2b5e9a
9e409e5
4e9f8ec
41cce87
6a72dd9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -35,7 +35,6 @@ Remotes: | |
r-lib/later | ||
Collate: | ||
'RcppExports.R' | ||
'decode_uri.R' | ||
'httpuv.R' | ||
'server.R' | ||
'static_paths.R' | ||
|
This file was deleted.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -490,7 +490,7 @@ std::string doEncodeURI(std::string value, bool encodeReserved) { | |
for (std::string::const_iterator it = value.begin(); | ||
it != value.end(); | ||
it++) { | ||
|
||
if (!needsEscape(*it, encodeReserved)) { | ||
os << *it; | ||
} else { | ||
|
@@ -501,60 +501,66 @@ std::string doEncodeURI(std::string value, bool encodeReserved) { | |
} | ||
|
||
//' URI encoding/decoding | ||
//' | ||
//' | ||
//' Encodes/decodes strings using URI encoding/decoding in the same way that web | ||
//' browsers do. The precise behaviors of these functions can be found at | ||
//' developer.mozilla.org: | ||
//' \href{https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURI}{encodeURI}, | ||
//' \href{https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURIComponent}{encodeURIComponent}, | ||
//' \href{https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/decodeURI}{decodeURI}, | ||
//' \href{https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/decodeURIComponent}{decodeURIComponent} | ||
//' | ||
//' | ||
//' Intended as a faster replacement for \code{\link[utils]{URLencode}} and | ||
//' \code{\link[utils]{URLdecode}}. | ||
//' | ||
//' | ||
//' encodeURI differs from encodeURIComponent in that the former will not encode | ||
//' reserved characters: \code{;,/?:@@&=+$} | ||
//' | ||
//' | ||
//' decodeURI differs from decodeURIComponent in that it will refuse to decode | ||
//' encoded sequences that decode to a reserved character. (If in doubt, use | ||
//' decodeURIComponent.) | ||
//' | ||
//' | ||
//' The only way these functions differ from web browsers is in the encoding of | ||
//' non-ASCII characters. All non-ASCII characters will be escaped byte-by-byte. | ||
//' If conformant non-ASCII behavior is important, ensure that your input vector | ||
//' is UTF-8 encoded before calling encodeURI or encodeURIComponent. | ||
//' | ||
//' | ||
//' @param value Character vector to be encoded or decoded. | ||
//' @return Encoded or decoded character vector of the same length as the | ||
//' input value. \code{decodeURI} and \code{decodeURIComponent} will return | ||
//' strings that are UTF-8 encoded. | ||
//' | ||
//' @export | ||
// [[Rcpp::export]] | ||
std::vector<std::string> encodeURI(std::vector<std::string> value) { | ||
for (std::vector<std::string>::iterator it = value.begin(); | ||
it != value.end(); | ||
it++) { | ||
Rcpp::CharacterVector encodeURI(Rcpp::CharacterVector value) { | ||
Rcpp::CharacterVector out(value.size()); | ||
|
||
*it = doEncodeURI(*it, false); | ||
for (int i = 0; i < value.size(); i++) { | ||
if (value[i] == NA_STRING) { | ||
out[i] = NA_STRING; | ||
} else { | ||
const char* s = doEncodeURI(Rcpp::as<std::string>(value[i]), false).c_str(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Don't you have an encoding problem here too? What does inline const char* string_utf8(SEXP x, int i) {
return Rf_translateCharUTF8(STRING_ELT(x, i));
} There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, that's true. Our current documentation for
This differs from the behavior of utf8_str <- "\ue5" # "å", in UTF-8
latin1_str <- iconv(utf8_str, "UTF-8", "latin1")
utf8_str
#> [1] "å"
latin1_str
#> [1] "å"
# Look at raw bytes
charToRaw(utf8_str)
#> [1] c3 a5
charToRaw(latin1_str)
#> [1] e5
# base::URLencode
URLencode(utf8_str)
#> [1] "%C3%A5"
URLencode(latin1_str)
#> [1] "%C3%A5"
# httpuv::encodeURI
httpuv::encodeURI(utf8_str)
#> [1] "%C3%A5"
httpuv::encodeURI(latin1_str)
#> [1] "%E5" |
||
out[i] = Rf_mkCharCE(s, CE_UTF8); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this needs a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It should not because you're immediately assigning into an object that Rcpp should be PROTECTing. |
||
} | ||
} | ||
|
||
return value; | ||
return out; | ||
} | ||
|
||
//' @rdname encodeURI | ||
//' @export | ||
// [[Rcpp::export]] | ||
std::vector<std::string> encodeURIComponent(std::vector<std::string> value) { | ||
for (std::vector<std::string>::iterator it = value.begin(); | ||
it != value.end(); | ||
it++) { | ||
Rcpp::CharacterVector encodeURIComponent(Rcpp::CharacterVector value) { | ||
Rcpp::CharacterVector out(value.size()); | ||
|
||
*it = doEncodeURI(*it, true); | ||
for (int i = 0; i < value.size(); i++) { | ||
if (value[i] == NA_STRING) { | ||
out[i] = NA_STRING; | ||
} else { | ||
const char* s = doEncodeURI(Rcpp::as<std::string>(value[i]), true).c_str(); | ||
out[i] = Rf_mkCharCE(s, CE_UTF8); | ||
} | ||
} | ||
|
||
return value; | ||
return out; | ||
} | ||
|
||
int hexToInt(char c) { | ||
|
@@ -584,14 +590,14 @@ std::string doDecodeURI(std::string value, bool component) { | |
for (std::string::const_iterator it = value.begin(); | ||
it != value.end(); | ||
it++) { | ||
|
||
// If there aren't enough characters left for this to be a | ||
// valid escape code, just use the character and move on | ||
if (it > value.end() - 3) { | ||
os << *it; | ||
continue; | ||
} | ||
|
||
if (*it == '%') { | ||
char hi = *(++it); | ||
char lo = *(++it); | ||
|
@@ -612,32 +618,45 @@ std::string doDecodeURI(std::string value, bool component) { | |
os << *it; | ||
} | ||
} | ||
|
||
return os.str(); | ||
} | ||
|
||
|
||
//' @rdname encodeURI | ||
//' @export | ||
// [[Rcpp::export]] | ||
std::vector<std::string> decodeURI_(std::vector<std::string> value) { | ||
for (std::vector<std::string>::iterator it = value.begin(); | ||
it != value.end(); | ||
it++) { | ||
Rcpp::CharacterVector decodeURI(Rcpp::CharacterVector value) { | ||
Rcpp::CharacterVector out(value.size()); | ||
|
||
*it = doDecodeURI(*it, false); | ||
for (int i = 0; i < value.size(); i++) { | ||
if (value[i] == NA_STRING) { | ||
out[i] = NA_STRING; | ||
} else { | ||
const char* s = doDecodeURI(Rcpp::as<std::string>(value[i]), false).c_str(); | ||
out[i] = Rf_mkCharCE(s, CE_UTF8); | ||
} | ||
} | ||
return value; | ||
|
||
return out; | ||
} | ||
|
||
//' @rdname encodeURI | ||
//' @export | ||
// [[Rcpp::export]] | ||
std::vector<std::string> decodeURIComponent_(std::vector<std::string> value) { | ||
for (std::vector<std::string>::iterator it = value.begin(); | ||
it != value.end(); | ||
it++) { | ||
Rcpp::CharacterVector decodeURIComponent(Rcpp::CharacterVector value) { | ||
Rcpp::CharacterVector out(value.size()); | ||
|
||
*it = doDecodeURI(*it, true); | ||
for (int i = 0; i < value.size(); i++) { | ||
if (value[i] == NA_STRING) { | ||
out[i] = NA_STRING; | ||
} else { | ||
const char* s = doDecodeURI(Rcpp::as<std::string>(value[i]), true).c_str(); | ||
out[i] = Rf_mkCharCE(s, CE_UTF8); | ||
} | ||
} | ||
return value; | ||
|
||
return out; | ||
} | ||
|
||
//' Check whether an address is IPv4 or IPv6 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You might want to see if the CharacterVector is already filled with
NA
s.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks like
CharacterVector(n)
will return a vector filled with""
, butCharacterVector(n, NA_STRING)
returns a string filled withNA
s.