From 8045692df6af6e6665042d51e2c835b6fafe0e7b Mon Sep 17 00:00:00 2001 From: Francois Marier Date: Tue, 20 Aug 2019 14:36:55 -0700 Subject: [PATCH] Filter user-tracking parameters from query string (fixes brave/brave-browser#4239) If a URL's query string includes one of the parameter names known to track individual users, we remove them. https://support.google.com/analytics/answer/7519794 https://stackoverflow.com/questions/52847475/what-is-fbclid-the-new-facebook-parameter https://about.ads.microsoft.com/en-us/blog/post/january-2018/conversion-tracking-update-on-bing-ads https://developer.mailchimp.com/documentation/mailchimp/guides/getting-started-with-ecommerce/#e-commerce-tracking-and-reports --- ...rave_site_hacks_network_delegate_helper.cc | 84 +++++++++++++++++++ ..._hacks_network_delegate_helper_unittest.cc | 83 ++++++++++++++++++ 2 files changed, 167 insertions(+) diff --git a/browser/net/brave_site_hacks_network_delegate_helper.cc b/browser/net/brave_site_hacks_network_delegate_helper.cc index 6140f5710eec..e7f9ce51f6e1 100644 --- a/browser/net/brave_site_hacks_network_delegate_helper.cc +++ b/browser/net/brave_site_hacks_network_delegate_helper.cc @@ -7,6 +7,7 @@ #include #include +#include #include "base/sequenced_task_runner.h" #include "base/strings/string_util.h" @@ -29,6 +30,10 @@ namespace brave { namespace { +const std::vector query_string_trackers = { + "fbclid", "gclid", "msclkid", "mc_eid" +}; + bool ApplyPotentialReferrerBlock(std::shared_ptr ctx) { DCHECK_CURRENTLY_ON(BrowserThread::IO); GURL target_origin = ctx->request_url.GetOrigin(); @@ -50,12 +55,91 @@ bool ApplyPotentialReferrerBlock(std::shared_ptr ctx) { return false; } +bool ApplyPotentialQueryStringFilter(std::shared_ptr ctx) { + DCHECK_CURRENTLY_ON(BrowserThread::IO); + if (!ctx->request_url.has_query()) { + return false; + } + + std::string new_query = ctx->request_url.query(); + + bool modified = false; + for (auto tracker : query_string_trackers) { + size_t key_size, key_start; + + // Look for the tracker anywhere in the query string + // (e.g. ?...fbclid=1234...). + { + const std::string key = tracker + "="; + key_size = key.size(); + key_start = new_query.find(key, 0); + } + if (key_start == std::string::npos) { + continue; + } + + // Check to see if the tracker is preceded by a '&' + // (e.g. ?...&fbclid=1234...). + if (key_start != 0) { + if (new_query[--key_start] != '&') { + // Tracker is substring of another key (e.g. ?...&abc-fbclid=1234...). + continue; + } + key_size++; + } + + { + // Find the size of the tracking parameter value. + const size_t value_start = key_start + key_size; + size_t value_end = new_query.find("&", key_start + 1); + if (value_end == std::string::npos) { + value_end = new_query.size(); // Tracker is the last query parameter. + } + if (value_end <= value_start) { + DCHECK_EQ(value_end, value_start); + continue; // Empty value, no need to remove tracker. + } + + // Remove tracking parameter and its value from the string. + modified = true; + new_query.erase(key_start, value_end - key_start); + } + + // Remove the trailing '&' if we removed the first parameter + // (e.g. ?fbclid=1234&...). + if (!new_query.empty() && key_start == 0) { + DCHECK_EQ(new_query[0], '&'); + new_query.erase(key_start, 1); + } + + if (new_query.empty()) { + break; // Nothing left to filter. + } + } + + if (modified) { + url::Replacements replacements; + if (new_query.empty()) { + replacements.ClearQuery(); + } else { + replacements.SetQuery(new_query.c_str(), + url::Component(0, new_query.size())); + } + ctx->new_url_spec = + ctx->request_url.ReplaceComponents(replacements).spec(); + return true; + } + + return false; +} + } // namespace int OnBeforeURLRequest_SiteHacksWork( const ResponseCallback& next_callback, std::shared_ptr ctx) { ApplyPotentialReferrerBlock(ctx); + ApplyPotentialQueryStringFilter(ctx); return net::OK; } diff --git a/browser/net/brave_site_hacks_network_delegate_helper_unittest.cc b/browser/net/brave_site_hacks_network_delegate_helper_unittest.cc index 2e1d62bbcb7b..aef25e7f0291 100644 --- a/browser/net/brave_site_hacks_network_delegate_helper_unittest.cc +++ b/browser/net/brave_site_hacks_network_delegate_helper_unittest.cc @@ -7,6 +7,7 @@ #include #include +#include #include #include "brave/browser/net/url_context.h" @@ -267,4 +268,86 @@ TEST_F(BraveSiteHacksNetworkDelegateHelperTest, }); } +TEST_F(BraveSiteHacksNetworkDelegateHelperTest, QueryStringUntouched) { + std::vector urls({ + GURL("https://example.com/"), + GURL("https://example.com/?"), + GURL("https://example.com/?+%20"), + GURL("https://user:pass@example.com/path/file.html?foo=1#fragment"), + GURL("http://user:pass@example.com/path/file.html?foo=1&bar=2#fragment"), + GURL("https://example.com/?file=https%3A%2F%2Fexample.com%2Ftest.pdf"), + GURL("https://example.com/?title=1+2&caption=1%202"), + GURL("https://example.com/?foo=1&&bar=2#fragment"), + GURL("https://example.com/?foo&bar=&#fragment"), + GURL("https://example.com/?foo=1&fbcid=no&gcid=no&mc_cid=no&bar=&#frag"), + GURL("https://example.com/?fbclid=&gclid&=mc_eid&msclkid="), + GURL("https://example.com/?value=fbclid=1¬-gclid=2&foo+mc_eid=3"), + GURL("https://example.com/?+fbclid=1"), + GURL("https://example.com/?%20fbclid=1"), + GURL("https://example.com/#fbclid=1"), + }); + std::for_each(urls.begin(), urls.end(), [this](GURL url){ + net::TestDelegate test_delegate; + std::unique_ptr request = + context()->CreateRequest(url, net::IDLE, &test_delegate, + TRAFFIC_ANNOTATION_FOR_TESTS); + + std::shared_ptr + brave_request_info(new brave::BraveRequestInfo()); + brave::BraveRequestInfo::FillCTXFromRequest(request.get(), + brave_request_info); + brave::ResponseCallback callback; + int ret = brave::OnBeforeURLRequest_SiteHacksWork(callback, + brave_request_info); + EXPECT_EQ(ret, net::OK); + // new_url should not be set + EXPECT_TRUE(brave_request_info->new_url_spec.empty()); + EXPECT_EQ(request->url(), url); + }); +} + +TEST_F(BraveSiteHacksNetworkDelegateHelperTest, QueryStringFiltered) { + std::vector > urls({ + // { original url, expected url after filtering } + { GURL("https://example.com/?fbclid=1234"), GURL("https://example.com/") }, + { GURL("https://example.com/?fbclid=1234&"), GURL("https://example.com/") }, + { GURL("https://example.com/?&fbclid=1234"), GURL("https://example.com/") }, + { GURL("https://example.com/?gclid=1234"), GURL("https://example.com/") }, + { GURL("https://example.com/?fbclid=0&gclid=1&msclkid=a&mc_eid=a1"), + GURL("https://example.com/") }, + { GURL("https://example.com/?fbclid=&foo=1&bar=2&gclid=abc"), + GURL("https://example.com/?fbclid=&foo=1&bar=2") }, + { GURL("https://example.com/?fbclid=&foo=1&gclid=1234&bar=2"), + GURL("https://example.com/?fbclid=&foo=1&bar=2") }, + { GURL("http://u:p@example.com/path/file.html?foo=1&fbclid=abcd#fragment"), + GURL("http://u:p@example.com/path/file.html?foo=1#fragment") }, + // Obscure edge cases that break most parsers: + { GURL("https://example.com/?fbclid&foo&&gclid=2&bar=&%20"), + GURL("https://example.com/?fbclid&foo&&bar=&%20") }, + { GURL("https://example.com/?fbclid=1&1==2&=msclkid&foo=bar&&a=b=c&"), + GURL("https://example.com/?1==2&=msclkid&foo=bar&&a=b=c&") }, + { GURL("https://example.com/?fbclid=1&=2&?foo=yes&bar=2+"), + GURL("https://example.com/?=2&?foo=yes&bar=2+") }, + { GURL("https://example.com/?fbclid=1&a+b+c=some%20thing&1%202=3+4"), + GURL("https://example.com/?a+b+c=some%20thing&1%202=3+4") }, + }); + std::for_each(urls.begin(), urls.end(), [this](std::pair url){ + net::TestDelegate test_delegate; + std::unique_ptr request = + context()->CreateRequest(url.first, net::IDLE, &test_delegate, + TRAFFIC_ANNOTATION_FOR_TESTS); + + std::shared_ptr + brave_request_info(new brave::BraveRequestInfo()); + brave::BraveRequestInfo::FillCTXFromRequest(request.get(), + brave_request_info); + brave::ResponseCallback callback; + int ret = brave::OnBeforeURLRequest_SiteHacksWork(callback, + brave_request_info); + EXPECT_EQ(ret, net::OK); + EXPECT_STREQ(brave_request_info->new_url_spec.c_str(), + url.second.spec().c_str()); + }); +} + } // namespace