-
Notifications
You must be signed in to change notification settings - Fork 274
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add deploy workflow for standalone CORS proxy (#2022)
## Motivation for the change, related issues It is likely that some folks will attempt to abuse the CORS proxy, and if they succeed despite our defensive measures, we would like to limit the risk of that abuse being associated with the Playground web app itself. Let's separate the CORS proxy from the web app. ## Implementation details This PR adds a workflow to deploy the CORS proxy to a dedicated host. We need to help folks transition from the CORS proxy on playground.wordpress.net to the proxy on the dedicated host. We could even consider keeping a more restricted version of the proxy on playground.wordpress.net, though having two instances would likely be confusing. ## Testing Instructions (or ideally a Blueprint) - After merge, manually run the deployment workflow and confirm that the CORS proxy works on the dedicated host.
- Loading branch information
1 parent
0b4eafd
commit 51db66a
Showing
4 changed files
with
374 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
name: Deploy CORS proxy | ||
|
||
on: | ||
workflow_dispatch: | ||
|
||
concurrency: | ||
group: cors-proxy-deployment | ||
|
||
jobs: | ||
build_and_deploy: | ||
# Only run this workflow from the trunk branch and when it's triggered by a maintainer listed below | ||
# TODO: Can we check for group membership? | ||
if: > | ||
github.ref == 'refs/heads/trunk' && ( | ||
github.event_name == 'workflow_run' || | ||
github.event_name == 'workflow_dispatch' || | ||
github.actor == 'adamziel' || | ||
github.actor == 'dmsnell' || | ||
github.actor == 'bgrgicak' || | ||
github.actor == 'brandonpayton' || | ||
github.actor == 'zaerl' || | ||
github.actor == 'akirk' | ||
) | ||
# Specify runner + deployment step | ||
runs-on: ubuntu-latest | ||
environment: | ||
name: playground-wordpress-net-wp-cloud | ||
steps: | ||
- uses: actions/checkout@v4 | ||
with: | ||
sparse-checkout: packages/playground/php-cors-proxy | ||
packages/playground/php-cors-proxy-deployment | ||
|
||
- name: Deploy to CORS proxy server | ||
shell: bash | ||
# TODO: Use completely separate environments for website and CORS proxy deployments | ||
run: | | ||
mkdir -p ~/.ssh | ||
echo "${{ secrets.DEPLOY_WEBSITE_TARGET_HOST_KEY }}" >> ~/.ssh/known_hosts | ||
echo "${{ secrets.DEPLOY_WEBSITE_PRIVATE_KEY }}" > ~/.ssh/id_ed25519 | ||
chmod 0600 ~/.ssh/* | ||
# CORS proxy files | ||
rsync --verbose --archive --compress -e "ssh -i ~/.ssh/id_ed25519" \ | ||
--include '*/' --include '*.php' --exclude '*' \ | ||
--delete --prune-empty-dirs \ | ||
packages/playground/php-cors-proxy/ \ | ||
${{ secrets.DEPLOY_CORS_PROXY_TARGET_USER }}@${{ secrets.DEPLOY_WEBSITE_TARGET_HOST }}:'~/updated-proxy-files' | ||
# Host-specific deployment scripts and server config | ||
rsync --verbose --archive --compress -e "ssh -i ~/.ssh/id_ed25519" --delete \ | ||
packages/playground/php-cors-proxy-deployment/ \ | ||
${{ secrets.DEPLOY_CORS_PROXY_TARGET_USER }}@${{ secrets.DEPLOY_WEBSITE_TARGET_HOST }}:'~/cors-proxy-deployment' | ||
# Apply update | ||
ssh -i ~/.ssh/id_ed25519 \ | ||
${{ secrets.DEPLOY_CORS_PROXY_TARGET_USER }}@${{ secrets.DEPLOY_WEBSITE_TARGET_HOST }} \ | ||
-tt -C '~/cors-proxy-deployment/apply-update.sh' |
19 changes: 19 additions & 0 deletions
19
packages/playground/php-cors-proxy-deployment/apply-update.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
#!/bin/bash | ||
|
||
set -euo pipefail | ||
|
||
echo Syncing staged files to production | ||
rsync -av --delete --no-perms --omit-dir-times ~/updated-proxy-files/ /srv/htdocs/ | ||
|
||
echo Purging edge cache | ||
curl -sS -X POST -H "Auth: $ATOMIC_SITE_API_KEY" "$SITE_API_BASE/edge-cache/$ATOMIC_SITE_ID/purge" \ | ||
> /dev/null \ | ||
&& echo "Edge cache purged" \ | ||
|| (>&2 echo "Failed to purge edge cache" && false) | ||
|
||
echo Applying latest CORS proxy rate-limiting schema | ||
# NOTE: This will reset rate-limiting token buckets, but that should be tolerable | ||
# as long as we're generally discouraging abuse of the proxy. | ||
cat ~/website-deployment/cors-proxy-rate-limiting-table.sql | mysql --database="$DB_NAME" | ||
|
||
echo Done! |
285 changes: 285 additions & 0 deletions
285
packages/playground/php-cors-proxy-deployment/cors-proxy-config.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,285 @@ | ||
<?php | ||
|
||
class PlaygroundCorsProxyTokenBucketConfig { | ||
public function __construct( | ||
public int $capacity, | ||
public int $fill_rate_per_minute, | ||
) {} | ||
} | ||
|
||
function playground_cors_proxy_get_token_bucket_config($remote_proxy_type) { | ||
// @TODO: Support custom HTTP header to select test bucket config. | ||
|
||
// NOTE: 2024-10-09: These are just guesses for reasonable bucket configs. | ||
// They are not based on any real-world data. Please adjust them as needed. | ||
switch ($remote_proxy_type) { | ||
case 'VPN': | ||
case 'TOR': | ||
case 'PUB': | ||
case 'WEB': | ||
return new PlaygroundCorsProxyTokenBucketConfig( | ||
capacity: 1000, | ||
fill_rate_per_minute: 100, | ||
); | ||
case 'SES': | ||
// If it's even possible to receive CORS proxy requests from | ||
// search engine bots, let's refuse them because they don't | ||
// need this service. | ||
return new PlaygroundCorsProxyTokenBucketConfig( | ||
capacity: 0, | ||
fill_rate_per_minute: 0, | ||
); | ||
default: | ||
// NOTE: 2024-10-14: Initially, let's make the default rate limit | ||
// the same as the limit for proxy IPs because some working on | ||
// Playground are typically proxied and may not otherwise notice | ||
// a problem with the default rate limit. | ||
return new PlaygroundCorsProxyTokenBucketConfig( | ||
capacity: 1000, | ||
fill_rate_per_minute: 100, | ||
); | ||
} | ||
} | ||
|
||
class PlaygroundCorsProxyTokenBucket { | ||
private $dbh = null; | ||
private $connected_to_db = false; | ||
|
||
public function __construct() { | ||
$this->dbh = mysqli_init(); | ||
if ($this->dbh) { | ||
$this->connected_to_db = mysqli_real_connect( | ||
$this->dbh, | ||
DB_HOST, | ||
DB_USER, | ||
DB_PASSWORD, | ||
DB_NAME | ||
); | ||
} | ||
} | ||
|
||
public function dispose() { | ||
if ($this->dbh && $this->connected_to_db) { | ||
mysqli_close($this->dbh); | ||
} | ||
} | ||
|
||
public function obtain_token($remote_ip, $bucket_config) { | ||
if (!$this->connected_to_db) { | ||
return false; | ||
} | ||
|
||
$cleanup_query = <<<'SQL' | ||
DELETE FROM cors_proxy_rate_limiting | ||
WHERE updated_at < DATE_SUB(NOW(), INTERVAL 1 DAY) | ||
SQL; | ||
if (mysqli_query($this->dbh, $cleanup_query) === false) { | ||
error_log( | ||
'Failed to clean up old token buckets: ' . | ||
mysqli_error($this->dbh) | ||
); | ||
return false; | ||
} | ||
|
||
// Abort if tokens are never available. | ||
if ($bucket_config->capacity <= 0) { | ||
return false; | ||
} | ||
|
||
/** | ||
* Aggregate addresses into /64 subnets. | ||
* This prevents a person with an entire /64 subnet from | ||
* exhausting the storage or getting more than their fair | ||
* share of the tokens. | ||
*/ | ||
$remote_subnet = playground_ip_to_a_64_subnet($remote_ip); | ||
|
||
$token_query = <<<'SQL' | ||
INSERT INTO cors_proxy_rate_limiting ( | ||
remote_addr, | ||
capacity, | ||
fill_rate_per_minute, | ||
tokens | ||
) | ||
WITH | ||
config AS ( | ||
SELECT | ||
? AS remote_addr, | ||
? AS capacity, | ||
? AS fill_rate_per_minute | ||
), | ||
bucket AS ( | ||
SELECT | ||
remote_addr, | ||
tokens AS previous_tokens, | ||
updated_at AS previous_updated_at, | ||
-- Ensure we don't exceed the capacity. | ||
LEAST( | ||
config.capacity, | ||
tokens + FLOOR( | ||
config.fill_rate_per_minute | ||
* TIMESTAMPDIFF(SECOND, updated_at, NOW()) | ||
/ 60 | ||
) | ||
) AS available_tokens | ||
FROM cors_proxy_rate_limiting INNER JOIN config USING (remote_addr) | ||
) | ||
SELECT | ||
config.remote_addr, | ||
config.capacity, | ||
config.fill_rate_per_minute, | ||
-- Make sure we stay within bounds. | ||
GREATEST( | ||
0, | ||
COALESCE(bucket.available_tokens, config.capacity) - 1 | ||
) AS tokens | ||
FROM config LEFT OUTER JOIN bucket USING (remote_addr) | ||
ON DUPLICATE KEY UPDATE | ||
capacity = VALUES(capacity), | ||
fill_rate_per_minute = VALUES(fill_rate_per_minute), | ||
tokens = VALUES(tokens), | ||
-- Force a row update by updating the timestamp when we've consumed a token, | ||
-- unless the number of available tokens remains at zero. | ||
-- @TODO Set last updated to use the time of the last possible token. Otherwise, the aggregate error may be noticeable. | ||
updated_at = IF( | ||
bucket.available_tokens = 0 AND bucket.previous_tokens = 0, | ||
bucket.previous_updated_at, | ||
NOW() | ||
) | ||
SQL; | ||
|
||
$token_statement = mysqli_prepare($this->dbh, $token_query); | ||
mysqli_stmt_bind_param( | ||
$token_statement, | ||
'sii', | ||
$remote_subnet, | ||
$bucket_config->capacity, | ||
$bucket_config->fill_rate_per_minute | ||
); | ||
|
||
if ( | ||
mysqli_stmt_execute($token_statement) && | ||
mysqli_stmt_affected_rows($token_statement) > 0 | ||
) { | ||
return true; | ||
} | ||
|
||
return false; | ||
} | ||
|
||
} | ||
|
||
function playground_cors_proxy_maybe_rate_limit() { | ||
// IP metadata from WP Cloud. | ||
$remote_proxy_type = $_SERVER['HTTP_X_IP_PROXY_TYPE'] ?? 'UNK'; | ||
$bucket_config = playground_cors_proxy_get_token_bucket_config( | ||
$remote_proxy_type | ||
); | ||
|
||
// We cannot trust the X-Forwarded-For header everywhere, | ||
// but we are trusting WP Cloud to provide an accurate value for it. | ||
$remote_ip = $_SERVER['HTTP_X_FORWARDED_FOR'] ?? $_SERVER['REMOTE_ADDR']; | ||
|
||
if (filter_var($remote_ip, FILTER_VALIDATE_IP) === false) { | ||
// Validate the IP address before using it as a key in the database. | ||
error_log('Invalid IP address: ' . var_export($remote_ip, true)); | ||
return false; | ||
} | ||
|
||
$token_bucket = new PlaygroundCorsProxyTokenBucket(); | ||
try { | ||
if (!$token_bucket->obtain_token($remote_ip, $bucket_config)) { | ||
http_response_code(429); | ||
echo "Rate limit exceeded"; | ||
exit; | ||
} | ||
} finally { | ||
$token_bucket->dispose(); | ||
} | ||
} | ||
|
||
/** | ||
* Converts the IP address to a key used for rate limiting. | ||
* It groups addresses into buckets of size /64. | ||
* | ||
* @return string The encoded IPv6 address or null if the input is not a valid IP address. | ||
*/ | ||
function playground_ip_to_a_64_subnet(string $ip_v4_or_v6): string { | ||
$ipv6_remote_ip = $ip_v4_or_v6; | ||
if (filter_var($ip_v4_or_v6, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)) { | ||
/** | ||
* Convert IPv4 to IPv6 mapped address for storage. | ||
* Do not group these addresses into buckets since | ||
* the number of IPv4 addresses is less than the number | ||
* of IPv6 addresses. | ||
*/ | ||
$ipv6_remote_ip = '::ffff:' . $ip_v4_or_v6; | ||
} else { | ||
/** | ||
* Zero out the last 64 bits of the IPv6 address for storage. | ||
* This means we're only considering the first 64 bits of the | ||
* address for rate limiting. This way, a person with an entire | ||
* /64 subnet cannot get more than their fair share of the | ||
* tokens. | ||
* | ||
* NOTE: This measure may be a bit heavy-handed to start with, | ||
* but we will not know until we are actually seeing requests | ||
* from IPv6 requests in the wild. If individual users are | ||
* experiencing too much rate-limiting, we can consider | ||
* limiting individual addresses first and only addressing | ||
* entire blocks once an IPv6 block has a number of IPs | ||
* that are hitting rate limits. | ||
*/ | ||
$ipv6_block = playground_get_ipv6_block($ipv6_remote_ip, 64); | ||
if ($ipv6_block === null) { | ||
error_log('Failed to get IPv6 block for ' . $ip_v4_or_v6); | ||
// Use the original IP address as a fallback when the block | ||
// cannot be determined. | ||
return $ip_v4_or_v6; | ||
} | ||
$ipv6_remote_ip = playground_encode_ipv6($ipv6_block); | ||
} | ||
return $ipv6_remote_ip; | ||
} | ||
|
||
/** | ||
* Returns the /64 subnet of the given IPv6 address. | ||
*/ | ||
function playground_get_ipv6_block(string $ipv6_remote_ip, int $block_size=64): ?string { | ||
$ip = inet_pton($ipv6_remote_ip); | ||
// $ip is a binary string of length 16, each bit represents a bit | ||
// of the ipv6 address. | ||
if ($ip === false) { | ||
return null; | ||
} | ||
|
||
if ($block_size % 8 !== 0) { | ||
// We're using a naive substr-based approach that reasons about | ||
// groups of 8 bits (characters) and not separately about each bit. | ||
// This approach can only support block sizes that are multiples | ||
// of 8. | ||
throw new Exception('Block size must be a multiple of 8.'); | ||
} | ||
|
||
$max_block_size = 128; | ||
if ($block_size > $max_block_size) { | ||
throw new Exception('Block size must be less than or equal to 128.'); | ||
} | ||
|
||
$subnet_length = $block_size / 8; | ||
$requested_block = substr($ip, 0, $subnet_length); | ||
$backfill_zeros = str_repeat(chr(0), 16 - $subnet_length); | ||
return $requested_block . $backfill_zeros; | ||
} | ||
|
||
function playground_encode_ipv6(string $ipv6): string { | ||
$hex_string = bin2hex($ipv6); | ||
$hex_string_length = strlen($hex_string); | ||
|
||
// Split the hex string into 4 groups of 4 characters. | ||
$groups = []; | ||
for ($i = 0; $i < $hex_string_length; $i += 4) { | ||
$groups[] = substr($hex_string, $i, 4); | ||
} | ||
return strtoupper(implode(':', $groups)); | ||
} |
11 changes: 11 additions & 0 deletions
11
packages/playground/php-cors-proxy-deployment/cors-proxy-rate-limiting-table.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
DROP TABLE IF EXISTS cors_proxy_rate_limiting; | ||
CREATE TABLE cors_proxy_rate_limiting ( | ||
remote_addr INET6 PRIMARY KEY, | ||
capacity SMALLINT UNSIGNED NOT NULL, | ||
fill_rate_per_minute SMALLINT UNSIGNED NOT NULL, | ||
tokens SMALLINT UNSIGNED NOT NULL, | ||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, | ||
-- @TODO: Make this per-domain | ||
CHECK (capacity > 0 AND tokens < capacity), | ||
INDEX (updated_at) | ||
); |