diff --git a/analysis/ipversion_transport.ipynb b/analysis/ipversion_transport.ipynb new file mode 100644 index 0000000..2a09d82 --- /dev/null +++ b/analysis/ipversion_transport.ipynb @@ -0,0 +1,472 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 258, + "id": "20ac4ad3", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import sqlalchemy as sa" + ] + }, + { + "cell_type": "code", + "execution_count": 259, + "id": "335afb50", + "metadata": {}, + "outputs": [], + "source": [ + "conn = sa.create_engine(\"postgresql://user:pwd@localhost:5432/db\")" + ] + }, + { + "cell_type": "code", + "execution_count": 260, + "id": "eba0ff4a", + "metadata": {}, + "outputs": [], + "source": [ + "temp_table_q = \"\"\"\n", + " CREATE TEMP TABLE temp_multiaddresses\n", + " AS\n", + " SELECT * from multi_addresses\n", + "\"\"\"\n", + "res = conn.execute(temp_table_q)\n" + ] + }, + { + "cell_type": "markdown", + "id": "43e79dd2", + "metadata": {}, + "source": [ + "Insert fields to indicate the type of given multiaddress: \n", + "- ip4_tcp\n", + "- ip4_quic\n", + "- ip6_tcp\n", + "- ip6_quic" + ] + }, + { + "cell_type": "code", + "execution_count": 261, + "id": "f80286fb", + "metadata": {}, + "outputs": [], + "source": [ + "alter_table_addcols_q = \"\"\"\n", + "ALTER TABLE temp_multiaddresses ADD COLUMN is_ip4_tcp int DEFAULT 0;\n", + "ALTER TABLE temp_multiaddresses ADD COLUMN is_ip4_quic int DEFAULT 0;\n", + "ALTER TABLE temp_multiaddresses ADD COLUMN is_ip6_tcp int DEFAULT 0;\n", + "ALTER TABLE temp_multiaddresses ADD COLUMN is_ip6_quic int DEFAULT 0;\n", + "\"\"\"\n", + "res = conn.execute(alter_table_addcols_q)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 262, + "id": "fdb341d9", + "metadata": {}, + "outputs": [], + "source": [ + "update_ip4_tcp_q = \"\"\"\n", + " UPDATE temp_multiaddresses SET\n", + " is_ip4_tcp = CASE\n", + " WHEN family(addr)=4 AND maddr LIKE '%%/tcp/%%' THEN \n", + " 1\n", + " ELSE \n", + " 0\n", + " END;\n", + "\"\"\"\n", + "res = conn.execute(update_ip4_tcp_q)" + ] + }, + { + "cell_type": "code", + "execution_count": 263, + "id": "92ce35e8", + "metadata": {}, + "outputs": [], + "source": [ + "update_ip6_tcp_q = \"\"\"\n", + " UPDATE temp_multiaddresses SET\n", + " is_ip6_tcp = CASE\n", + " WHEN family(addr)=6 AND maddr LIKE '%%/tcp/%%' THEN \n", + " 1\n", + " ELSE \n", + " 0\n", + " END;\n", + "\"\"\"\n", + "res = conn.execute(update_ip6_tcp_q)" + ] + }, + { + "cell_type": "code", + "execution_count": 264, + "id": "72285405", + "metadata": {}, + "outputs": [], + "source": [ + "update_ip4_quic_q = \"\"\"\n", + "UPDATE temp_multiaddresses SET\n", + " is_ip4_quic = CASE\n", + " WHEN family(addr)=4 AND maddr LIKE '%%/quic%%' THEN\n", + " 1\n", + " ELSE\n", + " 0\n", + " END;\n", + "\"\"\"\n", + "res = conn.execute(update_ip4_quic_q)" + ] + }, + { + "cell_type": "code", + "execution_count": 265, + "id": "16966d31", + "metadata": {}, + "outputs": [], + "source": [ + "update_ip6_quic_q = \"\"\"\n", + "UPDATE temp_multiaddresses SET\n", + " is_ip6_quic = CASE\n", + " WHEN family(addr)=6 AND maddr LIKE '%%/quic%%' THEN\n", + " 1\n", + " ELSE\n", + " 0\n", + " END;\n", + "\"\"\"\n", + "res = conn.execute(update_ip6_quic_q)" + ] + }, + { + "cell_type": "markdown", + "id": "bb87f705", + "metadata": {}, + "source": [ + "We are only interested in holepunch results where both the sides had an address relevant for a given protocol filter\n", + "\n", + "if we applied a filter of \\[ip6, quic\\] we'd like to only look at results where both the sides had at least one ip6_quic address.\n", + "\n", + "In the query: \n", + "The first subquery calculates the number of address types(count ip6_quic etc) for the local side\n", + "The second subquery calculates the number of address types for the remote side" + ] + }, + { + "cell_type": "code", + "execution_count": 288, + "id": "b2018df8", + "metadata": {}, + "outputs": [], + "source": [ + "final_results_q = \"\"\"\n", + "SELECT \n", + " hpr.outcome, \n", + " hpr.protocol_filters, \n", + " hpr.local_ip4_tcp_cnt,\n", + " hpr.local_ip6_tcp_cnt,\n", + " hpr.local_ip4_quic_cnt,\n", + " hpr.local_ip6_quic_cnt,\n", + " remote_addrs.ip4_tcp_cnt as remote_ip4_tcp_cnt,\n", + " remote_addrs.ip6_tcp_cnt as remote_ip6_tcp_cnt,\n", + " remote_addrs.ip4_quic_cnt as remote_ip4_quic_cnt,\n", + " remote_addrs.ip6_quic_cnt as remote_ip6_quic_cnt,\n", + " COUNT(DISTINCT hpr.id) as count\n", + " FROM (\n", + " (SELECT\n", + " hpr.id, \n", + " hpr.outcome,\n", + " hpr.protocol_filters,\n", + " sum(ma.is_ip4_tcp) as local_ip4_tcp_cnt, \n", + " sum(ma.is_ip6_tcp) as local_ip6_tcp_cnt, \n", + " sum(ma.is_ip4_quic) as local_ip4_quic_cnt,\n", + " sum(ma.is_ip6_quic) as local_ip6_quic_cnt\n", + " FROM\n", + " hole_punch_results hpr\n", + " INNER JOIN multi_addresses_sets mas ON mas.id=hpr.listen_multi_addresses_set_id\n", + " CROSS JOIN unnest(mas.multi_addresses_ids) lmids(mid)\n", + " INNER JOIN temp_multiaddresses ma ON ma.id=lmids.mid\n", + " WHERE \n", + " NOT ma.is_relay \n", + " AND ma.is_public\n", + " AND NOT EXISTS (SELECT FROM port_mappings pm WHERE pm.hole_punch_result_id = hpr.id)\n", + " GROUP BY hpr.id, hpr.outcome, hpr.protocol_filters\n", + " ) hpr\n", + " INNER JOIN (\n", + " SELECT \n", + " result_id,\n", + " sum(is_ip4_tcp) as ip4_tcp_cnt,\n", + " sum(is_ip6_tcp) as ip6_tcp_cnt,\n", + " sum(is_ip4_quic) as ip4_quic_cnt,\n", + " sum(is_ip6_quic) as ip6_quic_cnt\n", + " FROM (\n", + " SELECT DISTINCT\n", + " hpa.hole_punch_result_id as result_id, \n", + " ma.id as aid,\n", + " ma.is_ip4_tcp as is_ip4_tcp,\n", + " ma.is_ip6_tcp as is_ip6_tcp,\n", + " ma.is_ip4_quic as is_ip4_quic,\n", + " ma.is_ip6_quic as is_ip6_quic\n", + " FROM hole_punch_attempt hpa\n", + " INNER JOIN hole_punch_attempt_x_multi_addresses hpaxma on hpa.id = hpaxma.hole_punch_attempt\n", + " INNER JOIN temp_multiaddresses ma on hpaxma.multi_address_id = ma.id\n", + " WHERE ma.is_public AND NOT ma.is_relay\n", + " ) remote\n", + " GROUP BY remote.result_id\n", + " ) remote_addrs ON remote_addrs.result_id=hpr.id\n", + " )\n", + " GROUP BY \n", + " outcome, \n", + " protocol_filters, \n", + " local_ip4_tcp_cnt,\n", + " local_ip6_tcp_cnt,\n", + " local_ip4_quic_cnt,\n", + " local_ip6_quic_cnt,\n", + " remote_ip4_tcp_cnt,\n", + " remote_ip6_tcp_cnt,\n", + " remote_ip4_quic_cnt,\n", + " remote_ip6_quic_cnt\n", + "\"\"\"\n", + "df = pd.read_sql(final_results_q, con=conn)" + ] + }, + { + "cell_type": "code", + "execution_count": 289, + "id": "fe9d2e5e", + "metadata": {}, + "outputs": [], + "source": [ + "dff = df" + ] + }, + { + "cell_type": "markdown", + "id": "04abf105", + "metadata": {}, + "source": [ + "add a string protocol filter column for easy grouping" + ] + }, + { + "cell_type": "code", + "execution_count": 306, + "id": "2bf0d538", + "metadata": {}, + "outputs": [], + "source": [ + "def pf_to_string(x):\n", + " if not x['protocol_filters']:\n", + " return \"No Filter\"\n", + " else:\n", + " ip_filter = 4 if x['protocol_filters'][0] == 4 else 6\n", + " transport_filter = \"tcp\" if x['protocol_filters'][1] == 6 else \"quic\"\n", + " return f\"ip:{ip_filter}|transport:{transport_filter}\"\n", + "dff['pf'] = dff.apply(lambda x: pf_to_string(x), axis=1)\n" + ] + }, + { + "cell_type": "markdown", + "id": "98e89c2d", + "metadata": {}, + "source": [ + "There is some issue with multiple ip4 addresses on remote side but this behaviour is not observed with ip6 cases. So whatever is causing the ip6 errors is something else. " + ] + }, + { + "cell_type": "code", + "execution_count": 300, + "id": "20d496ce", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pf outcome \n", + "ip:4|transport:tcp CONNECTION_REVERSED 289\n", + " FAILED 49471\n", + " NO_STREAM 930\n", + " SUCCESS 34651\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 300, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dff.query('pf == \"ip:4|transport:tcp\" & local_ip4_tcp_cnt==1 & remote_ip4_tcp_cnt>1').groupby(['pf', 'outcome'])['count'].sum()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 301, + "id": "9b03ad8e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pf outcome \n", + "ip:4|transport:tcp CONNECTION_REVERSED 1749\n", + " FAILED 57277\n", + " NO_STREAM 1673\n", + " SUCCESS 277045\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 301, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dff.query('pf == \"ip:4|transport:tcp\" & local_ip4_tcp_cnt==1 & remote_ip4_tcp_cnt==1').groupby(['pf', 'outcome'])['count'].sum()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 302, + "id": "ff1c607a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pf outcome \n", + "ip:4|transport:quic CONNECTION_REVERSED 1137\n", + " FAILED 61653\n", + " NO_STREAM 1399\n", + " SUCCESS 66183\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 302, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dff.query('pf == \"ip:4|transport:quic\" & local_ip4_quic_cnt==1 & remote_ip4_quic_cnt>1').groupby(['pf', 'outcome'])['count'].sum()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 303, + "id": "dd5c5fa3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pf outcome \n", + "ip:4|transport:quic CONNECTION_REVERSED 1571\n", + " FAILED 53468\n", + " NO_STREAM 1270\n", + " SUCCESS 192483\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 303, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dff.query('pf == \"ip:4|transport:quic\" & local_ip4_quic_cnt==1 & remote_ip4_quic_cnt==1').groupby(['pf', 'outcome'])['count'].sum()\n" + ] + }, + { + "cell_type": "markdown", + "id": "087e9f48", + "metadata": {}, + "source": [ + "For ip6 tcp most of the errors happen when both sides had 1 address" + ] + }, + { + "cell_type": "code", + "execution_count": 304, + "id": "9b31b657", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pf outcome \n", + "ip:6|transport:tcp CONNECTION_REVERSED 364\n", + " FAILED 20830\n", + " NO_STREAM 333\n", + " SUCCESS 8732\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 304, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dff.query('pf == \"ip:6|transport:tcp\" & local_ip6_tcp_cnt==1 & remote_ip6_tcp_cnt==1').groupby(['pf', 'outcome'])['count'].sum()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 305, + "id": "e8232b36", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pf outcome \n", + "ip:6|transport:tcp CONNECTION_REVERSED 24\n", + " FAILED 580\n", + " NO_STREAM 9\n", + " SUCCESS 751\n", + "Name: count, dtype: int64" + ] + }, + "execution_count": 305, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dff.query('pf == \"ip:6|transport:tcp\" & local_ip6_tcp_cnt==1 & remote_ip6_tcp_cnt>1').groupby(['pf', 'outcome'])['count'].sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8cd05dfa", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}