Skip to content

Commit

Permalink
Add a Integration Test to scan the top 1k domains and validate that Z…
Browse files Browse the repository at this point in the history
…DNS's result is correct (#370)

* testing if github runner can scan 1k domains without crazy timeouts

* seeing results

* able to request X domain to Y IP

* working a and alookup tests

* working validators

* cleaning up unneeded files and added some logs

* cleanup and add as github action

* updated zdns exe path

* updated test name in ci.yml

* made python test a script

* updated script permissions

* removed dead import

* updated file name

* made error msg less scary

* only run CI on commits to main branch and all PR's, otherwise we're doubling the effort on all PRs

* remove limit to top 100, run against all 1k domains

* moved to only scanning 100 domains

* cleanup

* fixed name of CI action

* added rest of domains

* moved to 500 domains

* moved back to 100 domains

* add comments explaining integration tests
  • Loading branch information
phillip-stephens authored May 29, 2024
1 parent af72987 commit 736b1ca
Show file tree
Hide file tree
Showing 4 changed files with 294 additions and 3 deletions.
28 changes: 26 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
name: CI
on: [ push, pull_request ]
on:
push:
branches:
- main
pull_request:
jobs:
check-license:
runs-on: ubuntu-latest
Expand All @@ -16,6 +20,8 @@ jobs:
else
echo 'All .go files in repo have an appropriate header'
fi
# This tests runs both unit tests and integration tests that validate that ZDNS can pull many DNS record types against
# our controlled domain
build-and-test:
runs-on: ubuntu-latest
steps:
Expand All @@ -36,4 +42,22 @@ jobs:
sudo rm /etc/resolv.conf
sudo ln -sf /run/systemd/resolve/resolv.conf /etc/resolv.conf
python --version
./integration_tests.py
./testing/integration_tests.py
# This test runs an integration test for a ZDNS scan on 100 domains to validate that the A records are correct
build-and-test-large-scale:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Setup Go
uses: actions/setup-go@v5
with:
go-version: '1.20'
- name: Build
run: |
go version
make
- name: Large-Scale Integration Test
run: |
python --version
./testing/top_domains_integration_tests.py
100 changes: 100 additions & 0 deletions testing/domains.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
m.fabswingers.com
www.dmm.co.jp
anoboy.baby
www.cmoa.jp
visortmo.com
shahvani.com
sattaamatka.co.com
www.asos.com
arca.live
letterboxd.com
www.ebay.com
www.bfmtv.com
www.rat.xxx
my.xhtab3.com
www.flashscore.com
cz.pornhub.com
studio.code.org
www.t-online.de
accounts.google.com
asuratoon.com
glife.bingoplus.com
www.tiktok.com
aniwave.to
schools.madrasati.sa
gall.dcinside.com
m.fmkorea.com
mangabuddy.com
thisvid.com
www.flipkart.com
sp.oddspark.com
www.fmkorea.com
www.porn300.com
context.reverso.net
ijavhd.com
www.mobile.de
www.sexindrag.com
de.m.wikipedia.org
exhentai.org
onlyindianx.cc
secure.bankofamerica.com
www.mercadolibre.com.ar
www.linkedin.com
detail.chiebukuro.yahoo.co.jp
fr.pornhub.com
m.indiamart.com
www.ebay.de
www.facebook.com
www.researchgate.net
battwo.com
web.whatsapp.com
pikabu.ru
myreadingmanga.info
www.netflix.com
www.samsung.com
www.tokopedia.com
medium.com
ge.xhamster.com
www.bbc.com
www.publi24.ro
olimpbet.kz
www.polovniautomobili.com
m.yahoo.co.jp
www.coco.gg
www.69shu.pro
yako.red
escortbabylon.net
poczta.wp.pl
www.dagbladet.no
m.ss.com
m.bild.de
ria.ru
www.dailymotion.com
retail.onlinesbi.sbi
myactivity.google.com
nregastrep.nic.in
www.aznude.com
coinmarketcap.com
www.gazzetta.it
tv.simontokx.online
character.ai
pmkisan.gov.in
mobile-tracker-free.com
quizizz.com
mov.ibomma.rs
www.masrawy.com
www.google.es
as.com
m.youtube.com
bpexch.com
betinexchange.com
ncz3u7cj2.com
www.jusbrasil.com.br
auto.drom.ru
www.subito.it
poki.com
newtoki330.com
giris.turkiye.gov.tr
www.protothema.gr
18comic.vip
m.olx.pl
2 changes: 1 addition & 1 deletion integration_tests.py → testing/integration_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def dictSort(d):
class Tests(unittest.TestCase):

maxDiff = None
ZDNS_EXECUTABLE = "./zdns"
ZDNS_EXECUTABLE = "./zdns"

def run_zdns_check_failure(self, flags, name, expected_err, executable=ZDNS_EXECUTABLE):
flags = flags + " --threads=10"
Expand Down
167 changes: 167 additions & 0 deletions testing/top_domains_integration_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
#!/usr/bin/env python3

from typing import List

import requests
import socket
import concurrent.futures
import select
import subprocess
import unittest
import json

ZDNS_EXECUTABLE = "./zdns"
TOP_DOMAINS_FILE = "./testing/domains.csv"

# This function checks if a domain can be successfully requested at a given IP
def can_request_successfully(domain: str, ip: str) -> bool:
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.3 Safari/605.1.15"
headers = {"Host": domain, "User-Agent": user_agent}
try:
response = requests.get("https://" + ip, headers=headers, verify=False, timeout=5)
if response.status_code < 400:
return True
print(f"Failed to request {domain} with requests with status code {response.status_code}")
return False
except requests.exceptions.RequestException as e:
print(f"Failed to request {domain} with requests with error {e}")
return False


def can_automatically_visit(domain: str, ip: str):
if can_request_successfully(domain, ip):
return (domain, ip, 0)
return (domain, ip, -1)


def get_ip_address(domain: str):
try:
ip = socket.gethostbyname(domain)
print(f"Resolved {domain} to {ip}")
return (domain, ip)
except socket.gaierror:
return (domain, "")


def run_zdns(input_domains, flags, executable=ZDNS_EXECUTABLE):
# pipe the input domains into a call to zdns
# return the output of zdns
# Convert the list of domains to a single string, with each domain on a new line
input_data = "\n".join(input_domains)

# Start the zdns subprocess
print(flags)
process = subprocess.Popen([executable] + flags, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)

# Send the input data to the subprocess and get the output
process.stdin.write(input_data.encode())
process.stdin.close()

output = []

while True:
# Use select to wait for data to become available
reads = [process.stdout.fileno(), process.stderr.fileno()]
ret = select.select(reads, [], [])

for fd in ret[0]:
if fd == process.stdout.fileno():
output.append(process.stdout.readline().decode())
if fd == process.stderr.fileno():
print("stderr: " + process.stderr.readline().decode())
if process.poll() is not None:
break

return output


def get_requestable_domains(domains: List[str]) -> List[str]:
domain_ip_pairs = []
with concurrent.futures.ThreadPoolExecutor() as executor:
results = [executor.submit(get_ip_address, domain) for domain in domains]

for future in concurrent.futures.as_completed(results):
domain, ip = future.result()
domain_ip_pairs.append((domain, ip))

successes_requests = []
with concurrent.futures.ThreadPoolExecutor() as executor:
results = [executor.submit(can_automatically_visit, domain, ip) for domain, ip in domain_ip_pairs]

for future in concurrent.futures.as_completed(results):
domain, ip, result = future.result()
if result == 0:
# domain is reachable
successes_requests.append(domain)

print(f"Successfully requested {len(successes_requests)} out of {len(domains)} domains with requests")
print("Requests successes:", successes_requests)
print("Failures:", set(domains) - set(successes_requests))
return successes_requests


# Returns a list of tuples, where each tuple contains the domain and the IP address ZDNS returned. A domain could have
# multiple IP addresses, so there could be multiple tuples for a single domain.
def get_zdns_results_a(domains: List[str]) -> List[tuple[str, str]]:
zdns_output = run_zdns(domains, ["A", "--iterative"])
domain_ip_pairs = []
for line in zdns_output:
if "answer" in line and "TIMEOUT" not in line and "CNAME" not in line:
domain = line.split('"name":"')[1].split('"')[0]
ip = line.split('"answer":"')[1].split('"')[0]
domain_ip_pairs.append((domain, ip))
else:
print(f"Failed to parse ZDNS output: {line}")
return domain_ip_pairs


def get_zdns_results_a_lookup(domains: List[str]) -> List[tuple[str, str]]:
zdns_output = run_zdns(domains, ["ALOOKUP", "--iterative"])
domain_ip_pairs = []
for line in zdns_output:
if "ipv4_addresses" in line and "TIMEOUT" not in line:
# use jq to parse the JSON output, to extract data.ipv4_addresses
line_data = json.loads(line)
data = line_data.get('data')
ipv4_addresses = data.get('ipv4_addresses') if data else None
if ipv4_addresses:
for ip in ipv4_addresses:
domain = line_data.get('name')
domain_ip_pairs.append((domain, ip))
else:
print(f"Failed to parse ZDNS output: {line}")
return domain_ip_pairs


class TestZDNS(unittest.TestCase):
@classmethod
def setUpClass(cls):
with open(TOP_DOMAINS_FILE, "r") as f:
domains = f.read().splitlines()
# These are those domains which we can successfully request using the requests library directed at an IP address
# This excludes domains that use some form of DDoS mitigation, such as Cloudflare, which have more sophisticated
# bot detection
cls.known_reachable_domains = get_requestable_domains(domains)

def test_zdns_a(self):
zdns = get_zdns_results_a(self.known_reachable_domains)
print(f"ZDNS resolved {len(zdns)} domains to IP addresses from request-able domains")
# Check that the IP addresses ZDNS resolved the domains to are reachable, but in parallel
with concurrent.futures.ThreadPoolExecutor() as executor:
results = [executor.submit(can_request_successfully, domain, ip) for domain, ip in zdns]
for future in concurrent.futures.as_completed(results):
self.assertTrue(future.result(), "ZDNS resolved a domain to an IP address that will not respond to requests for the given domain")

def test_zdns_a_lookup(self):
zdns = get_zdns_results_a_lookup(self.known_reachable_domains)
print(f"ZDNS resolved {len(zdns)} domains to IP addresses from request-able domains")
# Check that the IP addresses ZDNS resolved the domains to are reachable, but in parallel
with concurrent.futures.ThreadPoolExecutor() as executor:
results = [executor.submit(can_request_successfully, domain, ip) for domain, ip in zdns]
for future in concurrent.futures.as_completed(results):
self.assertTrue(future.result(), "ZDNS resolved a domain to an IP address that will not respond to requests for the given domain")


if __name__ == '__main__':
unittest.main()

0 comments on commit 736b1ca

Please sign in to comment.