Skip to content

Commit

Permalink
Optimizing workflows version.x.n
Browse files Browse the repository at this point in the history
  • Loading branch information
spirillen committed Jan 18, 2025
1 parent 12b9c69 commit 4000b8c
Showing 1 changed file with 52 additions and 18 deletions.
70 changes: 52 additions & 18 deletions tools/sort_lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@
import webbrowser
import re
import ipaddress
from subprocess import check_output
import requests
from subprocess import check_output

VERSION = "0.2b1" # PEP 440 versioning format for beta release
VERSION = "0.2b2" # PEP 440 versioning format for beta release

def find_files_by_name(directory, filenames):
matches = []
Expand All @@ -24,17 +24,43 @@ def get_modified_files_in_last_commit():
output = check_output(["git", "diff", "--name-only", "HEAD~1", "HEAD"]).decode().splitlines()
return output

def fetch_valid_tlds():
response = requests.get("https://data.iana.org/TLD/tlds-alpha-by-domain.txt")
tlds = response.text.splitlines()
return set(tld.lower() for tld in tlds if not tld.startswith("#"))

VALID_TLDS = fetch_valid_tlds()

def is_valid_domain(domain):
def fetch_valid_tlds(proxy):
tlds_file = 'tools/tlds-alpha-by-domain.txt'
url = "https://data.iana.org/TLD/tlds-alpha-by-domain.txt"

# Check if the local file exists and read its version
local_version = None
if os.path.exists(tlds_file):
with open(tlds_file, 'r') as file:
for line in file:
if line.startswith("# Version:"):
local_version = line.split()[2]
break

# Fetch the remote file
headers = {"User-Agent": "Mozilla/5.0"}
proxies = {"http": proxy, "https": proxy} if proxy else None
response = requests.get(url, headers=headers, proxies=proxies)
remote_lines = response.text.splitlines()

# Get the remote version
remote_version = None
for line in remote_lines:
if line.startswith("# Version:"):
remote_version = line.split()[2]
break

# If the versions differ or no local version, update the local file
if local_version != remote_version:
with open(tlds_file, 'w') as file:
file.write(response.text)

return set(tld.lower() for tld in remote_lines if not tld.startswith("#"))

def is_valid_domain(domain, valid_tlds):
if "." in domain:
tld = domain.split(".")[-1].lower()
if tld not in VALID_TLDS:
if tld not in valid_tlds:
return False
regex = re.compile(
r'^(?:[a-zA-Z0-9]' # First character of the domain
Expand All @@ -51,7 +77,7 @@ def is_valid_ip_arpa(ip_arpa):
except ValueError:
return False

def sort_file_alphanum(file_path):
def sort_file_alphanum(file_path, valid_tlds):
with open(file_path, 'r') as file:
lines = file.readlines()

Expand All @@ -63,7 +89,7 @@ def sort_file_alphanum(file_path):
lines = sorted(lines[1:], key=lambda x: x.strip().split(',')[0] if ',' in x else '') # Sort FQDNs
lines.insert(0, header + "\n")

invalid_entries = [line for line in lines if not is_valid_domain(line.strip().split(',')[0])]
invalid_entries = [line for line in lines if not is_valid_domain(line.strip().split(',')[0], valid_tlds)]
if invalid_entries:
print(f"Invalid DNS entries in {file_path}:")
for entry in invalid_entries:
Expand All @@ -73,7 +99,7 @@ def sort_file_alphanum(file_path):
file.writelines(lines)
file.write("") # Ensure no additional newline

def sort_file_hierarchical(file_path):
def sort_file_hierarchical(file_path, valid_tlds):
with open(file_path, 'r') as file:
lines = file.readlines()

Expand All @@ -90,11 +116,11 @@ def sort_file_hierarchical(file_path):
parts = line.strip().split(',')
if len(parts) > 1:
domain, ip_arpa = parts[0], parts[1]
if not is_valid_domain(domain) or not is_valid_ip_arpa(ip_arpa):
if not is_valid_domain(domain, valid_tlds) or not is_valid_ip_arpa(ip_arpa):
invalid_entries.append(line)
else:
domain = parts[0]
if not is_valid_domain(domain):
if not is_valid_domain(domain, valid_tlds):
invalid_entries.append(line)

if invalid_entries:
Expand Down Expand Up @@ -122,13 +148,21 @@ def main():
parser = argparse.ArgumentParser(description="Sort and clean CSV files.")
parser.add_argument('-v', '--version', action='version', version=f"%(prog)s {VERSION}")
parser.add_argument('-f', '--force', action='store_true', help="Force run on all files, altered or not")
parser.add_argument('--proxy', type=str, default=None, help="Specify a proxy to use for downloading external files")
parser.add_argument('--no-proxy', action='store_true', help="Disable the default proxy setting")
parser.add_argument('-d', '-s', '--donate', '--sponsor', action='store_true', help="Open the donate link in default browser")
args = parser.parse_args()

proxy = args.proxy
if not args.no_proxy and not proxy and "GITHUB_ACTIONS" not in os.environ:
proxy = "socks5h://localhost:9050"

if args.donate:
handle_donate()
sys.exit(0)

valid_tlds = fetch_valid_tlds(proxy)

alphanum_filenames = ["tld.csv", "wildcard.csv", "wildcard.rpz-nsdname.csv", "domains.rpz-nsdname.csv", "mobile.csv", "snuff.csv"]
hierarchical_filenames = ["domains.csv", "onions.csv", "rpz-ip.csv", "ip4.csv", "ip6.csv", "rpz-client-ip.csv", "rpz-drop.csv", "rpz-ip.csv", "hosts.csv"]

Expand All @@ -138,11 +172,11 @@ def main():

for file in target_files_alphanum:
if args.force or any(file.endswith(modified) for modified in modified_files):
sort_file_alphanum(file)
sort_file_alphanum(file, valid_tlds)

for file in target_files_hierarchical:
if args.force or any(file.endswith(modified) for modified in modified_files):
sort_file_hierarchical(file)
sort_file_hierarchical(file, valid_tlds)

print("Please consider sponsoring My Privacy DNS at https://www.mypdns.org/donate")

Expand Down

0 comments on commit 4000b8c

Please sign in to comment.