Skip to content

Commit

Permalink
Testing rust, c++ vs python3.12
Browse files Browse the repository at this point in the history
  • Loading branch information
spirillen committed Jan 18, 2025
1 parent c0834f8 commit 272352c
Show file tree
Hide file tree
Showing 2 changed files with 232 additions and 0 deletions.
99 changes: 99 additions & 0 deletions tools/sort_lists.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
use std::collections::HashSet;
use std::env;
use std::fs::File;
use std::io::{self, BufRead, BufReader};
use std::path::Path;
use std::process::Command;
use std::time::Instant;

fn find_files_by_name(directory: &str, filenames: &[&str]) -> Vec<String> {
let mut matches = Vec::new();
for entry in walkdir::WalkDir::new(directory) {
let entry = entry.unwrap();
if entry.file_type().is_file() {
let file_name = entry.file_name().to_string_lossy();
if filenames.contains(&file_name.as_ref()) {
matches.push(entry.path().display().to_string());
}
}
}
matches
}

fn get_modified_files_in_last_commit() -> Vec<String> {
let output = Command::new("git")
.args(&["diff", "--name-only", "HEAD~1", "HEAD"])
.output()
.expect("Failed to execute git command");
let output_str = String::from_utf8_lossy(&output.stdout);
output_str.lines().map(|s| s.to_string()).collect()
}

fn fetch_valid_tlds() -> HashSet<String> {
let url = "https://data.iana.org/TLD/tlds-alpha-by-domain.txt";
let response = reqwest::blocking::get(url).expect("Failed to fetch TLDs");
let content = response.text().expect("Failed to read response text");

content
.lines()
.filter(|line| !line.starts_with('#'))
.map(|line| line.to_lowercase())
.collect()
}

fn is_valid_domain(domain: &str, valid_tlds: &HashSet<String>) -> bool {
if let Some(tld) = domain.split('.').last() {
if !valid_tlds.contains(tld) {
return false;
}
}
let re = regex::Regex::new(r"^(?:[a-zA-Z0-9_](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9_])?\.)+[a-zA-Z]{2,63}$").unwrap();
re.is_match(domain)
}

fn remove_duplicates(lines: Vec<String>) -> Vec<String> {
let mut seen = HashSet::new();
let mut unique_lines = Vec::new();
for line in lines {
if seen.insert(line.clone()) {
unique_lines.push(line);
}
}
unique_lines
}

fn sort_file_alphanum(file_path: &str, valid_tlds: &HashSet<String>) {
let file = File::open(file_path).expect("Failed to open file");
let reader = BufReader::new(file);
let mut lines: Vec<String> = reader.lines().filter_map(Result::ok).collect();

lines = remove_duplicates(lines);

lines.sort_by(|a, b| a.split(',').next().unwrap().cmp(b.split(',').next().unwrap()));

// Add your domain validation and connectivity test logic here

// Print invalid entries (example)
for line in &lines {
if !is_valid_domain(line, valid_tlds) {
println!("Invalid DNS entry: {}", line);
}
}
}

fn main() {
let start = Instant::now();
let valid_tlds = fetch_valid_tlds();
let alphanum_filenames = ["wildcard.csv", "mobile.csv", "snuff.csv"];
let modified_files = get_modified_files_in_last_commit();
let target_files_alphanum = find_files_by_name("source", &alphanum_filenames);

for file in target_files_alphanum {
if modified_files.iter().any(|mf| file.ends_with(mf)) {
sort_file_alphanum(&file, &valid_tlds);
}
}

let duration = start.elapsed();
println!("Time elapsed: {:?}", duration);
}
133 changes: 133 additions & 0 deletions tools/sort_records.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
#include <iostream>
#include <fstream>
#include <vector>
#include <string>
#include <unordered_set>
#include <algorithm>
#include <regex>
#include <filesystem>
#include <chrono>
#include <curl/curl.h>

namespace fs = std::filesystem;

std::vector<std::string> find_files_by_name(const std::string& directory, const std::vector<std::string>& filenames) {
std::vector<std::string> matches;
for (const auto& entry : fs::recursive_directory_iterator(directory)) {
if (fs::is_regular_file(entry)) {
std::string file_name = entry.path().filename().string();
if (std::find(filenames.begin(), filenames.end(), file_name) != filenames.end()) {
matches.push_back(entry.path().string());
}
}
}
return matches;
}

std::vector<std::string> get_modified_files_in_last_commit() {
std::vector<std::string> modified_files;
std::string command = "git diff --name-only HEAD~1 HEAD";
std::array<char, 128> buffer;
std::string result;
std::shared_ptr<FILE> pipe(popen(command.c_str(), "r"), pclose);
if (!pipe) throw std::runtime_error("popen() failed!");
while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) {
result += buffer.data();
}
std::istringstream iss(result);
for (std::string line; std::getline(iss, line); ) {
modified_files.push_back(line);
}
return modified_files;
}

std::unordered_set<std::string> fetch_valid_tlds() {
std::unordered_set<std::string> valid_tlds;
CURL* curl;
CURLcode res;
curl = curl_easy_init();
if (curl) {
curl_easy_setopt(curl, CURLOPT_URL, "https://data.iana.org/TLD/tlds-alpha-by-domain.txt");
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, +[](void* contents, size_t size, size_t nmemb, void* userp) -> size_t {
((std::string*)userp)->append((char*)contents, size * nmemb);
return size * nmemb;
});
std::string response_string;
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &response_string);
res = curl_easy_perform(curl);
if (res == CURLE_OK) {
std::istringstream iss(response_string);
for (std::string line; std::getline(iss, line); ) {
if (line[0] != '#') valid_tlds.insert(line);
}
}
curl_easy_cleanup(curl);
}
return valid_tlds;
}

bool is_valid_domain(const std::string& domain, const std::unordered_set<std::string>& valid_tlds) {
std::regex re(R"((?:[a-zA-Z0-9_](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9_])?\.)+[a-zA-Z]{2,63}$)");
if (!std::regex_match(domain, re)) return false;
auto pos = domain.find_last_of('.');
if (pos != std::string::npos) {
std::string tld = domain.substr(pos + 1);
return valid_tlds.find(tld) != valid_tlds.end();
}
return false;
}

std::vector<std::string> remove_duplicates(const std::vector<std::string>& lines) {
std::unordered_set<std::string> seen;
std::vector<std::string> unique_lines;
for (const auto& line : lines) {
if (seen.insert(line).second) {
unique_lines.push_back(line);
}
}
return unique_lines;
}

void sort_file_alphanum(const std::string& file_path, const std::unordered_set<std::string>& valid_tlds) {
std::ifstream infile(file_path);
std::vector<std::string> lines;
for (std::string line; std::getline(infile, line); ) {
lines.push_back(line);
}

lines = remove_duplicates(lines);
std::sort(lines.begin(), lines.end(), [](const std::string& a, const std::string& b) {
return a.substr(0, a.find(',')).compare(b.substr(0, b.find(','))) < 0;
});

// Add your domain validation and connectivity test logic here

// Print invalid entries (example)
for (const auto& line : lines) {
if (!is_valid_domain(line, valid_tlds)) {
std::cout << "Invalid DNS entry: " << line << std::endl;
}
}
}

int main() {
auto start = std::chrono::high_resolution_clock::now();

auto valid_tlds = fetch_valid_tlds();
std::vector<std::string> alphanum_filenames = {"wildcard.csv", "mobile.csv", "snuff.csv"};
auto modified_files = get_modified_files_in_last_commit();
auto target_files_alphanum = find_files_by_name("source", alphanum_filenames);

for (const auto& file : target_files_alphanum) {
if (std::any_of(modified_files.begin(), modified_files.end(), [&file](const std::string& mf) { return file.ends_with(mf); })) {
sort_file_alphanum(file, valid_tlds);
}
}

auto end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> duration = end - start;
std::cout << "Time elapsed: " << duration.count() << " seconds" << std::endl;

return 0;
}

0 comments on commit 272352c

Please sign in to comment.