Skip to content

Commit

Permalink
make C extension work on newest ruby
Browse files Browse the repository at this point in the history
BDIGIT api is internal ruby C api, and was removed from public api after
merge of Bignum and Fixnum into Integer.
Now use public C ruby api.

It is slower than previous BDIGIT api, but much faster than ruby version

                                       user     system      total       real
distance3_bdigit                   0.198673   0.000000   0.198673 (0.198672)
distance3_public                   0.373779   0.000000   0.373779 (0.373777)
distance3_ruby                     1.824285   0.000000   1.824285 (1.824315)
  • Loading branch information
haukot committed Jun 29, 2024
1 parent b3a7aad commit f1b8e0b
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 33 deletions.
27 changes: 10 additions & 17 deletions extconf.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,16 @@
File.write "Makefile", dummy_makefile(?.).join

unless Gem::Version.new(RUBY_VERSION) < Gem::Version.new("2.3.8")
if ruby_source_dir = if File.directory? "/ruby"
"-I/ruby" # for Github Actions: docker (currently disabled) and benchmark
elsif ENV["RBENV_ROOT"] && ENV["RBENV_VERSION"] && File.exist?(t = "#{ENV["RBENV_ROOT"]}/sources/#{ENV["RBENV_VERSION"]}/ruby-#{ENV["RBENV_VERSION"]}/bignum.c") # https://github.com/rbenv/rbenv/issues/1199
"-I#{File.dirname t}"
end
append_cppflags ruby_source_dir
append_cppflags "-DRUBY_EXPORT" unless Gem::Version.new(RUBY_VERSION) < Gem::Version.new("2.4")
create_makefile "idhash"
# Why this hack?
# 1. Because I want to use Ruby and ./idhash.bundle for tests, not C.
# 2. Because I don't want to bother users with two gems instead of one.
File.write "Makefile", <<~HEREDOC + File.read("Makefile")
.PHONY: test
test: all
\t$(RUBY) -r./lib/dhash-vips.rb ./lib/dhash-vips-post-install-test.rb
HEREDOC
end
append_cppflags "-DRUBY_EXPORT" unless Gem::Version.new(RUBY_VERSION) < Gem::Version.new("2.4")
create_makefile "idhash"
# Why this hack?
# 1. Because I want to use Ruby and ./idhash.bundle for tests, not C.
# 2. Because I don't want to bother users with two gems instead of one.
File.write "Makefile", <<~HEREDOC + File.read("Makefile")
.PHONY: test
test: all
\t$(RUBY) -r./lib/dhash-vips.rb ./lib/dhash-vips-post-install-test.rb
HEREDOC
end

__END__
Expand Down
61 changes: 45 additions & 16 deletions idhash.c
Original file line number Diff line number Diff line change
@@ -1,29 +1,58 @@
#include <bignum.c>
#include <ruby.h>

// extract bignum to array of unsigned ints
static unsigned int * idhash_bignum_to_buf(VALUE a, size_t *num) {
size_t word_numbits = sizeof(unsigned int) * CHAR_BIT;
size_t nlz_bits = 0;
*num = rb_absint_numwords(a, word_numbits, &nlz_bits);

if (*num == (size_t)-1) {
rb_raise(rb_eRuntimeError, "Number too large to represent and overflow occured");
}

unsigned int *buf = ALLOC_N(unsigned int, *num);

rb_integer_pack(a, buf, *num, sizeof(unsigned int), 0,
INTEGER_PACK_LSWORD_FIRST|INTEGER_PACK_NATIVE_BYTE_ORDER|
INTEGER_PACK_2COMP);

return buf;
}

// does ((a ^ b) & (a | b) >> 128)
static VALUE idhash_distance(VALUE self, VALUE a, VALUE b){
BDIGIT* tempd;
long i, an = BIGNUM_LEN(a), bn = BIGNUM_LEN(b), templ, acc = 0;
BDIGIT* as = BDIGITS(a);
BDIGIT* bs = BDIGITS(b);
while (0 < an && as[an-1] == 0) an--; // for (i = an; --i;) printf("%u\n", as[i]);
while (0 < bn && bs[bn-1] == 0) bn--; // for (i = bn; --i;) printf("%u\n", bs[i]);
// printf("%lu %lu\n", an, bn);
size_t an, bn;
unsigned int *as = idhash_bignum_to_buf(a, &an);
unsigned int *bs = idhash_bignum_to_buf(b, &bn);

while (an > 0 && as[an-1] == 0) an--;
while (bn > 0 && bs[bn-1] == 0) bn--;

if (an < bn) {
unsigned int *tempd; size_t templ;
tempd = as; as = bs; bs = tempd;
templ = an; an = bn; bn = templ;
}
for (i = an; i-- > 4;) {
// printf("%ld : (%u | %u) & (%u ^ %u)\n", i, as[i], (i >= bn ? 0 : bs[i]), as[i-4], bs[i-4]);
acc += __builtin_popcountl((as[i] | (i >= bn ? 0 : bs[i])) & (as[i-4] ^ bs[i-4]));
// printf("%ld : %ld\n", i, acc);

size_t i;
long acc = 0;
// to count >> 128
size_t cycles = 128 / (sizeof(unsigned int) * CHAR_BIT);

for (i = an; i-- > cycles;) {
acc += __builtin_popcountl((as[i] | (i >= bn ? 0 : bs[i])) & (as[i-cycles] ^ (i-cycles >= bn ? 0 : bs[i-cycles])));
}

RB_GC_GUARD(a);
RB_GC_GUARD(b);
xfree(as);
xfree(bs);

return INT2FIX(acc);
}

void Init_idhash() {
VALUE m = rb_define_module("DHashVips");
VALUE mm = rb_define_module_under(m, "IDHash");
rb_define_module_function(mm, "distance3_c", idhash_distance, 2);
void Init_idhash(void) {
VALUE m = rb_define_module("DHashVips");
VALUE mm = rb_define_module_under(m, "IDHash");
rb_define_module_function(mm, "distance3_c", idhash_distance, 2);
}

0 comments on commit f1b8e0b

Please sign in to comment.