From 3ac6c55b1d45a20316d384b2134314bd41d677db Mon Sep 17 00:00:00 2001 From: Vyacheslav Egorov Date: Tue, 3 Oct 2023 21:02:50 +0200 Subject: [PATCH] Optimize Dart version --- dart/related.dart | 81 ++++++++++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 32 deletions(-) diff --git a/dart/related.dart b/dart/related.dart index 5dd1c9c5..9c72708b 100644 --- a/dart/related.dart +++ b/dart/related.dart @@ -1,73 +1,90 @@ import 'dart:convert'; import 'dart:io'; +import 'dart:typed_data'; import 'models.dart'; +// When you have type `Map>` Dart AOT compiler can't figure +// out that this container can contain only specific type of lists as +// values. However if you create an indirection (like +// `Map`) that allows AOT compiler to figure out the type +// of `map[k].posts` because `TaggedPosts` is a concrete class with a single +// implementation - so its static type is enough. +class TaggedPosts { + final List posts = []; +} + void main() { - final postsJson = jsonDecode(File('../posts.json').readAsStringSync()) as List; + final postsJson = + jsonDecode(File('../posts.json').readAsStringSync()) as List; final posts = postsJson.map(Post.fromJson).toList(); final sw = Stopwatch()..start(); - final tagMap = >{}; + final tagMap = {}; for (var i = 0; i < posts.length; i++) { - posts[i].tags.forEach((tag) { - tagMap.update( - tag, - (list) => list..add(i), - ifAbsent: () => [i], - ); - }); + for (var tag in posts[i].tags) { + (tagMap[tag] ??= TaggedPosts()).posts.add(i); + } } // preallocate and reuse - final taggedPostCount = List.filled(posts.length, 0); + final taggedPostCount = Int32List(posts.length); final allRelatedPosts = List.generate(posts.length, (i) { final post = posts[i]; - taggedPostCount.fillRange(0, posts.length, 0); - // faster than for in - post.tags.forEach((tag) { - tagMap[tag]!.forEach((otherPostIdx) { + // For now simply inline fillRange which is unfortunately too slow. + for (var i = 0; i < posts.length; i++) { + taggedPostCount[i] = 0; + } + + for (var tag in post.tags) { + for (var otherPostIdx in tagMap[tag]!.posts) { taggedPostCount[otherPostIdx] += 1; - }); - }); + } + } taggedPostCount[i] = 0; // don't include self - final top5 = List.filled(5, (idx: 0, count: 0), growable: true); + // Compute max 5 without doing any unnecessary allocations. + const topN = 5; + final top5 = Int32List(topN * 2); // flattened list of (count, id) var minTags = 0; - // priority queue to keep track of top 5 for (var i = 0; i < taggedPostCount.length; i++) { final count = taggedPostCount[i]; - if (count > minTags) { - var pos = 4; - - while (pos >= 0 && count > top5[pos].count) { - pos -= 1; - } - - pos += 1; - - top5.insert(pos, (idx: i, count: count)); - top5.removeLast(); + if (count <= minTags) { + continue; + } - minTags = top5.last.count; + // Find upper bound: pos at which count is larger than current one. + var upperBound = (topN - 2) * 2; + while (upperBound >= 0 && count > top5[upperBound]) { + top5[upperBound + 2] = top5[upperBound]; + top5[upperBound + 3] = top5[upperBound + 1]; + upperBound -= 2; } + final insertionPos = upperBound + 2; + top5[insertionPos] = count; + top5[insertionPos + 1] = i; + + minTags = top5[topN * 2 - 2]; } return { "_id": post.iD, "tags": post.tags, - "related": top5.map((v) => posts[v.idx]).toList(), + "related": [ + for (var i = 1; i < 10; i += 2) posts[top5[i]], + ], }; }); print('Processing time (w/o IO): ${sw.elapsedMilliseconds}ms'); - File('../related_posts_dart.json').writeAsStringSync(jsonEncode(allRelatedPosts)); + File('../related_posts_dart.json') + .writeAsStringSync(jsonEncode(allRelatedPosts)); }