Skip to content

Commit

Permalink
Merge pull request #10 from dave-andersen/main
Browse files Browse the repository at this point in the history
Use indexes, heap-based top k for python as the other languages do
  • Loading branch information
jinyus authored Sep 24, 2023
2 parents 1b6fa28 + b1d776a commit a9c420b
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 49 deletions.
58 changes: 12 additions & 46 deletions python/related.py
Original file line number Diff line number Diff line change
@@ -1,62 +1,28 @@
import heapq
import json
from typing import Dict


class Post:
def __init__(self, _id, title, tags):
self._id = _id
self.title = title
self.tags = tags

def __hash__(self):
return hash(self._id)

def __eq__(self, other):
if isinstance(other, Post):
return self._id == other._id
return False

def to_dict(self):
return {
"_id": self._id,
"title": self.title,
"tags": self.tags,
}


class PostWithSharedTags:
def __init__(self, post, shared_tags):
self.post = post
self.shared_tags = shared_tags
from collections import Counter


def main():
with open("../posts.json") as f:
posts = json.load(f)

tag_map = {}
for post in posts:
for idx, post in enumerate(posts):
for tag in post["tags"]:
if tag not in tag_map:
tag_map[tag] = []
tag_map[tag].append(Post(post["_id"], post["title"], post["tags"]))
tag_map[tag].append(idx)

all_related_posts = []
for post in posts:
related_posts_dict: Dict[Post, int] = {}
for tag in post["tags"]:
for related_post in tag_map[tag]:
if related_post._id != post["_id"]:
if related_post not in related_posts_dict:
related_posts_dict[related_post] = 0
related_posts_dict[related_post] += 1

sorted_posts = sorted(
related_posts_dict.items(), key=lambda x: x[1], reverse=True
)

num = min(5, len(sorted_posts))
top_posts = [p[0].to_dict() for p in sorted_posts[:num]]
for this_post_idx, post in enumerate(posts):
related_posts_list = Counter((p for tag in post["tags"] for p in tag_map[tag]))
related_posts_list[this_post_idx] = 0

top_posts = [
{k: posts[p][k] for k in ("_id", "title", "tags")}
for p in heapq.nlargest(5, related_posts_list, key=related_posts_list.get)
]

all_related_posts.append(
{
Expand Down
5 changes: 2 additions & 3 deletions run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ run_go() {
echo "Running Go" &&
cd ./go &&
go build &&
#command time -f '%es %Mk' ./related
if [ $HYPER == 1 ]; then
command hyperfine -r 10 --show-output "./related"
else
Expand All @@ -25,7 +24,7 @@ run_rust() {
cd ./rust &&
cargo build --release &&
if [ $HYPER == 1 ]; then
command hyperfine -r 10 --show-output "./target/release/rust"
command hyperfine -r 20 --show-output "./target/release/rust"
else
command time -f '%es %Mk' ./target/release/rust
fi
Expand All @@ -46,7 +45,7 @@ run_python() {
echo "Running Python" &&
cd ./python &&
if [ $HYPER == 1 ]; then
command hyperfine -r 1 "python3 ./related.py"
command hyperfine -r 2 --show-output "python3 ./related.py"
else
command time -f '%es %Mk' python3 ./related.py
fi
Expand Down

0 comments on commit a9c420b

Please sign in to comment.