Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use indexes, heap-based top k for python as the other languages do #10

Merged
merged 10 commits into from
Sep 24, 2023
58 changes: 12 additions & 46 deletions python/related.py
Original file line number Diff line number Diff line change
@@ -1,62 +1,28 @@
import heapq
import json
from typing import Dict


class Post:
def __init__(self, _id, title, tags):
self._id = _id
self.title = title
self.tags = tags

def __hash__(self):
return hash(self._id)

def __eq__(self, other):
if isinstance(other, Post):
return self._id == other._id
return False

def to_dict(self):
return {
"_id": self._id,
"title": self.title,
"tags": self.tags,
}


class PostWithSharedTags:
def __init__(self, post, shared_tags):
self.post = post
self.shared_tags = shared_tags
from collections import Counter


def main():
with open("../posts.json") as f:
posts = json.load(f)

tag_map = {}
for post in posts:
for idx, post in enumerate(posts):
for tag in post["tags"]:
if tag not in tag_map:
tag_map[tag] = []
tag_map[tag].append(Post(post["_id"], post["title"], post["tags"]))
tag_map[tag].append(idx)

all_related_posts = []
for post in posts:
related_posts_dict: Dict[Post, int] = {}
for tag in post["tags"]:
for related_post in tag_map[tag]:
if related_post._id != post["_id"]:
if related_post not in related_posts_dict:
related_posts_dict[related_post] = 0
related_posts_dict[related_post] += 1

sorted_posts = sorted(
related_posts_dict.items(), key=lambda x: x[1], reverse=True
)

num = min(5, len(sorted_posts))
top_posts = [p[0].to_dict() for p in sorted_posts[:num]]
for this_post_idx, post in enumerate(posts):
related_posts_list = Counter((p for tag in post["tags"] for p in tag_map[tag]))
related_posts_list[this_post_idx] = 0

top_posts = [
{k: posts[p][k] for k in ("_id", "title", "tags")}
for p in heapq.nlargest(5, related_posts_list, key=related_posts_list.get)
]

all_related_posts.append(
{
Expand Down
5 changes: 2 additions & 3 deletions run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ run_go() {
echo "Running Go" &&
cd ./go &&
go build &&
#command time -f '%es %Mk' ./related
if [ $HYPER == 1 ]; then
command hyperfine -r 10 --show-output "./related"
else
Expand All @@ -25,7 +24,7 @@ run_rust() {
cd ./rust &&
cargo build --release &&
if [ $HYPER == 1 ]; then
command hyperfine -r 10 --show-output "./target/release/rust"
command hyperfine -r 20 --show-output "./target/release/rust"
else
command time -f '%es %Mk' ./target/release/rust
fi
Expand All @@ -46,7 +45,7 @@ run_python() {
echo "Running Python" &&
cd ./python &&
if [ $HYPER == 1 ]; then
command hyperfine -r 1 "python3 ./related.py"
command hyperfine -r 2 --show-output "python3 ./related.py"
else
command time -f '%es %Mk' python3 ./related.py
fi
Expand Down