Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add permalink metadata to message object #205

Merged
merged 2 commits into from
Jan 1, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion slackviewer/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class SlackFormatter(object):
# Class-level constants for precompilation of frequently-reused regular expressions
# URL detection relies on http://stackoverflow.com/a/1547940/1798683
_LINK_PAT = re.compile(r"<(https|http|mailto):[A-Za-z0-9_\.\-\/\?\,\=\#\:\@]+\|[^>]+>")
_MENTION_PAT = re.compile(r"<((?:#C|@[UB])\w+)(?:\|([A-Za-z0-9.-_]+))?>")
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a bugfix for slackdump. Slackdump channels are "<#C....|> without any character after the pipe.

If nothing is behind the pipe symbol, the existing logic will fall back looking up the proper channel name.

_MENTION_PAT = re.compile(r"<((?:#C|@[UB])\w+)(?:\|([A-Za-z0-9.-_]*))?>")
_HASHTAG_PAT = re.compile(r"(^| )#[A-Za-z][\w\.\-\_]+( |$)")

def __init__(self, USER_DATA, CHANNEL_DATA):
Expand Down
13 changes: 12 additions & 1 deletion slackviewer/message.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,17 @@ class Message(object):

_DEFAULT_USER_ICON_SIZE = 72

def __init__(self, formatter, message):
def __init__(self, formatter, message, channel_id, slack_name):
self._formatter = formatter
self._message = message
# default is False, we update it later if its a thread message
self.is_thread_msg = False
# used only with --since flag. Default to True, will update in the function
self.is_recent_msg = True
# Channel id is not part of self._message - at least not with slackdump
self.channel_id = channel_id
# slack name that is in the url https://<slackname>.slack.com
self.slack_name = slack_name

def __repr__(self):
message = self._message.get("text")
Expand Down Expand Up @@ -127,6 +131,13 @@ def id(self):
def subtype(self):
return self._message.get("subtype")

@property
def permalink(self):
permalink = f"https://{self.slack_name}.slack.com/archives/{self.channel_id}/p{self._message['ts'].replace('.','')}"
if "thread_ts" in self._message:
permalink += f"?thread_ts={self._message['thread_ts']}&cid={self.channel_id}"
return permalink


class LinkAttachment():
"""
Expand Down
24 changes: 22 additions & 2 deletions slackviewer/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import json
import os
import datetime
import pathlib

from slackviewer.formatter import SlackFormatter
from slackviewer.message import Message
Expand All @@ -20,6 +21,8 @@ def __init__(self, PATH, debug, since):
self._PATH = PATH
self._debug = debug
self._since = since
# slack name that is in the url https://<slackname>.slack.com
self._slack_name = self._get_slack_name()
# TODO: Make sure this works
with io.open(os.path.join(self._PATH, "users.json"), encoding="utf8") as f:
self.__USER_DATA = {u["id"]: User(u) for u in json.load(f)}
Expand Down Expand Up @@ -164,6 +167,10 @@ def _create_messages(self, names, data, isDms=False):
empty_dms = []
formatter = SlackFormatter(self.__USER_DATA, data)

# Channel name to channel id mapping. Needed to create a messages
# permalink when using slackdump
channel_name_to_id = {c["name"]: c["id"] for c in data.values()}

for name in names:

# gets path to dm directory that holds the json archive
Expand All @@ -186,7 +193,8 @@ def _create_messages(self, names, data, isDms=False):
# sorts the messages in the json file
day_messages.sort(key=Reader._extract_time)

messages.extend([Message(formatter, d) for d in day_messages])
c_id = channel_name_to_id[name]
messages.extend([Message(formatter, d, c_id, self._slack_name) for d in day_messages])

chats[name] = messages
chats = self._build_threads(chats)
Expand Down Expand Up @@ -254,7 +262,7 @@ def _build_threads(self, channel_data):
location = grouping[0] + 1
for reply in grouping[1]:
msgtext = reply._message.get("text")
if not msgtext or not msgtext.startswith("**Thread Reply:**"):
if not msgtext or not reply.is_thread_msg:
reply._message["text"] = "**Thread Reply:** {}".format(msgtext)
reply.is_thread_msg = True

Expand Down Expand Up @@ -347,3 +355,15 @@ def _message_in_timeframe(self, msg):
ts_obj = datetime.datetime.fromtimestamp(float(ts))

return self._since < ts_obj

def _get_slack_name(self):
"""
Returns the slack name that should be https://<slackname>.slack.com
Since slackdump doesn't contain the name, the function assumed that the
name of the zip file or directory is the slack name. This is a weak
assumption.
It's name ise used for the permalink generation.
"""
return pathlib.Path(self._PATH).stem
1 change: 1 addition & 0 deletions slackviewer/templates/example_template_single_export.html
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
{%if message.user.email%} <span class="print-only user-email">({{message.user.email}})</span>{%endif%}
</div>
<a href="#{{ message.id}}"><div class="time">{{ message.time }}</div></a>
<a href="{{ message.permalink}}"><div class="permalink">permalink</div></a>
<div class="msg">
{{ message.msg|safe }}
{% for attachment in message.attachments -%}
Expand Down
Loading