Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add option to skip messages when a user joins/leaves a channel #212

Merged
merged 3 commits into from
Jan 19, 2025
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 5 additions & 27 deletions slackviewer/archive.py
Original file line number Diff line number Diff line change
@@ -60,8 +60,11 @@ def extract_archive(filepath):
# if there are new features added
extra=to_bytes(slackviewer.__version__)
)
# use the zip file name as full path. This allows then slack name to be
# extracted from the path later in reader.py when creating direct slack URLs
slack_name = splitext(basename(filepath))[0]

extracted_path = os.path.join(SLACKVIEWER_TEMP_PATH, archive_sha)
extracted_path = os.path.join(SLACKVIEWER_TEMP_PATH, archive_sha, slack_name)

if os.path.exists(extracted_path):
print("{} already exists".format(extracted_path))
@@ -73,8 +76,7 @@ def extract_archive(filepath):
print(info.filename)
info.filename = info.filename.encode("cp437").decode("utf-8")
print(info.filename)
zip.extract(info,path=extracted_path)

zip.extract(info, path=extracted_path)

print("{} extracted to {}".format(filepath, extracted_path))

@@ -112,27 +114,3 @@ def create_archive_info(filepath, extracted_path, archive_sha=None):
s = json.dumps(archive_info, ensure_ascii=False)
s = to_unicode(s)
f.write(s)


def get_export_info(archive_name):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was only used by cli.py. At the same time the workspace name is also computed in Reader()

"""
Given a file or directory, extract it and return information that will be used in
an export printout: the basename of the file, the name stripped of its extension, and
our best guess (based on Slack's current naming convention) of the name of the
workspace that this is an export of.
"""
extracted_path = extract_archive(archive_name)
base_filename = basename(archive_name)
(noext_filename, _) = splitext(base_filename)
workspace_name = base_filename
# In case the archive is a zip file
if not os.path.isdir(extracted_path):
# Typical extract name: "My Friends and Family Slack export Jul 21 2018 - Sep 06 2018"
# If that's not the format, we will just fall back to the extension-free filename.
(workspace_name, _) = noext_filename.split(" Slack export ", 1)
return {
"readable_path": extracted_path,
"basename": base_filename,
"stripped_name": noext_filename,
"workspace_name": workspace_name,
}
31 changes: 14 additions & 17 deletions slackviewer/cli.py
Original file line number Diff line number Diff line change
@@ -8,7 +8,6 @@
from slackviewer.constants import SLACKVIEWER_TEMP_PATH
from slackviewer.utils.click import envvar, flag_ennvar
from slackviewer.reader import Reader
from slackviewer.archive import get_export_info
from jinja2 import Environment, PackageLoader


@@ -37,32 +36,29 @@ def clean(wet):
@click.option('--show-dms', is_flag=True, default=False, help="Show direct messages")
@click.option("--since", default=None, type=click.DateTime(formats=["%Y-%m-%d"]),
help="Only show messages since this date.")
@click.option('--skip-channel-member-change', is_flag=True, default=False, envvar='SKIP_CHANNEL_MEMBER_CHANGE', help="Hide channel join/leave messages")
@click.option("--template", default=None, type=click.File('r'), help="Custom single file export template")
@click.argument('archive_dir')
@click.argument('archive')
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this rename is to have the same config['archive'] name as main.py. This way the same config variable can be used in the Reader class.

def export(**kwargs):
config = kwargs

def export(archive_dir, debug, since, template, show_dms):
css = pkgutil.get_data('slackviewer', 'static/viewer.css').decode('utf-8')

tmpl = Environment(loader=PackageLoader('slackviewer')).get_template("export_single.html")
if template:
tmpl = Environment(loader=PackageLoader('slackviewer')).from_string(template.read())
export_file_info = get_export_info(archive_dir)
config = {
"debug": debug,
"since": since,
}
r = Reader(export_file_info["readable_path"], config)
if config["template"]:
tmpl = Environment(loader=PackageLoader('slackviewer')).from_string(config["template"].read())
r = Reader(config)
channel_list = sorted(
[{"channel_name": k, "messages": v} for (k, v) in r.compile_channels().items()],
key=lambda d: d["channel_name"]
)

dm_list = []
mpims = []
if show_dms:
if config["show_dms"]:
#
# Direct DMs
dm_list = r.compile_dm_messages()
dm_list = r.compile_dm_messages()
dm_users = r.compile_dm_users()

# make list better lookupable. Also hide own user in 1:1 DMs
@@ -89,13 +85,14 @@ def export(archive_dir, debug, since, template, show_dms):
html = tmpl.render(
css=css,
generated_on=datetime.now(),
workspace_name=export_file_info["workspace_name"],
source_file=export_file_info["basename"],
workspace_name=r.slack_name(),
source_file=os.path.basename(config["archive"]),
channels=channel_list,
dms=dm_list,
mpims=mpims,
)
with open(export_file_info['stripped_name'] + '.html', 'wb') as outfile:
filename = f"{r.slack_name()}.html"
with open(filename, 'wb') as outfile:
outfile.write(html.encode('utf-8'))

print("Exported to {}.html".format(export_file_info['stripped_name']))
print(f"Exported to {filename}")
67 changes: 22 additions & 45 deletions slackviewer/main.py
Original file line number Diff line number Diff line change
@@ -5,38 +5,35 @@
import flask

from slackviewer.app import app
from slackviewer.archive import extract_archive
from slackviewer.reader import Reader
from slackviewer.freezer import CustomFreezer
from slackviewer.utils.click import envvar, flag_ennvar


def configure_app(app, archive, channels, config):
app.debug = config.get("debug", False)
app.no_sidebar = config.get("no_sidebar", False)
app.no_external_references = config.get("no_external_references", False)
def configure_app(app, config):
app.debug = config["debug"]
app.no_sidebar = config["no_sidebar"]
app.no_external_references = config["no_external_references"]
if app.debug:
print("WARNING: DEBUG MODE IS ENABLED!")
app.config["PROPAGATE_EXCEPTIONS"] = True

path = extract_archive(archive)
reader = Reader(path, config)
reader = Reader(config)

top = flask._app_ctx_stack
top.path = path
top.channels = reader.compile_channels(channels)
top.path = reader.archive_path()
top.channels = reader.compile_channels(config["channels"])
top.groups = reader.compile_groups()
top.dms = {}
top.dm_users = []
top.mpims = {}
top.mpim_users = []
if not config.get("skip_dms", False):
if not config["skip_dms"]:
top.dms = reader.compile_dm_messages()
top.dm_users = reader.compile_dm_users()
top.mpims = reader.compile_mpim_messages()
top.mpim_users = reader.compile_mpim_users()


# remove any empty channels & groups. DM's are needed for now
# since the application loads the first
top.channels = {k: v for k, v in top.channels.items() if v}
@@ -75,41 +72,21 @@ def configure_app(app, archive, channels, config):
@click.option("--since", default=None, type=click.DateTime(formats=["%Y-%m-%d"]),
help="Only show messages since this date.")
@click.option('--skip-dms', is_flag=True, default=False, help="Hide direct messages")

def main(
port,
archive,
ip,
no_browser,
channels,
no_sidebar,
no_external_references,
test,
debug,
output_dir,
html_only,
since,
skip_dms,
):
if not archive:
@click.option('--skip-channel-member-change', is_flag=True, default=False, envvar='SKIP_CHANNEL_MEMBER_CHANGE', help="Hide channel join/leave messages")
def main(**kwargs):
hfaran marked this conversation as resolved.
Show resolved Hide resolved
config = kwargs
if not config["archive"]:
raise ValueError("Empty path provided for archive")

config = {
"debug": debug,
"since": since,
"skip_dms": skip_dms,
"no_sidebar": no_sidebar,
"no_external_references": no_external_references,
}
configure_app(app, archive, channels, config)
configure_app(app, config)

if html_only:
if config["html_only"]:
# We need relative URLs, otherwise channel refs do not work
app.config["FREEZER_RELATIVE_URLS"] = True

# Custom subclass of Freezer allows overwriting the output directory
freezer = CustomFreezer(app)
freezer.cf_output_dir = output_dir
freezer.cf_output_dir = config["output_dir"]

# This tells freezer about the channel URLs
@freezer.register_generator
@@ -119,14 +96,14 @@ def channel_name():

freezer.freeze()

if not no_browser:
if not config["no_browser"]:
webbrowser.open("file:///{}/index.html"
.format(os.path.abspath(output_dir)))
.format(os.path.abspath(config["output_dir"])))

elif not test:
if not no_browser:
webbrowser.open("http://{}:{}".format(ip, port))
elif not config["test"]:
if not config["no_browser"]:
webbrowser.open("http://{}:{}".format(config["ip"], config["port"]))
app.run(
host=ip,
port=port
host=config["ip"],
port=config["port"]
)
23 changes: 19 additions & 4 deletions slackviewer/reader.py
Original file line number Diff line number Diff line change
@@ -10,17 +10,18 @@
from slackviewer.formatter import SlackFormatter
from slackviewer.message import Message
from slackviewer.user import User, deleted_user
from slackviewer.archive import extract_archive


class Reader(object):
"""
Reader object will read all of the archives' data from the json files
"""

def __init__(self, PATH, config):
self._PATH = PATH
self._debug = config.get("debug", False)
self._since = config.get("since", None)
def __init__(self, config):
self._config = config
self._PATH = extract_archive(config["archive"])
self._since = config["since"]
# slack name that is in the url https://<slackname>.slack.com
self._slack_name = self._get_slack_name()
# TODO: Make sure this works
@@ -144,6 +145,14 @@ def _extract_time(json):
except KeyError:
return 0

def slack_name(self):
"""Returns the (guessed) slack name"""
return self._slack_name

def archive_path(self):
"""Returns the archive path"""
return self._PATH

###################
# Private Methods #
###################
@@ -238,6 +247,11 @@ def _build_threads(self, channel_data):
user_ts_lookup[k].append((i, m))

for location, message in enumerate(channel_data[channel_name]):
# remove "<user> joined/left <channel>" message
if self._config['skip_channel_member_change'] and message._message.get('subtype') in ['channel_join', 'channel_leave']:
items_to_remove.append(location)
continue

# If there's a "reply_count" key, generate a list of user and timestamp dictionaries
if 'reply_count' in message._message or 'replies' in message._message:
# Identify and save where we are
@@ -258,6 +272,7 @@ def _build_threads(self, channel_data):
for reply_obj_tuple in sorted_reply_objects:
items_to_remove.append(reply_obj_tuple[0])
replies[location] = [tup[1] for tup in sorted_reply_objects]

# Create an OrderedDict of thread locations and replies in reverse numerical order
sorted_threads = OrderedDict(sorted(replies.items(), reverse=True))