articles

#!/usr/bin/env python3
"""
Blog maintainance script for
    entbehrlich.es
to post to twitter and release new posts
"""
# -*- coding: utf-8 -*-

import sys
import os
import logging
import random as rnd
import time
import pytz
import html
import textwrap
import datetime
import warnings
import subprocess
from urllib.parse import urlparse, unquote
import urllib.request
import re
import fileinput
import frontmatter
import codecs

import PIL
from PIL import Image
import click
import git
import requests
import wikipedia
import twitter
import feedparser
import xmltodict
from dateutil.parser import parse

# fucking warnings that i cannot control (wikipedia lib)
warnings.filterwarnings("ignore")

# Environment variables
project_home = os.getcwd()
posts_home = project_home + "/content/post/"
images_home = project_home + "/static/images/"
feedurl = project_home + "/public/index.xml"

editor = os.getenv("EDITOR")

# Commons API
commonsapi = "https://tools.wmflabs.org/magnus-toolserver/commonsapi.php?image="

# Logging
logging.basicConfig(level=logging.ERROR)


# Logging
def log(msg, serv="debug"):
    """
    Logging to terminal
    :msg: str
    :serv: str (serverity)
    :returns: bool
    """

    colors = {
        "debug": "white",
        "info": "green",
        "warning": "yellow",
        "error": "red",
    }

    d = datetime.datetime.now().strftime('%H:%M:%S ')
    click.secho(d + msg, fg=colors[serv])

# Check if EDITOR is set
if editor is None:
    log("EDITOR is not set in your OS environment.", "error")
    sys.exit(1)

def gen_article_image(images, debug):
    """
    :images: list
    :returns: str
    """

    final_image = None
    author = None
    for image in images:

        # skip if not part of commons
        if not "/commons/" in image:
            continue

        # skip if its an svg
        if ".svg" in image:
            continue

        # build api url
        url = urlparse(image)
        fname = os.path.basename(url.path)
        fname = commonsapi + fname + "&meta"
        log("Fetching image meta informations from %s" % fname, "debug")

        # query api and get dict of api items
        try:
            xml = requests.get(fname).text
            commons = xmltodict.parse(xml)
            c = commons["response"]["file"]
        except:
            continue

        # filter pictures that are too small
        if float(c["width"]) < 540:
            continue

        # get crappy license meta data
        try:
            license = commons["response"]["licenses"]["license"]["name"]
        except (TypeError, KeyError) as e:
            try:
                license = commons["response"]["licenses"]["license"][0]["name"]
            except (TypeError, KeyError) as e:
                continue

        # check if its a friendly license
        if not "CC-BY" in license and not "CC-PD" in license:
            continue

        # fetch the author of the picture
        try:
            author = c["author"]
            author = re.sub('<[^<]+?>', '', author)
        except KeyError:
            continue

        title = c["title"].replace('File:', '')
        log("Found CC-BY Picture \"%s\" from %s" % (title, c["uploader"]), "debug")
        final_image = download_image(url=c["urls"]["file"], fname=title, debug=False)
        resize_image(title)
        break

    return final_image, author

# Article generators
def gen_article_meta(title, date, tags, image, imageauthor, thanks=None, author=None):
    """
    Make a blog post header like
    ---
    title: foo
    date: bar
    draft: false
    image: /images/foo.jpg
    imageauthor: h0nk
    thanks: user
    tags:
    - foo
    - bar
    ---

    :title: str
    :date: str
    :tags: list
    :image: str
    :returns: str
    """
    head = "---\n"
    n = "\n"

    header = head + "title: " + title + n
    header = header + "date: " + date + n
    header = header + "draft: true" + n

    if image is not None:
        header = header + "image: " + image + n
        header = header + "imageauthor: " + imageauthor + n

    if author is not None:
        header = header + "author: " + author + n

    if thanks is not None:
        header = header + "thanks: " + thanks + n

    header = header + "tags: "
    for tag in tags:
        if not ":" in tag:
            header = header + n + "- " + tag.replace("Kategorie:", '')

    header = header + n + head + n
    return header

def gen_article_quote(summary, title, url):
    """
    Takes the wiki summary and builds

    > lorem ipsum
    >
    > Quelle: [Lorem Impsum](https://...)

    :summary: str (wiki summary from wikilib)
    :title: str (generated title for the post being used as link text)
    :url: str (article url)
    :returns: str (full multiline quote)
    """

    # shortcut for newline
    n = '\n'

    summary = textwrap.wrap(summary, 78)

    # start with a single quote for markdown
    fmtsummary = ""
    for x in summary:
        fmtsummary = fmtsummary + textwrap.indent(x, '> ') + n

    # add source and proper link to quote
    quote = fmtsummary + '>' + n + '> Quelle: ' + gen_markdown_link(url=url, txt=title)

    return quote

def gen_article_thanks(thanks):
    """
    Takes a twitter username and adds a friendly thank
    you at the end of the post

    :thanks: str (twitter username)
    :returns: str
    """
    if thanks is not None:
        txt = "Danke an %s!" % gen_markdown_link(url="https://twitter.com/" + thanks, txt=thanks)
    else:
        txt = ""

    return txt

def gen_markdown_link(url, txt=None):
    """
    Build url for blogpost with urlencoding
    like this [http://foo.bar/baz](http://foo.bar.baz)
    :url: str (link target)
    :txt: str (link text)
    :returns: str
    """

    if txt is not None:
        mdurl = '[' + txt + ']'
    else:
        cont = url.replace("https://", "")
        cont = url.replace("http://", "")
        mdurl = '[' + cont + ']'

    mdurl = mdurl + '(' + url + ')'

    return mdurl


def publish(post):
    """
    Pick draft post, update date and draft to false

    :post: str (filename of the post)
    :returns: bool
    """

    t = datetime.datetime.now(pytz.timezone('Europe/Berlin'))
    t = t.replace(microsecond=0)
    t = t.isoformat()
    path = posts_home + post

    try:
        f = open(path, "r")
        f.close()
    except IOError:
        log("No such file %s" % path, "error")
        return False

    f = open(path, "r")
    if not 'draft: true' in f.read():
        log("Article %s was already release. Updating publish date" % post, "debug")

    f.close()

    with fileinput.FileInput(path, inplace=True) as file:
        for line in file:
            print(line.replace('draft: true', 'draft: false'), end='')
        file.close()

    with fileinput.FileInput(path, inplace=True) as file:
        for line in file:
            print(re.sub(r'date: .*', r'date: %s' % t, line), end='')
        file.close()

    log("Modified %s to draft=false and date %s" % (post, t), "debug")

    return True

def commit(filename, path=posts_home):
    """
    Commit a file in git
    :filename: str (filename of the post)
    :path: str (path)
    :returns: bool
    """

    try:
        repo = git.Repo(project_home)
        repo.git.add(path + filename)
        repo.git.commit(m=filename)
        log("%s committed in git" % filename, "info")
        return True
    except:
        log("Post %s could not be committed" % filename, "error")
        return False

def push():
    """
    Syncs the repo with the serverside repo.
    :returns: bool
    """

    try:
        repo = git.Repo(project_home)
        repo.git.push()
        log("Pushed current state to repository", "info")
        return True
    except:
        log("Failed to push current state to repository", "error")
        return False

def pull():
    """
    Syncs the serverside repo with the local repo
    :returns: bool
    """

    try:
        repo = git.Repo(project_home)
        repo.git.pull()
        log("Pulled content from remote repository", "info")
        return True
    except:
        log("Failed to pull state from remote repository", "error")
        return False


def download_image(url, fname, debug):
    """
    Downloads an image and places it to the right
    :url: str
    :returns: str path to picture
    """
    log("Downloading %s from %s" % (fname, url), "debug")

    if not debug:
        urllib.request.urlretrieve(url, images_home + fname)
        log("Downloaded %s from %s" % (fname, url), "info")

    src = "/images/" + fname
    return src


def resize_image(fname):
    """
    resize a downloaded picture from images/ homedir
    :returns: bool
    """

    basewidth = 540
    fpath = images_home + fname
    img = Image.open(fpath)
    wpercent = (basewidth/float(img.size[0]))
    hsize = int((float(img.size[1])*float(wpercent)))
    log("Resizing image %s to %sx%s" % (fname, basewidth, hsize), "debug")
    img = img.resize((basewidth,hsize), PIL.Image.ANTIALIAS)
    img.save(fpath)
    commit(fname, images_home)
    return True


def list_posts():
    """
    lists all postings with limit to certain post states

    :returns: all,
    """

    posts = os.listdir(posts_home)

    unreleased = 0
    released = 0
    podcasted = 0
    for post in posts:
        if 'draft: true' in open(posts_home + post).read():
            unreleased = unreleased + 1
        else:
            released = released + 1

        if 'podcast: ' in open(posts_home + post).read():
            podcasted = podcasted + 1

    return len(posts), released, unreleased, podcasted


# CLI Commands

@click.group()
def cli():
    """ cli command group """
    pass


@cli.command(short_help="Tweet latest blogpost from rss feed")
@click.option('oldfeed', '-o', '--oldfeed', default='-f /tmp/index.xml')
@click.option('newfeed', '-n', '--newfeed', default='https://entbehrlich.es/index.xml')
def tweet(oldfeed, newfeed):
    """
    Tweet latest blogpost from rss feed
    :feedurl: str (path)
    :returns: bool
    """

    try:
        oldfeed = feedparser.parse(oldfeed)
        newfeed = feedparser.parse(newfeed)
    except IOError:
        log("Feed file {} or {} not found.".format(oldfeed, newfeed), "error")
        return

    oldfeed_link = oldfeed['entries'][0]['link']
    newfeed_link = newfeed['entries'][0]['link']

    if oldfeed_link == newfeed_link:
        log("Link {} already tweeted".format(newfeed_link), "error")
        return
    else:
        log("Detected new article {}, trying to tweet".format(newfeed_link), "info")
    # get description from feed url
    # the goal is to find the html <p> that contains the description
    # added by me. not the wikipedia quote.
    desc = newfeed['entries'][0]['summary']
    desc = desc.split("</p>")[0]
    desc = desc.replace('\n', ' ')
    desc = html.unescape(desc)
    desc = re.sub('<[^<]+?>', '', desc)

    # if desc too long, cut it for twitter
    if len(desc) > 240:
        desc = desc[:237] + '...'

    message = desc + " #wikipedia " + newfeed_link

    try:
        auth = twitter.OAuth(
            consumer_key=os.getenv('tw_con_key'),
            consumer_secret=os.getenv('tw_con_secret'),
            token=os.getenv('tw_access_token'),
            token_secret=os.getenv('tw_access_token_secret'),
        )
    except:
        log("Could not find global variables for twitter auth", "error")
        return False

    try:
        t = twitter.Twitter(auth=auth)
        t.statuses.update(status=message)
        log("Tweeted \"%s\"" % message, "info")
        return True
    except:
        log("Could not send tweet. Check debugging", "error")
        return False


@cli.command(short_help="Post latest blogpost from rss feed to telegram")
@click.option('oldfeed', '-o', '--oldfeed', default='-f /tmp/index.xml')
@click.option('newfeed', '-n', '--newfeed', default='https://entbehrlich.es/index.xml')
def telegram(oldfeed, newfeed):
    """
    telegram latest blogpost from rss feed
    :feedurl: str (path)
    :returns: bool
    """

    try:
        oldfeed = feedparser.parse(oldfeed)
        newfeed = feedparser.parse(newfeed)
    except IOError:
        log("Feed file {} or {} not found.".format(oldfeed, newfeed), "error")
        return

    oldfeed_link = oldfeed['entries'][0]['link']
    newfeed_link = newfeed['entries'][0]['link']

    if oldfeed_link == newfeed_link:
        log("Link {} already telegramed".format(newfeed_link), "error")
        return
    else:
        log("Detected new article {}, trying to telegram".format(newfeed_link), "info")

    # get description from feed url
    # the goal is to find the html <p> that contains the description
    # added by me. not the wikipedia quote.
    desc = newfeed['entries'][0]['summary']
    desc = desc.split("</p>")[0]
    desc = desc.replace('\n', ' ')
    desc = html.unescape(desc)
    desc = re.sub('<[^<]+?>', '', desc)

    message = desc + "\n\n" + newfeed_link


    try:
        post_data = {
            "chat_id": os.getenv('entbehrliches_telegram_chat_id'),
            "text": message,
        }

        r = requests.post("https://api.telegram.org/bot%s/sendMessage" % os.getenv('entbehrliches_bot_token'), post_data)
        log(r.text)

    except:
        log("Could not find global variables for telegram bot", "error")
        return False


@cli.command(short_help="Create a new post")
@click.argument('url')
@click.option('date', '-d', '--date', default=str(datetime.datetime.now()))
@click.option('thanks', '-t', '--thanks', default=None)
@click.option('debug', '--debug', is_flag=True, default=False)
@click.option('author', '-a', '--author', default=os.getenv("USER"))
def new(url, date, thanks, debug, author):
    """
    \b
    Create new post.
    Fetches content from wikipedia url
    Creates article header
    Creates quote from article with reference
    Adds thank you at the end of the article
    """

    # pull from repo
    pull()

    # filter out mobile pages
    url = url.replace("https://de.m.", "https://de.")
    url = url.replace("https://en.m.", "https://en.")

    # needed in further building of article
    origurl = url

    # newline shortcut
    n = "\n"

    # find wikipedia link
    url = urlparse(url)
    lang = url.netloc.split('.')[0]
    title = url.path.split('/')[2].replace('-', ' ').replace('_', ' ')
    title = unquote(title)
    filename = title.replace(' ', '-').replace('---', '-').replace(')', '').replace('(', '').replace('/','') + '.md'
    path = posts_home + filename

    # check if article was written before or naming conflict
    if os.path.isfile(path) and not debug:
        log("Already exists. Duplicate article %s" % filename, "error")
        return False

    # set lang and title, fetch wikipedia article
    try:
        wikipedia.set_lang(lang)
        wiki = wikipedia.page(title)
        tags = wiki.categories
        summary = wiki.summary
        log("Fetched article \"%s\" from wikipedia (%s)" % (title, lang), "info")
    except wikipedia.exceptions.DisambiguationError as e:
        log(e, "error")
        return False
    except:
        log("Fetching Wikipedia article was not possible due to connection issues", "error")
        return False

    t = parse(date)
    t = t.replace(hour=14, minute=0, microsecond=0)
    t = t.isoformat()

    # fetch images
    try:
        images = wiki.images
        image, imageauthor = gen_article_image(images, debug)
    except KeyError:
        image = None
        imageauthor = None
        log("Images could not be fetched, but continuing", "error")
        pass

    # small fix for stupid macos
    if author == "fnbaum":
        author = "noqqe"

    # generate header for article
    header = gen_article_meta(title=title, date=t, tags=tags, image=image, imageauthor=imageauthor, thanks=thanks, author=author)

    # generate quote for article
    quote = gen_article_quote(summary=summary, title=title, url=origurl)

    # put all the content together
    text = \
        header + \
        n + n + \
        quote

    # if debug is on, just print and quit the function.
    # no edit, no commit, no file written
    if debug:
        log("Only showing generated post on stdout", "debug")
        log(n + text, "debug")
        return True

    # write markdown to the document
    with open(path, "w") as f:
        f.write(text)
        f.close()

    log("Created post in path " + path, "info")

    # edit file
    subprocess.call([editor, path])

    # commit to local repo
    commit(filename, posts_home)

    # push to repo
    push()

    all, released, unreleased, podcasted = list_posts()
    log("Posts: Unreleased {}, released {}, total {}, podcasted {}".format(unreleased, released, all, podcasted))

    return True

@cli.command(short_help="Show current drafts")
@click.option('allposts', '--all', '-a',  is_flag=True)
def queue(allposts):
    """
    \b
    Shows all unpublished posts. Can also show all posts ever using -a
    """

    all, released, unreleased, podcasted = list_posts()
    log("Posts: Unreleased {}, released {}, total {}, podcasted {}".format(unreleased, released, all, podcasted))

    posts = os.listdir(posts_home)
    if not allposts:
        log("All unpublished posts:", "debug")
        for post in posts:
            if 'draft: true' in open(posts_home + post).read():
                log(" * " + post, "debug")
    else:
        log("All posts:", "debug")
        for post in posts:
            log(" * " + post, "debug")

    return True

@cli.command(short_help="Publish a post")
@click.argument('post', required=False, default=None)
def release(post):
    """
    \b
    Releases a post to the website
    * removes draft status from post
    * commit post in git
    * tweet about it

    """

    # pull from remote repo
    if not pull():
        return False

    # if no specific post given, use a random one
    if post is None:
        log("Picking random post...", "debug")
        posts = os.listdir(posts_home)

        c = 0
        while c <= len(posts):
            c = c + 1
            post = rnd.choice(posts)
            if 'draft: true' in open(posts_home + post).read():
                log("Picked %s" % post, "info")
                break

        if c > len(posts):
            log("No post left in queue. Write new ones!", "error")
            return False

    # remove draft
    if not publish(post):
        return False

    # commit git changes
    if not commit(post, posts_home):
        return False

    # push git changes
    if not push():
        return False

    all, released, unreleased, podcasted = list_posts()
    log("Posts: Unreleased {}, released {}, total {}, podcasted {}".format(unreleased, released, all, podcasted))

    return True

@cli.command(short_help="Edit an already written article")
@click.argument('post', required=True)
def reedit(post):
    """
    Things happen. Sometimes I need to reedit an article.
    """

    # get changes
    pull()

    # edit file
    path = posts_home + post
    subprocess.call([editor, path])

    # commit to local repo
    commit(post, posts_home)

    # push changes
    push()

    return True

@cli.command(short_help="Live recently update articles from Wikipedia")
def inspiration():
    """
    Lookup some new/recently edited articles as inspirations.
    """
    wiki_changes_api = 'https://de.wikipedia.org/w/api.php?action=query&list=recentchanges&format=json&rctype=edit|new&rclimit=1'
    oldid = ""

    try:
        while True:

            # query edits from wikipedia
            r = requests.get(wiki_changes_api).json()
            edit = r["query"]["recentchanges"][0]
            title = edit["title"]

            # some filters
            if "Benutzer:" in title \
                or "Diskussion:" in title \
                or "Wikipedia:" in title \
                or "Kategorie:" in title:
                continue

            # if not seen before, show to user
            if oldid != edit["pageid"]:
                log(title + " -- https://de.wikipedia.org/wiki/?curid=" + str(edit["pageid"]), "debug")
                oldid = edit["pageid"]

            time.sleep(2)
    except KeyboardInterrupt:
        log("End...")
        return True


@cli.command(short_help="Get 20 random articles from Wikipedia")
def random():
    """
    Get some random articles from wikipedia
    """
    wikipedia.set_lang("de")
    for w in wikipedia.random(20):
        try:
            page = wikipedia.page(w)
            log(page.title + " -- " + page.url, "debug")
        except (wikipedia.exceptions.DisambiguationError, wikipedia.exceptions.PageError):
            pass

    return True


@cli.command(short_help="Add link to podcast to article")
@click.argument('post', required=True)
@click.argument('episode', required=True)
def podcast(post, episode):
    """
    Add link to podcast to article
    """

    # get changes
    pull()

    # read meta data
    post = post.replace('content/post/', '')
    ppath = posts_home + post
    with open(ppath, "r") as o:
        fm = frontmatter.loads(o.read())
        o.close()

    # write added meta data
    fm.metadata["podcast"] = episode
    with codecs.open(ppath, "wb+", 'utf8') as o:
        o.write(frontmatter.dumps(fm))
        o.close()

    # commit to local repo
    commit(post, posts_home)

    # push changes
    push()

    all, released, unreleased, podcasted = list_posts()
    log("Posts: Unreleased {}, released {}, total {}, podcasted {}".format(unreleased, released, all, podcasted))

    return True

@cli.command(short_help="Show all posts that are not podcasted, yet!")
def unpodcasted():
    """
    Show all posts that are not podcasted, yet!
    """

    posts = os.listdir(posts_home)

    all, released, unreleased, podcasted = list_posts()
    log("Posts: Unreleased {}, released {}, total {}, podcasted {}".format(unreleased, released, all, podcasted))
    log("All posts that are not podcasted, yet: ")
    for post in posts:
        if 'podcast: https://' not in open(posts_home + post).read():
            log(" * " + post, "debug")


# Call wrapper to cli commands
if __name__ == '__main__':
    cli()