diff --git a/README.md b/README.md
index cc2521d..f2c2422 100644
--- a/README.md
+++ b/README.md
@@ -54,6 +54,11 @@ You can use `unfluff` from node or right on the command line!
 
 This is what `unfluff` will try to grab from a web page:
 - `title` - The document's title (from the &lt;title&gt; tag)
+- `softTitle` - A version of `title` with less truncation
+- `date` - The document's publication date
+- `copyright` - The document's copyright line, if present
+- `author` - The document's author
+- `publisher` - The document's publisher (website name)
 - `text` - The main text of the document with all the junk thrown away
 - `image` - The main image for the document (what's used by facebook, etc.)
 - `videos` - An array of videos that were embedded in the article. Each video has src, width and height.
@@ -127,7 +132,14 @@ data = extractor(my_html_data, 'en');
 
 ```json
 {
-  "title": "Shovel Knight review: rewrite history",
+  "title": "Shovel Knight review",
+  "softTitle": "Shovel Knight review: rewrite history",
+  "date": "2014-06-26T13:00:03Z",
+  "copyright": "2016 Vox Media Inc Designed in house",
+  "author": [
+    "Griffin McElroy"
+  ],
+  "publisher": "Polygon",
   "text": "Shovel Knight is inspired by the past in all the right ways — but it's far from stuck in it. [.. snip ..]",
   "image": "http://cdn2.vox-cdn.com/uploads/chorus_image/image/34834129/jellyfish_hero.0_cinema_1280.0.png",  
   "tags": [],
@@ -159,6 +171,11 @@ data = extractor.lazy(my_html_data, 'en');
 
 // Access whichever data elements you need directly.
 console.log(data.title());
+console.log(data.softTitle());
+console.log(data.date());
+console.log(data.copyright());
+console.log(data.author());
+console.log(data.publisher());
 console.log(data.text());
 console.log(data.image());
 console.log(data.tags());
diff --git a/lib/extractor.js b/lib/extractor.js
index 132b61b..f51049e 100644
--- a/lib/extractor.js
+++ b/lib/extractor.js
@@ -1,34 +1,68 @@
 // Generated by CoffeeScript 2.0.0-beta7
 void function () {
-  var _, addSiblings, biggestTitleChunk, formatter, getObjectTag, getScore, getSiblingsContent, getSiblingsScore, getVideoAttrs, isBoostable, isHighlinkDensity, isNodescoreThresholdMet, isTableAndNoParaExist, postCleanup, stopwords, updateNodeCount, updateScore;
+  var _, addSiblings, biggestTitleChunk, cleanText, cleanTitle, formatter, getObjectTag, getScore, getSiblingsContent, getSiblingsScore, getVideoAttrs, isBoostable, isHighlinkDensity, isNodescoreThresholdMet, isTableAndNoParaExist, postCleanup, rawTitle, stopwords, updateNodeCount, updateScore;
   _ = require('lodash');
   stopwords = require('./stopwords');
   formatter = require('./formatter');
   module.exports = {
-    title: function (doc) {
-      var titleElement, titleText, usedDelimeter;
-      titleElement = doc("meta[property='og:title']");
-      if (titleElement)
-        titleText = titleElement.attr('content');
-      if (!titleText) {
-        titleElement = doc('title').first();
-        titleText = titleElement.text();
+    date: function (doc) {
+      var cache$, cache$1, cache$2, cache$3, cache$4, dateCandidates;
+      dateCandidates = doc("meta[property='article:published_time'],     meta[itemprop*='datePublished'], meta[name='dcterms.modified'],     meta[name='dcterms.date'],     meta[name='DC.date.issued'],  meta[name='dc.date.issued'],     meta[name='dc.date.modified'], meta[name='dc.date.created'],     meta[name='DC.date'],     meta[name='DC.Date'],     meta[name='dc.date'],     meta[name='date'],     time[itemprop*='pubDate'],     time[itemprop*='pubdate'],     span[itemprop*='datePublished'],     span[property*='datePublished'],     p[itemprop*='datePublished'],     p[property*='datePublished'],     div[itemprop*='datePublished'],     div[property*='datePublished'],     li[itemprop*='datePublished'],     li[property*='datePublished'],     time,     span[class*='date'],     p[class*='date'],     div[class*='date']");
+      return (null != dateCandidates && null != (cache$ = dateCandidates.first()) && null != (cache$1 = cache$.attr('content')) ? cache$1.trim() : void 0) || (null != dateCandidates && null != (cache$2 = dateCandidates.first()) && null != (cache$3 = cache$2.attr('datetime')) ? cache$3.trim() : void 0) || cleanText(null != dateCandidates && null != (cache$4 = dateCandidates.first()) ? cache$4.text() : void 0) || null;
+    },
+    copyright: function (doc) {
+      var cache$, copyright, copyrightCandidates, text;
+      copyrightCandidates = doc("p[class*='copyright'], div[class*='copyright'], span[class*='copyright'], li[class*='copyright'],     p[id*='copyright'], div[id*='copyright'], span[id*='copyright'], li[id*='copyright']");
+      text = null != copyrightCandidates && null != (cache$ = copyrightCandidates.first()) ? cache$.text() : void 0;
+      if (!text) {
+        text = doc('body').text().replace(/\s*[\r\n]+\s*/g, '. ');
+        if (!(text.indexOf('\xa9') > 0))
+          return null;
       }
-      if (!titleElement)
-        return null;
-      usedDelimeter = false;
-      _.each([
+      copyright = text.replace(/.*?©(\s*copyright)?([^,;:.|\r\n]+).*/gi, '$2').trim();
+      return cleanText(copyright);
+    },
+    author: function (doc) {
+      var authorCandidates, authorList, cache$, cache$1, cache$2, cache$3, cache$4, cache$5, fallbackAuthor;
+      authorCandidates = doc("meta[property='article:author'],     meta[property='og:article:author'], meta[name='author'],     meta[name='dcterms.creator'],     meta[name='DC.creator'],     meta[name='DC.Creator'],     meta[name='dc.creator'],     meta[name='creator']");
+      authorList = [];
+      authorCandidates.each(function () {
+        var author, cache$, cache$1;
+        author = null != (cache$ = doc(this)) && null != (cache$1 = cache$.attr('content')) ? cache$1.trim() : void 0;
+        if (author)
+          return authorList.push(author);
+      });
+      if (authorList.length === 0) {
+        fallbackAuthor = (null != (cache$ = doc("span[class*='author']").first()) ? cache$.text() : void 0) || (null != (cache$1 = doc("p[class*='author']").first()) ? cache$1.text() : void 0) || (null != (cache$2 = doc("div[class*='author']").first()) ? cache$2.text() : void 0) || (null != (cache$3 = doc("span[class*='byline']").first()) ? cache$3.text() : void 0) || (null != (cache$4 = doc("p[class*='byline']").first()) ? cache$4.text() : void 0) || (null != (cache$5 = doc("div[class*='byline']").first()) ? cache$5.text() : void 0);
+        if (fallbackAuthor)
+          authorList.push(cleanText(fallbackAuthor));
+      }
+      return authorList;
+    },
+    publisher: function (doc) {
+      var cache$, cache$1, publisherCandidates;
+      publisherCandidates = doc("meta[property='og:site_name'],     meta[name='dc.publisher'],     meta[name='DC.publisher'],     meta[name='DC.Publisher']");
+      if (null != publisherCandidates && null != (cache$ = publisherCandidates.first()) && null != (cache$1 = cache$.attr('content')))
+        return cache$1.trim();
+    },
+    title: function (doc) {
+      var titleText;
+      titleText = rawTitle(doc);
+      return cleanTitle(titleText, [
         '|',
         ' - ',
         '\xbb',
         ':'
-      ], function (c) {
-        if (titleText.indexOf(c) >= 0 && !usedDelimeter) {
-          titleText = biggestTitleChunk(titleText, c);
-          return usedDelimeter = true;
-        }
-      });
-      return titleText.replace(/�/g, '').trim();
+      ]);
+    },
+    softTitle: function (doc) {
+      var titleText;
+      titleText = rawTitle(doc);
+      return cleanTitle(titleText, [
+        '|',
+        ' - ',
+        '\xbb'
+      ]);
     },
     text: function (doc, topNode, lang) {
       if (topNode) {
@@ -414,4 +448,37 @@ void function () {
     });
     return node;
   };
+  cleanText = function (text) {
+    return text.replace(/[\r\n\t]/g, ' ').replace(/\s\s+/g, ' ').replace(/<!--.+?-->/g, '').replace(/�/g, '').trim();
+  };
+  cleanTitle = function (title, delimiters) {
+    var titleText, usedDelimeter;
+    titleText = title || '';
+    usedDelimeter = false;
+    _.each(delimiters, function (c) {
+      if (titleText.indexOf(c) >= 0 && !usedDelimeter) {
+        titleText = biggestTitleChunk(titleText, c);
+        return usedDelimeter = true;
+      }
+    });
+    return cleanText(titleText);
+  };
+  rawTitle = function (doc) {
+    var cache$, cache$1, cache$2, cache$3, cache$4, cache$5, cache$6, cache$7, cache$8, cache$9, gotTitle, titleText;
+    gotTitle = false;
+    titleText = '';
+    _.each([
+      null != (cache$ = doc("meta[property='og:title']")) && null != (cache$1 = cache$.first()) ? cache$1.attr('content') : void 0,
+      null != (cache$2 = doc("h1[class*='title']")) && null != (cache$3 = cache$2.first()) ? cache$3.text() : void 0,
+      null != (cache$4 = doc('title')) && null != (cache$5 = cache$4.first()) ? cache$5.text() : void 0,
+      null != (cache$6 = doc('h1')) && null != (cache$7 = cache$6.first()) ? cache$7.text() : void 0,
+      null != (cache$8 = doc('h2')) && null != (cache$9 = cache$8.first()) ? cache$9.text() : void 0
+    ], function (candidate) {
+      if (candidate && candidate.trim() && !gotTitle) {
+        titleText = candidate.trim();
+        return gotTitle = true;
+      }
+    });
+    return titleText;
+  };
 }.call(this);
diff --git a/lib/unfluff.js b/lib/unfluff.js
index a724ad5..81ad385 100644
--- a/lib/unfluff.js
+++ b/lib/unfluff.js
@@ -10,6 +10,11 @@ void function () {
     lng = language || extractor.lang(doc);
     pageData = {
       title: extractor.title(doc),
+      softTitle: extractor.softTitle(doc),
+      date: extractor.date(doc),
+      author: extractor.author(doc),
+      publisher: extractor.publisher(doc),
+      copyright: extractor.copyright(doc),
       favicon: extractor.favicon(doc),
       description: extractor.description(doc),
       keywords: extractor.keywords(doc),
@@ -31,6 +36,31 @@ void function () {
         doc = getParsedDoc.call(this, html);
         return null != this.title_ ? this.title_ : this.title_ = extractor.title(doc);
       },
+      softTitle: function () {
+        var doc;
+        doc = getParsedDoc.call(this, html);
+        return null != this.softTitle_ ? this.softTitle_ : this.softTitle_ = extractor.softTitle(doc);
+      },
+      date: function () {
+        var doc;
+        doc = getParsedDoc.call(this, html);
+        return null != this.date_ ? this.date_ : this.date_ = extractor.date(doc);
+      },
+      copyright: function () {
+        var doc;
+        doc = getParsedDoc.call(this, html);
+        return null != this.copyright_ ? this.copyright_ : this.copyright_ = extractor.copyright(doc);
+      },
+      author: function () {
+        var doc;
+        doc = getParsedDoc.call(this, html);
+        return null != this.author_ ? this.author_ : this.author_ = extractor.author(doc);
+      },
+      publisher: function () {
+        var doc;
+        doc = getParsedDoc.call(this, html);
+        return null != this.publisher_ ? this.publisher_ : this.publisher_ = extractor.publisher(doc);
+      },
       favicon: function () {
         var doc;
         doc = getParsedDoc.call(this, html);
diff --git a/src/extractor.coffee b/src/extractor.coffee
index 6dac1a6..b166559 100644
--- a/src/extractor.coffee
+++ b/src/extractor.coffee
@@ -3,24 +3,91 @@ stopwords = require("./stopwords")
 formatter = require("./formatter")
 
 module.exports =
-  # Grab the title of an html doc (excluding junk)
-  title: (doc) ->
-    titleElement = doc("meta[property='og:title']")
-    titleText = titleElement.attr("content") if titleElement
+  # Grab the date of an html doc
+  date: (doc) ->
+    dateCandidates = doc("meta[property='article:published_time'], \
+    meta[itemprop*='datePublished'], meta[name='dcterms.modified'], \
+    meta[name='dcterms.date'], \
+    meta[name='DC.date.issued'],  meta[name='dc.date.issued'], \
+    meta[name='dc.date.modified'], meta[name='dc.date.created'], \
+    meta[name='DC.date'], \
+    meta[name='DC.Date'], \
+    meta[name='dc.date'], \
+    meta[name='date'], \
+    time[itemprop*='pubDate'], \
+    time[itemprop*='pubdate'], \
+    span[itemprop*='datePublished'], \
+    span[property*='datePublished'], \
+    p[itemprop*='datePublished'], \
+    p[property*='datePublished'], \
+    div[itemprop*='datePublished'], \
+    div[property*='datePublished'], \
+    li[itemprop*='datePublished'], \
+    li[property*='datePublished'], \
+    time, \
+    span[class*='date'], \
+    p[class*='date'], \
+    div[class*='date']")
+    dateCandidates?.first()?.attr("content")?.trim() || dateCandidates?.first()?.attr("datetime")?.trim() || cleanText(dateCandidates?.first()?.text()) || null
+
+
+  # Grab the copyright line
+  copyright: (doc) ->
+    copyrightCandidates = doc("p[class*='copyright'], div[class*='copyright'], span[class*='copyright'], li[class*='copyright'], \
+    p[id*='copyright'], div[id*='copyright'], span[id*='copyright'], li[id*='copyright']")
+    text = copyrightCandidates?.first()?.text()
+    if !text
+      # try to find the copyright in the text
+      text = doc("body").text().replace(/\s*[\r\n]+\s*/g, ". ")
+      return null unless text.indexOf("©") > 0
+    copyright = text.replace(/.*?©(\s*copyright)?([^,;:.|\r\n]+).*/gi, "$2").trim()
+    cleanText(copyright)
+
+
+  # Grab the author of an html doc
+  author: (doc) ->
+    authorCandidates = doc("meta[property='article:author'], \
+    meta[property='og:article:author'], meta[name='author'], \
+    meta[name='dcterms.creator'], \
+    meta[name='DC.creator'], \
+    meta[name='DC.Creator'], \
+    meta[name='dc.creator'], \
+    meta[name='creator']")
+    authorList = []
+    authorCandidates.each () ->
+      author = doc(this)?.attr("content")?.trim()
+      if author
+        authorList.push(author)
+    # fallback to a named author div
+    if authorList.length == 0
+      fallbackAuthor = doc("span[class*='author']").first()?.text() || doc("p[class*='author']").first()?.text() || doc("div[class*='author']").first()?.text() || \
+      doc("span[class*='byline']").first()?.text() || doc("p[class*='byline']").first()?.text() || doc("div[class*='byline']").first()?.text()
+      if fallbackAuthor
+        authorList.push(cleanText(fallbackAuthor))
+
+    authorList
+
+
+  # Grab the publisher of the page/site
+  publisher: (doc) ->
+    publisherCandidates = doc("meta[property='og:site_name'], \
+    meta[name='dc.publisher'], \
+    meta[name='DC.publisher'], \
+    meta[name='DC.Publisher']")
+    publisherCandidates?.first()?.attr("content")?.trim()
 
-    if !titleText
-      titleElement = doc("title").first()
-      titleText = titleElement.text()
 
-    return null unless titleElement
+  # Grab the title of an html doc (excluding junk)
+  # Hard-truncates titles containing colon or spaced dash
+  title: (doc) ->
+    titleText = rawTitle(doc)
+    return cleanTitle(titleText, ["|", " - ", "»", ":"])
 
-    usedDelimeter = false
-    _.each ["|", " - ", "»", ":"], (c) ->
-      if titleText.indexOf(c) >= 0 && !usedDelimeter
-        titleText = biggestTitleChunk(titleText, c)
-        usedDelimeter = true
+  # Grab the title with soft truncation
+  softTitle: (doc) ->
+    titleText = rawTitle(doc)
+    return cleanTitle(titleText, ["|", " - ", "»"])
 
-    titleText.replace(/�/g, "").trim()
 
   # Grab the 'main' text chunk
   text: (doc, topNode, lang) ->
@@ -434,3 +501,33 @@ postCleanup = (doc, targetNode, lang) ->
         doc(e).remove()
 
   return node
+
+
+cleanText = (text) ->
+  return text.replace(/[\r\n\t]/g, " ").replace(/\s\s+/g, " ").replace(/<!--.+?-->/g, "").replace(/�/g, "").trim()
+
+
+cleanTitle = (title, delimiters) ->
+  titleText = title || ""
+  usedDelimeter = false
+  _.each delimiters, (c) ->
+    if titleText.indexOf(c) >= 0 && !usedDelimeter
+      titleText = biggestTitleChunk(titleText, c)
+      usedDelimeter = true
+  return cleanText(titleText)
+
+
+rawTitle = (doc) ->
+  gotTitle = false
+  titleText = ""
+  # The first h1 or h2 is a useful fallback
+  _.each [doc("meta[property='og:title']")?.first()?.attr("content"), \
+  doc("h1[class*='title']")?.first()?.text(), \
+  doc("title")?.first()?.text(), \
+  doc("h1")?.first()?.text(), \
+  doc("h2")?.first()?.text()], (candidate) ->
+    if candidate && candidate.trim() && !gotTitle
+      titleText = candidate.trim()
+      gotTitle = true
+
+  return titleText
\ No newline at end of file
diff --git a/src/unfluff.coffee b/src/unfluff.coffee
index 9d5a20e..52e7da0 100644
--- a/src/unfluff.coffee
+++ b/src/unfluff.coffee
@@ -8,6 +8,11 @@ module.exports = unfluff = (html, language) ->
 
   pageData =
     title: extractor.title(doc)
+    softTitle: extractor.softTitle(doc)
+    date: extractor.date(doc)
+    author: extractor.author(doc)
+    publisher: extractor.publisher(doc)
+    copyright: extractor.copyright(doc)
     favicon: extractor.favicon(doc)
     description: extractor.description(doc)
     keywords: extractor.keywords(doc)
@@ -34,6 +39,26 @@ unfluff.lazy = (html, language) ->
     doc = getParsedDoc.call(this, html)
     @title_ ?= extractor.title(doc)
 
+  softTitle: () ->
+    doc = getParsedDoc.call(this, html)
+    @softTitle_ ?= extractor.softTitle(doc)
+
+  date: () ->
+    doc = getParsedDoc.call(this, html)
+    @date_ ?= extractor.date(doc)
+
+  copyright: () ->
+    doc = getParsedDoc.call(this, html)
+    @copyright_ ?= extractor.copyright(doc)
+
+  author: () ->
+    doc = getParsedDoc.call(this, html)
+    @author_ ?= extractor.author(doc)
+
+  publisher: () ->
+    doc = getParsedDoc.call(this, html)
+    @publisher_ ?= extractor.publisher(doc)
+
   favicon: () ->
     doc = getParsedDoc.call(this, html)
     @favicon_ ?= extractor.favicon(doc)
diff --git a/test/extractor.coffee b/test/extractor.coffee
index 51cc86b..dded7c0 100644
--- a/test/extractor.coffee
+++ b/test/extractor.coffee
@@ -20,6 +20,11 @@ suite 'Extractor', ->
     title = extractor.title(doc)
     eq title, "This is my page"
 
+  test 'returns a soft title chunk without truncation', ->
+      doc = cheerio.load("<html><head><title>University Budgets: Where Your Fees Go | Top Universities</title></head></html>")
+      title = extractor.softTitle(doc)
+      eq title, "University Budgets: Where Your Fees Go"
+
   test 'prefers the meta tag title', ->
     doc = cheerio.load("<html><head><title>This is my page - mysite</title><meta property=\"og:title\" content=\"Open graph title\"></head></html>")
     title = extractor.title(doc)
@@ -49,3 +54,59 @@ suite 'Extractor', ->
     doc = cheerio.load("<html><head><title></title></head></html>")
     favicon = extractor.favicon(doc)
     eq undefined, favicon
+
+  test 'returns the article published meta date', ->
+    doc = cheerio.load("<html><head><meta property=\"article:published_time\" content=\"2014-10-15T00:01:03+00:00\" /></head></html>")
+    date = extractor.date(doc)
+    eq date, "2014-10-15T00:01:03+00:00"
+
+  test 'returns the article dublin core meta date', ->
+      doc = cheerio.load("<html><head><meta name=\"DC.date.issued\" content=\"2014-10-15T00:01:03+00:00\" /></head></html>")
+      date = extractor.date(doc)
+      eq date, "2014-10-15T00:01:03+00:00"
+
+  test 'returns the date in the <time> element', ->
+    doc = cheerio.load("<html><head></head><body><time>24 May, 2010</time></body></html>")
+    date = extractor.date(doc)
+    eq date, "24 May, 2010"
+
+  test 'returns the date in the <time> element datetime attribute', ->
+    doc = cheerio.load("<html><head></head><body><time datetime=\"2010-05-24T13:47:52+0000\">24 May, 2010</time></body></html>")
+    date = extractor.date(doc)
+    eq date, "2010-05-24T13:47:52+0000"
+
+  test 'returns the copyright line element', ->
+    doc = cheerio.load("<html><head></head><body><div>Some stuff</div><ul><li class='copyright'><!-- // some garbage -->© 2016 The World Bank Group, All Rights Reserved.</li></ul></body></html>")
+    copyright = extractor.copyright(doc)
+    eq copyright, "2016 The World Bank Group"
+
+  test 'returns the copyright found in the text', ->
+    doc = cheerio.load("<html><head></head><body><div>Some stuff</div><ul>© 2016 The World Bank Group, All Rights Reserved\nSome garbage following</li></ul></body></html>")
+    copyright = extractor.copyright(doc)
+    eq copyright, "2016 The World Bank Group"
+
+  test 'returns nothing if no copyright in the text', ->
+    doc = cheerio.load("<html><head></head><body></body></html>")
+    copyright = extractor.copyright(doc)
+    eq copyright, null
+
+  test 'returns the article published meta author', ->
+    doc = cheerio.load("<html><head><meta property=\"article:author\" content=\"Joe Bloggs\" /></head></html>")
+    author = extractor.author(doc)
+    eq JSON.stringify(author), JSON.stringify(["Joe Bloggs"])
+
+  test 'returns the meta author', ->
+    doc = cheerio.load("<html><head><meta property=\"article:author\" content=\"Sarah Smith\" /><meta name=\"author\" content=\"Joe Bloggs\" /></head></html>")
+    author = extractor.author(doc)
+    eq JSON.stringify(author), JSON.stringify(["Sarah Smith", "Joe Bloggs"])
+
+  test 'returns the named author in the text as fallback', ->
+      doc = cheerio.load("<html><head></head><body><span class=\"author\"><a href=\"/author/gary-trust-6318\" class=\"article__author-link\">Gary Trust</a></span></body></html>")
+      author = extractor.author(doc)
+      eq JSON.stringify(author), JSON.stringify(["Gary Trust"])
+
+  test 'returns the meta publisher', ->
+    doc = cheerio.load("<html><head><meta property=\"og:site_name\" content=\"Polygon\" /><meta name=\"author\" content=\"Griffin McElroy\" /></head></html>")
+    publisher = extractor.publisher(doc)
+    eq publisher, "Polygon"
+