Skip to content

Commit

Permalink
fix golint issues
Browse files Browse the repository at this point in the history
Documentation still needs a lot of work.
  • Loading branch information
dhowden committed Jun 26, 2017
1 parent 5f823f6 commit 3dad0cb
Show file tree
Hide file tree
Showing 14 changed files with 42 additions and 32 deletions.
2 changes: 1 addition & 1 deletion doc.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (
"time"
)

// Convert MS Word DOC
// ConvertDoc converts an MS Word .doc to text.
func ConvertDoc(r io.Reader) (string, map[string]string, error) {
f, err := NewLocalFile(r, "/tmp", "sajari-convert-")
if err != nil {
Expand Down
13 changes: 6 additions & 7 deletions docconv.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ type Response struct {
MSecs uint32 `json:"msecs"`
}

// Determine the mime type by the file's extension
// MimeTypeByExtension returns a mimetype for the given extension, or
// application/octet-stream if none can be determined.
func MimeTypeByExtension(filename string) string {
switch strings.ToLower(path.Ext(filename)) {
case ".doc":
Expand Down Expand Up @@ -51,8 +52,7 @@ func MimeTypeByExtension(filename string) string {
return "application/octet-stream"
}

// TODO(dhowden): Refactor this.
// Convert a file to plain text & meta data
// Convert a file to plain text.
func Convert(r io.Reader, mimeType string, readability bool) (*Response, error) {
start := time.Now()

Expand Down Expand Up @@ -107,8 +107,7 @@ func Convert(r io.Reader, mimeType string, readability bool) (*Response, error)
}, nil
}

// TODO(dhowden): Refactor this.
// Convert a file given a path
// ConvertPath converts a local path to text.
func ConvertPath(path string) (*Response, error) {
mimeType := MimeTypeByExtension(path)

Expand All @@ -121,8 +120,8 @@ func ConvertPath(path string) (*Response, error) {
return Convert(f, mimeType, true)
}

// TODO(dhowden): Refactor this.
// Convert a file given a path
// ConvertPathReadability converts a local path to text, with the given readability
// option.
func ConvertPathReadability(path string, readability bool) ([]byte, error) {
mimeType := MimeTypeByExtension(path)

Expand Down
20 changes: 10 additions & 10 deletions docd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,16 @@ type Response struct {
}

var (
inputPath *string = flag.String("input", "", "The file path to convert and exit; no server")
listenAddr *string = flag.String("addr", ":8888", "The address to listen on (e.g. 127.0.0.1:8888)")
logLevel *uint = flag.Uint("log-level", 0, "The verbosity of the log")
readabilityLengthLow *int = flag.Int("readability-length-low", 70, "Sets the readability length low")
readabilityLengthHigh *int = flag.Int("readability-length-high", 200, "Sets the readability length high")
readabilityStopwordsLow *float64 = flag.Float64("readability-stopwords-low", 0.2, "Sets the readability stopwords low")
readabilityStopwordsHigh *float64 = flag.Float64("readability-stopwords-high", 0.3, "Sets the readability stopwords high")
readabilityMaxLinkDensity *float64 = flag.Float64("readability-max-link-density", 0.2, "Sets the readability max link density")
readabilityMaxHeadingDistance *int = flag.Int("readability-max-heading-distance", 200, "Sets the readability max heading distance")
readabilityUseClasses *string = flag.String("readability-use-classes", "good,neargood", "Comma separated list of readability classes to use")
inputPath = flag.String("input", "", "The file path to convert and exit; no server")
listenAddr = flag.String("addr", ":8888", "The address to listen on (e.g. 127.0.0.1:8888)")
logLevel = flag.Uint("log-level", 0, "The verbosity of the log")
readabilityLengthLow = flag.Int("readability-length-low", 70, "Sets the readability length low")
readabilityLengthHigh = flag.Int("readability-length-high", 200, "Sets the readability length high")
readabilityStopwordsLow = flag.Float64("readability-stopwords-low", 0.2, "Sets the readability stopwords low")
readabilityStopwordsHigh = flag.Float64("readability-stopwords-high", 0.3, "Sets the readability stopwords high")
readabilityMaxLinkDensity = flag.Float64("readability-max-link-density", 0.2, "Sets the readability max link density")
readabilityMaxHeadingDistance = flag.Int("readability-max-heading-distance", 200, "Sets the readability max heading distance")
readabilityUseClasses = flag.String("readability-use-classes", "good,neargood", "Comma separated list of readability classes to use")
)

func main() {
Expand Down
3 changes: 2 additions & 1 deletion docx.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import (
"time"
)

// Convert DOCX to text
// ConvertDocx converts an MS Word docx file to text.
func ConvertDocx(r io.Reader) (string, map[string]string, error) {
meta := make(map[string]string)
var textHeader, textBody, textFooter string
Expand Down Expand Up @@ -92,6 +92,7 @@ func parseDocxText(f *zip.File) (string, error) {
return text, nil
}

// DocxXMLToText converts Docx XML into plain text.
func DocxXMLToText(r io.Reader) (string, error) {
return XMLToText(r, []string{"br", "p", "tab"}, []string{"instrText", "script"}, true)
}
10 changes: 6 additions & 4 deletions html.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import (
"github.com/JalfResi/justext"
)

// Convert HTML
// ConvertHTML converts HTML into text.
func ConvertHTML(r io.Reader, readability bool) (string, map[string]string, error) {
meta := make(map[string]string)

Expand Down Expand Up @@ -120,10 +120,11 @@ type HTMLReadabilityOptions struct {
ReadabilityUseClasses string
}

// HTMLReadabilityOptionsValues are the global settings used for HTMLReadability.
// TODO: Remove this from global state.
var HTMLReadabilityOptionsValues HTMLReadabilityOptions

// Extract the readable text in an HTML document
// HTMLReadability extracts the readable text in an HTML document
func HTMLReadability(r io.Reader) []byte {
jr := justext.NewReader(r)

Expand All @@ -144,7 +145,7 @@ func HTMLReadability(r io.Reader) []byte {

useClasses := strings.SplitN(HTMLReadabilityOptionsValues.ReadabilityUseClasses, ",", 10)

var output string = ""
output := ""
for _, paragraph := range paragraphSet {
for _, class := range useClasses {
if paragraph.CfClass == class {
Expand All @@ -156,12 +157,13 @@ func HTMLReadability(r io.Reader) []byte {
return []byte(output)
}

// HTMLToText converts HTML to plain text.
func HTMLToText(input io.Reader) string {
text, _ := XMLToText(input, []string{"br", "p", "h1", "h2", "h3", "h4"}, []string{}, false)
return text
}

var readabilityStopList map[string]bool = map[string]bool{"and": true, "the": true, "a": true, "about": true, "above": true, "across": true, "after": true, "afterwards": true, "again": true, "against": true, "all": true, "almost": true, "alone": true,
var readabilityStopList = map[string]bool{"and": true, "the": true, "a": true, "about": true, "above": true, "across": true, "after": true, "afterwards": true, "again": true, "against": true, "all": true, "almost": true, "alone": true,
"along": true, "already": true, "also": true, "although": true, "always": true, "am": true, "among": true, "amongst": true, "amoungst": true, "amount": true, "an": true, "another": true, "any": true,
"anyhow": true, "anyone": true, "anything": true, "anyway": true, "anywhere": true, "are": true, "around": true, "as": true, "at": true, "back": true, "be": true, "became": true, "because": true,
"become": true, "becomes": true, "becoming": true, "been": true, "before": true, "beforehand": true, "behind": true, "being": true, "below": true, "beside": true, "besides": true, "between": true,
Expand Down
3 changes: 3 additions & 0 deletions image.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@ import (
"io"
)

// ConvertImage converts images to text.
// Requires gosseract (ocr build tag).
func ConvertImage(r io.Reader) (string, map[string]string, error) {
return "", nil, fmt.Errorf("docconv not built with `ocr` build tag")
}

// SetImageLanguages sets the languages parameter passed to gosseract.
func SetImageLanguages(string) {}
3 changes: 3 additions & 0 deletions image_ocr.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ var langs = struct {
lang string
}{lang: "eng"}

// ConvertImage converts images to text.
// Requires gosseract.
func ConvertImage(r io.Reader) (string, map[string]string, error) {
f, err := NewLocalFile(r, "/tmp", "sajari-convert-")
if err != nil {
Expand All @@ -36,6 +38,7 @@ func ConvertImage(r io.Reader) (string, map[string]string, error) {
return <-out, meta, nil
}

// SetImageLanguages sets the languages parameter passed to gosseract.
func SetImageLanguages(l string) {
langs.Lock()
langs.lang = l
Expand Down
2 changes: 1 addition & 1 deletion odt.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
"time"
)

// Convert ODT to text
// ConvertODT converts a ODT file to text
func ConvertODT(r io.Reader) (string, map[string]string, error) {
meta := make(map[string]string)
var textBody string
Expand Down
2 changes: 1 addition & 1 deletion pages.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import (
"github.com/sajari/docconv/snappy"
)

// Convert PAGES to text
// ConvertPages converts a Pages file to text.
func ConvertPages(r io.Reader) (string, map[string]string, error) {
meta := make(map[string]string)
var textBody string
Expand Down
2 changes: 1 addition & 1 deletion pdf.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import (
"time"
)

// Convert PDF
// ConvertPDF converts a PDF file to text.
func ConvertPDF(r io.Reader) (string, map[string]string, error) {
f, err := NewLocalFile(r, "/tmp", "sajari-convert-")
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion rtf.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import (
"time"
)

// Convert RTF
// ConvertRTF converts RTF files to text.
func ConvertRTF(r io.Reader) (string, map[string]string, error) {
f, err := NewLocalFile(r, "/tmp", "sajari-convert-")
if err != nil {
Expand Down
4 changes: 3 additions & 1 deletion tidy.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ import (
"os/exec"
)

// Errors & warnings are deliberately suppressed as tidy throws warnings very easily
// Tidy attempts to tidy up XML.
// Errors & warnings are deliberately suppressed as underlying tools
// throw warnings very easily.
func Tidy(r io.Reader, xmlIn bool) ([]byte, error) {
f, err := ioutil.TempFile("/tmp", "sajari-convert-")
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion url.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
"github.com/advancedlogic/GoOse"
)

// Convert URL
// ConvertURL fetches the HTML page at the URL given in the io.Reader.
func ConvertURL(input io.Reader, readability bool) (string, map[string]string, error) {
meta := make(map[string]string)

Expand Down
6 changes: 3 additions & 3 deletions xml.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
"io"
)

// Convert XML input
// ConvertXML converts an XML file to text.
func ConvertXML(r io.Reader) (string, map[string]string, error) {
meta := make(map[string]string)
cleanXML, err := Tidy(r, true)
Expand All @@ -21,7 +21,7 @@ func ConvertXML(r io.Reader) (string, map[string]string, error) {
return result, meta, nil
}

// Convert XML to plain text given how to treat elements
// XMLToText converts XML to plain text given how to treat elements.
func XMLToText(r io.Reader, breaks []string, skip []string, strict bool) (string, error) {
var result string

Expand Down Expand Up @@ -73,7 +73,7 @@ func XMLToText(r io.Reader, breaks []string, skip []string, strict bool) (string
return result, nil
}

// Convert XML to a nested string map
// XMLToMap converts XML to a nested string map.
func XMLToMap(r io.Reader) (map[string]string, error) {
m := make(map[string]string)
dec := xml.NewDecoder(r)
Expand Down

0 comments on commit 3dad0cb

Please sign in to comment.