-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpretty-pocket.go
140 lines (112 loc) · 2.89 KB
/
pretty-pocket.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
package main
import (
"bufio"
"encoding/json"
"errors"
"fmt"
"os"
"path/filepath"
"strings"
"golang.org/x/net/html"
)
type pocketItem struct {
URL string
TimeAdded string
Tags string
}
type pocketExport struct {
Items []pocketItem
}
var items pocketExport
// Append a pocket item to a global items list
func appendItems(item *pocketItem) {
if item.Tags != "" && item.TimeAdded != "" && item.URL != "" {
items.Items = append(items.Items, *item)
}
}
// Crawls children html nodes from root node and saves values into pocketItem
func parseExport(n *html.Node) {
if n.Type == html.ElementNode && n.Data == "a" {
var item pocketItem
for _, element := range n.Attr {
if element.Key == "href" {
item.URL = element.Val
}
if element.Key == "time_added" {
item.TimeAdded = element.Val
}
if element.Key == "tags" {
item.Tags = element.Val
}
appendItems(&item)
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
parseExport(c)
}
}
// Takes a root html node to crawl and writes the values to output file
func writeExport(n *html.Node, output string) {
// Create output json file
f, err := os.Create(output)
check(err)
defer f.Close()
// Pointer to writer for output file
w := bufio.NewWriter(f)
// Parse export html file and write it to output json file
parseExport(n)
// Encode all items to Json
pocketItems, err := json.Marshal(items)
check(err)
// Write encoded json to file
w.WriteString(string(pocketItems))
w.Flush()
}
// Panic on any error
func check(e error) {
if e != nil {
panic(e)
}
}
// Validate args count
func validateArgsCount(args []string) {
if len(args) != 1 {
err := fmt.Sprintf("Expected args: 1, Found %d %s", len(args), args)
panic(err)
}
}
// Validate args file type
func validateArgsFileExtension(argsWithoutProg []string) {
if !strings.Contains(argsWithoutProg[0], ".html") {
err := fmt.Sprintf("Expected file extension: *.html, Found %s", argsWithoutProg[0])
panic(err)
}
}
// Validate existence of file
func validateFileExist(argsWithoutProg []string) {
htmlExportFileName := argsWithoutProg[0]
if _, err := os.Stat(htmlExportFileName); err == nil {
// no-op file exists!
} else if errors.Is(err, os.ErrNotExist) {
err := fmt.Sprintf("Cannot find %s in current directory", argsWithoutProg[0])
panic(err)
}
}
func main() {
// Get CLI args for input html filename
argsWithoutProg := os.Args[1:]
// Validate cli args
validateArgsCount(argsWithoutProg)
validateArgsFileExtension(argsWithoutProg)
validateFileExist(argsWithoutProg)
data, err := os.ReadFile(argsWithoutProg[0])
check(err)
// Reader pointer to read the input html file
doc, err := html.Parse(strings.NewReader(string(data)))
if err != nil {
fmt.Println("error: ", err)
}
// Get filename without extension
output := strings.TrimSuffix(argsWithoutProg[0], filepath.Ext(argsWithoutProg[0]))
writeExport(doc, output+".json")
}