-
-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathhtml2txt
executable file
·58 lines (53 loc) · 1.14 KB
/
html2txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/perl
# abstract: turn html into plain text
# usage:
# html2txt foo.html
# html2txt < foo.html
# html2txt http://japh.se
use strict;
use warnings FATAL => 'all';
#use utf8;
#use open qw(:std :utf8);
use File::Which ();
my @browsers = map { ucfirst $_ } (
$ENV{TERM} eq 'linux'
? File::Which::which('html2text') ? 'html2text' : ((), qw(lynx w3m links))
: qw(w3m lynx links elinks)
);
my($subclass, @err);
for my $browser(@browsers) {
eval "require HTML::FormatText::$browser";
if($@) {
push(@err, "$browser: $@");
next;
}
else {
$subclass = $browser;
last;
}
die "Found no suitable parsers:\n", map { " $_\n" } @err;
}
my $html_str = do {
local $/;
my $fh;
if(@ARGV) {
if($ARGV[0] =~ m{(https?://(?:www[.])?.+)} and ! -e $1) {
require LWP::Simple;
my $content = LWP::Simple::get($ARGV[0]);
open($fh, '<', \$content) or die $!;
}
else {
open($fh, '<', $ARGV[0]) or die $!;
}
}
else {
$fh = *STDIN
}
<$fh>;
};
print "HTML::FormatText::$subclass"->format_string(
$html_str,
rightmargin => 78,
leftmargin => 0,
#output_charset => 'utf-8',
);