forked from HariSekhon/Nagios-Plugins
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheck_hadoop_replication.pl
executable file
·95 lines (73 loc) · 3.13 KB
/
check_hadoop_replication.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/usr/bin/perl -T
# nagios: -epn
#
# Author: Hari Sekhon
# Date: 2014-03-05 21:45:08 +0000 (Wed, 05 Mar 2014)
#
# http://github.com/harisekhon
#
# License: see accompanying LICENSE file
#
$DESCRIPTION = "Nagios Plugin to check Hadoop HDFS replication via NameNode JMX
Raises Critical on any missing or corrupt blocks, with configurable thresholds for under-replicated blocks. Also reports excess blocks and blocks pending replication
See also check_hadoop_dfs.pl and check_hadoop_namenode.pl for earlier implementations of replication checking using dfsadmin and the old NameNode JSP respectively
Tested on Hortonworks HDP 2.1 (Hadoop 2.4.0.2.1.1.0-385)";
$VERSION = "0.1";
use strict;
use warnings;
BEGIN {
use File::Basename;
use lib dirname(__FILE__) . "/lib";
}
use HariSekhonUtils;
use Data::Dumper;
use JSON::XS;
use LWP::Simple '$ua';
$ua->agent("Hari Sekhon $progname version $main::VERSION");
set_threshold_defaults(0, 99999);
set_port_default(50070);
env_creds(["HADOOP_NAMENODE", "HADOOP"], "Hadoop NameNode");
%options = (
%hostoptions,
%thresholdoptions,
);
get_options();
$host = validate_host($host);
$port = validate_port($port);
validate_thresholds(1, 1, { "simple" => "upper", "positive" => 1, "integer" => 1 });
vlog2;
set_timeout();
$status = "OK";
my $url = "http://$host:$port/jmx";
my $content = curl $url, "NameNode";
try{
$json = decode_json $content;
};
catch{
quit "invalid json returned by NameNode at '$url'";
};
vlog3(Dumper($json));
my @beans = get_field_array("beans");
my $found_mbean = 0;
foreach(@beans){
next unless get_field2($_, "name") eq "Hadoop:service=NameNode,name=FSNamesystem";
$found_mbean = 1;
my $pending_repl = get_field2_int($_, "PendingReplicationBlocks");
my $pending_del = get_field2_int($_, "PendingDeletionBlocks");
my $under_repl = get_field2_int($_, "UnderReplicatedBlocks");
my $sched_repl = get_field2_int($_, "ScheduledReplicationBlocks");
my $corrupt = get_field2_int($_, "CorruptBlocks");
my $excess = get_field2_int($_, "ExcessBlocks");
my $missing = get_field2_int($_, "MissingBlocks");
my $post_misrepl = get_field2_int($_, "PostponedMisreplicatedBlocks");
$msg = sprintf("hdfs blocks missing: %d, corrupt: %d, under-replicated: %d", $missing, $corrupt, $under_repl);
critical if $missing;
critical if $corrupt;
check_thresholds($under_repl);
$msg .= sprintf(", excess: %d, replication pending: %d, scheduled: %d, deletion pending: %d, postponed misreplicated: %d | 'hdfs blocks missing'=%d 'hdfs blocks corrupt='%d 'hdfs blocks under-replicated'=%d", $excess, $pending_repl, $sched_repl, $pending_del, $post_misrepl, $missing, $corrupt, $under_repl);
msg_perf_thresholds();
$msg .= sprintf(" 'hdfs excess blocks'=%d 'hdfs blocks pending replication'=%d 'hdfs blocks scheduled for replication'=%d 'hdfs blocks pending deletion'=%d 'hdfs blocks postponed misreplicated'=%d", $excess, $pending_repl, $sched_repl, $pending_del, $post_misrepl, $missing, $corrupt, $under_repl);
last;
}
quit "UNKNOWN", "failed to find FSNamesystem mbean" unless $found_mbean;
quit $status, $msg;