#!/usr/bin/perl -w # # This is an sub-agent built for monitoring Linux NFS client and server, # operations, via the net-snmp(d) agent. # # To load this into a running agent with embedded perl support turned # on in net-snmpd, simply put the following line (without the leading # mark) # your snmpd.conf file: # # perl do "/path/to/NFSSTATS.pl"; # # Alternatively, if you do not have embedded perl support turned on, # (eg Fedora Core snmpd is not built with embedded perl support) you can use # this program separately as a sub-agent. Add this line to your snmpd.conf: # # master agentx # # Then run and background the script, eg in rc.local: # /path/to/NFSSTATS.pl > /var/log/snmpd-nfs.log 2>&1 & # You wil require the NetSNMP perl modules for this module to work. # installed (eg, in Fedora Core: yum install net-snmp-perl). # # This script works by reading /proc/net/rpc/nfs and /proc/net/rpc/nfsd # statistics files. Results are cached for $CACHE_TIME seconds. # # smnpwalk-ing is supported. # # TODO: # 1. Implement NFS v1, v2 and v4 support # 2. Write an MIB file, or integrate with SUN-nfsstat-mib. See http://www.misplaced.net/~btoneill/sunmib.html # (this script has more statistics that SUN-nfsstat-mib) # 2a. Pick a sensible OID-space rather than random enterprises.6789.6789 # 3. Percentages for other NFS server statistics # 4. Some statistics for threads usage time. See http://marc.theaimsgroup.com/?l=linux-nfs&m=102824853219024&w=2 # # Craig Macdonald # # BEGIN { print STDERR "starting $0\n"; } # set to 1 to get extra debugging information my $debugging = 0; # where to answer queries from my $SOURCEOID = '.1.3.6.1.4.1.6789.6789'; #corresponds to enterprises.6789.6789 # how long to cache /proc/net/rcp/nfs and /proc/net/rcp/nfsd results for, in seconds my $CACHE_TIME = 30; #name that module gives to snmpd my $NAME = 'NFSSTATS'; #use strict breaks embedded perl support, so leave out #use strict; my $agent; #error here? you probably havent installed the perl support for net-snmpd #eg in fedora core, install the net-snmp-perl installed use NetSNMP::OID (':all'); use NetSNMP::agent (':all'); use NetSNMP::ASN (':all'); my $REGAT = new NetSNMP::OID($SOURCEOID); my $CLIENT = $REGAT.'.0'; my $SERVER = $REGAT.'.1'; my $RPC_CLIENT = $CLIENT .'.0'; my $CLIENT1 = $CLIENT .'.1'; my $CLIENT2 = $CLIENT .'.2'; my $CLIENT3 = $CLIENT .'.3'; my $CLIENT4 = $CLIENT .'.4'; my $RPC_SERVER = $SERVER .'.0'; my $SERVER1 = $SERVER .'.1'; my $SERVER2 = $SERVER .'.2'; my $SERVER3 = $SERVER .'.3'; my $SERVER4 = $SERVER .'.4'; my $SERVER_NET = $SERVER.'.5'; my $SERVER_RC = $SERVER.'.6'; my $SERVER_FH = $SERVER.'.7'; my $SERVER_TH = $SERVER.'.8'; my %options = ( $RPC_CLIENT.'.1' => 'client.rpc.count.calls', $RPC_CLIENT.'.2' => 'client.rpc.count.retrans', $RPC_CLIENT.'.3' => 'client.rpc.count.authrefrsh', $CLIENT3.'.0.0' => 'client3.count.null', $CLIENT3.'.1.0' => 'client3.count.getattr', $CLIENT3.'.2.0' => 'client3.count.setattr', $CLIENT3.'.3.0' => 'client3.count.lookup', $CLIENT3.'.4.0' => 'client3.count.access', $CLIENT3.'.5.0' => 'client3.count.readlink', $CLIENT3.'.6.0' => 'client3.count.read', $CLIENT3.'.7.0' => 'client3.count.write', $CLIENT3.'.8.0' => 'client3.count.create', $CLIENT3.'.9.0' => 'client3.count.mkdir', $CLIENT3.'.10.0' => 'client3.count.symlink', $CLIENT3.'.11.0' => 'client3.count.mknod', $CLIENT3.'.12.0' => 'client3.count.remove', $CLIENT3.'.13.0' => 'client3.count.rmdir', $CLIENT3.'.14.0' => 'client3.count.rename', $CLIENT3.'.15.0' => 'client3.count.link', $CLIENT3.'.16.0' => 'client3.count.readdir', $CLIENT3.'.17.0' => 'client3.count.readdirplus', $CLIENT3.'.18.0' => 'client3.count.fsstat', $CLIENT3.'.19.0' => 'client3.count.fsinfo', $CLIENT3.'.20.0' => 'client3.count.pathconf', $CLIENT3.'.21.0' => 'client3.count.commit', $CLIENT3.'.22.0' => 'client3.count.total', $CLIENT3.'.0.1' => 'client3.percent.null', $CLIENT3.'.1.1' => 'client3.percent.getattr', $CLIENT3.'.2.1' => 'client3.percent.setattr', $CLIENT3.'.3.1' => 'client3.percent.lookup', $CLIENT3.'.4.1' => 'client3.percent.access', $CLIENT3.'.5.1' => 'client3.percent.readlink', $CLIENT3.'.6.1' => 'client3.percent.read', $CLIENT3.'.7.1' => 'client3.percent.write', $CLIENT3.'.8.1' => 'client3.percent.create', $CLIENT3.'.9.1' => 'client3.percent.mkdir', $CLIENT3.'.10.1' => 'client3.percent.symlink', $CLIENT3.'.11.1' => 'client3.percent.mknod', $CLIENT3.'.12.1' => 'client3.percent.remove', $CLIENT3.'.13.1' => 'client3.percent.rmdir', $CLIENT3.'.14.0' => 'client3.percent.rename', $CLIENT3.'.15.1' => 'client3.percent.link', $CLIENT3.'.16.1' => 'client3.percent.readdir', $CLIENT3.'.17.1' => 'client3.percent.readdirplus', $CLIENT3.'.18.1' => 'client3.percent.fsstat', $CLIENT3.'.19.1' => 'client3.percent.fsinfo', $CLIENT3.'.20.1' => 'client3.percent.pathconf', $CLIENT3.'.21.1' => 'client3.percent.commit', $CLIENT3.'.22.1' => 'client3.percent.total', $RPC_SERVER.'.0' => 'server.rpc.count.calls', $RPC_SERVER.'.1' => 'server.rpc.count.retrans', $RPC_SERVER.'.2' => 'server.rpc.count.badcalls', $RPC_SERVER.'.3' => 'server.rpc.count.badauth', $RPC_SERVER.'.4' => 'server.rpc.count.badclnt', $RPC_SERVER.'.5' => 'server.rpc.count.xdrcall', $SERVER3.'.0.0' => 'server3.count.null', $SERVER3.'.1.0' => 'server3.count.getattr', $SERVER3.'.2.0' => 'server3.count.setattr', $SERVER3.'.3.0' => 'server3.count.lookup', $SERVER3.'.4.0' => 'server3.count.access', $SERVER3.'.5.0' => 'server3.count.readlink', $SERVER3.'.6.0' => 'server3.count.read', $SERVER3.'.7.0' => 'server3.count.write', $SERVER3.'.8.0' => 'server3.count.create', $SERVER3.'.9.0' => 'server3.count.mkdir', $SERVER3.'.10.0' => 'server3.count.symlink', $SERVER3.'.11.0' => 'server3.count.mknod', $SERVER3.'.12.0' => 'server3.count.remove', $SERVER3.'.13.0' => 'server3.count.rmdir', $SERVER3.'.14.0' => 'server3.count.rename', $SERVER3.'.15.0' => 'server3.count.link', $SERVER3.'.16.0' => 'server3.count.readdir', $SERVER3.'.17.0' => 'server3.count.readdirplus', $SERVER3.'.18.0' => 'server3.count.fsstat', $SERVER3.'.19.0' => 'server3.count.fsinfo', $SERVER3.'.20.0' => 'server3.count.pathconf', $SERVER3.'.21.0' => 'server3.count.commit', $SERVER3.'.22.0' => 'server3.count.total', $SERVER3.'.0.1' => 'server3.percent.null', $SERVER3.'.1.1' => 'server3.percent.getattr', $SERVER3.'.2.1' => 'server3.percent.setattr', $SERVER3.'.3.1' => 'server3.percent.lookup', $SERVER3.'.4.1' => 'server3.percent.access', $SERVER3.'.5.1' => 'server3.percent.readlink', $SERVER3.'.6.1' => 'server3.percent.read', $SERVER3.'.7.1' => 'server3.percent.write', $SERVER3.'.8.1' => 'server3.percent.create', $SERVER3.'.9.1' => 'server3.percent.mkdir', $SERVER3.'.10.1' => 'server3.percent.symlink', $SERVER3.'.11.1' => 'server3.percent.mknod', $SERVER3.'.12.1' => 'server3.percent.remove', $SERVER3.'.13.1' => 'server3.percent.rmdir', $SERVER3.'.14.1' => 'server3.percent.rename', $SERVER3.'.15.1' => 'server3.percent.link', $SERVER3.'.16.1' => 'server3.percent.readdir', $SERVER3.'.17.1' => 'server3.percent.readdirplus', $SERVER3.'.18.1' => 'server3.percent.fsstat', $SERVER3.'.19.1' => 'server3.percent.fsinfo', $SERVER3.'.20.1' => 'server3.percent.pathconf', $SERVER3.'.21.1' => 'server3.percent.commit', $SERVER3.'.22.1' => 'server3.percent.total', $SERVER_NET.'0' => 'server.net.count.packets', $SERVER_NET.'1' => 'server.net.count.udp', $SERVER_NET.'2' => 'server.net.count.tcp', $SERVER_NET.'3' => 'server.net.count.tcpconn', $SERVER_RC.'.0' => 'server.replycache.count.hits', $SERVER_RC.'.1' => 'server.replycache.count.misses', $SERVER_RC.'.2' => 'server.replycache.count.nocache', $SERVER_FH.'.0' => 'server.fhcache.count.lookups', $SERVER_FH.'.1' => 'server.fhcache.count.anon', $SERVER_FH.'.2' => 'server.fhcache.count.nocache_dir', $SERVER_FH.'.3' => 'server.fhcache.count.nocache_nondir', $SERVER_FH.'.4' => 'server.fhcache.count.stale', $SERVER_TH.'.0' => 'server.threads.count', $SERVER_TH.'.1' => 'server.threads.fullcount' ); my @ks = sort {$a <=> $b} map{$_ = new NetSNMP::OID($_)} keys %options; my $lowestOid = $ks[0]; my $highestOid = $ks[$#ks]; if ($debugging) { foreach my $k (@ks) { print STDERR "$k -> ".$options{$k}."\n"; } } print STDERR "$0 loaded ok\n" if $debugging; my $running =0; # if we're not embedded, this will get auto-set below to 1 my $subagent = 0; # where we are going to hook onto my $regoid = new NetSNMP::OID($REGAT); print STDERR "registering at ".$regoid."\n" if ($debugging); # If we're not running embedded within the agent, then try to start # our own subagent instead. if (!$agent) { $agent = new NetSNMP::agent('Name' => $NAME.'_Agent', # reads test.conf 'AgentX' => 1); # make us a subagent if (! defined $agent) { print STDERR " Failed to connect to master, exiting $0\n"; exit -1; } $subagent = 1; print STDERR "started us as a subagent ($agent)\n" } # we register ourselves with the master agent we're embedded in. The # global $agent variable is how we do this: $agent->register($NAME, $regoid, \&my_snmp_handler); if ($subagent) { # We need to perform a loop here waiting for snmp requests. We # aren't doing anything else here, but we could. $SIG{'INT'} = \&shut_it_down; $SIG{'QUIT'} = \&shut_it_down; $running = 1; while($running) { $agent->agent_check_and_process(1); # 1 = block print STDERR "mainloop excercised\n" if ($debugging); } $agent->shutdown(); } ###################################################################### # define a subroutine to actually handle the incoming requests to our # part of the OID tree. This subroutine will get called for all # requests within the OID space under the registration oid made above. sub my_snmp_handler { my ($handler, $registration_info, $request_info, $requests) = @_; eval{ my $request; print STDERR "refs: ",join(", ", ref($handler), ref($registration_info), ref($request_info), ref($requests)),"\n"; print STDERR "processing a request of type " . $request_info->getMode() . "\n" if ($debugging); my $time = time; for($request = $requests; $request; $request = $request->next()) { my $oid = $request->getOID(); print STDERR " processing request of $oid\n"; if ($request_info->getMode() == MODE_GET) { #all main get code happens in set_value set_value($request, $oid); } elsif ($request_info->getMode() == MODE_GETNEXT) { # if the requested oid is lower than ours, then return ours print STDERR " query:$oid low:$lowestOid high:$highestOid ".ref($oid)." ".ref($lowestOid)." ".ref($highestOid)."\n"; if ($oid < $lowestOid) { set_value($request, $lowestOid); } elsif ($oid < $highestOid) #request is somewhere in our range, so return first one after it { my $i = 0; my $oidToUse = undef; #linear search of sorted keys array. do{ $oidToUse = $ks[$i]; $i++; #print STDERR "Comparing $oid to $oidToUse ".ref($oid)." ".ref($oidToUse). # " cmp=".NetSNMP::OID::compare($oid, $oidToUse)." cmp2=".($oid <= $oidToUse)."\n"; } while (NetSNMP::OID::compare($oid, $oidToUse) > -1 and $i < scalar @ks); #got one to return if (defined $oidToUse) { print STDERR " Next oid to $oid is $oidToUse\n" if ($debugging); set_value($request, $oidToUse); } } }#/if request type }#/for }; #/eval if ($@) { print STDERR " some problem in request processing loop, caught by eval: $@"; } print STDERR " finished processing\n" if ($debugging); } sub shut_it_down { $running = 0; print STDERR "shutting down $0\n" if ($debugging); } #give a $request a value by $oid sub set_value { my ($request, $oid) = @_; warn "looking up $oid\n"; my $counterName = $options{$oid}; if (! defined $counterName) { print STDERR (scalar localtime). " --> error finding a counter for $oid\n"; return; } my $value = get_value($counterName); if (defined $value) { if ($debugging) { print STDERR " $oid -> $lowestOid\n"; print STDERR (scalar localtime). " -> ($counterName) $value\n"; } $request->setOID($oid); if (! $request->setValue(ASN_COUNTER, ''.$value)) { warn "Error setting $counterName for $oid value: $!\n"; } } else { print STDERR (scalar localtime). " -> error getting counter called $counterName for $oid\n"; } } #cache statistics in these hashes. Cache age stored in scalars #cache expired after $CACHE_TIME my $client_cache_time = 0; my %client_cache; my $server_cache_time = 0; my %server_cache; #find a value from a string counter name sub get_value { my $counterName = shift; return undef unless defined $counterName; my $value = undef; #we maintain two different caches, because the client #and server data come from two different sources #no need to query both if only one required my $time = time; if ($counterName =~ /^client/) { if ( $time - $client_cache_time > $CACHE_TIME) { update_client_cache(); } $value = $client_cache{$counterName}||'0'; } elsif ($counterName =~ /^server/ ) { if ($time - $server_cache_time > $CACHE_TIME) { update_server_cache(); } $value = $server_cache{$counterName}||'0'; } return $value; } sub update_client_cache { $client_cache_time = time; open(PROCI, ") { chomp; my @parts = split / /, $_; if ($parts[0] eq 'net') {} elsif ($parts[0] eq 'rpc') { @client_cache{'client.rpc.count.calls', 'client.rpc.count.retrans', 'client.rpc.count.authrefrsh'} = @parts[1,2,3]; } elsif ($parts[0] eq 'proc2') {} elsif ($parts[0] eq 'proc3') { shift @parts; shift @parts; @client_cache{'client3.count.null', 'client3.count.getattr', 'client3.count.setattr', 'client3.count.lookup', 'client3.count.access', 'client3.count.readlink', 'client3.count.read', 'client3.count.write', 'client3.count.create', 'client3.count.mkdir', 'client3.count.symlink', 'client3.count.mknod', 'client3.count.remove', 'client3.count.rmdir', 'client3.count.rename', 'client3.count.link', 'client3.count.readdir', 'client3.count.readdirplus', 'client3.count.fsstat', 'client3.count.fsinfo', 'client3.count.pathconf', 'client3.count.commit', 'client3.count.fsstat'} = @parts; #now calculate percentages $client_cache{'client3.count.total'} = sumof(@parts); $client_cache{'client3.percent.total'} = 1; @client_cache{'client3.percent.null', 'client3.percent.getattr', 'client3.percent.setattr', 'client3.percent.lookup', 'client3.percent.access', 'client3.percent.readlink', 'client3.percent.read', 'client3.percent.write', 'client3.percent.create', 'client3.percent.mkdir', 'client3.percent.symlink', 'client3.percent.mknod', 'client3.percent.remove', 'client3.percent.rmdir', 'client3.percent.rename', 'client3.percent.link', 'client3.percent.readdir', 'client3.percent.readdirplus', 'client3.percent.fsstat', 'client3.percent.fsinfo', 'client3.percent.pathconf', 'client3.percent.commit', 'client3.percent.fsstat'} = map {$_ = int(100*$_ / $client_cache{'client3.count.total'})} @parts; } elsif ($parts[0] eq 'proc4') {} } close PROCI; } sub update_server_cache { $server_cache_time = time; return 0 if (!open(PROCI, ") { chomp; my @parts = split / /, $_; if ($parts[0] eq 'net') {#network io shift @parts; @server_cache{'server.net.count.packets', 'server.net.count.udp', 'server.net.count.tcp', 'server.net.count.tcpconn'} = @parts; } elsif ($parts[0] eq 'rc') {#reply cache @server_cache{'server.replycache.count.hits', 'server.replycache.count.misses', 'server.replycache.count.nocache'} = @parts[1,2,3]; } elsif ($parts[0] eq 'th') {#threads #shift @parts; #TODO: implement some form of usage time monitoring or sommat #See http://marc.theaimsgroup.com/?l=linux-nfs&m=102824853219024&w=2 @server_cache{'server.threads.count', 'server.threads.fullcount'} = @parts[1,2]; } elsif ($parts[0] eq 'fh') {#Server file handle cache #fh 0 1691927 0 110 351 #lookup anon ncachedir ncachedir stale #1691881 0 351 110 0 shift @parts; @server_cache{'server.fhcache.count.lookups', 'server.fhcache.count.anon', 'server.fhcache.count.nocache_dir', 'server.fhcache.count.nocache_nondir', 'server.fhcache.count.stale'} = @parts; } elsif ($parts[0] eq 'rpc') {#rpc stats @server_cache{'server.rpc.count.calls', 'server.rpc.count.badcalls', 'server.rpc.count.badauth', 'server.rpc.count.badclnt', 'server.rpc.count.xdrcall'} = @parts[1,2,3]; } elsif ($parts[0] eq 'proc2') {} #NFSv2 support TODO elsif ($parts[0] eq 'proc3') {#NFSv3 stats shift @parts; shift @parts; @server_cache{'server3.count.null', 'server3.count.getattr', 'server3.count.setattr', 'server3.count.lookup', 'server3.count.access', 'server3.count.readlink', 'server3.count.read', 'server3.count.write', 'server3.count.create', 'server3.count.mkdir', 'server3.count.symlink', 'server3.count.mknod', 'server3.count.remove', 'server3.count.rmdir', 'server3.count.rename', 'server3.count.link', 'server3.count.readdir', 'server3.count.readdirplus', 'server3.count.fsstat', 'server3.count.fsinfo', 'server3.count.pathconf', 'server3.count.commit'} = @parts; #now calculate percentages $server_cache{'server3.count.total'} = sumof(@parts); $server_cache{'server3.percent.total'} = 1; if ($server_cache{'server3.count.total'} > 0) { @server_cache{'server3.percent.null', 'server3.percent.getattr', 'server3.percent.setattr', 'server3.percent.lookup', 'server3.percent.access', 'server3.percent.readlink', 'server3.percent.read', 'server3.percent.write', 'server3.percent.create', 'server3.percent.mkdir', 'server3.percent.symlink', 'server3.percent.mknod', 'server3.percent.remove', 'server3.percent.rmdir', 'server3.percent.rename', 'server3.percent.link', 'server3.percent.readdir', 'server3.percent.readdirplus', 'server3.percent.fsstat', 'server3.percent.fsinfo', 'server3.percent.pathconf', 'server3.percent.commit', 'server3.percent.fsstat'} = map {$_ = int(100* $_ / $server_cache{'server3.count.total'})} @parts; } else { foreach ('server3.percent.null', 'server3.percent.getattr', 'server3.percent.setattr', 'server3.percent.lookup', 'server3.percent.access', 'server3.percent.readlink', 'server3.percent.read', 'server3.percent.write', 'server3.percent.create', 'server3.percent.mkdir', 'server3.percent.symlink', 'server3.percent.mknod', 'server3.percent.remove', 'server3.percent.rmdir', 'server3.percent.rename', 'server3.percent.link', 'server3.percent.readdir', 'server3.percent.readdirplus', 'server3.percent.fsstat', 'server3.percent.fsinfo', 'server3.percent.pathconf', 'server3.percent.commit', 'server3.percent.fsstat') { $server_cache{$_}= 0; } } } elsif ($parts[0] eq 'proc4') {} #NFSv4 support TODO } close PROCI; } sub sumof { my $total = 0; $total += $_ for @_; return $total; }