#!/serveur/dp/bin/perl -w

# 	$Id: CHA_sur.pl,v 2.2 2001/02/15 09:40:16 p004184 Exp $	

# ---------------------------------------------------------------
#
# Surveillance des clusters
#
# Vrifie si le cluster a bascul
#		
# usage : CHA_sur.pl nom_template
#
# Code retour :
#		0 : ok , 1 : erreur, 2 : alarme ITO
#
# exemple : CHA_sur.pl CHA_sur
#
# ---------------------------------------------------------------

use strict;
use File::Basename;
use lib dirname($0);
use dits_def;
require("hostname.pl");
use Fcntl ':flock';

# ---------------------------------------------------------------
# 0: pas de trace, 1: traces
# ---------------------------------------------------------------
my $debug = 0;

my $log = "/var/tmp/opc_chasur.log";
my $host_physique = hostname();

# ---------------------------------------------------------------
# les exceptions _temporaires_ pour la surveillance
# et on ne vrifie pas pour Oracle tant que toutes les bases
# ne sont pas  on
# ---------------------------------------------------------------
my %service_non_actif = ('metascmre7','phys-parker1|phys-parker2',
						 'metascp','phys-vorlon|phys-valenn|phys-molene|phys-yeu',
						 'tinamolene','phys-molene|phys-yeu',
						 'metascmfor','phys-molene|phys-yeu',
						 'tinabechet','phys-molene|phys-yeu');
my $host_non_cluster = join('|',
							'aucun_host','aucun_host');
my $host_bascule = join('|',
						'aucun_host','aucun_host');

# ---------------------------------------------------------------
# stocke sortie dans fichier log
# ---------------------------------------------------------------
sub Log {
	my @param = @_;
	open(LOG,">>$log") || Trace "open $log : $!";
	flock(LOG,LOCK_EX) || Trace "lock $log : $!";
	print LOG @param;
	print @param;
	flock(LOG,LOCK_UN);
	close(LOG);
}

# ---------------------------------------------------------------
# Rcupre le nom du host logique qui doit tourner
# Pour HA 1.3 c'est le nom physique sans le 'phys-'
# ---------------------------------------------------------------
sub HostLogique {
	my ($host_phys) = @_;
	my $host_logique = $host_phys;
	$host_logique =~ s/^phys-//;
	if(VerifCommande("SCCONF")) {
		my $cluster = NomCluster();
		my ($code,@result) = scconf("$cluster -p");
		if($code!=0) {
			Log "  erreur scconf : @result\n";
		} else {
			for(@result) {
				$host_logique = $1 if(/^Logical Host\s+:\s+(\S+)$/);
				last if(/Node List\s+:\s+$host_phys/);
			}
		}
	}
	return $host_logique;
}

# ---------------------------------------------------------------
# Teste si la machine physique assure le role d'une autre machine
# logique.
# ---------------------------------------------------------------
sub VerifBascule {
	if(! VerifCommande("HAGET") && ! VerifCommande("HAGET2")) {
		Log "\nCommande haget non trouve\n\n";
		return 1;
	}
	Log "\nVrification bascule\n\n";
	my $host_logique = HostLogique($host_physique);
	my ($code,@master) = haget("-f mastered 2>&1");
	if($code!=0) {
		for(@master) {
			if(/this host is not currently a SunCluster member/) {
				Log "  $host_physique : n'est plus membre du cluster\n";
				$code = 3;
			}
		}
		if($code!=3) {
			Log "  erreur haget : @master\n";
			$code = 2;
		}
	} elsif(@master != 1 || $master[0] !~ /^$host_logique$/) {
		Log "  Machine logique : $host_logique\n";
		if(@master) {
			my $master;
			for(@master) {
				chomp($master .= " $_");
			}
			Log "  Machines controles : $master\n" if(@master > 1);
			Log "  Machine controle : $master\n" if(@master==1);
		} else {
			Log "  Aucune Machine controle\n";
		}
		$code = 2 ;
	} else {
		Log "  Cluster non bascul\n";
	}
	$code = 0 if($host_physique =~ /^($host_bascule)$/);
	return $code;
}

# ---------------------------------------------------------------
# Vrifie l'existence des process smad (2 instances), clustd et ccdd
# Pour HA 1.3 seulement clustd
# ---------------------------------------------------------------
sub VerifProcess {
	Log "\nVrification des process\n\n";
	my %nb = ('smad',2,'clustd',1,'ccdd',1);
	%nb = ('clustd',1) if(!VerifCommande("SCCONF"));
	my $code = 0;
	my @result = ps("-ef");
	for(@result) {
		next if(/^\s*(\S+)\s+$$\s+/);	# suppression process en cours
		next if(/^$/);	# suppression lignes vides
		next if(/UID/);
		next if(/defunct/);
		if(/^\s*(\S+)\s+(\d+)\s+(\d+)\s+\S+\s+(\w+\s+\d+|\d+:\d+:\d+)\s+(\S+)\s+(\S+)\s+(.+)$/) {
			my($uid,$pid,$ppid,$stime,$tty,$time,$cmd)=($1,$2,$3,$4,$5,$6,$7);
			if($uid =~ /^\s*root\s*$/) {
				my @cmds = split(/\s+/,$cmd);
				map($_ = basename($_), @cmds);
				foreach my $cmd (keys %nb) {
					$nb{$cmd}-- if(grep(/^\s*$cmd\s*$/,@cmds)>0);
				}
			}
		}
	}
	foreach my $test (sort keys %nb) {
		if($nb{$test}>0) {
			Log "  Il manque $nb{$test} instance(s) du process $test\n";
			my @controle = grep(/$test/,@result);
			Log @controle if(@controle);
			$code = 2;
		} elsif($nb{$test}<0) {
			Log "  Il y a ",$nb{$test}*(-1)," instance(s) en trop pour le process $test\n";
			$code = 2;
		}
	}
	Log "  Process ok\n" if($code==0);
	return $code;
}

# ---------------------------------------------------------------
# Vrification des liens privs avec hastat
# ---------------------------------------------------------------
sub VerifLiensPrives {
	if(! VerifCommande("HASTAT") && ! VerifCommande("HASTAT2")) {
		Log "\nCommande hastat non trouve\n\n";
		return 1;
	}
	Log "\nVrification des liens privs\n\n";
	my($nb_inter,$nb_private) = (0,0);
	my $host = $host_physique;
	my ($code,@result) = hastat("-m 0 2>&1");
	if($code!=0) {
		Log "  erreur hastat : @result\n";
		return 2;
	}
	for(@result) {
		if(/^\s+(\S+) is not a cluster member/) {
			Log "  $1 n'est plus membre du cluster\n";
			return 2;
		} elsif(/^\s+Status of (Interconnects|private nets) on (\S+):/) {
			$host = $2;
		} elsif(/^\s+interconnect(\d+): (\S+)$/) {
			my ($connect,$status) = ($1,$2);
			$nb_inter++;
			if($status !~ /^(selected|up)$/ && ($nb_inter<=2 || $status ne 'unknown')) {
				Log "  $host : interconnect $connect $status\n";
				$code = 2;
			}
		} elsif(/^\s+To (\S+) - (\S+)$/) {
			my ($connect,$status) = ($1,$2);
			$nb_private++;
			$status = 'indtermin' if(!$status);
			if($status !~ /^(UP)$/ && ($nb_private<=2 || $status ne 'Unknown')) {
				Log "  $host : private nets to $connect $status\n";
				$code = 2;
			}
		} elsif(/Private nets: (.*)$/) {		# HA 1.3
			my $status = $1;
			$nb_private = 2;
			$nb_inter = 2;
			if($status !~ /^(Ok)$/) {
				Log "  $host : private nets $status\n";
				$code = 2;
			}
		}
	}
	if($nb_inter<2) {
		Log "  $host : $nb_inter interconnexion trouve\n";
		$code = 2;
	}
	if($nb_private<2) {
		Log "  $host : $nb_private private net trouv\n";
		$code = 2;
	}
	Log "  Liens privs ok\n" if($code==0);
	return $code;
}

# ---------------------------------------------------------------
# Vrification des liens publics avec pnmstat
# ---------------------------------------------------------------
sub VerifLiensPublics {
	Log "\nVrification des liens publics\n\n";
	my ($code,@result);
	if(VerifCommande("PNMSTAT") || VerifCommande("PNMSTAT2")) {
		($code,@result) = pnmstat("-l");
		if($code!=0) {
			Log "  erreur pnmstat : @result\n";
			return 2;
		}
		for(@result) {
			next if(!/^nafo\d+/);
			my($groupe,$inter,$status,$time,$util) = split;
			my($nominale) = split(/:/,$inter);
			if($status ne 'OK') {
				Log "  Lien public $groupe : $status\n";
				$code = 2;
			}
			if($time ne 'NEVER') {
				Log "  Lien public $groupe : $nominale bascul sur $util depuis $time secondes\n";
#				$code = 2;
			} elsif($inter !~ /^$util:/ && $inter !~ /^$util\s*$/) {
				Log "  Lien public $groupe : $nominale bascul sur $util\n";
#				$code = 2;
			}
		}
	} else {	# HA 1.3
		($code,@result) = hastat("-m 0 2>&1");
		if($code!=0) {
			Log "  erreur hastat : @result\n";
			return 2;
		}
		for(@result) {
			if(/Public nets: (\S+) - (.*); (\S+) - (.*)$/) {
				my ($connect1,$status1,$connect2,$status2) = ($1,$2,$3,$4);
				if($status1 !~ /^(Ok)$/) {
					Log "  Public nets: $connect1 - $status1\n";
					$code = 2;
				}
				if($status2 !~ /^(Ok)$/) {
					Log "  Public nets: $connect2 - $status2\n";
					$code = 2;
				}
			}
		}
	}
	Log "  Liens publics ok\n" if($code==0);
	return $code;
}

# ---------------------------------------------------------------
# Vrification des services
# ---------------------------------------------------------------
sub VerifServices {
	if(! VerifCommande("HAREG") && ! VerifCommande("HAREG2")) {
		Log "\nCommande hareg non trouve\n\n";
		return 1;
	}
	Log "\nVrification des services\n\n";
	my ($code,@result) = hareg();
	if($code!=0) {
		Log "  erreur hareg : @result\n";
		return 2;
	}
	for(@result) {
		my($service,$status) = split;
		next if($service_non_actif{$service}
				&& $host_physique =~ /^($service_non_actif{$service})$/);
		if($status ne 'on') {
			Log "  Service $service : $status\n";
			$code = 2;
		}
	}
	Log "  Services ok\n" if($code==0);
	return $code;
}

# ---------------------------------------------------------------
# Vrification des bases de donnes
# ---------------------------------------------------------------
sub VerifSGBD {
	Log "\nVrification SGBD\n\n";
	my $code = 0;
	if(VerifCommande("HAORACLE")) {
		my($code1,@result) = haoracle("list");
		if($code1!=0) {
			Log "  erreur haoracle : @result\n";
			$code = 2;
		} else {
			for(@result) {
				my($status,$instance) = split(/[\s|:]+/);
				if($status eq 'off') {
					Log "  Oracle $instance : $status\n";
					$code = 2;
				}
			}
		}
	}
	if(VerifCommande("HASYBASE")) {
		my($code1,@result) = hasybase("list");
		if($code1!=0) {
			Log "  erreur hasybase : @result\n";
			$code = 2;
		} else {
			for(@result) {
				my($status,$instance) = split(/[\s|:]+/);
				if($status ne 'on') {
					Log "  Sybase $instance : $status\n";
					$code = 2;
				}
			}
		}
	}
	if(VerifCommande("HAINFORMI")) {
		my($code1,@result) = hainformi("list");
		if($code1!=0) {
			Log "  erreur hainformi : @result\n";
			$code = 2;
		} else {
			for(@result) {
				my($status,$instance) = split(/[\s|:]+/);
				if($status ne 'on') {
					Log "  Informix $instance : $status\n";
					$code = 2;
				}
			}
		}
	}
	Log "  SGBD ok\n" if($code==0);
	return $code;
}

# ---------------------------------------------------------------
# Vrification du cluster
# ---------------------------------------------------------------
sub Traitement {
	Trace "Traitement" if($debug);
	unlink $log if(-f $log);
	my $code = VerifBascule();
	if($code != 3) {
		$code = 2 if(VerifProcess()>0);
		$code = 2 if(VerifLiensPrives()>0);
		$code = 2 if(VerifLiensPublics()>0);
		$code = 2 if(VerifServices()>0);
		$code = 2 if(VerifSGBD()>0);
		Log "\n\n--> Tout est ok\n\n" if($code==0);
	} else {
		$code = 0 if($host_physique =~ /^($host_non_cluster)$/);
	}
	Log "\n" if($code!=0);
	return $code;
}

# ---------------------------------------------------------------
# lecture des parametres : aucun
# ---------------------------------------------------------------
sub LectureParametres {
	Trace "LectureParametres" if($debug);
	return 0;
}

# ---------------------------------------------------------------
# traitement commun a tous les templates ITO
# ---------------------------------------------------------------

Trace "param=@ARGV" if($debug);

# recuperation du nom de template, obligatoire pour ITO
my $NomTemplate=shift(@ARGV);

my $RetourITO = LectureParametres();
$RetourITO = Traitement() if($RetourITO == 0);

Trace "$NomTemplate=$RetourITO" if($debug);

# envoi du code retour a ITO via opcmon
if(!$NomTemplate || $NomTemplate eq "bidon") {
	exit $RetourITO;
} else {
	opcmon("$NomTemplate=$RetourITO");
	exit 0;
}

__END__
