[Nagios-checkins] SF.net SVN: nagios:[2106] nagioscore/trunk

ageric at users.sourceforge.net ageric at users.sourceforge.net
Sun Aug 26 20:06:57 UTC 2012


Revision: 2106
          http://nagios.svn.sourceforge.net/nagios/?rev=2106&view=rev
Author:   ageric
Date:     2012-08-26 20:06:57 +0000 (Sun, 26 Aug 2012)
Log Message:
-----------
core: Fix latency calculation and (debug)log it for all events

Since we have microsecond precision it doesn't really make sense to
see how many microseconds after the one-second mark we happened to
run an event. With this patch we get the necessary precision to
actually get 0.0 seconds latency even if more than one event is
running at the same time.

While at it, we add a small gracetime where events can run up to 0.1
seconds early. Latency never goes negative though, so we zero it out
if we should get a negative value there.

Signed-off-by: Andreas Ericsson <ae at op5.se>

Modified Paths:
--------------
    nagioscore/trunk/base/events.c
    nagioscore/trunk/lib/worker.c
    nagioscore/trunk/lib/worker.h

Modified: nagioscore/trunk/base/events.c
===================================================================
--- nagioscore/trunk/base/events.c	2012-08-26 20:06:29 UTC (rev 2105)
+++ nagioscore/trunk/base/events.c	2012-08-26 20:06:57 UTC (rev 2106)
@@ -1002,25 +1002,21 @@
 		last_event = temp_event;
 
 		gettimeofday(&now, NULL);
-		poll_time_ms = tv_delta_msec(&now, event_runtime);
-		if (poll_time_ms <= 0) {
-			log_debug_info(DEBUGL_SCHEDULING, 1, "poll_time_ms is ACTUALLY %d; run_time - time(NULL) = (%lu - %lu) = %d\n",
-						   poll_time_ms, temp_event->run_time, time(NULL), (int)(temp_event->run_time - time(NULL)));
-			log_debug_info(DEBUGL_SCHEDULING, 1, "Daemon runtime: %lu\n",
-						   time(NULL) - program_start);
+		poll_time_ms = tv_delta_msec(&now, event_runtime) - 25;
+		if (poll_time_ms < 0)
 			poll_time_ms = 0;
-		}
-		if (poll_time_ms >= 1500)
+		else if(poll_time_ms >= 1500)
 			poll_time_ms = 1500;
-		/* turn this to DEBUGL_IPC later */
+
 		log_debug_info(DEBUGL_SCHEDULING | DEBUGL_IPC, 1, "## Polling %dms; sockets=%d; events=%u; iobs=%p\n",
-					   poll_time_ms, iobroker_get_num_fds(nagios_iobs),
-					   squeue_size(nagios_squeue), nagios_iobs);
+		               poll_time_ms, iobroker_get_num_fds(nagios_iobs),
+		               squeue_size(nagios_squeue), nagios_iobs);
 		inputs = iobroker_poll(nagios_iobs, poll_time_ms);
 		log_debug_info(DEBUGL_IPC, 2, "## %d descriptors had input\n", inputs);
 
-		/* if it's not time to run this event yet, we go back to listening */
-		if (temp_event->run_time > time(NULL))
+		/* 100 milliseconds allowance for firing off events early */
+		gettimeofday(&now, NULL);
+		if (tv_delta_msec(&now, event_runtime) > 100)
 			continue;
 
 		/*
@@ -1058,7 +1054,8 @@
 	service *temp_service = NULL;
 	void (*userfunc)(void *);
 	struct timeval tv;
-	double latency = 0.0;
+	const struct timeval *event_runtime;
+	double latency;
 
 
 	log_debug_info(DEBUGL_FUNCTIONS, 0, "handle_timed_event() start\n");
@@ -1070,6 +1067,13 @@
 
 	log_debug_info(DEBUGL_EVENTS, 0, "** Timed Event ** Type: %d, Run Time: %s", event->event_type, ctime(&event->run_time));
 
+	/* get event latency */
+	gettimeofday(&tv, NULL);
+	event_runtime = squeue_event_runtime(event->sq_event);
+	latency = (double)(tv_delta_f(&tv, event_runtime));
+	if (latency < 0.0) /* events may run up to 0.1 seconds early */
+		latency = 0.0;
+
 	/* how should we handle the event? */
 	switch(event->event_type) {
 
@@ -1077,10 +1081,6 @@
 
 			temp_service = (service *)event->event_data;
 
-			/* get check latency */
-			gettimeofday(&tv, NULL);
-			latency = (double)((double)(tv.tv_sec - event->run_time) + (double)(tv.tv_usec / 1000) / 1000.0);
-
 			log_debug_info(DEBUGL_EVENTS, 0, "** Service Check Event ==> Host: '%s', Service: '%s', Options: %d, Latency: %f sec\n", temp_service->host_name, temp_service->description, event->event_options, latency);
 
 			/* run the service check */
@@ -1092,10 +1092,6 @@
 
 			temp_host = (host *)event->event_data;
 
-			/* get check latency */
-			gettimeofday(&tv, NULL);
-			latency = (double)((double)(tv.tv_sec - event->run_time) + (double)(tv.tv_usec / 1000) / 1000.0);
-
 			log_debug_info(DEBUGL_EVENTS, 0, "** Host Check Event ==> Host: '%s', Options: %d, Latency: %f sec\n", temp_host->name, event->event_options, latency);
 
 			/* run the host check */
@@ -1105,7 +1101,7 @@
 
 		case EVENT_LOG_ROTATION:
 
-			log_debug_info(DEBUGL_EVENTS, 0, "** Log File Rotation Event\n");
+			log_debug_info(DEBUGL_EVENTS, 0, "** Log File Rotation Event. Latency: %.3fs\n", latency);
 
 			/* rotate the log file */
 			rotate_log_file(event->run_time);
@@ -1113,7 +1109,7 @@
 
 		case EVENT_PROGRAM_SHUTDOWN:
 
-			log_debug_info(DEBUGL_EVENTS, 0, "** Program Shutdown Event\n");
+			log_debug_info(DEBUGL_EVENTS, 0, "** Program Shutdown Event. Latency: %.3fs\n", latency);
 
 			/* set the shutdown flag */
 			sigshutdown = TRUE;
@@ -1124,7 +1120,7 @@
 
 		case EVENT_PROGRAM_RESTART:
 
-			log_debug_info(DEBUGL_EVENTS, 0, "** Program Restart Event\n");
+			log_debug_info(DEBUGL_EVENTS, 0, "** Program Restart Event. Latency: %.3fs\n", latency);
 
 			/* set the restart flag */
 			sigrestart = TRUE;
@@ -1135,7 +1131,7 @@
 
 		case EVENT_CHECK_REAPER:
 
-			log_debug_info(DEBUGL_EVENTS, 0, "** Check Result Reaper\n");
+			log_debug_info(DEBUGL_EVENTS, 0, "** Check Result Reaper. Latency: %.3fs\n", latency);
 
 			/* reap host and service check results */
 			reap_check_results();
@@ -1143,7 +1139,7 @@
 
 		case EVENT_ORPHAN_CHECK:
 
-			log_debug_info(DEBUGL_EVENTS, 0, "** Orphaned Host and Service Check Event\n");
+			log_debug_info(DEBUGL_EVENTS, 0, "** Orphaned Host and Service Check Event. Latency: %.3fs\n", latency);
 
 			/* check for orphaned hosts and services */
 			if(check_orphaned_hosts == TRUE)
@@ -1154,7 +1150,7 @@
 
 		case EVENT_RETENTION_SAVE:
 
-			log_debug_info(DEBUGL_EVENTS, 0, "** Retention Data Save Event\n");
+			log_debug_info(DEBUGL_EVENTS, 0, "** Retention Data Save Event. Latency: %.3fs\n", latency);
 
 			/* save state retention data */
 			save_state_information(TRUE);
@@ -1162,7 +1158,7 @@
 
 		case EVENT_STATUS_SAVE:
 
-			log_debug_info(DEBUGL_EVENTS, 0, "** Status Data Save Event\n");
+			log_debug_info(DEBUGL_EVENTS, 0, "** Status Data Save Event. Latency: %.3fs\n", latency);
 
 			/* save all status data (program, host, and service) */
 			update_all_status_data();
@@ -1170,7 +1166,7 @@
 
 		case EVENT_SCHEDULED_DOWNTIME:
 
-			log_debug_info(DEBUGL_EVENTS, 0, "** Scheduled Downtime Event\n");
+			log_debug_info(DEBUGL_EVENTS, 0, "** Scheduled Downtime Event. Latency: %.3fs\n", latency);
 
 			/* process scheduled downtime info */
 			if(event->event_data) {
@@ -1182,7 +1178,7 @@
 
 		case EVENT_SFRESHNESS_CHECK:
 
-			log_debug_info(DEBUGL_EVENTS, 0, "** Service Result Freshness Check Event\n");
+			log_debug_info(DEBUGL_EVENTS, 0, "** Service Result Freshness Check Event. Latency: %.3fs\n", latency);
 
 			/* check service result freshness */
 			check_service_result_freshness();
@@ -1190,7 +1186,7 @@
 
 		case EVENT_HFRESHNESS_CHECK:
 
-			log_debug_info(DEBUGL_EVENTS, 0, "** Host Result Freshness Check Event\n");
+			log_debug_info(DEBUGL_EVENTS, 0, "** Host Result Freshness Check Event. Latency: %.3fs\n", latency);
 
 			/* check host result freshness */
 			check_host_result_freshness();
@@ -1198,7 +1194,7 @@
 
 		case EVENT_EXPIRE_DOWNTIME:
 
-			log_debug_info(DEBUGL_EVENTS, 0, "** Expire Downtime Event\n");
+			log_debug_info(DEBUGL_EVENTS, 0, "** Expire Downtime Event. Latency: %.3fs\n", latency);
 
 			/* check for expired scheduled downtime entries */
 			check_for_expired_downtime();
@@ -1207,14 +1203,14 @@
 		case EVENT_RESCHEDULE_CHECKS:
 
 			/* adjust scheduling of host and service checks */
-			log_debug_info(DEBUGL_EVENTS, 0, "** Reschedule Checks Event\n");
+			log_debug_info(DEBUGL_EVENTS, 0, "** Reschedule Checks Event. Latency: %.3fs\n", latency);
 
 			adjust_check_scheduling();
 			break;
 
 		case EVENT_EXPIRE_COMMENT:
 
-			log_debug_info(DEBUGL_EVENTS, 0, "** Expire Comment Event\n");
+			log_debug_info(DEBUGL_EVENTS, 0, "** Expire Comment Event. Latency: %.3fs\n", latency);
 
 			/* check for expired comment */
 			check_for_expired_comment((unsigned long)event->event_data);
@@ -1222,7 +1218,7 @@
 
 		case EVENT_CHECK_PROGRAM_UPDATE:
 
-			log_debug_info(DEBUGL_EVENTS, 0, "** Check For Program Update\n");
+			log_debug_info(DEBUGL_EVENTS, 0, "** Check For Program Update. Latency: %.3fs\n", latency);
 
 			/* check for new versions of Nagios */
 			check_for_nagios_updates(FALSE, TRUE);
@@ -1230,7 +1226,7 @@
 
 		case EVENT_USER_FUNCTION:
 
-			log_debug_info(DEBUGL_EVENTS, 0, "** User Function Event\n");
+			log_debug_info(DEBUGL_EVENTS, 0, "** User Function Event. Latency: %.3fs\n", latency);
 
 			/* run a user-defined function */
 			if(event->event_data != NULL) {

Modified: nagioscore/trunk/lib/worker.c
===================================================================
--- nagioscore/trunk/lib/worker.c	2012-08-26 20:06:29 UTC (rev 2105)
+++ nagioscore/trunk/lib/worker.c	2012-08-26 20:06:57 UTC (rev 2106)
@@ -127,7 +127,7 @@
 	return msecs;
 }
 
-static float tv_delta_f(const struct timeval *start, const struct timeval *stop)
+float tv_delta_f(const struct timeval *start, const struct timeval *stop)
 {
 #define DIVIDER 1000000
 	float ret;

Modified: nagioscore/trunk/lib/worker.h
===================================================================
--- nagioscore/trunk/lib/worker.h	2012-08-26 20:06:29 UTC (rev 2105)
+++ nagioscore/trunk/lib/worker.h	2012-08-26 20:06:57 UTC (rev 2106)
@@ -93,6 +93,14 @@
 extern int tv_delta_msec(const struct timeval *start, const struct timeval *stop);
 
 /**
+ * Get timeval delta as seconds
+ * @param start The start time
+ * @param stop The stop time
+ * @return time difference in fractions of seconds
+ */
+extern float tv_delta_f(const struct timeval *start, const struct timeval *stop);
+
+/**
  * Set some common socket options
  * @param[in] sd The socket to set options for
  * @param[in] bufsize Size to set send and receive buffers to

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.





More information about the Nagios-commits mailing list