[Nagios-checkins] SF.net SVN: nagios:[2036] nagioscore/trunk

ageric at users.sourceforge.net ageric at users.sourceforge.net
Thu Aug 2 00:45:14 UTC 2012


Revision: 2036
          http://nagios.svn.sourceforge.net/nagios/?rev=2036&view=rev
Author:   ageric
Date:     2012-08-02 00:45:14 +0000 (Thu, 02 Aug 2012)
Log Message:
-----------
core: Process check results ad-hoc

First relegating them to an in-memory list is just an unnecessary
extra step that we can avoid entirely by just passing the parsed
check results to their respective handlers as soon as we encounter
them. While we might encounter occasional long timeouts while reaping
checks (in case they cause notifications or other helper programs to
be spawned), we will save some cpu time overall on this change, and
some extra memory as well.

Also, we avoid the whole problem of potentially reaping more checks
than we have time to process later, which could cause a buildup of
check result data in memory which was possible (although very unlikely)
with the old code.

Signed-off-by: Andreas Ericsson <ae at op5.se>

Modified Paths:
--------------
    nagioscore/trunk/base/checks.c
    nagioscore/trunk/base/nagios.c
    nagioscore/trunk/base/utils.c
    nagioscore/trunk/include/nagios.h

Modified: nagioscore/trunk/base/checks.c
===================================================================
--- nagioscore/trunk/base/checks.c	2012-08-02 00:44:52 UTC (rev 2035)
+++ nagioscore/trunk/base/checks.c	2012-08-02 00:45:14 UTC (rev 2036)
@@ -109,8 +109,6 @@
 extern unsigned long   next_event_id;
 extern unsigned long   next_problem_id;
 
-extern check_result    check_result_info;
-extern check_result    *check_result_list;
 
 extern pthread_t       worker_threads[TOTAL_WORKER_THREADS];
 
@@ -125,94 +123,14 @@
 
 /* reaps host and service check results */
 int reap_check_results(void) {
-	check_result *queued_check_result = NULL;
-	service *temp_service = NULL;
-	host *temp_host = NULL;
-	time_t current_time = 0L;
-	time_t reaper_start_time = 0L;
 	int reaped_checks = 0;
 
 	log_debug_info(DEBUGL_FUNCTIONS, 0, "reap_check_results() start\n");
 	log_debug_info(DEBUGL_CHECKS, 0, "Starting to reap check results.\n");
 
-	/* get the start time */
-	time(&reaper_start_time);
-
 	/* process files in the check result queue */
-	process_check_result_queue(check_result_path);
+	reaped_checks = process_check_result_queue(check_result_path);
 
-	/* read all check results that have come in... */
-	while((queued_check_result = read_check_result())) {
-
-		reaped_checks++;
-
-		log_debug_info(DEBUGL_CHECKS, 2, "Found a check result (#%d) to handle...\n", reaped_checks);
-
-		/* service check */
-		if(queued_check_result->object_check_type == SERVICE_CHECK) {
-
-			/* make sure the service exists */
-			if((temp_service = find_service(queued_check_result->host_name, queued_check_result->service_description)) == NULL) {
-
-				logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: Check result queue contained results for service '%s' on host '%s', but the service could not be found!  Perhaps you forgot to define the service in your config files?\n", queued_check_result->service_description, queued_check_result->host_name);
-
-				/* free memory */
-				free_check_result(queued_check_result);
-				my_free(queued_check_result);
-
-				/* TODO - add new service definition automatically */
-
-				continue;
-				}
-
-			log_debug_info(DEBUGL_CHECKS, 1, "Handling check result for service '%s' on host '%s'...\n", temp_service->description, temp_service->host_name);
-
-			/* process the check result */
-			handle_async_service_check_result(temp_service, queued_check_result);
-			}
-
-		/* host check */
-		else {
-			if((temp_host = find_host(queued_check_result->host_name)) == NULL) {
-
-				/* make sure the host exists */
-				logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: Check result queue contained results for host '%s', but the host could not be found!  Perhaps you forgot to define the host in your config files?\n", queued_check_result->host_name);
-
-				/* free memory */
-				free_check_result(queued_check_result);
-				my_free(queued_check_result);
-
-				/* TODO - add new host definition automatically */
-
-				continue;
-				}
-
-			log_debug_info(DEBUGL_CHECKS, 1, "Handling check result for host '%s'...\n", temp_host->name);
-
-			/* process the check result */
-			handle_async_host_check_result_3x(temp_host, queued_check_result);
-			}
-
-		log_debug_info(DEBUGL_CHECKS | DEBUGL_IPC, 1, "Deleted check result file '%s'\n", queued_check_result->output_file);
-
-		/* free allocated memory */
-		free_check_result(queued_check_result);
-		my_free(queued_check_result);
-
-		/* break out if we've been here too long (max_check_reaper_time seconds) */
-		time(&current_time);
-		if((int)(current_time - reaper_start_time) > max_check_reaper_time) {
-			log_debug_info(DEBUGL_CHECKS, 0, "Breaking out of check result reaper: max reaper time exceeded\n");
-			break;
-			}
-
-		/* bail out if we encountered a signal */
-		if(sigshutdown == TRUE || sigrestart == TRUE) {
-			log_debug_info(DEBUGL_CHECKS, 0, "Breaking out of check result reaper: signal encountered\n");
-			break;
-			}
-		}
-
 	log_debug_info(DEBUGL_CHECKS, 0, "Finished reaping %d check results\n", reaped_checks);
 	log_debug_info(DEBUGL_FUNCTIONS, 0, "reap_check_results() end\n");
 

Modified: nagioscore/trunk/base/nagios.c
===================================================================
--- nagioscore/trunk/base/nagios.c	2012-08-02 00:44:52 UTC (rev 2035)
+++ nagioscore/trunk/base/nagios.c	2012-08-02 00:45:14 UTC (rev 2036)
@@ -251,7 +251,6 @@
 notification    *notification_list;
 
 check_result    check_result_info;
-check_result    *check_result_list = NULL;
 unsigned long	max_check_result_file_age = DEFAULT_MAX_CHECK_RESULT_AGE;
 
 dbuf            check_result_dbuf;

Modified: nagioscore/trunk/base/utils.c
===================================================================
--- nagioscore/trunk/base/utils.c	2012-08-02 00:44:52 UTC (rev 2035)
+++ nagioscore/trunk/base/utils.c	2012-08-02 00:45:14 UTC (rev 2036)
@@ -2019,7 +2019,8 @@
 	struct stat stat_buf;
 	struct stat ok_stat_buf;
 	char *temp_buffer = NULL;
-	int result = OK;
+	int result = OK, check_result_files = 0;
+	time_t start;
 
 	/* make sure we have what we need */
 	if(dirname == NULL) {
@@ -2035,9 +2036,22 @@
 
 	log_debug_info(DEBUGL_CHECKS, 1, "Starting to read check result queue '%s'...\n", dirname);
 
+	start = time(NULL);
+
 	/* process all files in the directory... */
 	while((dirfile = readdir(dirp)) != NULL) {
+		/* bail out if we encountered a signal */
+		if (sigshutdown == TRUE || sigrestart == TRUE) {
+			log_debug_info(DEBUGL_CHECKS, 0, "Breaking out of check result reaper: signal encountered\n");
+			break;
+			}
 
+		/* break out if we've been here too long */
+		if ((int)(time(NULL) - start) < max_check_reaper_time) {
+			log_debug_info(DEBUGL_CHECKS, 0, "Breaking out of check result reaper: max time exceeded\n");
+			break;
+			}
+
 		/* create /path/to/file */
 		snprintf(file, sizeof(file), "%s/%s", dirname, dirfile->d_name);
 		file[sizeof(file) - 1] = '\x0';
@@ -2076,18 +2090,39 @@
 			/* break out if we encountered an error */
 			if(result == ERROR)
 				break;
+
+			check_result_files++;
 			}
 		}
 
 	closedir(dirp);
 
-	return result;
+	return check_result_files;
 
 	}
 
 
+int process_check_result(check_result *cr)
+{
+	if (!cr)
+		return ERROR;
+	if (cr->object_check_type == SERVICE_CHECK) {
+		service *svc;
+		svc = find_service(cr->host_name, cr->service_description);
+		if (!svc)
+			return ERROR;
+		return handle_async_service_check_result(svc, cr);
+		}
+	if (cr->object_check_type == HOST_CHECK) {
+		host *hst;
+		hst = find_host(cr->host_name);
+		if (!hst)
+			return ERROR;
+		return handle_async_host_check_result_3x(hst, cr);
+		}
+	return ERROR;
+	}
 
-
 /* reads check result(s) from a file */
 int process_check_result_file(char *fname) {
 	mmapfile *thefile = NULL;
@@ -2096,11 +2131,12 @@
 	char *val = NULL;
 	char *v1 = NULL, *v2 = NULL;
 	time_t current_time;
-	check_result *new_cr = NULL;
+	check_result cr;
 
 	if(fname == NULL)
 		return ERROR;
 
+	init_check_result(&cr);
 	time(&current_time);
 
 	log_debug_info(DEBUGL_CHECKS, 1, "Processing check result file: '%s'\n", fname);
@@ -2131,26 +2167,18 @@
 		/* empty line indicates end of record */
 		else if(input[0] == '\n') {
 
-			/* we have something... */
-			if(new_cr) {
+			/* do we have the minimum amount of data? */
+			if(cr.host_name != NULL && cr.output != NULL) {
 
-				/* do we have the minimum amount of data? */
-				if(new_cr->host_name != NULL && new_cr->output != NULL) {
+				/* process the check result */
+				process_check_result(&cr);
 
-					/* add check result to list in memory */
-					add_check_result_to_list(new_cr);
-
-					/* reset pointer */
-					new_cr = NULL;
-					}
-
-				/* discard partial input */
-				else {
-					free_check_result(new_cr);
-					init_check_result(new_cr);
-					new_cr->output_file = (char *)strdup(fname);
-					}
 				}
+
+			/* cleanse for next check result */
+			free_check_result(&cr);
+			init_check_result(&cr);
+			cr.output_file = fname;
 			}
 
 		if((var = my_strtok(input, "=")) == NULL)
@@ -2170,88 +2198,63 @@
 
 		/* else we have check result data */
 		else {
-
-			/* allocate new check result if necessary */
-			if(new_cr == NULL) {
-
-				if((new_cr = (check_result *)malloc(sizeof(check_result))) == NULL)
-					continue;
-
-				/* init values */
-				init_check_result(new_cr);
-				new_cr->output_file = (char *)strdup(fname);
-				}
-
 			if(!strcmp(var, "host_name"))
-				new_cr->host_name = (char *)strdup(val);
+				cr.host_name = (char *)strdup(val);
 			else if(!strcmp(var, "service_description")) {
-				new_cr->service_description = (char *)strdup(val);
-				new_cr->object_check_type = SERVICE_CHECK;
+				cr.service_description = (char *)strdup(val);
+				cr.object_check_type = SERVICE_CHECK;
 				}
 			else if(!strcmp(var, "check_type"))
-				new_cr->check_type = atoi(val);
+				cr.check_type = atoi(val);
 			else if(!strcmp(var, "check_options"))
-				new_cr->check_options = atoi(val);
+				cr.check_options = atoi(val);
 			else if(!strcmp(var, "scheduled_check"))
-				new_cr->scheduled_check = atoi(val);
+				cr.scheduled_check = atoi(val);
 			else if(!strcmp(var, "reschedule_check"))
-				new_cr->reschedule_check = atoi(val);
+				cr.reschedule_check = atoi(val);
 			else if(!strcmp(var, "latency"))
-				new_cr->latency = strtod(val, NULL);
+				cr.latency = strtod(val, NULL);
 			else if(!strcmp(var, "start_time")) {
 				if((v1 = strtok(val, ".")) == NULL)
 					continue;
 				if((v2 = strtok(NULL, "\n")) == NULL)
 					continue;
-				new_cr->start_time.tv_sec = strtoul(v1, NULL, 0);
-				new_cr->start_time.tv_usec = strtoul(v2, NULL, 0);
+				cr.start_time.tv_sec = strtoul(v1, NULL, 0);
+				cr.start_time.tv_usec = strtoul(v2, NULL, 0);
 				}
 			else if(!strcmp(var, "finish_time")) {
 				if((v1 = strtok(val, ".")) == NULL)
 					continue;
 				if((v2 = strtok(NULL, "\n")) == NULL)
 					continue;
-				new_cr->finish_time.tv_sec = strtoul(v1, NULL, 0);
-				new_cr->finish_time.tv_usec = strtoul(v2, NULL, 0);
+				cr.finish_time.tv_sec = strtoul(v1, NULL, 0);
+				cr.finish_time.tv_usec = strtoul(v2, NULL, 0);
 				}
 			else if(!strcmp(var, "early_timeout"))
-				new_cr->early_timeout = atoi(val);
+				cr.early_timeout = atoi(val);
 			else if(!strcmp(var, "exited_ok"))
-				new_cr->exited_ok = atoi(val);
+				cr.exited_ok = atoi(val);
 			else if(!strcmp(var, "return_code"))
-				new_cr->return_code = atoi(val);
+				cr.return_code = atoi(val);
 			else if(!strcmp(var, "output"))
-				new_cr->output = (char *)strdup(val);
+				cr.output = (char *)strdup(val);
 			}
 		}
 
-	/* we have something */
-	if(new_cr) {
+	/* do we have the minimum amount of data? */
+	if(cr.host_name != NULL && cr.output != NULL) {
 
-		/* do we have the minimum amount of data? */
-		if(new_cr->host_name != NULL && new_cr->output != NULL) {
-
-			/* add check result to list in memory */
-			add_check_result_to_list(new_cr);
-
-			/* reset pointer */
-			new_cr = NULL;
-			}
-
-		/* discard partial input */
-		/* free memory for current check result record */
-		else {
-			free_check_result(new_cr);
-			my_free(new_cr);
-			}
+		/* process check result */
+		process_check_result(&cr);
 		}
 
+	free_check_result(&cr);
+
 	/* free memory and close file */
 	my_free(input);
 	mmap_fclose(thefile);
 
-	/* delete the file (as well its ok-to-go file) if it's too old */
-	/* other (current) files are deleted later (when results are processed) */
+	/* delete the file (as well its ok-to-go file) */
 	delete_check_result_file(fname);
 
 	return OK;
@@ -2278,21 +2281,6 @@
 
 
 
-/* reads the first host/service check result from the list in memory */
-check_result *read_check_result(void) {
-	check_result *first_cr = NULL;
-
-	if(check_result_list == NULL)
-		return NULL;
-
-	first_cr = check_result_list;
-	check_result_list = check_result_list->next;
-
-	return first_cr;
-	}
-
-
-
 /* initializes a host/service check result */
 int init_check_result(check_result *info) {
 
@@ -2318,73 +2306,12 @@
 	info->exited_ok = TRUE;
 	info->return_code = 0;
 	info->output = NULL;
-	info->next = NULL;
 
 	return OK;
 	}
 
 
 
-
-/* adds a new host/service check result to the list in memory */
-int add_check_result_to_list(check_result *new_cr) {
-	check_result *temp_cr = NULL;
-	check_result *last_cr = NULL;
-
-	if(new_cr == NULL)
-		return ERROR;
-
-	/* add to list, sorted by finish time (asc) */
-
-	/* find insertion point */
-	last_cr = check_result_list;
-	for(temp_cr = check_result_list; temp_cr != NULL; temp_cr = temp_cr->next) {
-		if(temp_cr->finish_time.tv_sec >= new_cr->finish_time.tv_sec) {
-			if(temp_cr->finish_time.tv_sec > new_cr->finish_time.tv_sec)
-				break;
-			else if(temp_cr->finish_time.tv_usec > new_cr->finish_time.tv_usec)
-				break;
-			}
-		last_cr = temp_cr;
-		}
-
-	/* item goes at head of list */
-	if(check_result_list == NULL || temp_cr == check_result_list) {
-		new_cr->next = check_result_list;
-		check_result_list = new_cr;
-		}
-
-	/* item goes in middle or at end of list */
-	else {
-		new_cr->next = temp_cr;
-		last_cr->next = new_cr;
-		}
-
-	return OK;
-	}
-
-
-
-
-/* frees all memory associated with the check result list */
-int free_check_result_list(void) {
-	check_result *this_cr = NULL;
-	check_result *next_cr = NULL;
-
-	for(this_cr = check_result_list; this_cr != NULL; this_cr = next_cr) {
-		next_cr = this_cr->next;
-		free_check_result(this_cr);
-		my_free(this_cr);
-		}
-
-	check_result_list = NULL;
-
-	return OK;
-	}
-
-
-
-
 /* frees memory associated with a host/service check result */
 int free_check_result(check_result *info) {
 
@@ -2393,7 +2320,6 @@
 
 	my_free(info->host_name);
 	my_free(info->service_description);
-	my_free(info->output_file);
 	my_free(info->output);
 
 	return OK;
@@ -3621,9 +3547,6 @@
 	/* free memory allocated to comments */
 	free_comment_data();
 
-	/* free check result list */
-	free_check_result_list();
-
 	/* free event queue data */
 	squeue_destroy(nagios_squeue, SQUEUE_FREE_DATA);
 	nagios_squeue = NULL;

Modified: nagioscore/trunk/include/nagios.h
===================================================================
--- nagioscore/trunk/include/nagios.h	2012-08-02 00:44:52 UTC (rev 2035)
+++ nagioscore/trunk/include/nagios.h	2012-08-02 00:45:14 UTC (rev 2036)
@@ -318,7 +318,6 @@
 	int return_code;				/* plugin return code */
 	char *output;	                                /* plugin output */
 	struct rusage rusage;			/* resource usage by this check */
-	struct check_result_struct *next;
 	} check_result;
 
 
@@ -444,10 +443,8 @@
 int move_check_result_to_queue(char *);
 int process_check_result_queue(char *);
 int process_check_result_file(char *);
-int add_check_result_to_list(check_result *);
-check_result *read_check_result(void);                  	/* reads a host/service check result from the list in memory */
+int process_check_result(check_result *);
 int delete_check_result_file(char *);
-int free_check_result_list(void);
 int init_check_result(check_result *);
 int free_check_result(check_result *);                  	/* frees memory associated with a host/service check result */
 int parse_check_output(char *, char **, char **, char **, int, int);

This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site.





More information about the Nagios-commits mailing list