/* * Read network log files (satlinkstats) and create a list of failures and duration. * * $Id: net-failures.c,v 1.10 2011/01/31 05:44:52 grog Exp $ */ #include #include #include #include #include #include #include #include "timefuncs.h" #define SMALLBUF 20 #define DAY 86400 time_t start_timestamp; /* time to start evaluation */ time_t end_timestamp = 0x7ff00000; /* time to end evaluation */ time_t midnight; /* timestamp of midnight today */ time_t start_outage; /* timestamp current outage started */ time_t last_outage_end; /* timestamp previous outage ended */ time_t uptime; /* time up between last outage and this one */ time_t first_sample; /* first entry in input */ int outages; /* total number of outages */ int outage_time; /* and total time */ /* these copies over entire time */ int total_outages; /* total number of outages */ int total_outage_time; /* and total time */ char todays_date [SMALLBUF]; /* YYYYMMDD date */ char last_date [SMALLBUF]; /* YYYYMMDD date */ /* Info from input */ time_t now; /* current entry timestamp */ float linkstat; /* link status */ int systems; /* number of systems available */ char comments [1024]; char now_date [SMALLBUF]; /* YYYYMMDD from last input */ int link_state = 1; /* current state of link */ char start_time [80]; /* start time for printing */ char end_time [80]; /* end time for printing */ int all_disconnects; /* set to count only link dropouts */ int summarystats; /* set if we want stats per day instead of per incident */ int verbose = 0; /* and no way to change it */ void usage (char *me) { fprintf (stderr, "Usage:\n" "\t%s [-c] [-v] [-s start-date] [-e end-date]\n" "\t-c:\toutput count and duration of outages per day\n" "\t-e\tend at date\n" "\t-s\tstart at date\n" "\t-v:\tverbose output\n" "\tOtherwise output details of each outage\n", me ); exit (1); } /* * Read in a line and parse to global locations. * Return 1 for success, 0 for failure (presumably EOF). */ int getinfo () { char line [1024]; if (fgets (line, 1024, stdin) == NULL) return 0; if (sscanf (line, "%d %f %d %s\n", &now, &linkstat, &systems, comments) != 4) { puts (line); return 0; } if ((all_disconnects == 0) && (systems > 0)) linkstat = 1.0; /* no fake link down messages */ strftime (now_date, 8, "%Y%m%d", localtime (&now)); return 1; } char *HMS (int seconds, char *result) { int minutes = seconds / 60; int hours = seconds / 3600; minutes -= hours* 60; seconds = seconds % 60; if (hours > 23) { int days = hours / 24; hours %= 24; sprintf (result, "%d days, %02d:%02d:%02d", days, hours, minutes, seconds); } else sprintf (result, "%02d:%02d:%02d", hours, minutes, seconds); return result; } /* * Print statistics for a day when using -c option. */ void print_stats () { int duration = DAY; /* default to a day at a time */ time_t now = time (NULL); if ((now - midnight) < duration) duration = now - midnight; if (link_state == 0) /* still down, */ { outage_time += midnight + duration - start_outage; /* * We're making the implicit assumption here that our data are * complete. If we miss a day, it means that we've been down all * that time. But I don't check this when I print things out, at * least not at the moment, so we could end up with outages * lasting more than a day (correct) and availability < 0 * (incorrect).. */ start_outage = midnight + duration; /* for next time */ } /* * Convert to readable format. * * output format: * timestamp for start of day * number of outages * duration of outages * % uptime * # date */ strftime (start_time, 80, "%e %B %Y", localtime (&midnight)); printf ("%d\t%3d\t%6d\t%6.2f%%\t# %s\n", midnight, outages, outage_time, (float) (duration - outage_time) * 100 / duration, start_time ); total_outage_time += outage_time; outage_time = 0; total_outages += outages; outages = 0; } int main (int argc, char *argv []) { char total_hms [20]; char average_hms [20]; char duration_hms [20]; time (&now); if (argc > 1) { int i; for (i = 1; i < argc; i++) { if (! strcmp (argv [i], "-a")) all_disconnects = 1; else if (! strcmp (argv [i], "-c")) summarystats = 1; else if (! strcmp (argv [i], "-v")) verbose = 1; else if (! strcmp (argv [i], "-s")) { i++; start_timestamp = get_date (argv, &i, now); } else if (! strcmp (argv [i], "-e")) { i++; end_timestamp = get_date (argv, &i, now); } else usage (argv [0]); } } /* Find the beginning of the period we're interested in. */ do getinfo (); while (now < start_timestamp); first_sample = now; /* we now have the first sample */ if (summarystats) /* per day version */ { int sample_duration; printf ("Date Outages Duration Availability\n"); do { strftime (todays_date, 80, "%e %B %Y", localtime (&now)); if (strcmp (todays_date, last_date)) /* new day */ { struct tm midnight_tm; if (*last_date) /* we already had something */ print_stats (); memset (&midnight_tm, 0, sizeof (midnight_tm)); midnight_tm.tm_isdst = -1; /* mktime should make up its own mind about DST */ strptime (todays_date, " %e %B %Y", &midnight_tm); midnight = mktime (&midnight_tm); strcpy (last_date, todays_date); } if (link_state) /* we were up */ { if (linkstat == 0.0) /* we've gone down now */ { outages++; /* another one */ start_outage = now; link_state = 0; } } else { if (linkstat != 0.0) /* we've come up */ { outage_time += now - start_outage; link_state = 1; } } if (verbose) printf ("%d %f %d %d %s\n", now, linkstat, link_state, systems, comments); } while (getinfo () && (now < (end_timestamp + DAY))); print_stats (); /* Summary over period */ /* * To calculate our uptime, don't go before start or beyond now */ if (first_sample > start_timestamp) start_timestamp = first_sample; if (time (NULL) < end_timestamp) end_timestamp = time (NULL); sample_duration = end_timestamp - start_timestamp; printf ("Total:\t\t%3d\t%6d\t%6.2f%%\n", total_outages, total_outage_time, (float) (sample_duration - total_outage_time) * 100 / sample_duration); exit (0); } /* Default: print info for each outage */ printf ("Start time End time\tDuration\tBadness\n"); do { if (verbose) printf ("%d %f %d %s\n", now, linkstat, systems, comments); if (link_state) /* we were up */ { if (linkstat == 0.0) /* we've gone down now */ { outages++; /* another one */ start_outage = now; link_state = 0; } } else { if (linkstat != 0.0) /* we've come up */ { outage_time += now - start_outage; if (last_outage_end) uptime = start_outage - last_outage_end; last_outage_end = now; /* convert to readable format */ strftime (start_time, 80, "%e %B %Y %H:%M:%S", localtime (&start_outage)); strftime (end_time, 80, "%e %B %Y %H:%M:%S", localtime (&now)); if (uptime) printf ("%d %d %6d\t%7.3f\t# %s %s\n", start_outage, now, now - start_outage, 3600 / (float) uptime, start_time, end_time ); else printf ("%d %d %6d\t\t# %s %s\n", start_outage, now, now - start_outage, start_time, end_time ); start_outage = 0; /* we don't really need this */ link_state = 1; } } } while (getinfo () && (now < (end_timestamp + DAY))); HMS (outage_time, total_hms); if (outages) { HMS ((now - first_sample) / outages, average_hms); HMS (outage_time / outages, duration_hms); printf ("\n" "Total %d outages, total time %d seconds (%s)\n" "Average time between outages:\t%d seconds (%s)\n" "Average duration:\t\t%d seconds (%s)\n" "Availability:\t\t\t%5.2f%%\n", outages, outage_time, total_hms, (now - first_sample) / outages, average_hms, outage_time / outages, duration_hms, (float) (now - first_sample - outage_time) * 100 / (float) (now - first_sample) ); } else printf ("\nNo outages\n" ); exit (0); }