Index: mergelog-4.5/configure.in =================================================================== --- mergelog-4.5.orig/configure.in +++ mergelog-4.5/configure.in @@ -2,7 +2,7 @@ dnl Process this file with autoconf to p AC_INIT(src/mergelog.c) PACKAGE=mergelog -VERSION=4.5 +VERSION=4.5-split AM_INIT_AUTOMAKE($PACKAGE,$VERSION,nosubst) AC_ARG_PROGRAM Index: mergelog-4.5/man/mergelog.1 =================================================================== --- mergelog-4.5.orig/man/mergelog.1 +++ mergelog-4.5/man/mergelog.1 @@ -1,8 +1,9 @@ -.TH MERGELOG 1 "22 Jan 2001" +.TH MERGELOG 1 "20 Jun 2001" .SH NAME mergelog \- a fast tool to merge http log files by date .SH SYNOPSIS .B mergelog +.IR [-vh]\ [-o\ outfmt] .IR logfile1 .IR logfile2 \ ... .SH DESCRIPTION Index: mergelog-4.5/man/zmergelog.1 =================================================================== --- mergelog-4.5.orig/man/zmergelog.1 +++ mergelog-4.5/man/zmergelog.1 @@ -1,8 +1,9 @@ -.TH ZMERGELOG 1 "22 Jan 2001" +.TH ZMERGELOG 1 "20 Jun 2001" .SH NAME zmergelog \- a fast tool to merge gzipped http log files by date .SH SYNOPSIS .B zmergelog +.IR [-vh]\ [-o\ outfmt] .IR logfile1 .IR logfile2 \ ... .SH DESCRIPTION Index: mergelog-4.5/src/mergelog.c =================================================================== --- mergelog-4.5.orig/src/mergelog.c +++ mergelog-4.5/src/mergelog.c @@ -45,12 +45,16 @@ #define mygets(a,b,c,d) fast_gzgets(a,b,c,d) #define myrewind gzrewind #define myclose gzclose +#define mywrite(f,b,l) gzwrite((f),(b),(l)) +#define MYSUFFIX ".gz" #else #define myFH FILE #define myopen fopen #define mygets(a,b,c,d) fgets(a,b,c) #define myrewind rewind #define myclose fclose +#define mywrite(f,b,l) fwrite((b),1,(l),(f)) +#define MYSUFFIX #endif @@ -112,20 +116,141 @@ int main (int argc, char *argv[]) { char *trans_digits[60]; char *trans_year[200]; char months[24]="anebarprayunulugepctovec"; + int goc; + int goUsage = 0, goVerbose = 0; + const char* goOutput = NULL; + const char* argv0 = *argv; + struct tm gotmFlags; + const char* goOutPtr; + myFH *outFile = NULL; + char outFileName[512]; + struct tm outFileTM; + char outFileNewName[512]; + size_t outBytes; + + while((goc=getopt(argc,argv,"vho:"))!=-1) { + switch(goc) { + case 'h': /* -h print usage */ + goUsage++; + break; + case 'v': /* -v verbose */ + goVerbose++; + break; + case 'o': /* -o fmt output specification */ + goOutput = optarg; + break; + default: /* whatever.. but we better print out usage */ + goUsage++; + break; + } + } + argv = &argv[optind]; + argc -= optind; - /* - print usage if necessary - */ - if (argc == 1) { - fprintf(stderr,"usage: %s logfile1 logfile2 ...\nmergelog %s Copyright (C) 2000-2001 Bertrand Demiddelaer\n",argv[0],VERSION); + if(argc<1) + goUsage++; + if(goUsage) { + printf( + "mergelog " VERSION " Copyright (c) 2000-2001 Bertrand Demiddelaer\n\n" + "I have at least %d reason(s) for lending you a helping hand on using the program.\n\n" + "Usage: %s [-vh] [-o fmt] logfile1 logfile2 ..\n\n" + " -h Print this message\n" + " -v Increase verbosity\n" + " -o Give template for output file name to write log to\n" + " instead of standard output in strftime(3) form.\n" + " ( eg. -o %%Y-%%m-%%d-access.log" MYSUFFIX ")\n", + goUsage,argv0); exit(1); } + /* Analyze output format if given. It's better than trying to + * regenerate file name and compare it to the filename currently being + * written each time we want to dump one line. */ + if(goOutput) { + memset(&gotmFlags,0,sizeof(gotmFlags)); + for(goOutPtr=goOutput;*goOutPtr;goOutPtr++) { + if( (*goOutPtr=='%') && goOutPtr[1]) { + switch(*(++goOutPtr)) { + case 'a': /* abbreviated weekday */ + case 'A': /* full weekday */ + case 'u': /* day of week number */ + case 'w': /* zerobased weekday number */ + gotmFlags.tm_wday = 1; + break; + case 'b': /* abbreviated month */ + case 'h': /* %b */ + case 'B': /* full month */ + case 'm': /* month number */ + gotmFlags.tm_mon = 1; + break; + case 'C': /* century */ + case 'y': /* centuryless year */ + case 'Y': /* year with a century */ + gotmFlags.tm_year = 1; + break; + case 'd': /* day of month */ + case 'e': /* day of month space padded */ + gotmFlags.tm_mday = 1; + break; + case 'D': /* %m/%d/%y americanism */ + gotmFlags.tm_mon = gotmFlags.tm_mday = gotmFlags.tm_year = 1; + break; + case 'G': /* ISO 8601 year repsesentation. depends on week */ + case 'g': /* ISO 8601 year repsesentation without century. depends on week */ + case 'U': /* sunday-based week number */ + case 'V': /* ISO 8601 week number */ + case 'W': /* monday-based week number */ + case 'x': /* preferred date representation for locale without the time */ + gotmFlags.tm_yday = gotmFlags.tm_year = 1; + break; + case 'H': /* 24h format hour */ + case 'I': /* 12h format hour */ + case 'k': /* 24h format hour space padded */ + case 'l': /* 12h format hour space padded */ + case 'p': /* AM/PM indicator */ + case 'P': /* am/pm indicator */ + gotmFlags.tm_hour = 1; + break; + case 'j': /* day of year */ + gotmFlags.tm_yday = 1; + break; + case 'M': /* minute */ + gotmFlags.tm_min = 1; + break; + case 'n': /* newline character */ + case 't': /* tab character */ + case 'z': /* time zone as an offset from GMT */ + case 'Z': /* timezone name or abbreviation */ + case '%': /* % character */ + break; + case 'r': /* %I:%M:%S %p time in am/pm notation*/ + case 'T': /* %H:%M:%S time in 24h notation */ + case 'X': /* preferred time representation for locale without the date */ + gotmFlags.tm_hour = gotmFlags.tm_min = gotmFlags.tm_sec = 1; + break; + case 'R': /* %H:%M time in 24h notation without seconds */ + gotmFlags.tm_hour = gotmFlags.tm_min = 1; + break; + case 'S': /* second */ + gotmFlags.tm_sec = 1; + break; + case 'c': /* preferred date and time representation for locale */ + case 'E': /* weird modifier */ + case 'O': /* weird modifier */ + case 's': /* number of seconds since epoch */ + case '+': /* date and time in date(1) format */ + memset(&gotmFlags,0xFF,sizeof(gotmFlags)); + break; + } + } + } + } + #ifdef USE_ZLIB /* check if there are enough gunzip buffers */ - if(argc>MAX_FILES) { + if(argc>=MAX_FILES) { fputs("too many gzipped log files, aborting\n",stderr); exit(1); } @@ -134,9 +259,9 @@ int main (int argc, char *argv[]) { /* open log files */ - for (i=1;itm_isdst=-1; for (j=0;((j<12)&&(memcmp(months+2*j,month+1,2) != 0));j++); if (j == 12) { - fprintf(stderr,"abort due to a problem with %s:\n%s\n",argv[i+1],log_buffer[i]); + fprintf(stderr,"abort due to a problem with %s:\n%s\n",argv[i],log_buffer[i]); exit(1); } date->tm_mon=j; @@ -303,6 +428,8 @@ int main (int argc, char *argv[]) { exit if we have only empty files */ if (nb_files == 0) { + if(outFile) + myclose(outFile); exit(0); } @@ -323,7 +450,7 @@ int main (int argc, char *argv[]) { /* start to compute since this date */ - nb_files_orig=argc-1; + nb_files_orig=argc; for(;;) { /* @@ -377,7 +504,55 @@ int main (int argc, char *argv[]) { write the log line faster than a puts and we are sure to find a '\0' in log_buffer[i] */ - write(1,log_buffer[i],(size_t)((char *)memchr(log_buffer[i],0,BUFFER_SIZE)-log_buffer[i])); + if(goOutput) { + /* Check whether any of the date/time components concerned + * have changes or if we have no output file opened. It + * doesn't make much sense to do all the comparinsons in case + * we have no output file opened, but it makes even less sense + * to check for file handle each time we evaluate date. */ + if( +# define TMCOMPARE(tmpart) ( gotmFlags.tm_##tmpart && (outFileTM.tm_##tmpart!=date->tm_##tmpart) ) + TMCOMPARE(sec) || TMCOMPARE(min) || TMCOMPARE(hour) + || TMCOMPARE(mday) || TMCOMPARE(mon) || TMCOMPARE(year) + || TMCOMPARE(wday) || TMCOMPARE(yday) + || !outFile +# undef TMCOMPARE + ) { + if(!strftime(outFileNewName,sizeof(outFileNewName)-1,goOutput,date)) { + fprintf(stderr,"abort due to a failed attempt to generate output file name\n"); + if(outFile) + myclose(outFile); + exit(1); + } + if( (!outFile) || strcmp(outFileName,outFileNewName) ) { + /* okay, we're about to change output file or start + * writing first file */ + if(outFile) { + myclose(outFile); + } + if(goVerbose) + fprintf(stderr, "Writing to \"%s\"..\n",outFileNewName); + outFile = myopen(outFileNewName,"a"); + if(!outFile) { + fprintf(stderr, "abort due to a failed attempt to open/create output file %s: %m\n",outFileNewName); + exit(1); + } + memmove(outFileName,outFileNewName,sizeof(outFileName)); + memmove(&outFileTM,date,sizeof(outFileTM)); + } + } + /* %Y-%m-%d_%H-%M-%S-access.log */ + outBytes = (size_t)((char *)memchr(log_buffer[i],0,BUFFER_SIZE)-log_buffer[i]); + if( mywrite(outFile,log_buffer[i],outBytes) != outBytes) { + fprintf(stderr,"abort due to a failed write operation on output file %s: %m\n",outFileName); + if(outFile) + myclose(outFile); + exit(1); + } + }else{ + /* do it the way we would do without */ + write(1,log_buffer[i],(size_t)((char *)memchr(log_buffer[i],0,BUFFER_SIZE)-log_buffer[i])); + } /* is it an end of file ? @@ -388,7 +563,9 @@ int main (int argc, char *argv[]) { close all log files and exit if all end of files are reached */ if (--nb_files == 0) { - for (j=0;j