Awk craziness: Processing log files

In: General|Linux

13 Apr 2013

A while back I wrote an awk script to process a custom log file to work out how an application was performing over a period of time.
The following script takes a timestamp in field one $1 and also an arbitrary field $10 in this case.

For every 100 lines of the log file processed it would calculate the min, max, avg, stddev for rolling stats.

zcat Summary.2012-12-09.gz | grep '2012-12-09 20:[234]' | awk '
  BEGIN{
    size=50;
    tmax = -999999999;
    tmin = 999999999;
  }
  {   # Accumulate basic data
      cnt[$10]++;
      item[++n] = $10;
      if ($10 > max) max = $10; if ($10 < min) min = $10;
      if ($10 > tmax) tmax = $10; if ($10 < tmin) tmin = $10;
  }
  {
    mod=NR%size;
    if(NR<=size) {
      count++;
    } else {
      sum-=array[mod];
      min=max=$10;
      
    };
    total+=$10;
    sum+=$10; sumsq+=$10*$10;
    array[mod]=$10;  
    print $1" "$2 "\tvalue: " $10 "\tmin: " min "\tmax: " max "\tavg: " sum/count "\tstddev: " sqrt(sumsq/NR - (sum/NR)**2)  "\t"
  }
  END {
    printf("\n\n# Lines processed = %d\n", NR);
    printf("# Total = %d\n", total);
    printf("# Sqrt = %d\n", sqrt(sumsq/NR - (sum/NR)**2));
    # Print Descriptive Statistics
    printf("# Count = %d\n", n);
    printf("# Min = %d\n", tmin);
    decile = 1;
    for (decile = 10; decile < 100; decile += 10) {
      idx = int((decile * n) / 100) + 1;
      printf("# %d%% decile = %d\n", decile, item[idx]);
      if (decile == 50)
        median = item[idx];
    }
    printf("# Max = %d\n", tmax);
 
    printf("# Median = %d\n", median);
    for (i in cnt) {
      if (cnt[i] > cnt[mode])
        mode = i;
    }
    printf("# Mode = %d\n", mode);
  }
' | less
 
...
2012-12-09 20:20:01     value: 18       min:    max: 18 avg: 5.8        stddev: 6.20967 
2012-12-09 20:20:01     value: 7        min:    max: 18 avg: 6  stddev: 5.68624 
2012-12-09 20:20:01     value: 7        min:    max: 18 avg: 6.14286    stddev: 5.27605 
2012-12-09 20:20:01     value: 4        min:    max: 18 avg: 5.875      stddev: 4.98592 
2012-12-09 20:20:01     value: 2        min:    max: 18 avg: 5.44444    stddev: 4.85595 
2012-12-09 20:20:02     value: 3        min:    max: 18 avg: 5.2        stddev: 4.66476 
2012-12-09 20:20:02     value: 4        min:    max: 18 avg: 5.09091    stddev: 4.46103 
2012-12-09 20:20:02     value: 104      min:    max: 104        avg: 13.3333    stddev: 27.6687 
2012-12-09 20:20:02     value: 2        min:    max: 104        avg: 12.4615    stddev: 26.7542 
2012-12-09 20:20:02     value: 25       min:    max: 104        avg: 13.3571    stddev: 25.9824 
2012-12-09 20:20:03     value: 3        min:    max: 104        avg: 12.6667    stddev: 25.234  
2012-12-09 20:20:03     value: 3        min:    max: 104        avg: 12.0625    stddev: 24.5445 
2012-12-09 20:20:03     value: 3        min:    max: 104        avg: 11.5294    stddev: 23.907  
2012-12-09 20:20:03     value: 2        min:    max: 104        avg: 11 stddev: 23.3357 
2012-12-09 20:20:03     value: 6        min:    max: 104        avg: 10.7368    stddev: 22.7407 
2012-12-09 20:20:04     value: 4        min:    max: 104        avg: 10.4       stddev: 22.2135 
2012-12-09 20:20:04     value: 3        min:    max: 104        avg: 10.0476    stddev: 21.7354 
2012-12-09 20:20:04     value: 2        min:    max: 104        avg: 9.68182    stddev: 21.3017 
2012-12-09 20:20:04     value: 70       min:    max: 104        avg: 12.3043    stddev: 24.1938 
2012-12-09 20:20:04     value: 4        min:    max: 104        avg: 11.9583    stddev: 23.7425 
2012-12-09 20:20:04     value: 12       min:    max: 104        avg: 11.96      stddev: 23.2628 
2012-12-09 20:20:04     value: 14       min:    max: 104        avg: 12.0385    stddev: 22.8144 
2012-12-09 20:20:04     value: 6        min:    max: 104        avg: 11.8148    stddev: 22.417  
2012-12-09 20:20:04     value: 5        min:    max: 104        avg: 11.5714    stddev: 22.0493 
2012-12-09 20:20:04     value: 2        min:    max: 104        avg: 11.2414    stddev: 21.7361 
2012-12-09 20:20:04     value: 30       min:    max: 104        avg: 11.8667    stddev: 21.6344 
2012-12-09 20:20:05     value: 5        min:    max: 104        avg: 11.6452    stddev: 21.3172 
2012-12-09 20:20:05     value: 22       min:    max: 104        avg: 11.9688    stddev: 21.0587 
2012-12-09 20:20:05     value: 4        min:    max: 104        avg: 11.7273    stddev: 20.7821 
2012-12-09 20:20:05     value: 2        min:    max: 104        avg: 11.4412    stddev: 20.54   
2012-12-09 20:20:05     value: 25       min:    max: 104        avg: 11.8286    stddev: 20.3701 
2012-12-09 20:20:05     value: 27       min:    max: 104        avg: 12.25      stddev: 20.2394 
2012-12-09 20:20:05     value: 3        min:    max: 104        avg: 12 stddev: 20.0203 
2012-12-09 20:20:06     value: 2        min:    max: 104        avg: 11.7368    stddev: 19.8198 
2012-12-09 20:20:06     value: 1        min:    max: 104        avg: 11.4615    stddev: 19.6375 
2012-12-09 20:20:06     value: 2        min:    max: 104        avg: 11.225     stddev: 19.4467 
2012-12-09 20:20:06     value: 36       min:    max: 104        avg: 11.8293    stddev: 19.5846 
2012-12-09 20:20:06     value: 3        min:    max: 104        avg: 11.619     stddev: 19.3968 
2012-12-09 20:20:06     value: 27       min:    max: 104        avg: 11.9767    stddev: 19.3096 
2012-12-09 20:20:06     value: 2        min:    max: 104        avg: 11.75      stddev: 19.1467 
2012-12-09 20:20:07     value: 5        min:    max: 104        avg: 11.6       stddev: 18.9589 
2012-12-09 20:20:07     value: 2        min:    max: 104        avg: 11.3913    stddev: 18.8039 
2012-12-09 20:20:07     value: 5        min:    max: 104        avg: 11.2553    stddev: 18.6256 
2012-12-09 20:20:07     value: 4        min:    max: 104        avg: 11.1042    stddev: 18.4597 
2012-12-09 20:20:07     value: 2        min:    max: 104        avg: 10.9184    stddev: 18.3156 
2012-12-09 20:20:07     value: 4        min:    max: 104        avg: 10.78      stddev: 18.1574 
...
 
# Lines processed = 9863
# Total = 80137
# Sqrt = 15
# Count = 9863
# Min = 1
# 10% decile = 8
# 20% decile = 1
# 30% decile = 3
# 40% decile = 3
# 50% decile = 17
# 60% decile = 7
# 70% decile = 5
# 80% decile = 1
# 90% decile = 4
# Max = 140
# Median = 17
# Mode = 2

Comment Form

About this blog

I have been a developer for roughly 10 years and have worked with an extensive range of technologies. Whilst working for relatively small companies, I have worked with all aspects of the development life cycle, which has given me a broad and in-depth experience.