Commit fc9e8beb authored by Poul-Henning Kamp's avatar Poul-Henning Kamp

Overhaul NCSA logging.

Reduce memory footprint by only allocating space for a fd first time
we see it.  Few if any servers will ever see 64k fd's in use.

Move the sbuf into the logline structure for simplicity.

Access the logline structure through a pointer instead of indexing
the array all the time.

Reduce amount of malloc/free traffic for performance.

Use the "cooked" SHMlog api where the length, fd, etc are broken out
as arguments for us.  Pass the FILE* where we want the stuff to
go as private data (stdout for now).

Add FlexeLint files.

Now over to Anders again...



git-svn-id: http://www.varnish-cache.org/svn/trunk/varnish-cache@981 d4fa192b-c00b-0410-8231-f00ffab90ce4
parent 7b672b69
-passes=3
-header(../../config.h)
+d__attribute__()=
// Fix strchr() semtics, it can only return NULL if arg2 != 0
-sem(strchr, 1p, type(1), 2n == 0 ? (@p < 1p) : (@p < 1p || @p == 0 ))
-ffc // No automatic custody
-e763 // Redundant declaration for symbol '...' previously declared
-e726 // Extraneous comma ignored
-e728 // Symbol ... not explicitly initialized
-e716 // while(1) ...
-e785 // Too few initializers for aggregate
-e786 // String concatenation within initializer
-emacro(740, TAILQ_PREV) // Unusual pointer cast (incompatible indirect types)
-emacro((826), TAILQ_PREV) // Suspicious pointer-to-pointer conversion (area too small)
-esym(534, printf) // Ignoring return value of function
-esym(534, fprintf) // Ignoring return value of function
-esym(534, memset) // Ignoring return value of function
-esym(534, memcpy) // Ignoring return value of function
-esym(534, memmove) // Ignoring return value of function
-esym(534, strcpy) // Ignoring return value of function
-esym(534, vsb_printf) // Ignoring return value of function
-esym(534, vsb_cat) // Ignoring return value of function
// cache.h
-emacro(506, INCOMPL) // Constant value Boolean
// cache_center.c
-efunc(525, CNT_Session) // Negative indentation from line
-efunc(525, http_FilterHeader) // Negative indentation from line
-efunc(539, http_FilterHeader) // Positive indentation from line
// cache_vcl.c
-efunc(525, vcl_handlingname) // Negative indentation from line
-esym(528, vcl_handlingname) // Not referenced
-e641 // Converting enum 'cli_status_e' to int
// Review all below this line ///////////////////////////////////////////////
-e767 // Macro redef (system queue.h vs ours )
-e574 // Signed-unsigned mix with relational
-e712 // Loss of precision (assignment) (long long to
-e747 // Significant prototype coercion (arg. no. 2) long
-e713 // Loss of precision (assignment) (unsigned long long to long long)
-e506 // Constant value boolean
-e818 // Pointer parameter '...' could be declared as pointing to const
-e774 // Boolean within 'if' always evaluates to False
-e534 // Ignoring return value of function
-e557 // unrecog format
-e732 // Loss of sign (arg. no. 2) (int to unsigned
-e737 // [45] Loss of sign in promotion from int to unsigned
#!/bin/sh
T=/tmp/_$$
flexelint \
-I/usr/include \
-I. \
-I../../include \
flint.lnt \
*.c > $T 2>&1
for t in Error Warning Info
do
sed -n "/$t [0-9][0-9][0-9]:/s/.*\($t [0-9][0-9][0-9]\).*/\1/p" $T
done | awk '
$2 == 830 { next }
$2 == 831 { next }
{
i=$2"_"$1
h[i]++
n++
}
END {
printf "%5d %s\n", n, "Total"
for (i in h)
printf "%5d %s\n", h[i], i
}
' | sort -rn
cat $T
......@@ -27,385 +27,133 @@
#include "shmlog.h"
#include "varnishapi.h"
/* Ordering-----------------------------------------------------------*/
/* Adding a struct to hold the data for the logline
*
*/
struct logline {
char df_h[4 * (3 + 1)]; // Datafield for %h (IP adress)
int df_hfini; // Set to 1 when a SessionClose is seen.
unsigned char *df_r; // Datafield for %r (Request)
int df_rfini; // Set to 1 when a ReqServTime has come.
unsigned char *df_s; // Datafield for %s, Status
int df_sfini;
unsigned char *df_b; // Datafield for %b, Bytes
int df_bfini;
struct tm *logline_time; // Datafield for %t
unsigned char *df_R; // Datafield for %{Referer}i
int df_Rfini;
unsigned char *df_U; // Datafield for %{User-agent}i
int df_Ufini;
int bogus_req; // Set to 1 if we discover a bogus request
};
/* We make a array of pointers to vsb's. Sbuf is a string buffer.
* * The buffer can be made/extended/cleared etc. through a API.
* * The array is 65536 long because we will use sessionid as key.
* *
* */
static struct vsb *ob[65536];
static struct logline ll[65536];
/*
* Clean order is called once in a while. It clears all the sessions that
* where never finished (SLT_SessionClose). Because the data is not complete
* we disregard the data.
*
*/
static void
clean_order(void)
static struct logline {
char df_h[4 * (3 + 1)]; /* Datafield for %h (IP adress) */
char df_s[4]; /* Datafield for %s, Status */
char df_b[12]; /* Datafield for %b, Bytes */
char *df_R; /* Datafield for %{Referer} */
char *df_U; /* Datafield for %{User-agent} */
int bogus_req; /* bogus request */
struct vsb *sb;
} *ll[65536];
/* Check if string starts with pfx */
static int
ispfx(const char *ptr, unsigned len, const char *pfx)
{
unsigned u;
for (u = 0; u < 65536; u++) {
if (ob[u] == NULL)
continue;
vsb_finish(ob[u]);
vsb_clear(ob[u]);
}
}
static struct tm *make_timestruct(char *tmpPtra){
char *tmpPtrb;
int timesec = 0;
time_t req_time; // Timeobject used for making the requesttime.
int i = 0;
char temp_time[27];
struct tm *timestring;
temp_time[0] = '\0';
for ( tmpPtrb = strtok(tmpPtra," "); tmpPtrb != NULL; tmpPtrb = strtok(NULL, " ")){
if (i == 2){
// We have the right time
//printf("Time: %s\n", tmpPtrb);
tmpPtra = tmpPtrb;
strncpy(temp_time, tmpPtra, 10);
temp_time[11] = '\0';
}
//printf("ReqServTime number %d: %s\n", i, tmpPtrb);
i++;
}
//strncpy(temp_time, tmpPtra, 20);
//printf("inten: %s\n",temp_time);
timesec = atoi(temp_time);
req_time = timesec;
timestring = localtime(&req_time);
return timestring;
unsigned l;
l = strlen(pfx);
if (l > len)
return (0);
if (strncasecmp(ptr, pfx, l))
return (0);
return (1);
}
static void
extended_log_format(unsigned char *p)
static int
extended_log_format(void *priv, unsigned tag, unsigned fd, unsigned len, unsigned spec, const char *ptr)
{
unsigned u,v,w;
// Used for getting IP.
unsigned char *tmpPtr;
int j;
// Used for requesttime.
char *tmpPtra;
u = (p[2] << 8) | p[3];
if (ob[u] == NULL) {
ob[u] = vsb_new(NULL, NULL, 0, VSB_AUTOEXTEND);
assert(ob[u] != NULL);
const char *p;
char *q;
FILE *fo;
time_t t;
long l;
struct tm tm;
char tbuf[40];
struct logline *lp;
if (!(spec &VSL_S_CLIENT))
return (0);
if (ll[fd] == NULL) {
ll[fd] = calloc(sizeof *ll[fd], 1);
assert(ll[fd] != NULL);
ll[fd]->sb = vsb_new(NULL, NULL, 0, VSB_AUTOEXTEND);
assert(ll[fd]->sb != NULL);
}
v = 0;
w = 0;
j = 0;
//ll[u].df_rfini = 0;
//ll[u].df_hfini = 0;
lp = ll[fd];
switch (p[0]) {
switch (tag) {
case SLT_SessionOpen:
// We catch the IP adress of the session.
// We also catch IP in SessionReuse because we can not always
// be sure we see a SessionOpen when we start logging.
//printf("IP %s\n", p+4);
ll[u].df_h[0] = '\0';
tmpPtr = strchr(p + 4, ' ');
if (tmpPtr != NULL){
j = strlen(p + 4) - strlen(tmpPtr); // length of IP
strncpy(ll[u].df_h, p + 4, j);
ll[u].df_h[j] = '\0'; // put on a NULL at end of buffer.
}
//printf("New session [%d]: %s \n",u, ll[u].df_h);
for (p = ptr, q = lp->df_h; *p && *p != ' ';)
*q++ = *p++;
*q = '\0';
break;
case SLT_ReqStart:
// We use XID to catch that a new request is comming inn.
vsb_clear(lp->sb);
break;
case SLT_RxRequest:
vsb_clear(ob[u]);
if (p[1] >= 4 && !strncasecmp((void *)&p[4], "HEAD",4)){
vsb_bcat(ob[u], p + 4, strlen(p + 4));
//printf("Got a HEAD\n");
if (ispfx(ptr, len, "HEAD")) {
vsb_bcat(lp->sb, ptr, len);
} else if (ispfx(ptr, len, "POST")) {
vsb_bcat(lp->sb, ptr, len);
} else if (ispfx(ptr, len, "GET")) {
vsb_bcat(lp->sb, ptr, len);
} else {
lp->bogus_req = 1;
}
else if (p[1] >= 4 && !strncasecmp((void *)&p[4], "POST",4)){
vsb_bcat(ob[u], p + 4, strlen(p + 4));
//printf("Got a POST\n");
}
else if (p[1] >= 3 && !strncasecmp((void *)&p[4], "GET",3)){
vsb_bcat(ob[u], p + 4, strlen(p + 4));
//printf("Got a GET\n");
}
else {
//vsb_bcat(ob[u], p + 4, strlen(p + 4));
//printf("Got something other than HEAD, POST, GET\n");
ll[u].bogus_req = 1;
}
break;
break;
case SLT_RxURL:
vsb_cat(ob[u], " ");
vsb_bcat(ob[u], p + 4, strlen(p + 4));
vsb_cat(lp->sb, " ");
vsb_bcat(lp->sb, ptr, len);
break;
case SLT_RxProtocol:
vsb_cat(ob[u], " ");
vsb_bcat(ob[u], p + 4, strlen(p + 4));
vsb_cat(lp->sb, " ");
vsb_bcat(lp->sb, ptr, len);
break;
case SLT_TxStatus:
ll[u].df_s = strdup(p + 4);
ll[u].df_sfini = 1;
strcpy(lp->df_s, ptr);
break;
case SLT_RxHeader:
if (p[1] >= 11 && !strncasecmp((void *)&p[4], "user-agent:",11)){
ll[u].df_U = strdup(p + 4);
// Jump ahead past "User-Agent: "
ll[u].df_U = ll[u].df_U + 12;
ll[u].df_Ufini = 1;
}
if (p[1] >= 8 && !strncasecmp((void *)&p[4], "referer:",8)){
ll[u].df_R = strdup(p + 4);
ll[u].df_R = ll[u].df_R + 9;
ll[u].df_Rfini = 1;
}
break;
case SLT_ReqEnd:
// We use ReqServTime to find how the time the request was delivered
// also to define that a request is finished.
tmpPtra = strdup(p + 4);
ll[u].logline_time = make_timestruct(tmpPtra);
free(tmpPtra);
ll[u].df_rfini = 1;
//printf("ReqServTime %s\n", temp_time);
if (ispfx(ptr, len, "user-agent:"))
lp->df_U = strdup(ptr + 12);
else if (ispfx(ptr, len, "referer:"))
lp->df_R = strdup(ptr + 9);
break;
case SLT_Length:
// XXX ask DES or PHK about this one. Am I overflowing?
ll[u].df_b = strdup(p + 4);
ll[u].df_bfini = 1;
if (!atoi(ll[u].df_b)){
ll[u].df_b = malloc(2);
ll[u].df_b[0] = '-';
ll[u].df_b[1] = '\0';
}
break;
case SLT_SessionClose:
// Session is closed, we clean up things. But do not write.
//printf("Session close [%d]\n", u);
ll[u].df_hfini = 1;
break;
case SLT_SessionReuse:
// We use SessionReuse to catch the IP adress of a session that has already
// started with a SessionOpen that we did not catch.
// Other than that it is not used.
// Catch IP if not already done.
if (ll[u].df_h[0] == '\0'){
// We don't have IP, fetch it.
//
tmpPtr = strchr(p + 4, ' ');
if (tmpPtr != NULL){
j = strlen(p + 4) - strlen(tmpPtr); // length of IP
strncpy(ll[u].df_h, p + 4, j);
ll[u].df_h[j] = '\0'; // put on a NULL at end of buffer.
}
//printf("Got IP from Reuse [%d] : %s\n", u, ll[u].df_h);
}
//printf("Session reuse [%d]\n", u);
if (strcmp(ptr, "0"))
strcpy(lp->df_b, ptr);
else
strcpy(lp->df_b, "-");
break;
default:
// printf("DEBUG: %s\n", p+4);
break;
}
if (ll[u].df_rfini) {
// We have a ReqServTime. Lets print the logline
// and clear variables that are different for each request.
//
char temp_time[27]; // Where we store the string we take from the log
// make temp_time
strftime (temp_time, 29, "[%d/%b/%Y:%X %z] ", ll[u].logline_time);
if (ll[u].df_h[0] == '\0' || ll[u].bogus_req){
ll[u].bogus_req = 0;
//printf("Tom IP \n");
}
else{
printf("%s - - %s ", ll[u].df_h, temp_time );
vsb_finish(ob[u]);
printf("\"%s\"", vsb_data(ob[u]));
printf(" %s %s ", ll[u].df_s, ll[u].df_b);
if (ll[u].df_Rfini){
printf(" \"%s\" ", ll[u].df_R);
}
else {
printf(" \"-\" ");
}
if (ll[u].df_Ufini){
printf(" \"%s\" ", ll[u].df_U);
}
else {
printf(" \"-\" ");
}
printf("\n");
}
fflush(stdout);
//vsb_finish(ob[u]);
vsb_clear(ob[u]);
temp_time[0] = '\0';
ll[u].df_rfini = 0;
// Clear the TxStaus
if (ll[u].df_sfini){
free(ll[u].df_s);
ll[u].df_sfini = 0;
//printf("Freed df_s [%d]\n", u);
}
if (ll[u].df_bfini){
free(ll[u].df_b);
ll[u].df_bfini = 0;
//printf("Freed df_b [%d]\n", u);
}
// Clean User-Agent and Referer
if (ll[u].df_Ufini){
ll[u].df_U = ll[u].df_U - 12;
free(ll[u].df_U);
ll[u].df_Ufini = 0;
ll[u].df_U[0] = '\0';
//printf("Freed df_U [%d]\n", u);
}
if (ll[u].df_Rfini){
ll[u].df_R = ll[u].df_R - 9;
free(ll[u].df_R);
ll[u].df_R[0] = '\0';
ll[u].df_Rfini = 0;
//printf("Freed df_R [%d]\n", u);
}
// Clean up ReqEnd/Time variables
//if (tmpPtra != NULL){
// free(tmpPtra);
//printf("Freed tmpPtra [%d]\n", u);
//}
/*
if (tmpPtrb != NULL){
free(tmpPtrb);
//printf("Freed tmpPtrb [%d]\n", u);
}
if (tmpPtrc != NULL){
free(tmpPtrc);
//printf("Freed tmpPtrc [%d]\n", u);
}
*/
temp_time[0] = '\0';
if (ll[u].df_hfini) {
// We have a SessionClose. Lets clean data.
//
// Clean IP adress
ll[u].df_h[0] = '\0';
//printf("Clearer [%d]\n", u);
ll[u].df_hfini = 0;
}
if (tag != SLT_ReqEnd)
return (0);
fo = priv;
assert(1 == sscanf(ptr, "%*u %*u.%*u %ld.", &l));
t = l;
localtime_r(&t, &tm);
strftime(tbuf, sizeof tbuf, "%d/%b/%Y:%T %z", &tm);
fprintf(fo, "%s - - %s", lp->df_h, tbuf);
vsb_finish(lp->sb);
fprintf(fo, " \"%s\"", vsb_data(lp->sb));
fprintf(fo, " %s", lp->df_b);
if (lp->df_R != NULL) {
fprintf(fo, " \"%s\"", lp->df_R);
free(lp->df_R);
lp->df_R = NULL;
}
if (lp->df_U != NULL) {
fprintf(fo, " \"%s\"", lp->df_U);
free(lp->df_U);
lp->df_U = NULL;
}
fprintf(fo, "\n");
return (0);
}
/*--------------------------------------------------------------------*/
......@@ -421,15 +169,11 @@ int
main(int argc, char **argv)
{
int i, c;
unsigned u, v;
unsigned char *p;
char *w_opt = NULL;
FILE *wfile = NULL;
struct VSL_data *vd;
vd = VSL_New();
while ((c = getopt(argc, argv, VSL_ARGS "w:")) != -1) {
while ((c = getopt(argc, argv, VSL_ARGS "")) != -1) {
i = VSL_Arg(vd, c, optarg);
if (i < 0)
exit (1);
......@@ -439,9 +183,6 @@ main(int argc, char **argv)
case 'V':
varnish_version("varnishncsa");
exit(0);
case 'w':
w_opt = optarg;
break;
default:
usage();
}
......@@ -450,36 +191,11 @@ main(int argc, char **argv)
if (VSL_OpenLog(vd))
exit (1);
if (w_opt != NULL) {
wfile = fopen(w_opt, "w");
if (wfile == NULL) {
perror(w_opt);
exit (1);
}
}
u = 0;
v = 0;
while (1) {
i = VSL_NextLog(vd, &p);
i = VSL_Dispatch(vd, extended_log_format, stdout);
if (i < 0)
break;
if (i == 0) {
if (w_opt == NULL) {
if (++v == 100){
clean_order();
fflush(stdout);
}
}
usleep(50000);
continue;
}
v = 0;
extended_log_format(p);
}
clean_order();
return (0);
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment