Fixed bug in parsing code that lead to buffer overflow and corrupted

memory. Also made some minor optimizations in same.

Fixed bug in parsing code that lead to buffer overflow and corrupted
memory. Also made some minor optimizations in same.
768146f8 · Jim Fulton · a3ed0b73 · 768146f8
Commit 768146f8 authored Jul 31, 1997 by Jim Fulton
Show whitespace changes
Inline Side-by-side

Showing with 83 additions and 67 deletions

lib/python/SearchIndex/WordSequence.c lib/python/SearchIndex/WordSequence.c +83 -67

No files found.
--- a/lib/python/SearchIndex/WordSequence.c
+++ b/lib/python/SearchIndex/WordSequence.c
@@ -151,40 +151,47 @@ check_synstop(WordSequence *self, PyObject *word)
    return value;		/* Which must be None! */
 }
+#define MAX_WORD 256
 static PyObject *
 next_word(WordSequence *self)
 {
-    char wbuf[256];
+  char wbuf[MAX_WORD];
-    char *tmp_ptr;
+  char *p, *end, *here, *b;
  int size = PyString_Size(self->wordletters), i = 0;
  PyObject *pyword, *res;
-    while (self->here < self->end)
+  here=self->here;
+  end=self->end;
+  b=wbuf;
+  while (here < end)
    {
      /* skip hyphens */ 
-        if ((i > 0) && (*self->here == '-'))
+      if ((i > 0) && (*here == '-'))
        {
-            tmp_ptr = self->here;
+	  while ((*++here <= ' ') && (here < end));
-            while ((*(++tmp_ptr) <= ' ') && (tmp_ptr < self->end));
+	  continue;
-            if ((tmp_ptr < self->end) && (tmp_ptr - self->here) > 1)
-            {
-                self->here = tmp_ptr;
-	    }
 	}
      /* Check to see if this character is part of a word */
-        if (memchr(self->cwordletters, *self->here, size))
+      if (memchr(self->cwordletters, *here, size))
        {
-            wbuf[i++] = *self->here;
+	  if(i++ < MAX_WORD) *b++ = *here;
        }
      else if (i != 0)
        {
+	  if(i >= MAX_WORD)
+	    {
+	      /* Ridiculously long word! */
+	      i=0;
+	      b=wbuf;
+	      here++;
+	      continue;
+	    }
-	    /* We've found the end of a word */
+	  /* Check for and skip a phone number (I know it's lame :) */
+	  if(i == 8 &&
-	    /* Check for a phone number (I know it's lame :) */
+	     wbuf[0] >= '0' && wbuf[0] <= '9' && 
-	    if(wbuf[0] >= '0' && wbuf[0] <= '9' && 
 	     wbuf[1] >= '0' && wbuf[1] <= '9' && 
 	     wbuf[2] >= '0' && wbuf[2] <= '9' &&
 	     wbuf[3] == '-' &&
@@ -194,23 +201,29 @@ next_word(WordSequence *self)
 	     wbuf[7] >= '0' && wbuf[7] <= '9')
 	    {
 	      i=0;
-		self->here++;
+	      b=wbuf;
+	      here++;
 	      continue;
 	    }
 	  UNLESS(pyword = PyString_FromStringAndSize(wbuf, i))
            {
+	      self->here=here;
 	      return NULL;
 	    }
+	  /* We've found the end of a word */
 	  UNLESS(res = check_synstop(self, pyword))
            {
+	      self->here=here;
 	      Py_DECREF(pyword);
 	      return NULL;
 	    }
 	  if (res != Py_None)
            {
+	      self->here=here;
 	      Py_DECREF(pyword);
 	      return res;
 	    }
@@ -220,11 +233,14 @@ next_word(WordSequence *self)
 	  Py_DECREF(res);          
 	  Py_DECREF(pyword);
 	  i = 0;
+	  b=wbuf;
        }            
-        self->here++;
+      here++;
    }
+  self->here=here;
  /* We've reached the end of the string */
  if (i == 0)
@@ -538,7 +554,7 @@ static char WordSequence_module_documentation[] =
 "\n"
 "for use in an inverted index\n"
 "\n"
-"$Id: WordSequence.c,v 1.8 1997/07/17 14:44:45 jim Exp $\n"
+"$Id: WordSequence.c,v 1.9 1997/07/31 22:31:58 jim Exp $\n"
 ;
 void