su_bm.c 6.39 KB
Newer Older
Pekka Pessi's avatar
Pekka Pessi committed
1 2 3 4 5 6 7
/*
 * This file is part of the Sofia-SIP package
 *
 * Copyright (C) 2005 Nokia Corporation.
 *
 * Contact: Pekka Pessi <pekka.pessi@nokia.com>
 *
8
 * This library is free software; you can redistribute it and/or
Pekka Pessi's avatar
Pekka Pessi committed
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 * 02110-1301 USA
 *
 */

Pekka Pessi's avatar
Pekka Pessi committed
25
/**@internal
26
 * @file su_bm.c
27
 * @brief Search with Boyer-Moore algorithm
Pekka Pessi's avatar
Pekka Pessi committed
28 29 30 31 32 33 34 35 36
 *  
 * @author Pekka Pessi <Pekka.Pessi@nokia.com>
 *
 * @date Created: Mon Apr 11 16:35:16 2005 ppessi
 * 
 */

#include "config.h"

37
#include <sofia-sip/su_bm.h>
38

Pekka Pessi's avatar
Pekka Pessi committed
39 40 41 42
#include <sys/types.h>
#include <stddef.h>
#include <limits.h>
#include <ctype.h>
43
#include <stdlib.h>
Remi Denis-Courmont's avatar
Remi Denis-Courmont committed
44
#include <string.h>
Pekka Pessi's avatar
Pekka Pessi committed
45 46 47 48 49 50

#ifndef TORTURELOG
#define TORTURELOG(x) (void)0
#endif

struct bw_fwd_table { 
Pekka Pessi's avatar
Pekka Pessi committed
51
  unsigned char table[UCHAR_MAX + 1];
Pekka Pessi's avatar
Pekka Pessi committed
52 53
};

Pekka Pessi's avatar
Pekka Pessi committed
54
/** Build forward skip table #bm_fwd_table_t for Boyer-Moore algorithm. */
Pekka Pessi's avatar
Pekka Pessi committed
55 56 57 58 59 60 61 62 63 64 65
static
bm_fwd_table_t *
bm_memmem_study0(char const *needle, size_t nlen, bm_fwd_table_t *fwd)
{
  size_t i;

  if (nlen >= UCHAR_MAX) {
    needle += nlen - UCHAR_MAX;
    nlen = UCHAR_MAX;
  }

66
  memset(&fwd->table, (unsigned char)nlen, sizeof fwd->table);
Pekka Pessi's avatar
Pekka Pessi committed
67 68

  for (i = 0; i < nlen; i++) {
Pekka Pessi's avatar
Pekka Pessi committed
69
    fwd->table[(unsigned short)needle[i]] = (unsigned char)(nlen - i - 1);
Pekka Pessi's avatar
Pekka Pessi committed
70 71 72 73 74
  }

  return fwd;
}

Pekka Pessi's avatar
Pekka Pessi committed
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
/** @defgroup su_bm Fast string searching with Boyer-Moore algorithm
 *
 * The Boyer-Moore algorithm is used to implement fast substring search. The
 * algorithm has some overhead caused by filling a table. Substring search
 * then requires at most 1 / substring-length less string comparisons. On
 * modern desktop hardware, Boyer-Moore algorithm is seldom faster than the
 * naive implementation if the searched substring is shorter than the cache
 * line.
 *
 */ 

/**@ingroup su_bm
 * @typedef struct bw_fwd_table bm_fwd_table_t;
 *
 * Forward skip table for Boyer-Moore algorithm.
 *
 */

/** Build case-sensitive forward skip table #bm_fwd_table_t 
 *  for Boyer-Moore algorithm.
 * @ingroup su_bm
 */
Pekka Pessi's avatar
Pekka Pessi committed
97 98 99 100 101
bm_fwd_table_t *
bm_memmem_study(char const *needle, size_t nlen)
{
  bm_fwd_table_t *fwd = malloc(sizeof *fwd);

102 103 104 105
  if (fwd)
    bm_memmem_study0(needle, nlen, fwd);

  return fwd;
Pekka Pessi's avatar
Pekka Pessi committed
106 107
}

Pekka Pessi's avatar
Pekka Pessi committed
108 109 110
/** Search for a substring using Boyer-Moore algorithm.
 * @ingroup su_bm
 */
111
char *
Pekka Pessi's avatar
Pekka Pessi committed
112 113 114 115 116
bm_memmem(char const *haystack, size_t hlen,
	  char const *needle, size_t nlen,
	  bm_fwd_table_t *fwd)
{
  size_t i, j;
117
  bm_fwd_table_t fwd0[1];
Pekka Pessi's avatar
Pekka Pessi committed
118 119

  if (nlen == 0)
120
    return (char *)haystack;
Pekka Pessi's avatar
Pekka Pessi committed
121 122 123 124 125 126
  if (needle == NULL || haystack == NULL || nlen > hlen)
    return NULL;

  if (nlen == 1) {
    for (i = 0; i < hlen; i++)
      if (haystack[i] == needle[0])
127
	return (char *)haystack + i;
Pekka Pessi's avatar
Pekka Pessi committed
128 129 130
    return NULL;
  }

131
  if (!fwd)
Pekka Pessi's avatar
Pekka Pessi committed
132 133 134 135 136 137
    fwd = bm_memmem_study0(needle, nlen, fwd0);

  for (i = j = nlen - 1; i < hlen;) {
    unsigned char h = haystack[i];
    if (h == needle[j]) {
      TORTURELOG(("match \"%s\" at %u\nwith  %*s\"%.*s*%s\": %s\n", 
138 139
		  haystack, (unsigned)i, 
		  (int)(i - j), "", (int)j, needle, needle + j + 1, 
Pekka Pessi's avatar
Pekka Pessi committed
140 141
		  j == 0 ? "match!" : "back by 1"));
      if (j == 0)
142
	return (char *)haystack + i;
Pekka Pessi's avatar
Pekka Pessi committed
143 144 145 146 147
      i--, j--;
    }
    else {
      if (fwd->table[h] > nlen - j) {
	TORTURELOG(("match \"%s\" at %u\n"
148 149 150 151
		    "last  %*s\"%.*s*%s\": (by %u)\n", 
		    haystack, (unsigned)i,
		    (int)(i - j), "", 
		    (int)j, needle, needle + j + 1, fwd->table[h]));
Pekka Pessi's avatar
Pekka Pessi committed
152 153 154 155
      	i += fwd->table[h];
      }
      else {
	TORTURELOG(("match \"%s\" at %u\n"
156 157 158 159
		    "2nd   %*s\"%.*s*%s\": (by %u)\n", 
		    haystack, (unsigned)i,
		    (int)(i - j), "", 
		    (int)j, needle, needle + j + 1, (unsigned)(nlen - j)));
Pekka Pessi's avatar
Pekka Pessi committed
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
	i += nlen - j;
      }
      j = nlen - 1;
    }
  }
  
  return NULL;
}


/** Build forward skip table for Boyer-Moore algorithm */
static
bm_fwd_table_t *
bm_memcasemem_study0(char const *needle, size_t nlen, bm_fwd_table_t *fwd)
{
  size_t i;

  if (nlen >= UCHAR_MAX) {
    needle += nlen - UCHAR_MAX;
    nlen = UCHAR_MAX;
  }

  for (i = 0; i < UCHAR_MAX; i++)
    fwd->table[i] = (unsigned char)nlen;

  for (i = 0; i < nlen; i++) {
    unsigned char n = tolower(needle[i]);
    fwd->table[n] = (unsigned char)(nlen - i - 1);
  }

  return fwd;
}

Pekka Pessi's avatar
Pekka Pessi committed
193 194 195
/** Build case-insensitive forward skip table for Boyer-Moore algorithm.
 * @ingroup su_bm
 */
Pekka Pessi's avatar
Pekka Pessi committed
196 197 198 199 200
bm_fwd_table_t *
bm_memcasemem_study(char const *needle, size_t nlen)
{
  bm_fwd_table_t *fwd = malloc(sizeof *fwd);

201 202 203 204
  if (fwd)
    bm_memcasemem_study0(needle, nlen, fwd);

  return fwd;
Pekka Pessi's avatar
Pekka Pessi committed
205 206
}

Pekka Pessi's avatar
Pekka Pessi committed
207 208 209
/** Search for substring using Boyer-Moore algorithm.
 * @ingroup su_bm
 */
210
char *
Pekka Pessi's avatar
Pekka Pessi committed
211
bm_memcasemem(char const *haystack, size_t hlen,
Pekka Pessi's avatar
Pekka Pessi committed
212 213
	      char const *needle, size_t nlen,
	      bm_fwd_table_t *fwd)
Pekka Pessi's avatar
Pekka Pessi committed
214 215
{
  size_t i, j;
Pekka Pessi's avatar
Pekka Pessi committed
216
  bm_fwd_table_t fwd0[1];
Pekka Pessi's avatar
Pekka Pessi committed
217 218

  if (nlen == 0)
219
    return (char *)haystack;
Pekka Pessi's avatar
Pekka Pessi committed
220 221 222 223 224 225
  if (needle == 0 || haystack == 0 || nlen > hlen)
    return NULL;

  if (nlen == 1) {
    for (i = 0; i < hlen; i++)
      if (haystack[i] == needle[0])
226
	return (char *)haystack + i;
Pekka Pessi's avatar
Pekka Pessi committed
227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242
    return NULL;
  }

  if (!fwd) {
    fwd = bm_memcasemem_study0(needle, nlen, fwd0);
  }

  for (i = j = nlen - 1; i < hlen;) {
    unsigned char h = haystack[i], n = needle[j];
    if (isupper(h)) 
      h = tolower(h);
    if (isupper(n))
      n = tolower(n);

    if (h == n) {
      TORTURELOG(("match \"%s\" at %u\n"
243 244 245
		  "with  %*s\"%.*s*%s\": %s\n",
		  haystack, (unsigned)i,
		  (int)(i - j), "", (int)j, needle, needle + j + 1, 
Pekka Pessi's avatar
Pekka Pessi committed
246 247
		  j == 0 ? "match!" : "back by 1"));
      if (j == 0)
248
	return (char *)haystack + i;
Pekka Pessi's avatar
Pekka Pessi committed
249 250 251 252 253
      i--, j--;
    }
    else {
      if (fwd->table[h] > nlen - j) {
	TORTURELOG(("match \"%s\" at %u\n"
254 255 256 257
		    "last  %*s\"%.*s*%s\": (by %u)\n",
		    haystack, (unsigned)i,
		    (int)(i - j), "", (int)j, needle, needle + j + 1, 
		    fwd->table[h]));
Pekka Pessi's avatar
Pekka Pessi committed
258 259 260 261
      	i += fwd->table[h];
      }
      else {
	TORTURELOG(("match \"%s\" at %u\n"
262 263 264 265
		    "2nd   %*s\"%.*s*%s\": (by %u)\n", 
		    haystack, (unsigned)i,
		    (int)(i - j), "", (int)j, needle, needle + j + 1, 
		    (unsigned)(nlen - j)));
Pekka Pessi's avatar
Pekka Pessi committed
266 267 268 269 270 271 272 273
	i += nlen - j;
      }
      j = nlen - 1;
    }
  }
  
  return NULL;
}