| 1 | %% Boyer-Moore searching on ASCII encoded binary |
|---|
| 2 | -module(bmsearch). |
|---|
| 3 | -export([compile/1, match/3]). |
|---|
| 4 | |
|---|
| 5 | -record(bmCtx, {pat, len, tab}). |
|---|
| 6 | |
|---|
| 7 | compile(Str) -> |
|---|
| 8 | Len = length(Str), |
|---|
| 9 | Default = dict:from_list([{C, Len} || C <- lists:seq(1, 255)]), |
|---|
| 10 | Dict = set_shifts(Str, Len, 1, Default), |
|---|
| 11 | Tab = list_to_tuple([Pos || {_, Pos} <- lists:sort(dict:to_list(Dict))]), |
|---|
| 12 | #bmCtx{pat = lists:reverse(Str), len = Len, tab = Tab}. |
|---|
| 13 | |
|---|
| 14 | set_shifts([], _, _, Dict) -> Dict; |
|---|
| 15 | set_shifts([C|T], StrLen, Pos, Dict) -> |
|---|
| 16 | set_shifts(T, StrLen, Pos + 1, dict:store(C, StrLen - Pos, Dict)). |
|---|
| 17 | |
|---|
| 18 | %% @spec match(Bin, Start, #bmCtx) -> {true, Len} | {false, SkipLen} |
|---|
| 19 | match(Bin, S, #bmCtx{pat=Pat, len=Len, tab=Tab}) -> |
|---|
| 20 | match_1(Bin, S + Len - 1, Pat, Len, Tab, 0). |
|---|
| 21 | match_1(Bin, S, [C|T], Len, Tab, Count) -> |
|---|
| 22 | <<_:S/binary, C1, _/binary>> = Bin, |
|---|
| 23 | case C1 of |
|---|
| 24 | C -> |
|---|
| 25 | match_1(Bin, S - 1, T, Len, Tab, Count + 1); |
|---|
| 26 | _ -> |
|---|
| 27 | case element(C1, Tab) of |
|---|
| 28 | Len -> {false, Len}; |
|---|
| 29 | Shift when Shift =< Count -> {false, 1}; |
|---|
| 30 | Shift -> {false, Shift - Count} |
|---|
| 31 | end |
|---|
| 32 | end; |
|---|
| 33 | match_1(_, _, [], Len, _, _) -> {true, Len}. |
|---|