KunyitTst is a ternary search tree library in Python. Normally, it takes string or unicode as key since I use it for Thai word segmentation or something similar. By the way, now I want to use it to store list of string key too for storing (Moses) phrase table so it needs some modification. I’m still not sure how much it will effect performance. Anways, the patch is as follow:
Index: tests/test_basic.py
===================================================================
--- tests/test_basic.py (revision 15)
+++ tests/test_basic.py (working copy)
@@ -26,6 +26,27 @@
def setup_method(self, method):
self.setup()
+ def test_insert_has_key_list(self):
+ self.tst.insert(['i', 'have', 'a', 'key'], 10)
+ self.tst.insert(['i', 'have', 'a', 'book'], 20)
+ self.tst.insert(['a', 'book'], 30)
+ assert self.tst.has_key(['i', 'have', 'a', 'key']) == True
+ assert self.tst.has_key(['i', 'have', 'a', 'book']) == True
+ assert self.tst.has_key(['a', 'book']) == True
+ assert self.tst.has_key(['a', 'look']) == False
+
+ def test_iterator_list(self):
+ self.tst.insert(['a', 'book'], 30)
+ i = self.tst.iterator()
+ assert i.apply(u"a") == True
+ assert i.is_break_pos() == False
+ assert i.apply(u"book") == True
+ assert i.is_break_pos() == True
+ assert i.get_value() == 30
+
+ i = self.tst.iterator()
+ assert i.apply(u"book") == False
+
def test_happy_insert(self):
self.tst.insert(u"abc", 1)
Index: kunyittst/tst.py
===================================================================
--- kunyittst/tst.py (revision 15)
+++ kunyittst/tst.py (working copy)
@@ -27,7 +27,8 @@
if not self._valid:
return False
if isinstance(ch, str) or isinstance(ch, unicode):
- ch = ord(ch)
+ if len(ch) == 1:
+ ch = ord(ch)
self._n = self._tst.goto_child(self._n)
if self._n == None:
return False
@@ -163,7 +164,10 @@
def insert(self, k, v):
p = self._root * Tst.SIZE
for i, ch in enumerate(k):
- c = ord(ch)
+ if len(ch) == 1:
+ c = ord(ch)
+ else:
+ c = ch
while self._nodes[p + Tst.KEY] != c:
if self._nodes[p + Tst.KEY] == None:
self._nodes[p + Tst.KEY] = c






