KunyitTST: Taking list of string as key

KunyitTst is a ternary search tree library in Python. Normally, it takes string or unicode as key since I use it for Thai word segmentation or something similar. By the way, now I want to use it to store list of string key too for storing (Moses) phrase table so it needs some modification. I’m still not sure how much it will effect performance. Anways, the patch is as follow:

Index: tests/test_basic.py
===================================================================
--- tests/test_basic.py	(revision 15)
+++ tests/test_basic.py	(working copy)
@@ -26,6 +26,27 @@
     def setup_method(self, method):
         self.setup()

+    def test_insert_has_key_list(self):
+        self.tst.insert(['i', 'have', 'a', 'key'], 10)
+        self.tst.insert(['i', 'have', 'a', 'book'], 20)
+        self.tst.insert(['a', 'book'], 30)
+        assert self.tst.has_key(['i', 'have', 'a', 'key']) == True
+        assert self.tst.has_key(['i', 'have', 'a', 'book']) == True
+        assert self.tst.has_key(['a', 'book']) == True
+        assert self.tst.has_key(['a', 'look']) == False
+
+    def test_iterator_list(self):
+        self.tst.insert(['a', 'book'], 30)
+        i = self.tst.iterator()
+        assert i.apply(u"a") == True
+        assert i.is_break_pos() == False
+        assert i.apply(u"book") == True
+        assert i.is_break_pos() == True
+        assert i.get_value() == 30
+
+        i = self.tst.iterator()
+        assert i.apply(u"book") == False
+
     def test_happy_insert(self):
         self.tst.insert(u"abc", 1)

Index: kunyittst/tst.py
===================================================================
--- kunyittst/tst.py	(revision 15)
+++ kunyittst/tst.py	(working copy)
@@ -27,7 +27,8 @@
         if not self._valid:
             return False
         if isinstance(ch, str) or isinstance(ch, unicode):
-            ch = ord(ch)
+            if len(ch) == 1:
+                ch = ord(ch)
         self._n = self._tst.goto_child(self._n)
         if self._n == None:
             return False
@@ -163,7 +164,10 @@
     def insert(self, k, v):
         p = self._root * Tst.SIZE
         for i, ch in enumerate(k):
-            c = ord(ch)
+            if len(ch) == 1:
+                c = ord(ch)
+            else:
+                c = ch
             while self._nodes[p + Tst.KEY] != c:
                 if self._nodes[p + Tst.KEY] == None:
                     self._nodes[p + Tst.KEY] = c

ใส่ความเห็น

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / เปลี่ยนแปลง )

Twitter picture

You are commenting using your Twitter account. Log Out / เปลี่ยนแปลง )

Facebook photo

You are commenting using your Facebook account. Log Out / เปลี่ยนแปลง )

Google+ photo

You are commenting using your Google+ account. Log Out / เปลี่ยนแปลง )

Connecting to %s