After talking to @kengggg, we are agree to implement NS_GetComplexLineBreaks without using deprecated UCFindTextBreak. So I try to rewrite nsCarbonBreaker by using Core Foundation API. However I don’t have Mac since 2008 so I didn’t test. In fact, I don’t even try to compile the code.

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
 * The contents of this file are subject to the Mozilla Public License Version
 * 1.1 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 * The Original Code is mozilla.org code.
 * The Initial Developer of the Original Code is
 * Theppitak Karoonboonyanan <thep@linux.thai.net>.
 * Portions created by the Initial Developer are Copyright (C) 2007
 * the Initial Developer. All Rights Reserved.
 * Contributor(s):
 * - Theppitak Karoonboonyanan <thep@linux.thai.net>
 * Alternatively, the contents of this file may be used under the terms of
 * either of the GNU General Public License Version 2 or later (the "GPL"),
 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the MPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the MPL, the GPL or the LGPL.
 * ***** END LICENSE BLOCK ***** */

#include "nsComplexBreaker.h"
#include <CoreFoundation/CFStringTokenizer.h>
#include <CoreFoundation/CFBase.h>

// An imaginary nsCoreFoundationBreaker
// I don't have Mac since 2008 (Vee Satayamas)

NS_GetComplexLineBreaks(const PRUnichar* aText, PRUint32 aLength,
                        PRPackedBool* aBreakBefore)
  NS_ASSERTION(aText, "aText shouldn't be null");

  // I don't know whether CFStringCreateWithCharactersNoCopy will works.
  // Some conversions may be needed
  CFStringRef cfAText = CFStringCreateWithCharactersNoCopy(NULL, aText, aLength,
  NS_ASSERTION(cfAText, "cfAText shouldn't be null");

  CFStringTokenizerRef tokenizer = tokenizerForString(cfAText);

    CFStringTokenizerTokenType  tokenType; 
    // CFStringTokenizerGetCurrentTokenRange may have to be called before
    // CFStringTokenizerAdvanceToNextToken
    while(CFStringTokenizerAdvanceToNextToken(tokenizer) != 
      CFRange tokenRange = CFStringTokenizerGetCurrentTokenRange(tokenizer);
      aBreakBefore[tokenRange.location + tokenRange.length] = PR_TRUE;

  // Foundation.h must be included for CFRelease?

This is my speed test

$ curl http://mirrors.rit.edu/ubuntu-releases/karmic/ubun-9.10-desktop-i386.iso > /dev/null
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  2  689M    2 14.5M    0     0  89418      0  2:14:51  0:02:50  2:12:01   99k

GIZA++ 1.0.3 บน Ubuntu 9.10

ผมพยายามจะลง Giza++ 1.0.3 บน Ubuntu 9.10 จริงๆ มี .deb ให้ใช้แต่ว่า ผมไม่ได้ใช้เพราะอยากจะ code ของ Giza++ ด้วย

แต่ว่าก็มีปัญหานิดหน่อย เพราะว่า string ที่จองมาเก็บปี จองมาน้อยไปหน่อย คล้ายๆ ปัญหา y2k ก็เลย patch ไปแบบนี้

diff -Nur giza-pp/GIZA++-v2/file_spec.h giza-pp.orig/GIZA++-v2/file_spec.h
--- giza-pp/GIZA++-v2/file_spec.h	2010-02-06 19:05:55.000000000 +0000
+++ giza-pp.orig/GIZA++-v2/file_spec.h	2009-03-20 11:41:12.000000000 +0000
@@ -37,13 +37,13 @@
   struct tm *local;
   time_t t;
   char *user;
-  char time_stmp[18];
+  char time_stmp[17];
   char *file_spec = 0;
   t = time(NULL);
   local = localtime(&t);
-  sprintf(time_stmp, "%03d-%02d-%02d.%02d%02d%02d.", local->tm_year, 
+  sprintf(time_stmp, "%02d-%02d-%02d.%02d%02d%02d.", local->tm_year, 
 	  (local->tm_mon + 1), local->tm_mday, local->tm_hour, 
 	  local->tm_min, local->tm_sec);
   user = getenv("USER");

รายงานบักไปแล้วเหมือนจะซ้ำด้วย http://code.google.com/p/giza-pp/issues/detail?id=20 บรรยากาศก็เงียบๆ

ทดสอบ mirror1.ku.ac.th


$ curl http://mirror1.ku.ac.th/ubuntu-dvd/karmic/release/ubuntu-9.10-dvd-amd64.iso > test
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 3981M  100 3981M    0     0  6865k      0  0:09:53  0:09:53 --:--:-- 9034k

ทดสอบเวลาประมาณ 10:30น. 8/2/2553 จากภาควิชาวิศวคอมฯ ม.เกษตรศาสตร์ ที่บางเขน ครับ