[Date Prev][Date Next] [Thread Prev][Thread Next] [Date Index] [Thread Index]

[graphite2] 15/69: Imported Upstream version 1.0.3.real



This is an automated email from the git hooks/post-receive script.

rene pushed a commit to branch master
in repository graphite2.

commit bf5fc4858bc9318688b5234551e738917ce8ae42
Author: Rene Engelhard <rene@debian.org>
Date:   Thu Apr 21 14:48:42 2016 +0200

    Imported Upstream version 1.0.3.real
---
 .hg_archival.txt                                   |   4 +-
 .hgtags                                            |   4 +
 contrib/android/jni/Android.mk                     |   4 +-
 contrib/android/jni/graphite/Android.mk            |   2 +-
 contrib/android/jni/graphite_layer.cpp             |   6 +-
 contrib/android/jni/loadgr_jni.cpp                 |   2 +-
 .../src/org/sil/palaso/helloworld/HelloWorld.java  |   2 +-
 src/CMakeLists.txt                                 |   1 +
 src/CmapCache.cpp                                  |  46 +-
 src/CmapCache.h                                    |  47 +-
 src/Face.cpp                                       |  21 +-
 src/Face.h                                         |   8 +-
 src/Main.h                                         |   2 +-
 src/NameTable.cpp                                  |  50 +--
 src/SegCacheStore.cpp                              |  21 +-
 src/Segment.cpp                                    | 120 +----
 src/{CmapCache.h => UtfCodec.cpp}                  |  37 +-
 src/UtfCodec.h                                     | 208 +++++++++
 src/files.mk                                       |   4 +-
 src/gr_segment.cpp                                 |  81 ++--
 src/processUTF.h                                   | 494 ---------------------
 tests/CMakeLists.txt                               |   1 +
 tests/segcache/segcachetest.cpp                    |  42 +-
 tests/utftest/CMakeLists.txt                       |  15 +
 tests/utftest/utftest.cpp                          |  56 +++
 tests/vm/CMakeLists.txt                            |   1 +
 26 files changed, 505 insertions(+), 774 deletions(-)

diff --git a/.hg_archival.txt b/.hg_archival.txt
index 2db55cc..2d78a37 100644
--- a/.hg_archival.txt
+++ b/.hg_archival.txt
@@ -1,5 +1,5 @@
 repo: 999e2033695c3bcf2f65d611737ac9008805bd58
-node: cb735be7d86d894f0667cb63dffc4273fd53d9fe
+node: 418e55d88178b9bd870bab38be8768aecb743829
 branch: default
 latesttag: 1.0.3
-latesttagdistance: 2
+latesttagdistance: 1
diff --git a/.hgtags b/.hgtags
index 094d794..82d1a33 100644
--- a/.hgtags
+++ b/.hgtags
@@ -14,3 +14,7 @@ bedb05f72d56f24ca0fc333fd14eabb1ec553902 1.0.1
 0fa690ff089ce0bc382a553cc01c0b721fbdee5c 1.0.2
 b10bcaf1302411513a5961d1854ff8c02e5ad5e6 1.0.2
 8795e344f7964bdf8ef4607004f01b94c41e5775 1.0.3
+8795e344f7964bdf8ef4607004f01b94c41e5775 1.0.3
+0000000000000000000000000000000000000000 1.0.3
+0000000000000000000000000000000000000000 1.0.3
+f148746a0d99d2f9bc050906ce78815565a0d0b4 1.0.3
diff --git a/contrib/android/jni/Android.mk b/contrib/android/jni/Android.mk
index 7ae1d28..224e329 100644
--- a/contrib/android/jni/Android.mk
+++ b/contrib/android/jni/Android.mk
@@ -28,9 +28,9 @@ LOCAL_PATH := $(call my-dir)
 
 MY_ANDROID_SRC := $(HOME)/Work/android/android-src
 MY_ANDROID_LIBS := $(MY_ANDROID_SRC)/out/target/product/generic/symbols/system/lib
-#MY_ANDROID_LIBS := $(HOME)/Work/android/android-sdk-linux_x86/platforms/android-8/symbols/system/lib
+MY_ANDROID_LIBS := $(HOME)/Work/android/android-sdk-linux_x86/platforms/android-8/symbols/system/lib
 MY_SKIA := $(MY_ANDROID_SRC)/external/skia
-#MY_SKIA := $(HOME)/Work/android/skia/8
+MY_SKIA := $(HOME)/Work/android/skia/8
 
 include $(CLEAR_VARS)
 
diff --git a/contrib/android/jni/graphite/Android.mk b/contrib/android/jni/graphite/Android.mk
index 9a5990a..ffb6d3e 100644
--- a/contrib/android/jni/graphite/Android.mk
+++ b/contrib/android/jni/graphite/Android.mk
@@ -11,7 +11,7 @@ include ../../src/files.mk
 LOCAL_MODULE := graphite2
 #LOCAL_SRC_FILES := $(foreach v,$(GR2_SOURCES),./$(v))
 LOCAL_SRC_FILES := $(GR2_SOURCES)
-LOCAL_C_INCLUDES := ../../include
+LOCAL_C_INCLUDES := ../../include ../../src
 LOCAL_EXPORT_C_INCLUDES := ../../include
 #LOCAL_C_INCLUDES := /home/mhosken/Work/dev/Graphite/graphiteng/include
 #LOCAL_EXPORT_C_INCLUDES := /home/mhosken/Work/dev/Graphite/graphiteng/include
diff --git a/contrib/android/jni/graphite_layer.cpp b/contrib/android/jni/graphite_layer.cpp
index 425e9e4..29b1a5f 100644
--- a/contrib/android/jni/graphite_layer.cpp
+++ b/contrib/android/jni/graphite_layer.cpp
@@ -448,11 +448,11 @@ func_map thismap[] = {
     { "_ZN8SkDevice8drawTextERK6SkDrawPKvjffRK7SkPaint", "_ZN10mySkDevice8drawTextERK6SkDrawPKvjffRK7SkPaint", 0, 0 },
     // SkTypeface::CreateFromName                        mySkTypeface::CreateFromName
     { "_ZN10SkTypeface14CreateFromNameEPKcNS_5StyleE",   "_ZN12mySkTypeface14CreateFromNameEPKcN10SkTypeface5StyleE", 0, 0 },
-    // SkPaint::measureText                         SkPaint::measureText
+    // SkPaint::measureText                         mySkPaint::measureText
     { "_ZNK7SkPaint11measureTextEPKvjP6SkRectf",    "_ZNK9mySkPaint11measureTextEPKvjP6SkRectf", 0, 0 },
-    // SkPaint::measureText                         SkPaint::measureText
+    // SkPaint::measureText                         mySkPaint::measureText
     { "_ZNK7SkPaint11measureTextEPKvj",             "_ZNK9mySkPaint11measureTextEPKvj", 0, 0},
-    // SkPaint::getTextWidths
+    // SkPaint::getTextWidths                       mySkPaint::getTextWidths
     { "_ZNK7SkPaint13getTextWidthsEPKvjPfP6SkRect", "_ZNK9mySkPaint13getTextWidthsEPKvjPfP6SkRect", 0, 0}
 };
 
diff --git a/contrib/android/jni/loadgr_jni.cpp b/contrib/android/jni/loadgr_jni.cpp
index 5b6e3b0..64a9ecf 100644
--- a/contrib/android/jni/loadgr_jni.cpp
+++ b/contrib/android/jni/loadgr_jni.cpp
@@ -180,7 +180,7 @@ extern "C" jobject Java_org_sil_palaso_Graphite_addFontResource( JNIEnv *env, jo
     f->next = myfonts;
     f->tf = tf;
     f->name = rtl ? "" : name;
-    f->rtl = rtl ? 3 : 0;
+    f->rtl = rtl ? 7 : 0;
     if (!gFTLibrary && FT_Init_FreeType(&gFTLibrary))
     {
         delete f->tf;
diff --git a/contrib/android/src/org/sil/palaso/helloworld/HelloWorld.java b/contrib/android/src/org/sil/palaso/helloworld/HelloWorld.java
index 57ee06a..b1a3922 100644
--- a/contrib/android/src/org/sil/palaso/helloworld/HelloWorld.java
+++ b/contrib/android/src/org/sil/palaso/helloworld/HelloWorld.java
@@ -47,7 +47,7 @@ public class HelloWorld extends Activity {
     	TextView tv;
     	WebView wv;
 //    	String s = "မဂင်္ဂလာ|မဘ္ဘာ၊ ဤကဲ့|သို့|ရာ|ဇ|ဝင်|တင်|မည့် ကြေ|ညာ|ချက်|ကို ပြု|လုပ်|ပြီး|နောက် ဤညီ|လာ|ခံ|အ|စည်း|အ|ဝေး|ကြီး|က ကမ္ဘာ့|ကု|လ|သ|မဂ္ဂ|အ|ဖွဲ့|ဝင် နိုင်|ငံ အား|လုံး|အား ထို|ကြေ|ညာ|စာ|တမ်း|ကြီး၏ စာ|သား|ကို|အ|များ|ပြည်|သူ|တို့ ကြား|သိ|စေ|ရန် ကြေ|ညာ|ပါ|မည့် အ|ကြောင်း|ကို|လည်း|ကောင်း၊ ထို့|ပြင်|နိုင်|ငံ|များ၊ သို့|တည်း|မ|ဟုတ် နယ်|မြေ|များ၏ နိုင်|ငံ|ရေး အ|ဆင့်|အ|တ|န်း|ကို လိုက်၍ ခွဲ|ခြား|ခြင်း မ|���ြု|ဘဲ|အ|ဓိ|က|အား|ဖြင့် စာ|သင်|ကျောင်း|များ|နှင့် အ|ခြား|ပ|ညာ|ရေး အ|ဖွဲ့|အ|စည်း|များ|တ [...]
-    	String s = "لمّا كان الاعتراف بالكرامة المتأصلة في جميع أعضاء الأسرة البشرية وبحقوقهم المتساوية الثابتة هو أساس الحرية والعدل \u06F1\u06F2\u06F3 والسلام في العالم.";
+    	String s = "لمّا كان الاعتراف بالكرامة المتأصلة في جميع أعضاء الأسرة (البشرية) وبحقوقهم المتساوية الثابتة هو أساس الحرية والعدل \u06F1\u06F2\u06F3 والسلام في العالم.";
     	String w = "\uFEFF<html><body style=\"font-family: Scheh\">Test: "
                                     + s + "</body></html>";                     // <3>
     	
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 0d50933..d47ce04 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -85,6 +85,7 @@ add_library(graphite2 SHARED
     Slot.cpp
     Sparse.cpp
     TtfUtil.cpp
+    UtfCodec.cpp
     XmlTraceLog.cpp
     XmlTraceLogTags.cpp)
 
diff --git a/src/CmapCache.cpp b/src/CmapCache.cpp
index 0fc85ee..dc2e43e 100644
--- a/src/CmapCache.cpp
+++ b/src/CmapCache.cpp
@@ -39,8 +39,8 @@ CmapCache::CmapCache(const void* cmapTable, size_t length)
     const void * table310 = TtfUtil::FindCmapSubtable(cmapTable, 3, 10, length);
     m_isBmpOnly = (!table310);
     int rangeKey = 0;
-    unsigned int codePoint = 0;
-    unsigned int prevCodePoint = 0;
+    uint32 	codePoint = 0,
+    		prevCodePoint = 0;
     if (table310 && TtfUtil::CheckCmap310Subtable(table310))
     {
         m_blocks = grzeroalloc<uint16*>(0x1100);
@@ -92,12 +92,50 @@ CmapCache::CmapCache(const void* cmapTable, size_t length)
     }
 }
 
-CmapCache::~CmapCache()
+CmapCache::~CmapCache() throw()
 {
     unsigned int numBlocks = (m_isBmpOnly)? 0x100 : 0x1100;
     for (unsigned int i = 0; i < numBlocks; i++)
     	free(m_blocks[i]);
     free(m_blocks);
-    m_blocks = NULL;
+}
+
+uint16 CmapCache::operator [] (const uint32 usv) const throw()
+{
+    if ((m_isBmpOnly && usv > 0xFFFF) || (usv > 0x10FFFF))
+        return 0;
+    const uint32 block = 0xFFFF & (usv >> 8);
+    if (m_blocks[block])
+        return m_blocks[block][usv & 0xFF];
+    return 0;
+};
+
+CmapCache::operator bool() const throw()
+{
+	return m_blocks;
+}
+
+
+DirectCmap::DirectCmap(const void* cmap, size_t length)
+{
+    _ctable = TtfUtil::FindCmapSubtable(cmap, 3, 1, length);
+    if (!_ctable || !TtfUtil::CheckCmap31Subtable(_ctable))
+    {
+        _ctable =  0;
+        return;
+    }
+    _stable = TtfUtil::FindCmapSubtable(cmap, 3, 10, length);
+    if (_stable && !TtfUtil::CheckCmap310Subtable(_stable))
+    	_stable = 0;
+}
+
+uint16 DirectCmap::operator [] (const uint32 usv) const throw()
+{
+    return usv > 0xFFFF ? (_stable ? TtfUtil::Cmap310Lookup(_stable, usv) : 0) : TtfUtil::Cmap31Lookup(_ctable, usv);
+}
+
+DirectCmap::operator bool () const throw()
+{
+	return _ctable;
 }
 
diff --git a/src/CmapCache.h b/src/CmapCache.h
index dc1603b..1facde3 100644
--- a/src/CmapCache.h
+++ b/src/CmapCache.h
@@ -26,24 +26,45 @@ of the License or (at your option) any later version.
 */
 #pragma once
 
-#include <graphite2/Types.h>
+#include <Main.h>
 
 namespace graphite2 {
 
-class CmapCache
+class Face;
+
+class Cmap
+{
+public:
+	virtual ~Cmap() throw() {}
+
+	virtual uint16 operator [] (const uint32) const throw() { return 0; }
+
+	virtual operator bool () const throw() { return false; }
+
+	CLASS_NEW_DELETE;
+};
+
+class DirectCmap : public Cmap
+{
+public:
+	DirectCmap(const void* cmap, size_t length);
+	virtual uint16 operator [] (const uint32 usv) const throw();
+	virtual operator bool () const throw();
+
+    CLASS_NEW_DELETE;
+private:
+    const void *_stable,
+    		   *_ctable;
+};
+
+class CmapCache : public Cmap
 {
 public:
-    CmapCache(const void * cmapTable, size_t length);
-    ~CmapCache();
-    uint16 lookup(unsigned int unicode) const {
-        if ((m_isBmpOnly && unicode > 0xFFFF) || (unicode > 0x10FFFF))
-            return 0;
-        unsigned int block = (0xFFFFFF & unicode) >> 8;
-        if (m_blocks && m_blocks[block])
-            return m_blocks[block][unicode & 0xFF];
-        return 0;
-    };
-    CLASS_NEW_DELETE
+	CmapCache(const void * cmapTable, size_t length);
+	virtual ~CmapCache() throw();
+	virtual uint16 operator [] (const uint32 usv) const throw();
+	virtual operator bool () const throw();
+    CLASS_NEW_DELETE;
 private:
     bool m_isBmpOnly;
     uint16 ** m_blocks;
diff --git a/src/Face.cpp b/src/Face.cpp
index 0c0af0c..060a4da 100644
--- a/src/Face.cpp
+++ b/src/Face.cpp
@@ -40,10 +40,10 @@ using namespace graphite2;
 Face::~Face()
 {
     delete m_pGlyphFaceCache;
-    delete m_cmapCache;
+    delete m_cmap;
     delete[] m_silfs;
     m_pGlyphFaceCache = NULL;
-    m_cmapCache = NULL;
+    m_cmap = NULL;
     m_silfs = NULL;
     delete m_pFileFace;
     delete m_pNames;
@@ -58,13 +58,18 @@ bool Face::readGlyphs(unsigned int faceOptions)
 
     m_pGlyphFaceCache = GlyphFaceCache::makeCache(hdr);
     if (!m_pGlyphFaceCache) return false;
+
+    size_t length = 0;
+    const byte * table = getTable(Tag::cmap, &length);
+    if (!table) return false;
+
     if (faceOptions & gr_face_cacheCmap)
-    {
-        size_t length = 0;
-        const byte * table = getTable(Tag::cmap, &length);
-        if (!table) return false;
-        m_cmapCache = new CmapCache(table, length);
-    }
+    	m_cmap = new CmapCache(table, length);
+    else
+    	m_cmap = new DirectCmap(table, length);
+
+    if (!m_cmap || !*m_cmap) return false;
+
     if (faceOptions & gr_face_preloadGlyphs)
     {
         m_pGlyphFaceCache->loadAllGlyphs();
diff --git a/src/Face.h b/src/Face.h
index b1a0fab..461a1bc 100644
--- a/src/Face.h
+++ b/src/Face.h
@@ -46,7 +46,7 @@ namespace graphite2 {
 class Segment;
 class FeatureVal;
 class NameTable;
-class CmapCache;
+class Cmap;
 
 using TtfUtil::Tag;
 
@@ -116,7 +116,7 @@ public:
 public:
     Face(const void* appFaceHandle/*non-NULL*/, gr_get_table_fn getTable2) : 
         m_appFaceHandle(appFaceHandle), m_getTable(getTable2), m_pGlyphFaceCache(NULL),
-        m_cmapCache(NULL), m_numSilf(0), m_silfs(NULL), m_pFileFace(NULL),
+        m_cmap(NULL), m_numSilf(0), m_silfs(NULL), m_pFileFace(NULL),
         m_pNames(NULL) {}
     virtual ~Face();
 public:
@@ -142,7 +142,7 @@ public:
 
     const GlyphFaceCache* getGlyphFaceCache() const { return m_pGlyphFaceCache; }      //never NULL
     void takeFileFace(FileFace* pFileFace/*takes ownership*/);
-    CmapCache * getCmapCache() const { return m_cmapCache; };
+    Cmap & cmap() const { return *m_cmap; };
     NameTable * nameTable() const;
     uint16 languageForLocale(const char * locale) const;
 
@@ -156,7 +156,7 @@ private:
     // unsigned short m_readglyphs;    // how many glyphs have we in m_glyphs?
     // unsigned short m_capacity;      // how big is m_glyphs
     mutable GlyphFaceCache* m_pGlyphFaceCache;      //owned - never NULL
-    mutable CmapCache* m_cmapCache; // cmap cache if available
+    mutable Cmap * m_cmap; // cmap cache if available
     unsigned short m_upem;          // design units per em
 protected:
     unsigned short m_numSilf;       // number of silf subtables in the silf table
diff --git a/src/Main.h b/src/Main.h
index 1cfd809..29dc4a7 100644
--- a/src/Main.h
+++ b/src/Main.h
@@ -63,7 +63,7 @@ template <typename T> T * grzeroalloc(size_t n)
     void operator delete   (void * p) throw() { free(p);} \
     void operator delete   (void *, void *) throw() {} \
     void operator delete[] (void * p)throw() { free(p); } \
-    void operator delete[] (void *, void *) throw() {} \
+    void operator delete[] (void *, void *) throw() {}
 
 #ifdef __GNUC__
 #define GR_MAYBE_UNUSED __attribute__((unused))
diff --git a/src/NameTable.cpp b/src/NameTable.cpp
index c471767..e82a5f7 100644
--- a/src/NameTable.cpp
+++ b/src/NameTable.cpp
@@ -28,8 +28,7 @@ of the License or (at your option) any later version.
 #include "Endian.h"
 
 #include "NameTable.h"
-#include "processUTF.h"
-
+#include "UtfCodec.h"
 
 using namespace graphite2;
 
@@ -146,42 +145,37 @@ void* NameTable::getName(uint16& languageId, uint16 nameId, gr_encform enc, uint
         return NULL;
     }
     utf16Length >>= 1; // in utf16 units
-    uint16 * utf16Name = gralloc<uint16>(utf16Length + 1);
+    utf16::codeunit_t * utf16Name = gralloc<utf16::codeunit_t>(utf16Length);
     const uint8* pName = m_nameData + offset;
     for (size_t i = 0; i < utf16Length; i++)
     {
         utf16Name[i] = be::read<uint16>(pName);
     }
-    utf16Name[utf16Length] = 0;
-    if (enc == gr_utf16)
+    switch (enc)
     {
-        length = utf16Length;
-        return utf16Name;
-    }
-    else if (enc == gr_utf8)
+    case gr_utf8:
     {
-        uint8* uniBuffer = gralloc<uint8>(3 * utf16Length + 1);
-        ToUtf8Processor processor(uniBuffer, 3 * utf16Length + 1);
-        IgnoreErrors ignore;
-        BufferLimit bufferLimit(gr_utf16, reinterpret_cast<void*>(utf16Name), reinterpret_cast<void*>(utf16Name + utf16Length));
-        processUTF<BufferLimit, ToUtf8Processor, IgnoreErrors>(bufferLimit, &processor, &ignore);
-        length = processor.bytesProcessed();
-        uniBuffer[processor.bytesProcessed()] = 0;
-        free(utf16Name);
+    	utf8::codeunit_t* uniBuffer = gralloc<utf8::codeunit_t>(3 * utf16Length + 1);
+        utf8::iterator d = uniBuffer;
+        for (utf16::const_iterator s = utf16Name, e = utf16Name + utf16Length; s != e; ++s, ++d)
+        	*d = *s;
+        length = d - uniBuffer;
+        uniBuffer[length] = 0;
         return uniBuffer;
     }
-    else if (enc == gr_utf32)
+    case gr_utf16:
+    	length = utf16Length;
+    	return utf16Name;
+    case gr_utf32:
     {
-        uint32 * uniBuffer = gralloc<uint32>(utf16Length  + 1);
-        IgnoreErrors ignore;
-        BufferLimit bufferLimit(gr_utf16, reinterpret_cast<void*>(utf16Name), reinterpret_cast<void*>(utf16Name + utf16Length));
-
-        ToUtf32Processor processor(uniBuffer, utf16Length);
-        processUTF(bufferLimit, &processor, &ignore);
-        length = processor.charsProcessed();
-        uniBuffer[length] = 0;
-        free(utf16Name);
-        return uniBuffer;
+    	utf32::codeunit_t * uniBuffer = gralloc<utf32::codeunit_t>(utf16Length  + 1);
+		utf32::iterator d = uniBuffer;
+		for (utf16::const_iterator s = utf16Name, e = utf16Name + utf16Length; s != e; ++s, ++d)
+			*d = *s;
+		length = d - uniBuffer;
+		uniBuffer[length] = 0;
+		return uniBuffer;
+    }
     }
     length = 0;
     return NULL;
diff --git a/src/SegCacheStore.cpp b/src/SegCacheStore.cpp
index 1ce0aa1..5e13c1f 100644
--- a/src/SegCacheStore.cpp
+++ b/src/SegCacheStore.cpp
@@ -40,26 +40,9 @@ SegCacheStore::SegCacheStore(const Face *face, unsigned int numSilf, size_t maxS
     assert(face);
     assert(face->getGlyphFaceCache());
     m_maxCmapGid = face->getGlyphFaceCache()->numGlyphs();
-    if (face->getCmapCache())
-    {
-        m_spaceGid = face->getCmapCache()->lookup(0x20);
-        m_zwspGid = face->getCmapCache()->lookup(0x200B);
-    }
-    else
-    {
-        size_t cmapSize = 0;
-        const void * cmapTable = face->getTable(Tag::cmap, &cmapSize);
-        const void * bmpTable = TtfUtil::FindCmapSubtable(cmapTable, 3, 1, cmapSize);
-        //const void * supplementaryTable = TtfUtil::FindCmapSubtable(cmapTable, 3, 10, cmapSize);
 
-        if (bmpTable)
-        {
-            m_spaceGid = TtfUtil::Cmap31Lookup(bmpTable, 0x20);
-            m_zwspGid = TtfUtil::Cmap31Lookup(bmpTable, 0x200B);
-            // TODO find out if the Cmap(s) can be parsed to find a m_maxCmapGid < num_glyphs
-            // The Pseudo glyphs may mean that it isn't worth the effort
-        }
-    }
+    m_spaceGid = face->cmap()[0x20];
+    m_zwspGid = face->cmap()[0x200B];
 }
 
 #endif
diff --git a/src/Segment.cpp b/src/Segment.cpp
index 1fb97e0..b860643 100644
--- a/src/Segment.cpp
+++ b/src/Segment.cpp
@@ -24,7 +24,7 @@ Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
 License, as published by the Free Software Foundation, either version 2
 of the License or (at your option) any later version.
 */
-#include "processUTF.h"
+#include "UtfCodec.h"
 #include <string.h>
 #include <stdlib.h>
 
@@ -425,105 +425,33 @@ void Segment::logSegment() const
 
 #endif
 
-
-
-class SlotBuilder
+template <typename utf_iter>
+inline void process_utf_data(Segment & seg, const Face & face, const int fid, utf_iter c, size_t n_chars)
 {
-public:
-      SlotBuilder(const Face *face2, const Features* pFeats/*must not be NULL*/, Segment* pDest2)
-      :	  m_face(face2), 
-	  m_pDest(pDest2), 
-	  m_ctable(NULL),
-	  m_stable(NULL),
-	  m_fid(pDest2->addFeatures(*pFeats)),
-	  m_nCharsProcessed(0) 
-      {
-          size_t cmapSize = 0;
-          const void * table = face2->getTable(Tag::cmap, &cmapSize);
-          if (!table) return;
-          m_ctable = TtfUtil::FindCmapSubtable(table, 3, 1, cmapSize);
-          if (!m_ctable || !TtfUtil::CheckCmap31Subtable(m_ctable))
-          {
-              m_ctable = NULL;
-              return;
-          }
-          m_stable = TtfUtil::FindCmapSubtable(table, 3, 10, cmapSize);
-          if (m_stable && !TtfUtil::CheckCmap310Subtable(m_stable)) m_stable = NULL;
-      }
-
-      bool processChar(uint32 cid/*unicode character*/, size_t coffset)		//return value indicates if should stop processing
-      {
-          if (!m_ctable) return false;
-          uint16 gid = cid > 0xFFFF ? (m_stable ? TtfUtil::Cmap310Lookup(m_stable, cid) : 0) : (m_ctable ? TtfUtil::Cmap31Lookup(m_ctable, cid) : 0);
-          if (!gid)
-              gid = m_face->findPseudo(cid);
-          m_pDest->appendSlot(m_nCharsProcessed, cid, gid, m_fid, coffset);
-          ++m_nCharsProcessed;
-          return true;
-      }
-
-      size_t charsProcessed() const { return m_nCharsProcessed; }
-
-private:
-      const Face *m_face;
-      Segment *m_pDest;
-      const void *   m_ctable;
-      const void *   m_stable;
-      const unsigned int m_fid;
-      size_t m_nCharsProcessed ;
-};
-
-class CachedSlotBuilder
-{
-public:
-    CachedSlotBuilder(const Face *face2, const Features* pFeats/*must not be NULL*/, Segment* pDest2)
-    :  m_face(face2),
-    m_cmap(face2->getCmapCache()),
-    m_pDest(pDest2),
-    m_breakAttr(pDest2->silf()->aBreak()),
-    m_fid(pDest2->addFeatures(*pFeats)),
-    m_nCharsProcessed(0)
-    {
-    }
-
-    bool processChar(uint32 cid/*unicode character*/, size_t coffset)     //return value indicates if should stop processing
-    {
-        if (!m_cmap) return false;
-        uint16 gid = m_cmap->lookup(cid);
-        if (!gid)
-            gid = m_face->findPseudo(cid);
-        //int16 bw = m_face->glyphAttr(gid, m_breakAttr);
-        m_pDest->appendSlot(m_nCharsProcessed, cid, gid, m_fid, coffset);
-        ++m_nCharsProcessed;
-        return true;
-    }
-
-      size_t charsProcessed() const { return m_nCharsProcessed; }
-
-private:
-      const Face *m_face;
-      const CmapCache *m_cmap;
-      Segment *m_pDest;
-      uint8 m_breakAttr;
-      const unsigned int m_fid;
-      size_t m_nCharsProcessed ;
-};
+	const Cmap    & cmap = face.cmap();
+	int slotid = 0;
+
+	const typename utf_iter::codeunit_type * const base = c;
+	for (; n_chars; --n_chars, ++c, ++slotid)
+	{
+		const uint32 usv = *c;
+		uint16 gid = cmap[usv];
+		if (!gid)	gid = face.findPseudo(usv);
+		seg.appendSlot(slotid, usv, gid, fid, c - base);
+	}
+}
 
 void Segment::read_text(const Face *face, const Features* pFeats/*must not be NULL*/, gr_encform enc, const void* pStart, size_t nChars)
 {
-    assert(pFeats);
-    CharacterCountLimit limit(enc, pStart, nChars);
-    IgnoreErrors ignoreErrors;
-    if (face->getCmapCache())
-    {
-        CachedSlotBuilder slotBuilder(face, pFeats, this);
-        processUTF(limit/*when to stop processing*/, &slotBuilder, &ignoreErrors);
-    }
-    else
-    {
-        SlotBuilder slotBuilder(face, pFeats, this);
-        processUTF(limit/*when to stop processing*/, &slotBuilder, &ignoreErrors);
-    }
+	assert(face);
+	assert(pFeats);
+
+	switch (enc)
+	{
+	case gr_utf8:	process_utf_data(*this, *face, addFeatures(*pFeats), utf8::const_iterator(pStart), nChars); break;
+	case gr_utf16:	process_utf_data(*this, *face, addFeatures(*pFeats), utf16::const_iterator(pStart), nChars); break;
+	case gr_utf32:	process_utf_data(*this, *face, addFeatures(*pFeats), utf32::const_iterator(pStart), nChars); break;
+	}
 }
 
 void Segment::prepare_pos(const Font * /*font*/)
diff --git a/src/CmapCache.h b/src/UtfCodec.cpp
similarity index 68%
copy from src/CmapCache.h
copy to src/UtfCodec.cpp
index dc1603b..2064075 100644
--- a/src/CmapCache.h
+++ b/src/UtfCodec.cpp
@@ -15,8 +15,8 @@
 
     You should also have received a copy of the GNU Lesser General Public
     License along with this library in the file named "LICENSE".
-    If not, write to the Free Software Foundation, 51 Franklin Street,
-    Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+    If not, write to the Free Software Foundation, 51 Franklin Street, 
+    Suite 500, Boston, MA 02110-1335, USA or visit their web page on the 
     internet at http://www.fsf.org/licenses/lgpl.html.
 
 Alternatively, the contents of this file may be used under the terms of the
@@ -24,29 +24,22 @@ Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
 License, as published by the Free Software Foundation, either version 2
 of the License or (at your option) any later version.
 */
-#pragma once
-
-#include <graphite2/Types.h>
+#include "UtfCodec.h"
+//using namespace graphite2;
 
 namespace graphite2 {
 
-class CmapCache
+}
+
+using namespace graphite2;
+
+const int8 _utf_codec<8>::sz_lut[16] =
 {
-public:
-    CmapCache(const void * cmapTable, size_t length);
-    ~CmapCache();
-    uint16 lookup(unsigned int unicode) const {
-        if ((m_isBmpOnly && unicode > 0xFFFF) || (unicode > 0x10FFFF))
-            return 0;
-        unsigned int block = (0xFFFFFF & unicode) >> 8;
-        if (m_blocks && m_blocks[block])
-            return m_blocks[block][unicode & 0xFF];
-        return 0;
-    };
-    CLASS_NEW_DELETE
-private:
-    bool m_isBmpOnly;
-    uint16 ** m_blocks;
+		1,1,1,1,1,1,1,1,	// 1 byte
+		0,0,0,0,  			// trailing byte
+		2,2,				// 2 bytes
+		3,					// 3 bytes
+		4					// 4 bytes
 };
 
-} // namespace graphite2
+const byte  _utf_codec<8>::mask_lut[5] = {0x7f, 0xff, 0x3f, 0x1f, 0x0f};
diff --git a/src/UtfCodec.h b/src/UtfCodec.h
new file mode 100644
index 0000000..5d5192c
--- /dev/null
+++ b/src/UtfCodec.h
@@ -0,0 +1,208 @@
+/*  GRAPHITE2 LICENSING
+
+    Copyright 2010, SIL International
+    All rights reserved.
+
+    This library is free software; you can redistribute it and/or modify
+    it under the terms of the GNU Lesser General Public License as published
+    by the Free Software Foundation; either version 2.1 of License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should also have received a copy of the GNU Lesser General Public
+    License along with this library in the file named "LICENSE".
+    If not, write to the Free Software Foundation, 51 Franklin Street,
+    Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+    internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#pragma once
+
+#include <iterator>
+#include "Main.h"
+#include "graphite2/Segment.h"
+
+namespace graphite2 {
+
+typedef uint32 	uchar_t;
+
+template <int N>
+struct _utf_codec
+{
+	typedef	uchar_t	codeunit_t;
+
+	static void 	put(codeunit_t * cp, const uchar_t , int8 & len) throw();
+	static uchar_t	get(const codeunit_t * cp, int8 & len) throw();
+};
+
+
+template <>
+struct _utf_codec<32>
+{
+private:
+	static const uchar_t	limit = 0x110000;
+public:
+	typedef	uint32	codeunit_t;
+
+	inline
+	static void put(codeunit_t * cp, const uchar_t usv, int8 & l) throw()
+	{
+		*cp = usv; l = 1;
+	}
+
+	inline
+	static uchar_t get(const codeunit_t * cp, int8 & l) throw()
+	{
+		if (cp[0] < limit)	{ l = 1;  return cp[0]; }
+		else				{ l = -1; return 0xFFFD; }
+	}
+};
+
+
+template <>
+struct _utf_codec<16>
+{
+private:
+	static const int32	lead_offset		 = 0xD800 - (0x10000 >> 10);
+	static const int32	surrogate_offset = 0x10000 - (0xD800 << 10) - 0xDC00;
+public:
+	typedef	uint16	codeunit_t;
+
+	inline
+	static void put(codeunit_t * cp, const uchar_t usv, int8 & l) throw()
+	{
+		if (usv < 0x10000)	{ l = 1; cp[0] = codeunit_t(usv); }
+		else
+		{
+			cp[0] = codeunit_t(lead_offset + (usv >> 10));
+			cp[1] = codeunit_t(0xDC00 + (usv & 0x3FF));
+			l = 2;
+		}
+	}
+
+	inline
+	static uchar_t get(const codeunit_t * cp, int8 & l) throw()
+	{
+		const uint32	uh = cp[0];
+		l = 1;
+
+		if (0xD800 > uh || uh > 0xDFFF)	{ return uh; }
+		const uint32 ul = cp[1];
+		if (uh > 0xDBFF || 0xDC00 > ul || ul > 0xDFFF) { l = -1; return 0xFFFD; }
+		++l;
+		return (uh<<10) + ul + surrogate_offset;
+	}
+};
+
+
+template <>
+struct _utf_codec<8>
+{
+private:
+	static const int8 sz_lut[16];
+	static const byte mask_lut[5];
+
+
+public:
+	typedef	uint8	codeunit_t;
+
+	inline
+	static void put(codeunit_t * cp, const uchar_t usv, int8 & l) throw()
+	{
+		if (usv < 0x80)		{l = 1; cp[0] = usv; return; }
+        if (usv < 0x0800)	{l = 2; cp[0] = 0xC0 + (usv >> 6);  cp[1] = 0x80 + (usv & 0x3F); return; }
+        if (usv < 0x10000)	{l = 3; cp[0] = 0xE0 + (usv >> 12); cp[1] = 0x80 + ((usv >> 6) & 0x3F);  cp[2] = 0x80 + (usv & 0x3F); return; }
+        else				{l = 4; cp[0] = 0xF0 + (usv >> 18); cp[1] = 0x80 + ((usv >> 12) & 0x3F); cp[2] = 0x80 + ((usv >> 6) & 0x3F); cp[3] = 0x80 + (usv & 0x3F); return; }
+ 	}
+
+	inline
+	static uchar_t get(const codeunit_t * cp, int8 & l) throw()
+	{
+		const int8 seq_sz = sz_lut[*cp >> 4];
+		uchar_t	u = *cp & mask_lut[seq_sz];
+		l = 1;
+		bool toolong = false;
+
+		switch(seq_sz) {
+			case 4:     u <<= 6; u |= *++cp & 0x3F; if (*cp >> 6 != 2) break; ++l; toolong  = (u < 0x10);
+			case 3:     u <<= 6; u |= *++cp & 0x3F; if (*cp >> 6 != 2) break; ++l; toolong |= (u < 0x20);
+			case 2:     u <<= 6; u |= *++cp & 0x3F; if (*cp >> 6 != 2) break; ++l; toolong |= (u < 0x80);
+			case 1:		break;
+			case 0:     l = -1; return 0xFFFD;
+		}
+
+		if (l != seq_sz || toolong)
+		{
+			l = -l;
+			return 0xFFFD;
+		}
+		return u;
+	}
+};
+
+
+template <typename C>
+class _utf_iterator
+{
+	typedef _utf_codec<sizeof(C)*8>	codec;
+
+	C 	  		  * cp;
+	mutable int8	sl;
+
+public:
+	typedef C 			codeunit_type;
+	typedef uchar_t		value_type;
+	typedef uchar_t	  * pointer;
+
+	class reference
+	{
+		const _utf_iterator & _i;
+
+		reference(const _utf_iterator & i): _i(i) {}
+	public:
+		operator value_type () const throw () 					{ return codec::get(_i.cp, _i.sl); }
+		reference & operator = (const value_type usv) throw() 	{ codec::put(_i.cp, usv, _i.sl); return *this; }
+
+		friend class _utf_iterator;
+	};
+
+
+	_utf_iterator(const void * us=0)	: cp(reinterpret_cast<C *>(const_cast<void *>(us))), sl(1) { }
+
+	_utf_iterator   & operator ++ () 	{ cp += abs(sl); return *this; }
+	_utf_iterator 	operator ++ (int) 	{ _utf_iterator tmp(*this); operator++(); return tmp; }
+
+	bool operator == (const _utf_iterator & rhs) const throw() { return cp >= rhs.cp; }
+	bool operator != (const _utf_iterator & rhs) const throw() { return !operator==(rhs); }
+
+	reference 	operator * () const throw() { return *this; }
+	pointer		operator ->() const throw() { return &operator *(); }
+
+	operator codeunit_type * () const throw() { return cp; }
+
+	bool error() const throw()	{ return sl < 1; }
+};
+
+template <typename C>
+struct utf
+{
+	typedef	typename _utf_codec<sizeof(C)*8>::codeunit_t codeunit_t;
+
+	typedef _utf_iterator<C>		iterator;
+	typedef _utf_iterator<const C>	const_iterator;
+};
+
+
+typedef utf<uint32>	utf32;
+typedef utf<uint16>	utf16;
+typedef utf<uint8>	utf8;
+
+} // namespace graphite2
diff --git a/src/files.mk b/src/files.mk
index 9504e60..355e374 100644
--- a/src/files.mk
+++ b/src/files.mk
@@ -63,7 +63,8 @@ $(_NS)_SOURCES = \
     $($(_NS)_BASE)/src/Silf.cpp \
     $($(_NS)_BASE)/src/Slot.cpp \
     $($(_NS)_BASE)/src/Sparse.cpp \
-    $($(_NS)_BASE)/src/TtfUtil.cpp
+    $($(_NS)_BASE)/src/TtfUtil.cpp \
+    $($(_NS)_BASE)/src/UtfCodec.cpp
 
 $(_NS)_PRIVATE_HEADERS = \
     $($(_NS)_BASE)/src/CachedFace.h \
@@ -96,6 +97,7 @@ $(_NS)_PRIVATE_HEADERS = \
     $($(_NS)_BASE)/src/Sparse.h \
     $($(_NS)_BASE)/src/TtfTypes.h \
     $($(_NS)_BASE)/src/TtfUtil.h \
+    $($(_NS)_BASE)/src/UtfCodec.h \
     $($(_NS)_BASE)/src/XmlTraceLog.h \
     $($(_NS)_BASE)/src/XmlTraceLogTags.h 
 
diff --git a/src/gr_segment.cpp b/src/gr_segment.cpp
index f6cf52d..0c1d6d4 100644
--- a/src/gr_segment.cpp
+++ b/src/gr_segment.cpp
@@ -25,46 +25,13 @@ License, as published by the Free Software Foundation, either version 2
 of the License or (at your option) any later version.
 */
 #include "graphite2/Segment.h"
-#include "processUTF.h"
+#include "UtfCodec.h"
 #include "Segment.h"
 
 using namespace graphite2;
 
 namespace 
 {
-  template <class LIMIT, class CHARPROCESSOR>
-  size_t doCountUnicodeCharacters(const LIMIT& limit, CHARPROCESSOR* pProcessor, const void** pError)
-  {
-      BreakOnError breakOnError;
-      
-      processUTF(limit/*when to stop processing*/, pProcessor, &breakOnError);
-      if (pError) {
-          *pError = breakOnError.m_pErrorPos;
-      }
-      return pProcessor->charsProcessed();
-  }
-
-  class CharCounterToNul
-  {
-  public:
-        CharCounterToNul()
-        :	  m_nCharsProcessed(0) 
-        {
-        }	  
-
-        bool processChar(uint32 cid/*unicode character*/, size_t /*offset*/)		//return value indicates if should stop processing
-        {
-            if (cid==0)
-                return false;
-            ++m_nCharsProcessed;
-            return true;
-        }
-
-        size_t charsProcessed() const { return m_nCharsProcessed; }
-
-  private:
-        size_t m_nCharsProcessed ;
-  };
 
   gr_segment* makeAndInitialize(const Font *font, const Face *face, uint32 script, const Features* pFeats/*must not be NULL*/, gr_encform enc, const void* pStart, size_t nChars, int dir)
   {
@@ -92,23 +59,43 @@ namespace
 }
 
 
-extern "C" {
+template <typename utf_iter>
+inline size_t count_unicode_chars(utf_iter first, const utf_iter last, const void **error)
+{
+	size_t n_chars = 0;
+	uint32 usv = 0;
+
+	if (last)
+	{
+		for (;first != last; ++first, ++n_chars)
+			if ((usv = *first) == 0 || first.error()) break;
+	}
+	else
+	{
+		while ((usv = *first) != 0 && !first.error())
+		{
+			++first;
+			++n_chars;
+		}
+	}
+
+	if (error)	*error = first.error() ? first : 0;
+	return n_chars;
+}
 
+extern "C" {
 
 size_t gr_count_unicode_characters(gr_encform enc, const void* buffer_begin, const void* buffer_end/*don't go on or past end, If NULL then ignored*/, const void** pError)   //Also stops on nul. Any nul is not in the count
 {
-  if (buffer_end)
-  {
-    BufferLimit limit(enc, buffer_begin, buffer_end);
-    CharCounterToNul counter;
-    return doCountUnicodeCharacters(limit, &counter, pError);
-  }
-  else
-  {
-    NoLimit limit(enc, buffer_begin);
-    CharCounterToNul counter;
-    return doCountUnicodeCharacters(limit, &counter, pError);
-  }
+	assert(buffer_begin);
+
+	switch (enc)
+	{
+	case gr_utf8:	return count_unicode_chars<utf8::const_iterator>(buffer_begin, buffer_end, pError); break;
+	case gr_utf16:	return count_unicode_chars<utf16::const_iterator>(buffer_begin, buffer_end, pError); break;
+	case gr_utf32:	return count_unicode_chars<utf32::const_iterator>(buffer_begin, buffer_end, pError); break;
+	default:		return 0;
+	}
 }
 
 
diff --git a/src/processUTF.h b/src/processUTF.h
deleted file mode 100644
index 18c5e6a..0000000
--- a/src/processUTF.h
+++ /dev/null
@@ -1,494 +0,0 @@
-/*  GRAPHITE2 LICENSING
-
-    Copyright 2010, SIL International
-    All rights reserved.
-
-    This library is free software; you can redistribute it and/or modify
-    it under the terms of the GNU Lesser General Public License as published
-    by the Free Software Foundation; either version 2.1 of License, or
-    (at your option) any later version.
-
-    This program is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
-
-    You should also have received a copy of the GNU Lesser General Public
-    License along with this library in the file named "LICENSE".
-    If not, write to the Free Software Foundation, 51 Franklin Street, 
-    Suite 500, Boston, MA 02110-1335, USA or visit their web page on the 
-    internet at http://www.fsf.org/licenses/lgpl.html.
-
-Alternatively, the contents of this file may be used under the terms of the
-Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
-License, as published by the Free Software Foundation, either version 2
-of the License or (at your option) any later version.
-*/
-#pragma once 
-
-#include "Main.h"
-#include "graphite2/Segment.h"
-
-namespace graphite2 {
-
-class NoLimit		//relies on the processor.processChar() failing, such as because of a terminating nul character
-{
-public:
-    NoLimit(gr_encform enc2, const void* pStart2) : m_enc(enc2), m_pStart(pStart2) {}
-    gr_encform enc() const { return m_enc; }
-    const void* pStart() const { return m_pStart; }
-
-    bool inBuffer(const void* /*pCharLastSurrogatePart*/, uint32 /*val*/) const { return true; }
-    bool needMoreChars(const void* /*pCharStart*/, size_t /*nProcessed*/) const { return true; }
-    
-private:
-    gr_encform m_enc;
-    const void* m_pStart;
-};
-
-
-class CharacterCountLimit
-{
-public:
-    CharacterCountLimit(gr_encform enc2, const void* pStart2, size_t numchars) : m_numchars(numchars), m_enc(enc2), m_pStart(pStart2) {}
-    gr_encform enc() const { return m_enc; }
-    const void* pStart() const { return m_pStart; }
-
-    bool inBuffer (const void* /*pCharLastSurrogatePart*/, uint32 val) const { return (val != 0); }
-    bool needMoreChars (const void* /*pCharStart*/, size_t nProcessed) const { return nProcessed<m_numchars; }
-    
-private:
-    size_t m_numchars;
-    gr_encform m_enc;
-    const void* m_pStart;
-};
-
-
-class BufferLimit
-{
-public:
-    BufferLimit(gr_encform enc2, const void* pStart2, const void* pEnd/*as in stl i.e. don't use end*/) : m_enc(enc2), m_pStart(pStart2) {
-	size_t nFullTokens = (static_cast<const char*>(pEnd)-static_cast<const char *>(m_pStart))/int(m_enc); //rounds off partial tokens
-	m_pEnd = static_cast<const char *>(m_pStart) + (nFullTokens*int(m_enc));
-    }
-    gr_encform enc() const { return m_enc; }
-    const void* pStart() const { return m_pStart; }
-  
-    bool inBuffer (const void* pCharLastSurrogatePart, uint32 /*val*/) const { return pCharLastSurrogatePart<m_pEnd; }	//also called on charstart by needMoreChars()
-
-    bool needMoreChars (const void* pCharStart, size_t /*nProcessed*/) const { return inBuffer(pCharStart, 1); }
-     
-private:
-    const void* m_pEnd;
-    gr_encform m_enc;
-    const void* m_pStart;
-};
-
-
-class IgnoreErrors
-{
-public:
-    //for all of the ignore* methods is the parameter is false, the return result must be true
-    static bool ignoreUnicodeOutOfRangeErrors(bool /*isBad*/) { return true; }
-    static bool ignoreBadSurrogatesErrors(bool /*isBad*/) { return true; }
-
-    static bool handleError(const void* /*pPositionOfError*/) { return true;}
-};
-
-
-class BreakOnError
-{
-public:
-    BreakOnError() : m_pErrorPos(NULL) {}
-    
-    //for all of the ignore* methods is the parameter is false, the return result must be true
-    static bool ignoreUnicodeOutOfRangeErrors(bool isBad) { return !isBad; }
-    static bool ignoreBadSurrogatesErrors(bool isBad) { return !isBad; }
-
-    bool handleError(const void* pPositionOfError) { m_pErrorPos=pPositionOfError; return false;}
-
-public:
-    const void* m_pErrorPos;
-};
-
-
-
-
-
-/*
-  const int utf8_extrabytes_lut[16] = {0,0,0,0,0,0,0,0,        // 1 byte
-                                          3,3,3,3,  // errors since trailing byte, catch later
-                                          1,1,            // 2 bytes
-                                          2,                 // 3 bytes
-                                          3};                // 4 bytes
-   quicker to implement directly:
-*/
-
-inline unsigned int utf8_extrabytes(const unsigned int topNibble) { return (0xE5FF0000>>(2*topNibble))&0x3; }
-
-inline unsigned int utf8_mask(const unsigned int seq_extra) { return ((0xFEC0>>(4*seq_extra))&0xF)<<4; }
-
-class Utf8Consumer
-{
-public:
-    Utf8Consumer(const uint8* pCharStart2) : m_pCharStart(pCharStart2) {}
-    
-    const uint8* pCharStart() const { return m_pCharStart; }
-
-private:
-    template <class ERRORHANDLER>
-    bool respondToError(uint32* pRes, ERRORHANDLER* pErrHandler) {       //return value is if should stop parsing
-        *pRes = 0xFFFD;
-        if (!pErrHandler->handleError(m_pCharStart)) {
-            return false;
-        }                          
-        ++m_pCharStart; 
-        return true;
-    }
-    
-public:
-    template <class LIMIT, class ERRORHANDLER>
-    inline bool consumeChar(const LIMIT& limit, uint32* pRes, ERRORHANDLER* pErrHandler) {			//At start, limit.inBuffer(m_pCharStart) is true. return value is iff character contents does not go past limit
-        const unsigned int seq_extra = utf8_extrabytes(*m_pCharStart >> 4);        //length of sequence including *m_pCharStart is 1+seq_extra
-        if (!limit.inBuffer(m_pCharStart+(seq_extra), *m_pCharStart)) {
-            return false;
-        }
-    
-        *pRes = *m_pCharStart ^ utf8_mask(seq_extra);
-        
-        if (seq_extra) {
-            switch(seq_extra) {    //hopefully the optimizer will implement this as a jump table. If not the above if should cover the majority case.    
-                case 3: {	
-                    if (pErrHandler->ignoreUnicodeOutOfRangeErrors(*m_pCharStart>=0xF8)) {		//the good case
-                        ++m_pCharStart;
-                        if (!pErrHandler->ignoreBadSurrogatesErrors((*m_pCharStart&0xC0)!=0x80)) {
-                            return respondToError(pRes, pErrHandler);
-                        }           
-                        
-                        *pRes <<= 6; *pRes |= *m_pCharStart & 0x3F;		//drop through
-                    }
-                    else {
-                        return respondToError(pRes, pErrHandler);
-                    }		    
-                }
-                case 2: {
-                    ++m_pCharStart;
-                    if (!pErrHandler->ignoreBadSurrogatesErrors((*m_pCharStart&0xC0)!=0x80)) {
-                        return respondToError(pRes, pErrHandler);
-                    }
-                }           
-                *pRes <<= 6; *pRes |= *m_pCharStart & 0x3F;       //drop through
-                case 1: {
-                    ++m_pCharStart;
-                    if (!pErrHandler->ignoreBadSurrogatesErrors((*m_pCharStart&0xC0)!=0x80)) {
-                        return respondToError(pRes, pErrHandler);
-                    }
-                }           
-                *pRes <<= 6; *pRes |= *m_pCharStart & 0x3F;
-             }
-        }
-        ++m_pCharStart; 
-        return true;
-    }	
-  
-private:
-    const uint8 *m_pCharStart;
-};
-
-
-
-class Utf16Consumer
-{
-public:
-      Utf16Consumer(const uint16* pCharStart2) : m_pCharStart(pCharStart2) {}
-      
-      const uint16* pCharStart() const { return m_pCharStart; }
-  
-private:
-    template <class ERRORHANDLER>
-    bool respondToError(uint32* pRes, ERRORHANDLER* pErrHandler) {       //return value is if should stop parsing
-        *pRes = 0xFFFD;
-        if (!pErrHandler->handleError(m_pCharStart)) {
-            return false;
-        }                          
-        ++m_pCharStart; 
-        return true;
-    }
-    
-public:
-      template <class LIMIT, class ERRORHANDLER>
-      inline bool consumeChar(const LIMIT& limit, uint32* pRes, ERRORHANDLER* pErrHandler)			//At start, limit.inBuffer(m_pCharStart) is true. return value is iff character contents does not go past limit
-      {
-	  *pRes = *m_pCharStart;
-      if (0xD800 > *pRes || !pErrHandler->ignoreUnicodeOutOfRangeErrors(*pRes >= 0xE000)) {
-          ++m_pCharStart;
-          return true;
-      }
-      
-      if (!pErrHandler->ignoreBadSurrogatesErrors(*pRes >= 0xDC00)) {        //second surrogate is incorrectly coming first
-          return respondToError(pRes, pErrHandler);
-      }
-
-      ++m_pCharStart;
-	  if (!limit.inBuffer(m_pCharStart, *pRes)) {
-	      return false;
-	  }
-
-	  uint32 ul = *(m_pCharStart);
-	  if (!pErrHandler->ignoreBadSurrogatesErrors(0xDC00 > ul || ul > 0xDFFF)) {
-          return respondToError(pRes, pErrHandler);
-	  }
-	  ++m_pCharStart;
-	  *pRes =  ((*pRes - 0xD800)<<10) + ul - 0xDC00;
-	  return true;
-      }
-
-private:
-      const uint16 *m_pCharStart;
-};
-
-
-class Utf32Consumer
-{
-public:
-      Utf32Consumer(const uint32* pCharStart2) : m_pCharStart(pCharStart2) {}
-      
-      const uint32* pCharStart() const { return m_pCharStart; }
-  
-private:
-    template <class ERRORHANDLER>
-    bool respondToError(uint32* pRes, ERRORHANDLER* pErrHandler) {       //return value is if should stop parsing
-        *pRes = 0xFFFD;
-        if (!pErrHandler->handleError(m_pCharStart)) {
-            return false;
-        }                          
-        ++m_pCharStart; 
-        return true;
-    }
-
-public:
-      template <class LIMIT, class ERRORHANDLER>
-      inline bool consumeChar(const LIMIT& limit, uint32* pRes, ERRORHANDLER* pErrHandler)			//At start, limit.inBuffer(m_pCharStart) is true. return value is iff character contents does not go past limit
-      {
-	  *pRes = *m_pCharStart;
-      if (pErrHandler->ignoreUnicodeOutOfRangeErrors(!(*pRes<0xD800 || (*pRes>=0xE000 && *pRes<0x110000)))) {
-          if (!limit.inBuffer(++m_pCharStart, *pRes))
-            return false;
-          else
-            return true;
-      }
-      
-      return respondToError(pRes, pErrHandler);
-      }
-
-private:
-      const uint32 *m_pCharStart;
-};
-
-
-
-
-/* The following template function assumes that LIMIT and CHARPROCESSOR have the following methods and semantics:
-
-class LIMIT
-{
-public:
-    SegmentHandle::encform enc() const;		//which of the below overloads of inBuffer() and needMoreChars() are called
-    const void* pStart() const;			//start of first character to process
-  
-    bool inBuffer(const uint8* pCharLastSurrogatePart) const;	//whether or not the input is considered to be in the range of the buffer.
-    bool inBuffer(const uint16* pCharLastSurrogatePart) const;	//whether or not the input is considered to be in the range of the buffer.
-
-    bool needMoreChars(const uint8* pCharStart, size_t nProcessed) const; //whether or not the input is considered to be in the range of the buffer, and sufficient characters have been processed.
-    bool needMoreChars(const uint16* pCharStart, size_t nProcessed) const; //whether or not the input is considered to be in the range of the buffer, and sufficient characters have been processed.
-    bool needMoreChars(const uint32* pCharStart, size_t nProcessed) const; //whether or not the input is considered to be in the range of the buffer, and sufficient characters have been processed.
-};
-
-class ERRORHANDLER
-{
-public:
-    //for all of the ignore* methods is the parameter is false, the return result must be true
-    bool ignoreUnicodeOutOfRangeErrors(bool isBad) const;
-    bool ignoreBadSurrogatesErrors(bool isBad) const;
-
-    bool handleError(const void* pPositionOfError);     //returns true iff error handled and should continue
-};
-
-class CHARPROCESSOR
-{
-public:
-    bool processChar(uint32 cid);		//return value indicates if should stop processing
-    size_t charsProcessed() const;	//number of characters processed. Usually starts from 0 and incremented by processChar(). Passed in to LIMIT::needMoreChars
-};
-
-Useful reusable examples of LIMIT are:
-NoLimit		//relies on the CHARPROCESSOR.processChar() failing, such as because of a terminating nul character
-CharacterCountLimit //doesn't care about where the input buffer may end, but limits the number of unicode characters processed.
-BufferLimit	//processes how ever many characters there are until the buffer end. characters straggling the end are not processed.
-BufferAndCharacterCountLimit //processes a maximum number of characters there are until the buffer end. characters straggling the end are not processed.
-
-Useful examples of ERRORHANDLER are IgnoreErrors, BreakOnError.
-*/
-
-template <class LIMIT, class CHARPROCESSOR, class ERRORHANDLER>
-void processUTF(const LIMIT& limit/*when to stop processing*/, CHARPROCESSOR* pProcessor, ERRORHANDLER* pErrHandler)
-{
-     uint32             cid;
-     switch (limit.enc()) {
-       case gr_utf8 : {
-        const uint8 *pInit = static_cast<const uint8 *>(limit.pStart());
-	    Utf8Consumer consumer(pInit);
-        for (;limit.needMoreChars(consumer.pCharStart(), pProcessor->charsProcessed());) {
-            const uint8 *pCur = consumer.pCharStart();
-		    if (!consumer.consumeChar(limit, &cid, pErrHandler))
-		        break;
-		    if (!pProcessor->processChar(cid, pCur - pInit))
-		        break;
-        }
-        break; }
-       case gr_utf16: {
-        const uint16* pInit = static_cast<const uint16 *>(limit.pStart());
-        Utf16Consumer consumer(pInit);
-        for (;limit.needMoreChars(consumer.pCharStart(), pProcessor->charsProcessed());) {
-            const uint16 *pCur = consumer.pCharStart();
-    		if (!consumer.consumeChar(limit, &cid, pErrHandler))
-	    	    break;
-		    if (!pProcessor->processChar(cid, pCur - pInit))
-		        break;
-            }
-	    break;
-        }
-       case gr_utf32 : default: {
-        const uint32 *pInit = static_cast<const uint32 *>(limit.pStart());
-	    Utf32Consumer consumer(pInit);
-        for (;limit.needMoreChars(consumer.pCharStart(), pProcessor->charsProcessed());) {
-            const uint32 *pCur = consumer.pCharStart();
-		    if (!consumer.consumeChar(limit, &cid, pErrHandler))
-		        break;
-		    if (!pProcessor->processChar(cid, pCur - pInit))
-		        break;
-            }
-        break;
-        }
-    }
-}
-
-    class ToUtf8Processor
-    {
-    public:
-        // buffer length should be three times the utf16 length or
-        // four times the utf32 length to cover the worst case
-        ToUtf8Processor(uint8 * buffer, size_t maxLength) :
-            m_count(0), m_byteLength(0), m_maxLength(maxLength), m_buffer(buffer)
-        {}
-        bool processChar(uint32 cid, size_t /*offset*/)
-        {
-            // taken from Unicode Book ch3.9
-            if (cid <= 0x7F)
-                m_buffer[m_byteLength++] = cid;
-            else if (cid <= 0x07FF)
-            {
-                if (m_byteLength + 2 >= m_maxLength)
-                    return false;
-                m_buffer[m_byteLength++] = 0xC0 + (cid >> 6);
-                m_buffer[m_byteLength++] = 0x80 + (cid & 0x3F);
-            }
-            else if (cid <= 0xFFFF)
-            {
-                if (m_byteLength + 3 >= m_maxLength)
-                    return false;
-                m_buffer[m_byteLength++] = 0xE0 + (cid >> 12);
-                m_buffer[m_byteLength++] = 0x80 + ((cid & 0x0FC0) >> 6);
-                m_buffer[m_byteLength++] = 0x80 +  (cid & 0x003F);
-            }
-            else if (cid <= 0x10FFFF)
-            {
-                if (m_byteLength + 4 >= m_maxLength)
-                    return false;
-                m_buffer[m_byteLength++] = 0xF0 + (cid >> 18);
-                m_buffer[m_byteLength++] = 0x80 + ((cid & 0x3F000) >> 12);
-                m_buffer[m_byteLength++] = 0x80 + ((cid & 0x00FC0) >> 6);
-                m_buffer[m_byteLength++] = 0x80 +  (cid & 0x0003F);
-            }
-            else
-            {
-                // ignore
-            }
-            m_count++;
-            if (m_byteLength >= m_maxLength)
-                return false;
-            return true;
-        }
-        size_t charsProcessed() const { return m_count; }
-        size_t bytesProcessed() const { return m_byteLength; }
-    private:
-        size_t m_count;
-        size_t m_byteLength;
-        size_t m_maxLength;
-        uint8 * m_buffer;
-    };
-
-    class ToUtf16Processor
-    {
-    public:
-        // buffer length should be twice the utf32 length
-        // to cover the worst case
-        ToUtf16Processor(uint16 * buffer, size_t maxLength) :
-            m_count(0), m_uint16Length(0), m_maxLength(maxLength), m_buffer(buffer)
-        {}
-        bool processChar(uint32 cid, size_t /*offset*/)
-        {
-            // taken from Unicode Book ch3.9
-            if (cid <= 0xD800)
-                m_buffer[m_uint16Length++] = cid;
-            else if (cid < 0xE000)
-            {
-                // skip for now
-            }
-            else if (cid >= 0xE000 && cid <= 0xFFFF)
-                m_buffer[m_uint16Length++] = cid;
-            else if (cid <= 0x10FFFF)
-            {
-                if (m_uint16Length + 2 >= m_maxLength)
-                    return false;
-                m_buffer[m_uint16Length++] = 0xD800 + ((cid & 0xFC00) >> 10) + ((cid >> 16) - 1);
-                m_buffer[m_uint16Length++] = 0xDC00 + ((cid & 0x03FF) >> 12);
-            }
-            else
-            {
-                // ignore
-            }
-            m_count++;
-            if (m_uint16Length == m_maxLength)
-                return false;
-            return true;
-        }
-        size_t charsProcessed() const { return m_count; }
-        size_t uint16Processed() const { return m_uint16Length; }
-    private:
-        size_t m_count;
-        size_t m_uint16Length;
-        size_t m_maxLength;
-        uint16 * m_buffer;
-    };
-
-    class ToUtf32Processor
-    {
-    public:
-        ToUtf32Processor(uint32 * buffer, size_t maxLength) :
-            m_count(0), m_maxLength(maxLength), m_buffer(buffer) {}
-        bool processChar(uint32 cid, size_t /*offset*/)
-        {
-            m_buffer[m_count++] = cid;
-            if (m_count == m_maxLength)
-                return false;
-            return true;
-        }
-        size_t charsProcessed() const { return m_count; }
-    private:
-        size_t m_count;
-        size_t m_maxLength;
-        uint32 * m_buffer;
-    };
-
-} // namespace graphite2
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 541fb2e..4e9f7a6 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -14,6 +14,7 @@ add_subdirectory(nametabletest)
 add_subdirectory(examples)
 add_subdirectory(grlist)
 add_subdirectory(endian)
+add_subdirectory(utftest)
 
 enable_testing()
 
diff --git a/tests/segcache/segcachetest.cpp b/tests/segcache/segcachetest.cpp
index 25cea8f..3a4021c 100644
--- a/tests/segcache/segcachetest.cpp
+++ b/tests/segcache/segcachetest.cpp
@@ -27,7 +27,7 @@
 #include "Segment.h"
 #include "SegCache.h"
 #include "SegCacheStore.h"
-#include "processUTF.h"
+#include "UtfCodec.h"
 #include "TtfTypes.h"
 #include "TtfUtil.h"
 
@@ -35,24 +35,18 @@ using namespace graphite2;
 
 inline gr_face * api_cast(CachedFace *p) { return static_cast<gr_face*>(static_cast<Face*>(p)); }
 
-class CmapProcessor
+template <typename utf_itr>
+void resolve_unicode_to_glyphs(const Face & face, utf_itr first, size_t n_chars, uint16 * glyphs)
 {
-public:
-    CmapProcessor(Face * face, uint16 * buffer) :
-        m_cmapTable(TtfUtil::FindCmapSubtable(face->getTable("cmap", NULL), 3, 1)),
-        m_buffer(buffer), m_pos(0) {};
-    bool processChar(uint32 cid, size_t /*offset*/)      //return value indicates if should stop processing
-    {
-        assert(cid < 0xFFFF); // only lower plane supported for this test
-        m_buffer[m_pos++] = TtfUtil::Cmap31Lookup(m_cmapTable, cid);
-        return true;
-    }
-    size_t charsProcessed() const { return m_pos; } //number of characters processed. Usually starts from 0 and incremented by processChar(). Passed in to LIMIT::needMoreChars
-private:
-    const void * m_cmapTable;
-    uint16 * m_buffer;
-    size_t m_pos;
-};
+	const void * cmap = TtfUtil::FindCmapSubtable(face.getTable("cmap", NULL), 3, 1);
+
+	for (; n_chars; --n_chars, ++first)
+	{
+		const uint32 usv = *first;
+		assert(usv < 0xFFFF); 	// only lower plane supported for this test
+		*glyphs++ = TtfUtil::Cmap31Lookup(cmap, usv);
+	}
+}
 
 bool checkEntries(CachedFace
  * face, const char * testString, uint16 * glyphString, size_t testLength)
@@ -120,10 +114,7 @@ bool testSeg(CachedFace
                                                     testString + strlen(testString),
                                                     &badUtf8);
     *testGlyphString = gralloc<uint16>(*testLength + 1);
-    CharacterCountLimit limit(gr_utf8, testString, *testLength);
-    CmapProcessor cmapProcessor(face, *testGlyphString);
-    IgnoreErrors ignoreErrors;
-    processUTF(limit, &cmapProcessor, &ignoreErrors);
+    resolve_unicode_to_glyphs(*face, utf8::iterator(testString), *testLength, *testGlyphString);
 
     gr_segment * segA = gr_make_seg(sizedFont, api_cast(face), 0, NULL, gr_utf8, testString,
                         *testLength, 0);
@@ -149,11 +140,8 @@ int main(int argc, char ** argv)
     }
     FILE * log = fopen("grsegcache.xml", "w");
     graphite_start_logging(log, GRLOG_SEGMENT);
-    CachedFace
- *face = static_cast<CachedFace
-*>(static_cast<Face
-*>(
-        (gr_make_file_face_with_seg_cache(fileName, 10, gr_face_default))));
+    CachedFace *face = static_cast<CachedFace *>(static_cast<Face *>(
+        gr_make_file_face_with_seg_cache(fileName, 10, gr_face_default)));
     if (!face)
     {
         fprintf(stderr, "Invalid font, failed to parse tables\n");
diff --git a/tests/utftest/CMakeLists.txt b/tests/utftest/CMakeLists.txt
new file mode 100644
index 0000000..5048202
--- /dev/null
+++ b/tests/utftest/CMakeLists.txt
@@ -0,0 +1,15 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0 FATAL_ERROR)
+project(utftest)
+include(Graphite)
+include_directories(../../src)
+
+if  (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
+    add_definitions(-D_SCL_SECURE_NO_WARNINGS -D_CRT_SECURE_NO_WARNINGS -DUNICODE)
+endif (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
+
+
+target_objects(OBJS graphite2)
+add_executable(utftest utftest.cpp ${OBJS})
+add_dependencies(utftest graphite2)
+
+add_test(NAME utftest COMMAND $<TARGET_FILE:utftest>)
diff --git a/tests/utftest/utftest.cpp b/tests/utftest/utftest.cpp
new file mode 100644
index 0000000..bdcc9e7
--- /dev/null
+++ b/tests/utftest/utftest.cpp
@@ -0,0 +1,56 @@
+#include <graphite2/Segment.h>
+#include <stdio.h>
+
+struct test
+{
+    int len,
+    	error;
+    unsigned char str[12];
+};
+struct test tests[] = {
+    { 4, -1, {0x7F, 0xDF, 0xBF, 0xEF, 0xBF, 0xBF, 0xF4, 0x8F, 0xBF, 0xBF, 0,    0} },   // U+7F, U+7FF, U+FFFF, U+10FFF
+    { 2,  3, {0x7F, 0xDF, 0xBF, 0xF0, 0x8F, 0xBF, 0xBF, 0xF4, 0x8F, 0xBF, 0xBF, 0} },   // U+7F, U+7FF, long(U+FFFF), U+10FFF
+    { 1,  1, {0x7F, 0xE0, 0x9F, 0xBF, 0xEF, 0xBF, 0xBF, 0xF4, 0x8F, 0xBF, 0xBF, 0} },   // U+7F, long(U+7FF), U+FFFF, U+10FFF
+    { 0,  0, {0xC1, 0xBF, 0xDF, 0xBF, 0xEF, 0xBF, 0xBF, 0xF4, 0xBF, 0xBF, 0xBF, 0} },   // long(U+7F), U+7FF, U+FFFF, U+10FFF
+    { 4, -1, {0x01, 0xC2, 0x80, 0xE0, 0xA0, 0x80, 0xF0, 0x90, 0x80, 0x80, 0,    0} },   // U+01, U+80, U+800, U+10000
+    { 1,  1, {0x65, 0x9F, 0x65, 0x65, 0,    0,    0,    0,    0,    0,    0,    0} },   // U+65 bad(1) U+65 U+65
+    { 2,  2, {0x65, 0x65, 0xC2, 0xC2, 0x65, 0x65, 0,    0,    0,    0,    0,    0} },   // U+65 U+65 bad(1) bad(1) U+65 U+65
+    { 2,  2, {0x65, 0x75, 0xE3, 0x84, 0x75, 0x75, 0,    0,    0,    0,    0,    0} },   // U+65 U+75 bad(2) U+75 U+75
+    { 2,  2, {0x65, 0x75, 0xF3, 0x84, 0xA5, 0x75, 0x75, 0,    0,    0,    0,    0} },   // U+65 U+75 bad(3) U+75 U+75
+    { 2,  2, {0x65, 0x75, 0xF3, 0x84, 0xA5, 0xF5, 0x75, 0,    0,    0,    0,    0} },   // U+65 U+75 bad(3) bad(1) U+75
+};
+
+const int numtests = sizeof(tests)/sizeof(test);
+
+int main(int argc, char * argv[]) {
+    int i;
+    const void * error;
+
+    for (i = 0; i < numtests; ++i)
+    {
+        int res = gr_count_unicode_characters(gr_utf8, tests[i].str, tests[i].str + sizeof(tests[i].str), &error);
+        if (tests[i].error >= 0)
+        {
+        	if (!error)
+        	{
+				fprintf(stderr, "%s: test %d failed: expected error condition did not occur\n", argv[0], i + 1);
+				return (i+1);
+        	}
+        	else if (ptrdiff_t(error) - ptrdiff_t(tests[i].str) != tests[i].error)
+            {
+        		fprintf(stderr, "%s: test %d failed: error at codepoint %d expected at codepoint %d\n", argv[0], i + 1, int(ptrdiff_t(error) - ptrdiff_t(tests[i].str)), tests[i].len);
+                return (i+1);
+            }
+        }
+        else if (error)
+		{
+			fprintf(stderr, "%s: test %d failed: unexpected error occured at codepoint %d\n", argv[0], i + 1, int(ptrdiff_t(error) - ptrdiff_t(tests[i].str)));
+			return (i+1);
+		}
+        if (res != tests[i].len)
+        {
+            fprintf(stderr, "%s: test %d failed: character count failure %d != %d\n", argv[0], i + 1, res, tests[i].len);
+            return (i+1);
+        }
+    }
+}
diff --git a/tests/vm/CMakeLists.txt b/tests/vm/CMakeLists.txt
index f2c2a3e..d5abe62 100644
--- a/tests/vm/CMakeLists.txt
+++ b/tests/vm/CMakeLists.txt
@@ -33,6 +33,7 @@ target_objects(OBJS graphite2
     Slot.cpp
     Sparse.cpp
     TtfUtil.cpp
+    UtfCodec.cpp
     ${TRACESUPPORT})
 add_library(vm-test-common STATIC basic_test.cpp ${OBJS})
 add_dependencies(vm-test-common graphite2)

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-openoffice/graphite2.git


Reply to: