[graphite2] 15/69: Imported Upstream version 1.0.3.real
This is an automated email from the git hooks/post-receive script.
rene pushed a commit to branch master
in repository graphite2.
commit bf5fc4858bc9318688b5234551e738917ce8ae42
Author: Rene Engelhard <rene@debian.org>
Date: Thu Apr 21 14:48:42 2016 +0200
Imported Upstream version 1.0.3.real
---
.hg_archival.txt | 4 +-
.hgtags | 4 +
contrib/android/jni/Android.mk | 4 +-
contrib/android/jni/graphite/Android.mk | 2 +-
contrib/android/jni/graphite_layer.cpp | 6 +-
contrib/android/jni/loadgr_jni.cpp | 2 +-
.../src/org/sil/palaso/helloworld/HelloWorld.java | 2 +-
src/CMakeLists.txt | 1 +
src/CmapCache.cpp | 46 +-
src/CmapCache.h | 47 +-
src/Face.cpp | 21 +-
src/Face.h | 8 +-
src/Main.h | 2 +-
src/NameTable.cpp | 50 +--
src/SegCacheStore.cpp | 21 +-
src/Segment.cpp | 120 +----
src/{CmapCache.h => UtfCodec.cpp} | 37 +-
src/UtfCodec.h | 208 +++++++++
src/files.mk | 4 +-
src/gr_segment.cpp | 81 ++--
src/processUTF.h | 494 ---------------------
tests/CMakeLists.txt | 1 +
tests/segcache/segcachetest.cpp | 42 +-
tests/utftest/CMakeLists.txt | 15 +
tests/utftest/utftest.cpp | 56 +++
tests/vm/CMakeLists.txt | 1 +
26 files changed, 505 insertions(+), 774 deletions(-)
diff --git a/.hg_archival.txt b/.hg_archival.txt
index 2db55cc..2d78a37 100644
--- a/.hg_archival.txt
+++ b/.hg_archival.txt
@@ -1,5 +1,5 @@
repo: 999e2033695c3bcf2f65d611737ac9008805bd58
-node: cb735be7d86d894f0667cb63dffc4273fd53d9fe
+node: 418e55d88178b9bd870bab38be8768aecb743829
branch: default
latesttag: 1.0.3
-latesttagdistance: 2
+latesttagdistance: 1
diff --git a/.hgtags b/.hgtags
index 094d794..82d1a33 100644
--- a/.hgtags
+++ b/.hgtags
@@ -14,3 +14,7 @@ bedb05f72d56f24ca0fc333fd14eabb1ec553902 1.0.1
0fa690ff089ce0bc382a553cc01c0b721fbdee5c 1.0.2
b10bcaf1302411513a5961d1854ff8c02e5ad5e6 1.0.2
8795e344f7964bdf8ef4607004f01b94c41e5775 1.0.3
+8795e344f7964bdf8ef4607004f01b94c41e5775 1.0.3
+0000000000000000000000000000000000000000 1.0.3
+0000000000000000000000000000000000000000 1.0.3
+f148746a0d99d2f9bc050906ce78815565a0d0b4 1.0.3
diff --git a/contrib/android/jni/Android.mk b/contrib/android/jni/Android.mk
index 7ae1d28..224e329 100644
--- a/contrib/android/jni/Android.mk
+++ b/contrib/android/jni/Android.mk
@@ -28,9 +28,9 @@ LOCAL_PATH := $(call my-dir)
MY_ANDROID_SRC := $(HOME)/Work/android/android-src
MY_ANDROID_LIBS := $(MY_ANDROID_SRC)/out/target/product/generic/symbols/system/lib
-#MY_ANDROID_LIBS := $(HOME)/Work/android/android-sdk-linux_x86/platforms/android-8/symbols/system/lib
+MY_ANDROID_LIBS := $(HOME)/Work/android/android-sdk-linux_x86/platforms/android-8/symbols/system/lib
MY_SKIA := $(MY_ANDROID_SRC)/external/skia
-#MY_SKIA := $(HOME)/Work/android/skia/8
+MY_SKIA := $(HOME)/Work/android/skia/8
include $(CLEAR_VARS)
diff --git a/contrib/android/jni/graphite/Android.mk b/contrib/android/jni/graphite/Android.mk
index 9a5990a..ffb6d3e 100644
--- a/contrib/android/jni/graphite/Android.mk
+++ b/contrib/android/jni/graphite/Android.mk
@@ -11,7 +11,7 @@ include ../../src/files.mk
LOCAL_MODULE := graphite2
#LOCAL_SRC_FILES := $(foreach v,$(GR2_SOURCES),./$(v))
LOCAL_SRC_FILES := $(GR2_SOURCES)
-LOCAL_C_INCLUDES := ../../include
+LOCAL_C_INCLUDES := ../../include ../../src
LOCAL_EXPORT_C_INCLUDES := ../../include
#LOCAL_C_INCLUDES := /home/mhosken/Work/dev/Graphite/graphiteng/include
#LOCAL_EXPORT_C_INCLUDES := /home/mhosken/Work/dev/Graphite/graphiteng/include
diff --git a/contrib/android/jni/graphite_layer.cpp b/contrib/android/jni/graphite_layer.cpp
index 425e9e4..29b1a5f 100644
--- a/contrib/android/jni/graphite_layer.cpp
+++ b/contrib/android/jni/graphite_layer.cpp
@@ -448,11 +448,11 @@ func_map thismap[] = {
{ "_ZN8SkDevice8drawTextERK6SkDrawPKvjffRK7SkPaint", "_ZN10mySkDevice8drawTextERK6SkDrawPKvjffRK7SkPaint", 0, 0 },
// SkTypeface::CreateFromName mySkTypeface::CreateFromName
{ "_ZN10SkTypeface14CreateFromNameEPKcNS_5StyleE", "_ZN12mySkTypeface14CreateFromNameEPKcN10SkTypeface5StyleE", 0, 0 },
- // SkPaint::measureText SkPaint::measureText
+ // SkPaint::measureText mySkPaint::measureText
{ "_ZNK7SkPaint11measureTextEPKvjP6SkRectf", "_ZNK9mySkPaint11measureTextEPKvjP6SkRectf", 0, 0 },
- // SkPaint::measureText SkPaint::measureText
+ // SkPaint::measureText mySkPaint::measureText
{ "_ZNK7SkPaint11measureTextEPKvj", "_ZNK9mySkPaint11measureTextEPKvj", 0, 0},
- // SkPaint::getTextWidths
+ // SkPaint::getTextWidths mySkPaint::getTextWidths
{ "_ZNK7SkPaint13getTextWidthsEPKvjPfP6SkRect", "_ZNK9mySkPaint13getTextWidthsEPKvjPfP6SkRect", 0, 0}
};
diff --git a/contrib/android/jni/loadgr_jni.cpp b/contrib/android/jni/loadgr_jni.cpp
index 5b6e3b0..64a9ecf 100644
--- a/contrib/android/jni/loadgr_jni.cpp
+++ b/contrib/android/jni/loadgr_jni.cpp
@@ -180,7 +180,7 @@ extern "C" jobject Java_org_sil_palaso_Graphite_addFontResource( JNIEnv *env, jo
f->next = myfonts;
f->tf = tf;
f->name = rtl ? "" : name;
- f->rtl = rtl ? 3 : 0;
+ f->rtl = rtl ? 7 : 0;
if (!gFTLibrary && FT_Init_FreeType(&gFTLibrary))
{
delete f->tf;
diff --git a/contrib/android/src/org/sil/palaso/helloworld/HelloWorld.java b/contrib/android/src/org/sil/palaso/helloworld/HelloWorld.java
index 57ee06a..b1a3922 100644
--- a/contrib/android/src/org/sil/palaso/helloworld/HelloWorld.java
+++ b/contrib/android/src/org/sil/palaso/helloworld/HelloWorld.java
@@ -47,7 +47,7 @@ public class HelloWorld extends Activity {
TextView tv;
WebView wv;
// String s = "မဂင်္ဂလာ|မဘ္ဘာ၊ ဤကဲ့|သို့|ရာ|ဇ|ဝင်|တင်|မည့် ကြေ|ညာ|ချက်|ကို ပြု|လုပ်|ပြီး|နောက် ဤညီ|လာ|ခံ|အ|စည်း|အ|ဝေး|ကြီး|က ကမ္ဘာ့|ကု|လ|သ|မဂ္ဂ|အ|ဖွဲ့|ဝင် နိုင်|ငံ အား|လုံး|အား ထို|ကြေ|ညာ|စာ|တမ်း|ကြီး၏ စာ|သား|ကို|အ|များ|ပြည်|သူ|တို့ ကြား|သိ|စေ|ရန် ကြေ|ညာ|ပါ|မည့် အ|ကြောင်း|ကို|လည်း|ကောင်း၊ ထို့|ပြင်|နိုင်|ငံ|များ၊ သို့|တည်း|မ|ဟုတ် နယ်|မြေ|များ၏ နိုင်|ငံ|ရေး အ|ဆင့်|အ|တ|န်း|ကို လိုက်၍ ခွဲ|ခြား|ခြင်း မ|���ြု|ဘဲ|အ|ဓိ|က|အား|ဖြင့် စာ|သင်|ကျောင်း|များ|နှင့် အ|ခြား|ပ|ညာ|ရေး အ|ဖွဲ့|အ|စည်း|များ|တ [...]
- String s = "لمّا كان الاعتراف بالكرامة المتأصلة في جميع أعضاء الأسرة البشرية وبحقوقهم المتساوية الثابتة هو أساس الحرية والعدل \u06F1\u06F2\u06F3 والسلام في العالم.";
+ String s = "لمّا كان الاعتراف بالكرامة المتأصلة في جميع أعضاء الأسرة (البشرية) وبحقوقهم المتساوية الثابتة هو أساس الحرية والعدل \u06F1\u06F2\u06F3 والسلام في العالم.";
String w = "\uFEFF<html><body style=\"font-family: Scheh\">Test: "
+ s + "</body></html>"; // <3>
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 0d50933..d47ce04 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -85,6 +85,7 @@ add_library(graphite2 SHARED
Slot.cpp
Sparse.cpp
TtfUtil.cpp
+ UtfCodec.cpp
XmlTraceLog.cpp
XmlTraceLogTags.cpp)
diff --git a/src/CmapCache.cpp b/src/CmapCache.cpp
index 0fc85ee..dc2e43e 100644
--- a/src/CmapCache.cpp
+++ b/src/CmapCache.cpp
@@ -39,8 +39,8 @@ CmapCache::CmapCache(const void* cmapTable, size_t length)
const void * table310 = TtfUtil::FindCmapSubtable(cmapTable, 3, 10, length);
m_isBmpOnly = (!table310);
int rangeKey = 0;
- unsigned int codePoint = 0;
- unsigned int prevCodePoint = 0;
+ uint32 codePoint = 0,
+ prevCodePoint = 0;
if (table310 && TtfUtil::CheckCmap310Subtable(table310))
{
m_blocks = grzeroalloc<uint16*>(0x1100);
@@ -92,12 +92,50 @@ CmapCache::CmapCache(const void* cmapTable, size_t length)
}
}
-CmapCache::~CmapCache()
+CmapCache::~CmapCache() throw()
{
unsigned int numBlocks = (m_isBmpOnly)? 0x100 : 0x1100;
for (unsigned int i = 0; i < numBlocks; i++)
free(m_blocks[i]);
free(m_blocks);
- m_blocks = NULL;
+}
+
+uint16 CmapCache::operator [] (const uint32 usv) const throw()
+{
+ if ((m_isBmpOnly && usv > 0xFFFF) || (usv > 0x10FFFF))
+ return 0;
+ const uint32 block = 0xFFFF & (usv >> 8);
+ if (m_blocks[block])
+ return m_blocks[block][usv & 0xFF];
+ return 0;
+};
+
+CmapCache::operator bool() const throw()
+{
+ return m_blocks;
+}
+
+
+DirectCmap::DirectCmap(const void* cmap, size_t length)
+{
+ _ctable = TtfUtil::FindCmapSubtable(cmap, 3, 1, length);
+ if (!_ctable || !TtfUtil::CheckCmap31Subtable(_ctable))
+ {
+ _ctable = 0;
+ return;
+ }
+ _stable = TtfUtil::FindCmapSubtable(cmap, 3, 10, length);
+ if (_stable && !TtfUtil::CheckCmap310Subtable(_stable))
+ _stable = 0;
+}
+
+uint16 DirectCmap::operator [] (const uint32 usv) const throw()
+{
+ return usv > 0xFFFF ? (_stable ? TtfUtil::Cmap310Lookup(_stable, usv) : 0) : TtfUtil::Cmap31Lookup(_ctable, usv);
+}
+
+DirectCmap::operator bool () const throw()
+{
+ return _ctable;
}
diff --git a/src/CmapCache.h b/src/CmapCache.h
index dc1603b..1facde3 100644
--- a/src/CmapCache.h
+++ b/src/CmapCache.h
@@ -26,24 +26,45 @@ of the License or (at your option) any later version.
*/
#pragma once
-#include <graphite2/Types.h>
+#include <Main.h>
namespace graphite2 {
-class CmapCache
+class Face;
+
+class Cmap
+{
+public:
+ virtual ~Cmap() throw() {}
+
+ virtual uint16 operator [] (const uint32) const throw() { return 0; }
+
+ virtual operator bool () const throw() { return false; }
+
+ CLASS_NEW_DELETE;
+};
+
+class DirectCmap : public Cmap
+{
+public:
+ DirectCmap(const void* cmap, size_t length);
+ virtual uint16 operator [] (const uint32 usv) const throw();
+ virtual operator bool () const throw();
+
+ CLASS_NEW_DELETE;
+private:
+ const void *_stable,
+ *_ctable;
+};
+
+class CmapCache : public Cmap
{
public:
- CmapCache(const void * cmapTable, size_t length);
- ~CmapCache();
- uint16 lookup(unsigned int unicode) const {
- if ((m_isBmpOnly && unicode > 0xFFFF) || (unicode > 0x10FFFF))
- return 0;
- unsigned int block = (0xFFFFFF & unicode) >> 8;
- if (m_blocks && m_blocks[block])
- return m_blocks[block][unicode & 0xFF];
- return 0;
- };
- CLASS_NEW_DELETE
+ CmapCache(const void * cmapTable, size_t length);
+ virtual ~CmapCache() throw();
+ virtual uint16 operator [] (const uint32 usv) const throw();
+ virtual operator bool () const throw();
+ CLASS_NEW_DELETE;
private:
bool m_isBmpOnly;
uint16 ** m_blocks;
diff --git a/src/Face.cpp b/src/Face.cpp
index 0c0af0c..060a4da 100644
--- a/src/Face.cpp
+++ b/src/Face.cpp
@@ -40,10 +40,10 @@ using namespace graphite2;
Face::~Face()
{
delete m_pGlyphFaceCache;
- delete m_cmapCache;
+ delete m_cmap;
delete[] m_silfs;
m_pGlyphFaceCache = NULL;
- m_cmapCache = NULL;
+ m_cmap = NULL;
m_silfs = NULL;
delete m_pFileFace;
delete m_pNames;
@@ -58,13 +58,18 @@ bool Face::readGlyphs(unsigned int faceOptions)
m_pGlyphFaceCache = GlyphFaceCache::makeCache(hdr);
if (!m_pGlyphFaceCache) return false;
+
+ size_t length = 0;
+ const byte * table = getTable(Tag::cmap, &length);
+ if (!table) return false;
+
if (faceOptions & gr_face_cacheCmap)
- {
- size_t length = 0;
- const byte * table = getTable(Tag::cmap, &length);
- if (!table) return false;
- m_cmapCache = new CmapCache(table, length);
- }
+ m_cmap = new CmapCache(table, length);
+ else
+ m_cmap = new DirectCmap(table, length);
+
+ if (!m_cmap || !*m_cmap) return false;
+
if (faceOptions & gr_face_preloadGlyphs)
{
m_pGlyphFaceCache->loadAllGlyphs();
diff --git a/src/Face.h b/src/Face.h
index b1a0fab..461a1bc 100644
--- a/src/Face.h
+++ b/src/Face.h
@@ -46,7 +46,7 @@ namespace graphite2 {
class Segment;
class FeatureVal;
class NameTable;
-class CmapCache;
+class Cmap;
using TtfUtil::Tag;
@@ -116,7 +116,7 @@ public:
public:
Face(const void* appFaceHandle/*non-NULL*/, gr_get_table_fn getTable2) :
m_appFaceHandle(appFaceHandle), m_getTable(getTable2), m_pGlyphFaceCache(NULL),
- m_cmapCache(NULL), m_numSilf(0), m_silfs(NULL), m_pFileFace(NULL),
+ m_cmap(NULL), m_numSilf(0), m_silfs(NULL), m_pFileFace(NULL),
m_pNames(NULL) {}
virtual ~Face();
public:
@@ -142,7 +142,7 @@ public:
const GlyphFaceCache* getGlyphFaceCache() const { return m_pGlyphFaceCache; } //never NULL
void takeFileFace(FileFace* pFileFace/*takes ownership*/);
- CmapCache * getCmapCache() const { return m_cmapCache; };
+ Cmap & cmap() const { return *m_cmap; };
NameTable * nameTable() const;
uint16 languageForLocale(const char * locale) const;
@@ -156,7 +156,7 @@ private:
// unsigned short m_readglyphs; // how many glyphs have we in m_glyphs?
// unsigned short m_capacity; // how big is m_glyphs
mutable GlyphFaceCache* m_pGlyphFaceCache; //owned - never NULL
- mutable CmapCache* m_cmapCache; // cmap cache if available
+ mutable Cmap * m_cmap; // cmap cache if available
unsigned short m_upem; // design units per em
protected:
unsigned short m_numSilf; // number of silf subtables in the silf table
diff --git a/src/Main.h b/src/Main.h
index 1cfd809..29dc4a7 100644
--- a/src/Main.h
+++ b/src/Main.h
@@ -63,7 +63,7 @@ template <typename T> T * grzeroalloc(size_t n)
void operator delete (void * p) throw() { free(p);} \
void operator delete (void *, void *) throw() {} \
void operator delete[] (void * p)throw() { free(p); } \
- void operator delete[] (void *, void *) throw() {} \
+ void operator delete[] (void *, void *) throw() {}
#ifdef __GNUC__
#define GR_MAYBE_UNUSED __attribute__((unused))
diff --git a/src/NameTable.cpp b/src/NameTable.cpp
index c471767..e82a5f7 100644
--- a/src/NameTable.cpp
+++ b/src/NameTable.cpp
@@ -28,8 +28,7 @@ of the License or (at your option) any later version.
#include "Endian.h"
#include "NameTable.h"
-#include "processUTF.h"
-
+#include "UtfCodec.h"
using namespace graphite2;
@@ -146,42 +145,37 @@ void* NameTable::getName(uint16& languageId, uint16 nameId, gr_encform enc, uint
return NULL;
}
utf16Length >>= 1; // in utf16 units
- uint16 * utf16Name = gralloc<uint16>(utf16Length + 1);
+ utf16::codeunit_t * utf16Name = gralloc<utf16::codeunit_t>(utf16Length);
const uint8* pName = m_nameData + offset;
for (size_t i = 0; i < utf16Length; i++)
{
utf16Name[i] = be::read<uint16>(pName);
}
- utf16Name[utf16Length] = 0;
- if (enc == gr_utf16)
+ switch (enc)
{
- length = utf16Length;
- return utf16Name;
- }
- else if (enc == gr_utf8)
+ case gr_utf8:
{
- uint8* uniBuffer = gralloc<uint8>(3 * utf16Length + 1);
- ToUtf8Processor processor(uniBuffer, 3 * utf16Length + 1);
- IgnoreErrors ignore;
- BufferLimit bufferLimit(gr_utf16, reinterpret_cast<void*>(utf16Name), reinterpret_cast<void*>(utf16Name + utf16Length));
- processUTF<BufferLimit, ToUtf8Processor, IgnoreErrors>(bufferLimit, &processor, &ignore);
- length = processor.bytesProcessed();
- uniBuffer[processor.bytesProcessed()] = 0;
- free(utf16Name);
+ utf8::codeunit_t* uniBuffer = gralloc<utf8::codeunit_t>(3 * utf16Length + 1);
+ utf8::iterator d = uniBuffer;
+ for (utf16::const_iterator s = utf16Name, e = utf16Name + utf16Length; s != e; ++s, ++d)
+ *d = *s;
+ length = d - uniBuffer;
+ uniBuffer[length] = 0;
return uniBuffer;
}
- else if (enc == gr_utf32)
+ case gr_utf16:
+ length = utf16Length;
+ return utf16Name;
+ case gr_utf32:
{
- uint32 * uniBuffer = gralloc<uint32>(utf16Length + 1);
- IgnoreErrors ignore;
- BufferLimit bufferLimit(gr_utf16, reinterpret_cast<void*>(utf16Name), reinterpret_cast<void*>(utf16Name + utf16Length));
-
- ToUtf32Processor processor(uniBuffer, utf16Length);
- processUTF(bufferLimit, &processor, &ignore);
- length = processor.charsProcessed();
- uniBuffer[length] = 0;
- free(utf16Name);
- return uniBuffer;
+ utf32::codeunit_t * uniBuffer = gralloc<utf32::codeunit_t>(utf16Length + 1);
+ utf32::iterator d = uniBuffer;
+ for (utf16::const_iterator s = utf16Name, e = utf16Name + utf16Length; s != e; ++s, ++d)
+ *d = *s;
+ length = d - uniBuffer;
+ uniBuffer[length] = 0;
+ return uniBuffer;
+ }
}
length = 0;
return NULL;
diff --git a/src/SegCacheStore.cpp b/src/SegCacheStore.cpp
index 1ce0aa1..5e13c1f 100644
--- a/src/SegCacheStore.cpp
+++ b/src/SegCacheStore.cpp
@@ -40,26 +40,9 @@ SegCacheStore::SegCacheStore(const Face *face, unsigned int numSilf, size_t maxS
assert(face);
assert(face->getGlyphFaceCache());
m_maxCmapGid = face->getGlyphFaceCache()->numGlyphs();
- if (face->getCmapCache())
- {
- m_spaceGid = face->getCmapCache()->lookup(0x20);
- m_zwspGid = face->getCmapCache()->lookup(0x200B);
- }
- else
- {
- size_t cmapSize = 0;
- const void * cmapTable = face->getTable(Tag::cmap, &cmapSize);
- const void * bmpTable = TtfUtil::FindCmapSubtable(cmapTable, 3, 1, cmapSize);
- //const void * supplementaryTable = TtfUtil::FindCmapSubtable(cmapTable, 3, 10, cmapSize);
- if (bmpTable)
- {
- m_spaceGid = TtfUtil::Cmap31Lookup(bmpTable, 0x20);
- m_zwspGid = TtfUtil::Cmap31Lookup(bmpTable, 0x200B);
- // TODO find out if the Cmap(s) can be parsed to find a m_maxCmapGid < num_glyphs
- // The Pseudo glyphs may mean that it isn't worth the effort
- }
- }
+ m_spaceGid = face->cmap()[0x20];
+ m_zwspGid = face->cmap()[0x200B];
}
#endif
diff --git a/src/Segment.cpp b/src/Segment.cpp
index 1fb97e0..b860643 100644
--- a/src/Segment.cpp
+++ b/src/Segment.cpp
@@ -24,7 +24,7 @@ Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
License, as published by the Free Software Foundation, either version 2
of the License or (at your option) any later version.
*/
-#include "processUTF.h"
+#include "UtfCodec.h"
#include <string.h>
#include <stdlib.h>
@@ -425,105 +425,33 @@ void Segment::logSegment() const
#endif
-
-
-class SlotBuilder
+template <typename utf_iter>
+inline void process_utf_data(Segment & seg, const Face & face, const int fid, utf_iter c, size_t n_chars)
{
-public:
- SlotBuilder(const Face *face2, const Features* pFeats/*must not be NULL*/, Segment* pDest2)
- : m_face(face2),
- m_pDest(pDest2),
- m_ctable(NULL),
- m_stable(NULL),
- m_fid(pDest2->addFeatures(*pFeats)),
- m_nCharsProcessed(0)
- {
- size_t cmapSize = 0;
- const void * table = face2->getTable(Tag::cmap, &cmapSize);
- if (!table) return;
- m_ctable = TtfUtil::FindCmapSubtable(table, 3, 1, cmapSize);
- if (!m_ctable || !TtfUtil::CheckCmap31Subtable(m_ctable))
- {
- m_ctable = NULL;
- return;
- }
- m_stable = TtfUtil::FindCmapSubtable(table, 3, 10, cmapSize);
- if (m_stable && !TtfUtil::CheckCmap310Subtable(m_stable)) m_stable = NULL;
- }
-
- bool processChar(uint32 cid/*unicode character*/, size_t coffset) //return value indicates if should stop processing
- {
- if (!m_ctable) return false;
- uint16 gid = cid > 0xFFFF ? (m_stable ? TtfUtil::Cmap310Lookup(m_stable, cid) : 0) : (m_ctable ? TtfUtil::Cmap31Lookup(m_ctable, cid) : 0);
- if (!gid)
- gid = m_face->findPseudo(cid);
- m_pDest->appendSlot(m_nCharsProcessed, cid, gid, m_fid, coffset);
- ++m_nCharsProcessed;
- return true;
- }
-
- size_t charsProcessed() const { return m_nCharsProcessed; }
-
-private:
- const Face *m_face;
- Segment *m_pDest;
- const void * m_ctable;
- const void * m_stable;
- const unsigned int m_fid;
- size_t m_nCharsProcessed ;
-};
-
-class CachedSlotBuilder
-{
-public:
- CachedSlotBuilder(const Face *face2, const Features* pFeats/*must not be NULL*/, Segment* pDest2)
- : m_face(face2),
- m_cmap(face2->getCmapCache()),
- m_pDest(pDest2),
- m_breakAttr(pDest2->silf()->aBreak()),
- m_fid(pDest2->addFeatures(*pFeats)),
- m_nCharsProcessed(0)
- {
- }
-
- bool processChar(uint32 cid/*unicode character*/, size_t coffset) //return value indicates if should stop processing
- {
- if (!m_cmap) return false;
- uint16 gid = m_cmap->lookup(cid);
- if (!gid)
- gid = m_face->findPseudo(cid);
- //int16 bw = m_face->glyphAttr(gid, m_breakAttr);
- m_pDest->appendSlot(m_nCharsProcessed, cid, gid, m_fid, coffset);
- ++m_nCharsProcessed;
- return true;
- }
-
- size_t charsProcessed() const { return m_nCharsProcessed; }
-
-private:
- const Face *m_face;
- const CmapCache *m_cmap;
- Segment *m_pDest;
- uint8 m_breakAttr;
- const unsigned int m_fid;
- size_t m_nCharsProcessed ;
-};
+ const Cmap & cmap = face.cmap();
+ int slotid = 0;
+
+ const typename utf_iter::codeunit_type * const base = c;
+ for (; n_chars; --n_chars, ++c, ++slotid)
+ {
+ const uint32 usv = *c;
+ uint16 gid = cmap[usv];
+ if (!gid) gid = face.findPseudo(usv);
+ seg.appendSlot(slotid, usv, gid, fid, c - base);
+ }
+}
void Segment::read_text(const Face *face, const Features* pFeats/*must not be NULL*/, gr_encform enc, const void* pStart, size_t nChars)
{
- assert(pFeats);
- CharacterCountLimit limit(enc, pStart, nChars);
- IgnoreErrors ignoreErrors;
- if (face->getCmapCache())
- {
- CachedSlotBuilder slotBuilder(face, pFeats, this);
- processUTF(limit/*when to stop processing*/, &slotBuilder, &ignoreErrors);
- }
- else
- {
- SlotBuilder slotBuilder(face, pFeats, this);
- processUTF(limit/*when to stop processing*/, &slotBuilder, &ignoreErrors);
- }
+ assert(face);
+ assert(pFeats);
+
+ switch (enc)
+ {
+ case gr_utf8: process_utf_data(*this, *face, addFeatures(*pFeats), utf8::const_iterator(pStart), nChars); break;
+ case gr_utf16: process_utf_data(*this, *face, addFeatures(*pFeats), utf16::const_iterator(pStart), nChars); break;
+ case gr_utf32: process_utf_data(*this, *face, addFeatures(*pFeats), utf32::const_iterator(pStart), nChars); break;
+ }
}
void Segment::prepare_pos(const Font * /*font*/)
diff --git a/src/CmapCache.h b/src/UtfCodec.cpp
similarity index 68%
copy from src/CmapCache.h
copy to src/UtfCodec.cpp
index dc1603b..2064075 100644
--- a/src/CmapCache.h
+++ b/src/UtfCodec.cpp
@@ -15,8 +15,8 @@
You should also have received a copy of the GNU Lesser General Public
License along with this library in the file named "LICENSE".
- If not, write to the Free Software Foundation, 51 Franklin Street,
- Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
internet at http://www.fsf.org/licenses/lgpl.html.
Alternatively, the contents of this file may be used under the terms of the
@@ -24,29 +24,22 @@ Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
License, as published by the Free Software Foundation, either version 2
of the License or (at your option) any later version.
*/
-#pragma once
-
-#include <graphite2/Types.h>
+#include "UtfCodec.h"
+//using namespace graphite2;
namespace graphite2 {
-class CmapCache
+}
+
+using namespace graphite2;
+
+const int8 _utf_codec<8>::sz_lut[16] =
{
-public:
- CmapCache(const void * cmapTable, size_t length);
- ~CmapCache();
- uint16 lookup(unsigned int unicode) const {
- if ((m_isBmpOnly && unicode > 0xFFFF) || (unicode > 0x10FFFF))
- return 0;
- unsigned int block = (0xFFFFFF & unicode) >> 8;
- if (m_blocks && m_blocks[block])
- return m_blocks[block][unicode & 0xFF];
- return 0;
- };
- CLASS_NEW_DELETE
-private:
- bool m_isBmpOnly;
- uint16 ** m_blocks;
+ 1,1,1,1,1,1,1,1, // 1 byte
+ 0,0,0,0, // trailing byte
+ 2,2, // 2 bytes
+ 3, // 3 bytes
+ 4 // 4 bytes
};
-} // namespace graphite2
+const byte _utf_codec<8>::mask_lut[5] = {0x7f, 0xff, 0x3f, 0x1f, 0x0f};
diff --git a/src/UtfCodec.h b/src/UtfCodec.h
new file mode 100644
index 0000000..5d5192c
--- /dev/null
+++ b/src/UtfCodec.h
@@ -0,0 +1,208 @@
+/* GRAPHITE2 LICENSING
+
+ Copyright 2010, SIL International
+ All rights reserved.
+
+ This library is free software; you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 2.1 of License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should also have received a copy of the GNU Lesser General Public
+ License along with this library in the file named "LICENSE".
+ If not, write to the Free Software Foundation, 51 Franklin Street,
+ Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
+ internet at http://www.fsf.org/licenses/lgpl.html.
+
+Alternatively, the contents of this file may be used under the terms of the
+Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
+License, as published by the Free Software Foundation, either version 2
+of the License or (at your option) any later version.
+*/
+#pragma once
+
+#include <iterator>
+#include "Main.h"
+#include "graphite2/Segment.h"
+
+namespace graphite2 {
+
+typedef uint32 uchar_t;
+
+template <int N>
+struct _utf_codec
+{
+ typedef uchar_t codeunit_t;
+
+ static void put(codeunit_t * cp, const uchar_t , int8 & len) throw();
+ static uchar_t get(const codeunit_t * cp, int8 & len) throw();
+};
+
+
+template <>
+struct _utf_codec<32>
+{
+private:
+ static const uchar_t limit = 0x110000;
+public:
+ typedef uint32 codeunit_t;
+
+ inline
+ static void put(codeunit_t * cp, const uchar_t usv, int8 & l) throw()
+ {
+ *cp = usv; l = 1;
+ }
+
+ inline
+ static uchar_t get(const codeunit_t * cp, int8 & l) throw()
+ {
+ if (cp[0] < limit) { l = 1; return cp[0]; }
+ else { l = -1; return 0xFFFD; }
+ }
+};
+
+
+template <>
+struct _utf_codec<16>
+{
+private:
+ static const int32 lead_offset = 0xD800 - (0x10000 >> 10);
+ static const int32 surrogate_offset = 0x10000 - (0xD800 << 10) - 0xDC00;
+public:
+ typedef uint16 codeunit_t;
+
+ inline
+ static void put(codeunit_t * cp, const uchar_t usv, int8 & l) throw()
+ {
+ if (usv < 0x10000) { l = 1; cp[0] = codeunit_t(usv); }
+ else
+ {
+ cp[0] = codeunit_t(lead_offset + (usv >> 10));
+ cp[1] = codeunit_t(0xDC00 + (usv & 0x3FF));
+ l = 2;
+ }
+ }
+
+ inline
+ static uchar_t get(const codeunit_t * cp, int8 & l) throw()
+ {
+ const uint32 uh = cp[0];
+ l = 1;
+
+ if (0xD800 > uh || uh > 0xDFFF) { return uh; }
+ const uint32 ul = cp[1];
+ if (uh > 0xDBFF || 0xDC00 > ul || ul > 0xDFFF) { l = -1; return 0xFFFD; }
+ ++l;
+ return (uh<<10) + ul + surrogate_offset;
+ }
+};
+
+
+template <>
+struct _utf_codec<8>
+{
+private:
+ static const int8 sz_lut[16];
+ static const byte mask_lut[5];
+
+
+public:
+ typedef uint8 codeunit_t;
+
+ inline
+ static void put(codeunit_t * cp, const uchar_t usv, int8 & l) throw()
+ {
+ if (usv < 0x80) {l = 1; cp[0] = usv; return; }
+ if (usv < 0x0800) {l = 2; cp[0] = 0xC0 + (usv >> 6); cp[1] = 0x80 + (usv & 0x3F); return; }
+ if (usv < 0x10000) {l = 3; cp[0] = 0xE0 + (usv >> 12); cp[1] = 0x80 + ((usv >> 6) & 0x3F); cp[2] = 0x80 + (usv & 0x3F); return; }
+ else {l = 4; cp[0] = 0xF0 + (usv >> 18); cp[1] = 0x80 + ((usv >> 12) & 0x3F); cp[2] = 0x80 + ((usv >> 6) & 0x3F); cp[3] = 0x80 + (usv & 0x3F); return; }
+ }
+
+ inline
+ static uchar_t get(const codeunit_t * cp, int8 & l) throw()
+ {
+ const int8 seq_sz = sz_lut[*cp >> 4];
+ uchar_t u = *cp & mask_lut[seq_sz];
+ l = 1;
+ bool toolong = false;
+
+ switch(seq_sz) {
+ case 4: u <<= 6; u |= *++cp & 0x3F; if (*cp >> 6 != 2) break; ++l; toolong = (u < 0x10);
+ case 3: u <<= 6; u |= *++cp & 0x3F; if (*cp >> 6 != 2) break; ++l; toolong |= (u < 0x20);
+ case 2: u <<= 6; u |= *++cp & 0x3F; if (*cp >> 6 != 2) break; ++l; toolong |= (u < 0x80);
+ case 1: break;
+ case 0: l = -1; return 0xFFFD;
+ }
+
+ if (l != seq_sz || toolong)
+ {
+ l = -l;
+ return 0xFFFD;
+ }
+ return u;
+ }
+};
+
+
+template <typename C>
+class _utf_iterator
+{
+ typedef _utf_codec<sizeof(C)*8> codec;
+
+ C * cp;
+ mutable int8 sl;
+
+public:
+ typedef C codeunit_type;
+ typedef uchar_t value_type;
+ typedef uchar_t * pointer;
+
+ class reference
+ {
+ const _utf_iterator & _i;
+
+ reference(const _utf_iterator & i): _i(i) {}
+ public:
+ operator value_type () const throw () { return codec::get(_i.cp, _i.sl); }
+ reference & operator = (const value_type usv) throw() { codec::put(_i.cp, usv, _i.sl); return *this; }
+
+ friend class _utf_iterator;
+ };
+
+
+ _utf_iterator(const void * us=0) : cp(reinterpret_cast<C *>(const_cast<void *>(us))), sl(1) { }
+
+ _utf_iterator & operator ++ () { cp += abs(sl); return *this; }
+ _utf_iterator operator ++ (int) { _utf_iterator tmp(*this); operator++(); return tmp; }
+
+ bool operator == (const _utf_iterator & rhs) const throw() { return cp >= rhs.cp; }
+ bool operator != (const _utf_iterator & rhs) const throw() { return !operator==(rhs); }
+
+ reference operator * () const throw() { return *this; }
+ pointer operator ->() const throw() { return &operator *(); }
+
+ operator codeunit_type * () const throw() { return cp; }
+
+ bool error() const throw() { return sl < 1; }
+};
+
+template <typename C>
+struct utf
+{
+ typedef typename _utf_codec<sizeof(C)*8>::codeunit_t codeunit_t;
+
+ typedef _utf_iterator<C> iterator;
+ typedef _utf_iterator<const C> const_iterator;
+};
+
+
+typedef utf<uint32> utf32;
+typedef utf<uint16> utf16;
+typedef utf<uint8> utf8;
+
+} // namespace graphite2
diff --git a/src/files.mk b/src/files.mk
index 9504e60..355e374 100644
--- a/src/files.mk
+++ b/src/files.mk
@@ -63,7 +63,8 @@ $(_NS)_SOURCES = \
$($(_NS)_BASE)/src/Silf.cpp \
$($(_NS)_BASE)/src/Slot.cpp \
$($(_NS)_BASE)/src/Sparse.cpp \
- $($(_NS)_BASE)/src/TtfUtil.cpp
+ $($(_NS)_BASE)/src/TtfUtil.cpp \
+ $($(_NS)_BASE)/src/UtfCodec.cpp
$(_NS)_PRIVATE_HEADERS = \
$($(_NS)_BASE)/src/CachedFace.h \
@@ -96,6 +97,7 @@ $(_NS)_PRIVATE_HEADERS = \
$($(_NS)_BASE)/src/Sparse.h \
$($(_NS)_BASE)/src/TtfTypes.h \
$($(_NS)_BASE)/src/TtfUtil.h \
+ $($(_NS)_BASE)/src/UtfCodec.h \
$($(_NS)_BASE)/src/XmlTraceLog.h \
$($(_NS)_BASE)/src/XmlTraceLogTags.h
diff --git a/src/gr_segment.cpp b/src/gr_segment.cpp
index f6cf52d..0c1d6d4 100644
--- a/src/gr_segment.cpp
+++ b/src/gr_segment.cpp
@@ -25,46 +25,13 @@ License, as published by the Free Software Foundation, either version 2
of the License or (at your option) any later version.
*/
#include "graphite2/Segment.h"
-#include "processUTF.h"
+#include "UtfCodec.h"
#include "Segment.h"
using namespace graphite2;
namespace
{
- template <class LIMIT, class CHARPROCESSOR>
- size_t doCountUnicodeCharacters(const LIMIT& limit, CHARPROCESSOR* pProcessor, const void** pError)
- {
- BreakOnError breakOnError;
-
- processUTF(limit/*when to stop processing*/, pProcessor, &breakOnError);
- if (pError) {
- *pError = breakOnError.m_pErrorPos;
- }
- return pProcessor->charsProcessed();
- }
-
- class CharCounterToNul
- {
- public:
- CharCounterToNul()
- : m_nCharsProcessed(0)
- {
- }
-
- bool processChar(uint32 cid/*unicode character*/, size_t /*offset*/) //return value indicates if should stop processing
- {
- if (cid==0)
- return false;
- ++m_nCharsProcessed;
- return true;
- }
-
- size_t charsProcessed() const { return m_nCharsProcessed; }
-
- private:
- size_t m_nCharsProcessed ;
- };
gr_segment* makeAndInitialize(const Font *font, const Face *face, uint32 script, const Features* pFeats/*must not be NULL*/, gr_encform enc, const void* pStart, size_t nChars, int dir)
{
@@ -92,23 +59,43 @@ namespace
}
-extern "C" {
+template <typename utf_iter>
+inline size_t count_unicode_chars(utf_iter first, const utf_iter last, const void **error)
+{
+ size_t n_chars = 0;
+ uint32 usv = 0;
+
+ if (last)
+ {
+ for (;first != last; ++first, ++n_chars)
+ if ((usv = *first) == 0 || first.error()) break;
+ }
+ else
+ {
+ while ((usv = *first) != 0 && !first.error())
+ {
+ ++first;
+ ++n_chars;
+ }
+ }
+
+ if (error) *error = first.error() ? first : 0;
+ return n_chars;
+}
+extern "C" {
size_t gr_count_unicode_characters(gr_encform enc, const void* buffer_begin, const void* buffer_end/*don't go on or past end, If NULL then ignored*/, const void** pError) //Also stops on nul. Any nul is not in the count
{
- if (buffer_end)
- {
- BufferLimit limit(enc, buffer_begin, buffer_end);
- CharCounterToNul counter;
- return doCountUnicodeCharacters(limit, &counter, pError);
- }
- else
- {
- NoLimit limit(enc, buffer_begin);
- CharCounterToNul counter;
- return doCountUnicodeCharacters(limit, &counter, pError);
- }
+ assert(buffer_begin);
+
+ switch (enc)
+ {
+ case gr_utf8: return count_unicode_chars<utf8::const_iterator>(buffer_begin, buffer_end, pError); break;
+ case gr_utf16: return count_unicode_chars<utf16::const_iterator>(buffer_begin, buffer_end, pError); break;
+ case gr_utf32: return count_unicode_chars<utf32::const_iterator>(buffer_begin, buffer_end, pError); break;
+ default: return 0;
+ }
}
diff --git a/src/processUTF.h b/src/processUTF.h
deleted file mode 100644
index 18c5e6a..0000000
--- a/src/processUTF.h
+++ /dev/null
@@ -1,494 +0,0 @@
-/* GRAPHITE2 LICENSING
-
- Copyright 2010, SIL International
- All rights reserved.
-
- This library is free software; you can redistribute it and/or modify
- it under the terms of the GNU Lesser General Public License as published
- by the Free Software Foundation; either version 2.1 of License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should also have received a copy of the GNU Lesser General Public
- License along with this library in the file named "LICENSE".
- If not, write to the Free Software Foundation, 51 Franklin Street,
- Suite 500, Boston, MA 02110-1335, USA or visit their web page on the
- internet at http://www.fsf.org/licenses/lgpl.html.
-
-Alternatively, the contents of this file may be used under the terms of the
-Mozilla Public License (http://mozilla.org/MPL) or the GNU General Public
-License, as published by the Free Software Foundation, either version 2
-of the License or (at your option) any later version.
-*/
-#pragma once
-
-#include "Main.h"
-#include "graphite2/Segment.h"
-
-namespace graphite2 {
-
-class NoLimit //relies on the processor.processChar() failing, such as because of a terminating nul character
-{
-public:
- NoLimit(gr_encform enc2, const void* pStart2) : m_enc(enc2), m_pStart(pStart2) {}
- gr_encform enc() const { return m_enc; }
- const void* pStart() const { return m_pStart; }
-
- bool inBuffer(const void* /*pCharLastSurrogatePart*/, uint32 /*val*/) const { return true; }
- bool needMoreChars(const void* /*pCharStart*/, size_t /*nProcessed*/) const { return true; }
-
-private:
- gr_encform m_enc;
- const void* m_pStart;
-};
-
-
-class CharacterCountLimit
-{
-public:
- CharacterCountLimit(gr_encform enc2, const void* pStart2, size_t numchars) : m_numchars(numchars), m_enc(enc2), m_pStart(pStart2) {}
- gr_encform enc() const { return m_enc; }
- const void* pStart() const { return m_pStart; }
-
- bool inBuffer (const void* /*pCharLastSurrogatePart*/, uint32 val) const { return (val != 0); }
- bool needMoreChars (const void* /*pCharStart*/, size_t nProcessed) const { return nProcessed<m_numchars; }
-
-private:
- size_t m_numchars;
- gr_encform m_enc;
- const void* m_pStart;
-};
-
-
-class BufferLimit
-{
-public:
- BufferLimit(gr_encform enc2, const void* pStart2, const void* pEnd/*as in stl i.e. don't use end*/) : m_enc(enc2), m_pStart(pStart2) {
- size_t nFullTokens = (static_cast<const char*>(pEnd)-static_cast<const char *>(m_pStart))/int(m_enc); //rounds off partial tokens
- m_pEnd = static_cast<const char *>(m_pStart) + (nFullTokens*int(m_enc));
- }
- gr_encform enc() const { return m_enc; }
- const void* pStart() const { return m_pStart; }
-
- bool inBuffer (const void* pCharLastSurrogatePart, uint32 /*val*/) const { return pCharLastSurrogatePart<m_pEnd; } //also called on charstart by needMoreChars()
-
- bool needMoreChars (const void* pCharStart, size_t /*nProcessed*/) const { return inBuffer(pCharStart, 1); }
-
-private:
- const void* m_pEnd;
- gr_encform m_enc;
- const void* m_pStart;
-};
-
-
-class IgnoreErrors
-{
-public:
- //for all of the ignore* methods is the parameter is false, the return result must be true
- static bool ignoreUnicodeOutOfRangeErrors(bool /*isBad*/) { return true; }
- static bool ignoreBadSurrogatesErrors(bool /*isBad*/) { return true; }
-
- static bool handleError(const void* /*pPositionOfError*/) { return true;}
-};
-
-
-class BreakOnError
-{
-public:
- BreakOnError() : m_pErrorPos(NULL) {}
-
- //for all of the ignore* methods is the parameter is false, the return result must be true
- static bool ignoreUnicodeOutOfRangeErrors(bool isBad) { return !isBad; }
- static bool ignoreBadSurrogatesErrors(bool isBad) { return !isBad; }
-
- bool handleError(const void* pPositionOfError) { m_pErrorPos=pPositionOfError; return false;}
-
-public:
- const void* m_pErrorPos;
-};
-
-
-
-
-
-/*
- const int utf8_extrabytes_lut[16] = {0,0,0,0,0,0,0,0, // 1 byte
- 3,3,3,3, // errors since trailing byte, catch later
- 1,1, // 2 bytes
- 2, // 3 bytes
- 3}; // 4 bytes
- quicker to implement directly:
-*/
-
-inline unsigned int utf8_extrabytes(const unsigned int topNibble) { return (0xE5FF0000>>(2*topNibble))&0x3; }
-
-inline unsigned int utf8_mask(const unsigned int seq_extra) { return ((0xFEC0>>(4*seq_extra))&0xF)<<4; }
-
-class Utf8Consumer
-{
-public:
- Utf8Consumer(const uint8* pCharStart2) : m_pCharStart(pCharStart2) {}
-
- const uint8* pCharStart() const { return m_pCharStart; }
-
-private:
- template <class ERRORHANDLER>
- bool respondToError(uint32* pRes, ERRORHANDLER* pErrHandler) { //return value is if should stop parsing
- *pRes = 0xFFFD;
- if (!pErrHandler->handleError(m_pCharStart)) {
- return false;
- }
- ++m_pCharStart;
- return true;
- }
-
-public:
- template <class LIMIT, class ERRORHANDLER>
- inline bool consumeChar(const LIMIT& limit, uint32* pRes, ERRORHANDLER* pErrHandler) { //At start, limit.inBuffer(m_pCharStart) is true. return value is iff character contents does not go past limit
- const unsigned int seq_extra = utf8_extrabytes(*m_pCharStart >> 4); //length of sequence including *m_pCharStart is 1+seq_extra
- if (!limit.inBuffer(m_pCharStart+(seq_extra), *m_pCharStart)) {
- return false;
- }
-
- *pRes = *m_pCharStart ^ utf8_mask(seq_extra);
-
- if (seq_extra) {
- switch(seq_extra) { //hopefully the optimizer will implement this as a jump table. If not the above if should cover the majority case.
- case 3: {
- if (pErrHandler->ignoreUnicodeOutOfRangeErrors(*m_pCharStart>=0xF8)) { //the good case
- ++m_pCharStart;
- if (!pErrHandler->ignoreBadSurrogatesErrors((*m_pCharStart&0xC0)!=0x80)) {
- return respondToError(pRes, pErrHandler);
- }
-
- *pRes <<= 6; *pRes |= *m_pCharStart & 0x3F; //drop through
- }
- else {
- return respondToError(pRes, pErrHandler);
- }
- }
- case 2: {
- ++m_pCharStart;
- if (!pErrHandler->ignoreBadSurrogatesErrors((*m_pCharStart&0xC0)!=0x80)) {
- return respondToError(pRes, pErrHandler);
- }
- }
- *pRes <<= 6; *pRes |= *m_pCharStart & 0x3F; //drop through
- case 1: {
- ++m_pCharStart;
- if (!pErrHandler->ignoreBadSurrogatesErrors((*m_pCharStart&0xC0)!=0x80)) {
- return respondToError(pRes, pErrHandler);
- }
- }
- *pRes <<= 6; *pRes |= *m_pCharStart & 0x3F;
- }
- }
- ++m_pCharStart;
- return true;
- }
-
-private:
- const uint8 *m_pCharStart;
-};
-
-
-
-class Utf16Consumer
-{
-public:
- Utf16Consumer(const uint16* pCharStart2) : m_pCharStart(pCharStart2) {}
-
- const uint16* pCharStart() const { return m_pCharStart; }
-
-private:
- template <class ERRORHANDLER>
- bool respondToError(uint32* pRes, ERRORHANDLER* pErrHandler) { //return value is if should stop parsing
- *pRes = 0xFFFD;
- if (!pErrHandler->handleError(m_pCharStart)) {
- return false;
- }
- ++m_pCharStart;
- return true;
- }
-
-public:
- template <class LIMIT, class ERRORHANDLER>
- inline bool consumeChar(const LIMIT& limit, uint32* pRes, ERRORHANDLER* pErrHandler) //At start, limit.inBuffer(m_pCharStart) is true. return value is iff character contents does not go past limit
- {
- *pRes = *m_pCharStart;
- if (0xD800 > *pRes || !pErrHandler->ignoreUnicodeOutOfRangeErrors(*pRes >= 0xE000)) {
- ++m_pCharStart;
- return true;
- }
-
- if (!pErrHandler->ignoreBadSurrogatesErrors(*pRes >= 0xDC00)) { //second surrogate is incorrectly coming first
- return respondToError(pRes, pErrHandler);
- }
-
- ++m_pCharStart;
- if (!limit.inBuffer(m_pCharStart, *pRes)) {
- return false;
- }
-
- uint32 ul = *(m_pCharStart);
- if (!pErrHandler->ignoreBadSurrogatesErrors(0xDC00 > ul || ul > 0xDFFF)) {
- return respondToError(pRes, pErrHandler);
- }
- ++m_pCharStart;
- *pRes = ((*pRes - 0xD800)<<10) + ul - 0xDC00;
- return true;
- }
-
-private:
- const uint16 *m_pCharStart;
-};
-
-
-class Utf32Consumer
-{
-public:
- Utf32Consumer(const uint32* pCharStart2) : m_pCharStart(pCharStart2) {}
-
- const uint32* pCharStart() const { return m_pCharStart; }
-
-private:
- template <class ERRORHANDLER>
- bool respondToError(uint32* pRes, ERRORHANDLER* pErrHandler) { //return value is if should stop parsing
- *pRes = 0xFFFD;
- if (!pErrHandler->handleError(m_pCharStart)) {
- return false;
- }
- ++m_pCharStart;
- return true;
- }
-
-public:
- template <class LIMIT, class ERRORHANDLER>
- inline bool consumeChar(const LIMIT& limit, uint32* pRes, ERRORHANDLER* pErrHandler) //At start, limit.inBuffer(m_pCharStart) is true. return value is iff character contents does not go past limit
- {
- *pRes = *m_pCharStart;
- if (pErrHandler->ignoreUnicodeOutOfRangeErrors(!(*pRes<0xD800 || (*pRes>=0xE000 && *pRes<0x110000)))) {
- if (!limit.inBuffer(++m_pCharStart, *pRes))
- return false;
- else
- return true;
- }
-
- return respondToError(pRes, pErrHandler);
- }
-
-private:
- const uint32 *m_pCharStart;
-};
-
-
-
-
-/* The following template function assumes that LIMIT and CHARPROCESSOR have the following methods and semantics:
-
-class LIMIT
-{
-public:
- SegmentHandle::encform enc() const; //which of the below overloads of inBuffer() and needMoreChars() are called
- const void* pStart() const; //start of first character to process
-
- bool inBuffer(const uint8* pCharLastSurrogatePart) const; //whether or not the input is considered to be in the range of the buffer.
- bool inBuffer(const uint16* pCharLastSurrogatePart) const; //whether or not the input is considered to be in the range of the buffer.
-
- bool needMoreChars(const uint8* pCharStart, size_t nProcessed) const; //whether or not the input is considered to be in the range of the buffer, and sufficient characters have been processed.
- bool needMoreChars(const uint16* pCharStart, size_t nProcessed) const; //whether or not the input is considered to be in the range of the buffer, and sufficient characters have been processed.
- bool needMoreChars(const uint32* pCharStart, size_t nProcessed) const; //whether or not the input is considered to be in the range of the buffer, and sufficient characters have been processed.
-};
-
-class ERRORHANDLER
-{
-public:
- //for all of the ignore* methods is the parameter is false, the return result must be true
- bool ignoreUnicodeOutOfRangeErrors(bool isBad) const;
- bool ignoreBadSurrogatesErrors(bool isBad) const;
-
- bool handleError(const void* pPositionOfError); //returns true iff error handled and should continue
-};
-
-class CHARPROCESSOR
-{
-public:
- bool processChar(uint32 cid); //return value indicates if should stop processing
- size_t charsProcessed() const; //number of characters processed. Usually starts from 0 and incremented by processChar(). Passed in to LIMIT::needMoreChars
-};
-
-Useful reusable examples of LIMIT are:
-NoLimit //relies on the CHARPROCESSOR.processChar() failing, such as because of a terminating nul character
-CharacterCountLimit //doesn't care about where the input buffer may end, but limits the number of unicode characters processed.
-BufferLimit //processes how ever many characters there are until the buffer end. characters straggling the end are not processed.
-BufferAndCharacterCountLimit //processes a maximum number of characters there are until the buffer end. characters straggling the end are not processed.
-
-Useful examples of ERRORHANDLER are IgnoreErrors, BreakOnError.
-*/
-
-template <class LIMIT, class CHARPROCESSOR, class ERRORHANDLER>
-void processUTF(const LIMIT& limit/*when to stop processing*/, CHARPROCESSOR* pProcessor, ERRORHANDLER* pErrHandler)
-{
- uint32 cid;
- switch (limit.enc()) {
- case gr_utf8 : {
- const uint8 *pInit = static_cast<const uint8 *>(limit.pStart());
- Utf8Consumer consumer(pInit);
- for (;limit.needMoreChars(consumer.pCharStart(), pProcessor->charsProcessed());) {
- const uint8 *pCur = consumer.pCharStart();
- if (!consumer.consumeChar(limit, &cid, pErrHandler))
- break;
- if (!pProcessor->processChar(cid, pCur - pInit))
- break;
- }
- break; }
- case gr_utf16: {
- const uint16* pInit = static_cast<const uint16 *>(limit.pStart());
- Utf16Consumer consumer(pInit);
- for (;limit.needMoreChars(consumer.pCharStart(), pProcessor->charsProcessed());) {
- const uint16 *pCur = consumer.pCharStart();
- if (!consumer.consumeChar(limit, &cid, pErrHandler))
- break;
- if (!pProcessor->processChar(cid, pCur - pInit))
- break;
- }
- break;
- }
- case gr_utf32 : default: {
- const uint32 *pInit = static_cast<const uint32 *>(limit.pStart());
- Utf32Consumer consumer(pInit);
- for (;limit.needMoreChars(consumer.pCharStart(), pProcessor->charsProcessed());) {
- const uint32 *pCur = consumer.pCharStart();
- if (!consumer.consumeChar(limit, &cid, pErrHandler))
- break;
- if (!pProcessor->processChar(cid, pCur - pInit))
- break;
- }
- break;
- }
- }
-}
-
- class ToUtf8Processor
- {
- public:
- // buffer length should be three times the utf16 length or
- // four times the utf32 length to cover the worst case
- ToUtf8Processor(uint8 * buffer, size_t maxLength) :
- m_count(0), m_byteLength(0), m_maxLength(maxLength), m_buffer(buffer)
- {}
- bool processChar(uint32 cid, size_t /*offset*/)
- {
- // taken from Unicode Book ch3.9
- if (cid <= 0x7F)
- m_buffer[m_byteLength++] = cid;
- else if (cid <= 0x07FF)
- {
- if (m_byteLength + 2 >= m_maxLength)
- return false;
- m_buffer[m_byteLength++] = 0xC0 + (cid >> 6);
- m_buffer[m_byteLength++] = 0x80 + (cid & 0x3F);
- }
- else if (cid <= 0xFFFF)
- {
- if (m_byteLength + 3 >= m_maxLength)
- return false;
- m_buffer[m_byteLength++] = 0xE0 + (cid >> 12);
- m_buffer[m_byteLength++] = 0x80 + ((cid & 0x0FC0) >> 6);
- m_buffer[m_byteLength++] = 0x80 + (cid & 0x003F);
- }
- else if (cid <= 0x10FFFF)
- {
- if (m_byteLength + 4 >= m_maxLength)
- return false;
- m_buffer[m_byteLength++] = 0xF0 + (cid >> 18);
- m_buffer[m_byteLength++] = 0x80 + ((cid & 0x3F000) >> 12);
- m_buffer[m_byteLength++] = 0x80 + ((cid & 0x00FC0) >> 6);
- m_buffer[m_byteLength++] = 0x80 + (cid & 0x0003F);
- }
- else
- {
- // ignore
- }
- m_count++;
- if (m_byteLength >= m_maxLength)
- return false;
- return true;
- }
- size_t charsProcessed() const { return m_count; }
- size_t bytesProcessed() const { return m_byteLength; }
- private:
- size_t m_count;
- size_t m_byteLength;
- size_t m_maxLength;
- uint8 * m_buffer;
- };
-
- class ToUtf16Processor
- {
- public:
- // buffer length should be twice the utf32 length
- // to cover the worst case
- ToUtf16Processor(uint16 * buffer, size_t maxLength) :
- m_count(0), m_uint16Length(0), m_maxLength(maxLength), m_buffer(buffer)
- {}
- bool processChar(uint32 cid, size_t /*offset*/)
- {
- // taken from Unicode Book ch3.9
- if (cid <= 0xD800)
- m_buffer[m_uint16Length++] = cid;
- else if (cid < 0xE000)
- {
- // skip for now
- }
- else if (cid >= 0xE000 && cid <= 0xFFFF)
- m_buffer[m_uint16Length++] = cid;
- else if (cid <= 0x10FFFF)
- {
- if (m_uint16Length + 2 >= m_maxLength)
- return false;
- m_buffer[m_uint16Length++] = 0xD800 + ((cid & 0xFC00) >> 10) + ((cid >> 16) - 1);
- m_buffer[m_uint16Length++] = 0xDC00 + ((cid & 0x03FF) >> 12);
- }
- else
- {
- // ignore
- }
- m_count++;
- if (m_uint16Length == m_maxLength)
- return false;
- return true;
- }
- size_t charsProcessed() const { return m_count; }
- size_t uint16Processed() const { return m_uint16Length; }
- private:
- size_t m_count;
- size_t m_uint16Length;
- size_t m_maxLength;
- uint16 * m_buffer;
- };
-
- class ToUtf32Processor
- {
- public:
- ToUtf32Processor(uint32 * buffer, size_t maxLength) :
- m_count(0), m_maxLength(maxLength), m_buffer(buffer) {}
- bool processChar(uint32 cid, size_t /*offset*/)
- {
- m_buffer[m_count++] = cid;
- if (m_count == m_maxLength)
- return false;
- return true;
- }
- size_t charsProcessed() const { return m_count; }
- private:
- size_t m_count;
- size_t m_maxLength;
- uint32 * m_buffer;
- };
-
-} // namespace graphite2
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 541fb2e..4e9f7a6 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -14,6 +14,7 @@ add_subdirectory(nametabletest)
add_subdirectory(examples)
add_subdirectory(grlist)
add_subdirectory(endian)
+add_subdirectory(utftest)
enable_testing()
diff --git a/tests/segcache/segcachetest.cpp b/tests/segcache/segcachetest.cpp
index 25cea8f..3a4021c 100644
--- a/tests/segcache/segcachetest.cpp
+++ b/tests/segcache/segcachetest.cpp
@@ -27,7 +27,7 @@
#include "Segment.h"
#include "SegCache.h"
#include "SegCacheStore.h"
-#include "processUTF.h"
+#include "UtfCodec.h"
#include "TtfTypes.h"
#include "TtfUtil.h"
@@ -35,24 +35,18 @@ using namespace graphite2;
inline gr_face * api_cast(CachedFace *p) { return static_cast<gr_face*>(static_cast<Face*>(p)); }
-class CmapProcessor
+template <typename utf_itr>
+void resolve_unicode_to_glyphs(const Face & face, utf_itr first, size_t n_chars, uint16 * glyphs)
{
-public:
- CmapProcessor(Face * face, uint16 * buffer) :
- m_cmapTable(TtfUtil::FindCmapSubtable(face->getTable("cmap", NULL), 3, 1)),
- m_buffer(buffer), m_pos(0) {};
- bool processChar(uint32 cid, size_t /*offset*/) //return value indicates if should stop processing
- {
- assert(cid < 0xFFFF); // only lower plane supported for this test
- m_buffer[m_pos++] = TtfUtil::Cmap31Lookup(m_cmapTable, cid);
- return true;
- }
- size_t charsProcessed() const { return m_pos; } //number of characters processed. Usually starts from 0 and incremented by processChar(). Passed in to LIMIT::needMoreChars
-private:
- const void * m_cmapTable;
- uint16 * m_buffer;
- size_t m_pos;
-};
+ const void * cmap = TtfUtil::FindCmapSubtable(face.getTable("cmap", NULL), 3, 1);
+
+ for (; n_chars; --n_chars, ++first)
+ {
+ const uint32 usv = *first;
+ assert(usv < 0xFFFF); // only lower plane supported for this test
+ *glyphs++ = TtfUtil::Cmap31Lookup(cmap, usv);
+ }
+}
bool checkEntries(CachedFace
* face, const char * testString, uint16 * glyphString, size_t testLength)
@@ -120,10 +114,7 @@ bool testSeg(CachedFace
testString + strlen(testString),
&badUtf8);
*testGlyphString = gralloc<uint16>(*testLength + 1);
- CharacterCountLimit limit(gr_utf8, testString, *testLength);
- CmapProcessor cmapProcessor(face, *testGlyphString);
- IgnoreErrors ignoreErrors;
- processUTF(limit, &cmapProcessor, &ignoreErrors);
+ resolve_unicode_to_glyphs(*face, utf8::iterator(testString), *testLength, *testGlyphString);
gr_segment * segA = gr_make_seg(sizedFont, api_cast(face), 0, NULL, gr_utf8, testString,
*testLength, 0);
@@ -149,11 +140,8 @@ int main(int argc, char ** argv)
}
FILE * log = fopen("grsegcache.xml", "w");
graphite_start_logging(log, GRLOG_SEGMENT);
- CachedFace
- *face = static_cast<CachedFace
-*>(static_cast<Face
-*>(
- (gr_make_file_face_with_seg_cache(fileName, 10, gr_face_default))));
+ CachedFace *face = static_cast<CachedFace *>(static_cast<Face *>(
+ gr_make_file_face_with_seg_cache(fileName, 10, gr_face_default)));
if (!face)
{
fprintf(stderr, "Invalid font, failed to parse tables\n");
diff --git a/tests/utftest/CMakeLists.txt b/tests/utftest/CMakeLists.txt
new file mode 100644
index 0000000..5048202
--- /dev/null
+++ b/tests/utftest/CMakeLists.txt
@@ -0,0 +1,15 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8.0 FATAL_ERROR)
+project(utftest)
+include(Graphite)
+include_directories(../../src)
+
+if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
+ add_definitions(-D_SCL_SECURE_NO_WARNINGS -D_CRT_SECURE_NO_WARNINGS -DUNICODE)
+endif (${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
+
+
+target_objects(OBJS graphite2)
+add_executable(utftest utftest.cpp ${OBJS})
+add_dependencies(utftest graphite2)
+
+add_test(NAME utftest COMMAND $<TARGET_FILE:utftest>)
diff --git a/tests/utftest/utftest.cpp b/tests/utftest/utftest.cpp
new file mode 100644
index 0000000..bdcc9e7
--- /dev/null
+++ b/tests/utftest/utftest.cpp
@@ -0,0 +1,56 @@
+#include <graphite2/Segment.h>
+#include <stdio.h>
+
+struct test
+{
+ int len,
+ error;
+ unsigned char str[12];
+};
+struct test tests[] = {
+ { 4, -1, {0x7F, 0xDF, 0xBF, 0xEF, 0xBF, 0xBF, 0xF4, 0x8F, 0xBF, 0xBF, 0, 0} }, // U+7F, U+7FF, U+FFFF, U+10FFF
+ { 2, 3, {0x7F, 0xDF, 0xBF, 0xF0, 0x8F, 0xBF, 0xBF, 0xF4, 0x8F, 0xBF, 0xBF, 0} }, // U+7F, U+7FF, long(U+FFFF), U+10FFF
+ { 1, 1, {0x7F, 0xE0, 0x9F, 0xBF, 0xEF, 0xBF, 0xBF, 0xF4, 0x8F, 0xBF, 0xBF, 0} }, // U+7F, long(U+7FF), U+FFFF, U+10FFF
+ { 0, 0, {0xC1, 0xBF, 0xDF, 0xBF, 0xEF, 0xBF, 0xBF, 0xF4, 0xBF, 0xBF, 0xBF, 0} }, // long(U+7F), U+7FF, U+FFFF, U+10FFF
+ { 4, -1, {0x01, 0xC2, 0x80, 0xE0, 0xA0, 0x80, 0xF0, 0x90, 0x80, 0x80, 0, 0} }, // U+01, U+80, U+800, U+10000
+ { 1, 1, {0x65, 0x9F, 0x65, 0x65, 0, 0, 0, 0, 0, 0, 0, 0} }, // U+65 bad(1) U+65 U+65
+ { 2, 2, {0x65, 0x65, 0xC2, 0xC2, 0x65, 0x65, 0, 0, 0, 0, 0, 0} }, // U+65 U+65 bad(1) bad(1) U+65 U+65
+ { 2, 2, {0x65, 0x75, 0xE3, 0x84, 0x75, 0x75, 0, 0, 0, 0, 0, 0} }, // U+65 U+75 bad(2) U+75 U+75
+ { 2, 2, {0x65, 0x75, 0xF3, 0x84, 0xA5, 0x75, 0x75, 0, 0, 0, 0, 0} }, // U+65 U+75 bad(3) U+75 U+75
+ { 2, 2, {0x65, 0x75, 0xF3, 0x84, 0xA5, 0xF5, 0x75, 0, 0, 0, 0, 0} }, // U+65 U+75 bad(3) bad(1) U+75
+};
+
+const int numtests = sizeof(tests)/sizeof(test);
+
+int main(int argc, char * argv[]) {
+ int i;
+ const void * error;
+
+ for (i = 0; i < numtests; ++i)
+ {
+ int res = gr_count_unicode_characters(gr_utf8, tests[i].str, tests[i].str + sizeof(tests[i].str), &error);
+ if (tests[i].error >= 0)
+ {
+ if (!error)
+ {
+ fprintf(stderr, "%s: test %d failed: expected error condition did not occur\n", argv[0], i + 1);
+ return (i+1);
+ }
+ else if (ptrdiff_t(error) - ptrdiff_t(tests[i].str) != tests[i].error)
+ {
+ fprintf(stderr, "%s: test %d failed: error at codepoint %d expected at codepoint %d\n", argv[0], i + 1, int(ptrdiff_t(error) - ptrdiff_t(tests[i].str)), tests[i].len);
+ return (i+1);
+ }
+ }
+ else if (error)
+ {
+ fprintf(stderr, "%s: test %d failed: unexpected error occured at codepoint %d\n", argv[0], i + 1, int(ptrdiff_t(error) - ptrdiff_t(tests[i].str)));
+ return (i+1);
+ }
+ if (res != tests[i].len)
+ {
+ fprintf(stderr, "%s: test %d failed: character count failure %d != %d\n", argv[0], i + 1, res, tests[i].len);
+ return (i+1);
+ }
+ }
+}
diff --git a/tests/vm/CMakeLists.txt b/tests/vm/CMakeLists.txt
index f2c2a3e..d5abe62 100644
--- a/tests/vm/CMakeLists.txt
+++ b/tests/vm/CMakeLists.txt
@@ -33,6 +33,7 @@ target_objects(OBJS graphite2
Slot.cpp
Sparse.cpp
TtfUtil.cpp
+ UtfCodec.cpp
${TRACESUPPORT})
add_library(vm-test-common STATIC basic_test.cpp ${OBJS})
add_dependencies(vm-test-common graphite2)
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-openoffice/graphite2.git
Reply to: