summaryrefslogtreecommitdiff
path: root/ConvertUTF.c
diff options
context:
space:
mode:
Diffstat (limited to 'ConvertUTF.c')
-rw-r--r--ConvertUTF.c24
1 files changed, 19 insertions, 5 deletions
diff --git a/ConvertUTF.c b/ConvertUTF.c
index 9b3deeb..67ab49f 100644
--- a/ConvertUTF.c
+++ b/ConvertUTF.c
@@ -33,6 +33,7 @@
July 2003: slight mods to back out aggressive FFFE detection.
Jan 2004: updated switches in from-UTF8 conversions.
Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions.
+ May 2006: updated isLegalUTF8Sequence.
See the header file "ConvertUTF.h" for complete documentation.
@@ -305,7 +306,7 @@ static Boolean isLegalUTF8(const UTF8 *source, int length) {
switch (*source) {
/* no fall-through in this inner switch */
case 0xE0: if (a < 0xA0) return false; break;
- case 0xED: if (a > 0x9F) return false; break;
+ case 0xED: if ((a < 0x80) || (a > 0x9F)) return false; break;
case 0xF0: if (a < 0x90) return false; break;
case 0xF4: if (a > 0x8F) return false; break;
default: if (a < 0x80) return false;
@@ -323,12 +324,25 @@ static Boolean isLegalUTF8(const UTF8 *source, int length) {
* Exported function to return whether a UTF-8 sequence is legal or not.
* This is not used here; it's just exported.
*/
+
Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
- int length = trailingBytesForUTF8[*source]+1;
- if (source+length > sourceEnd) {
- return false;
+ int length;
+ if (source == sourceEnd) {
+ return true;
+ }
+ while (true) {
+ length = trailingBytesForUTF8[*source]+1;
+ if (source+length > sourceEnd) {
+ return false;
+ }
+ if (!isLegalUTF8(source, length)) {
+ return false;
+ }
+ source += length;
+ if (source >= sourceEnd) {
+ return true;
+ }
}
- return isLegalUTF8(source, length);
}
/* --------------------------------------------------------------------- */