summaryrefslogtreecommitdiff
path: root/harness.c
diff options
context:
space:
mode:
authorLuke Shumaker <lukeshu@lukeshu.com>2004-10-22 05:57:51 -0500
committerLuke Shumaker <lukeshu@lukeshu.com>2004-10-22 05:57:51 -0500
commit7d347a05ce025a9aef28bcf72089e1388dd48d13 (patch)
treef2eb4b50da34c4823dc4f0f2141323829ff924e0 /harness.c
parent766942acf8f0c0d9ef6c16ffbdedefdfda0af4b2 (diff)
http://web.archive.org/web/20041022055751/http:/www.unicode.org:80/Public/BETA/CVTUTF-1-3/BETA/CVTUTF-1-3
Diffstat (limited to 'harness.c')
-rw-r--r--harness.c61
1 files changed, 55 insertions, 6 deletions
diff --git a/harness.c b/harness.c
index 1e3dfb9..25b3e9e 100644
--- a/harness.c
+++ b/harness.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2001 Unicode, Inc.
+ * Copyright 2001-2004 Unicode, Inc.
*
* Disclaimer
*
@@ -34,6 +34,8 @@
* Sept 19, 2002: Corrected error on line 234: utf16_buf[2] becomes utf16_result[2]
* per report from Iain Murray.
* July 3, 2003: Updated printout message.
+ * Oct 19, 2004: Updated isLegalUTF8 test data and corrected switch statements to catch
+ * illegal surrogate use in UTF-8, per report from Frank Tang.
*
*/
@@ -54,7 +56,9 @@
00-7F 0000- 007F
C2-DF 80-BF 0080- 07FF
E0 A0-BF 80-BF 0800- 0FFF
- E1-EF 80-BF 80-BF 1000- FFFF
+ E1-EC 80-BF 80-BF 1000- CFFF
+ ED 80-9F 80-BF D000- D7FF
+ EE-EF 80-BF 80-BF E000- FFFF
F0 90-BF 80-BF 80-BF 10000- 3FFFF
F1-F3 80-BF 80-BF 80-BF 40000- FFFFF
F4 80-8F 80-BF 80-BF 100000-10FFFF
@@ -88,9 +92,16 @@ struct utf8_test utf8_testData[] = {
{ 0, 2, { 0xC0, 0xAF, 0x00, 0x00, 0x00 }}, /* 15 */
{ 0, 3, { 0xE0, 0x9F, 0x80, 0x00, 0x00 }}, /* 16 */
{ 0, 4, { 0xF0, 0x93, 0xB2, 0xC1, 0x00 }}, /* 17 */
-/* for all > 17 use "short" buffer lengths to detect over-run */
+
+ { 1, 3, { 0xED, 0x9F, 0xBF, 0x00, 0x00 }}, /* 18 */
+ { 1, 3, { 0xEE, 0x80, 0x80, 0x00, 0x00 }}, /* 19 */
+ { 0, 3, { 0xED, 0xA0, 0x80, 0x00, 0x00 }}, /* 20 */
+ { 0, 3, { 0xED, 0xBF, 0xBF, 0x00, 0x00 }}, /* 21 */
+
+/* for all > 21 use "short" buffer lengths to detect over-run */
{ 0, 4, { 0xF0, 0x93, 0xB2, 0xC3, 0x00 }}, /* 18 use short buflen */
- { 0, 0, { 0x00, 0x00, 0x00, 0x00, 0x00 }}
+ { 0, 0, { 0x00, 0x00, 0x00, 0x00, 0x00 }},
+
};
int test01() {
@@ -103,8 +114,8 @@ int test01() {
for (i = 0; utf8_testData[i].utf8_len; i++) {
wantVal1 = wantVal2 = utf8_testData[i].utf8_legal;
gotVal1 = isLegalUTF8(&(utf8_testData[i].utf8_seq[0]), utf8_testData[i].utf8_len);
- /* use truncated length for tests over 17 */
- if (i <= 17) { len2 = 4; } else { len2 = utf8_testData[i].utf8_len-1; wantVal2 = 0; }
+ /* use truncated length for tests over 21 */
+ if (i <= 21) { len2 = 4; } else { len2 = utf8_testData[i].utf8_len-1; wantVal2 = 0; }
gotVal2 = isLegalUTF8Sequence(&(utf8_testData[i].utf8_seq[0]), &(utf8_testData[i].utf8_seq[0])+len2);
if ((gotVal1 != wantVal1) || (gotVal2 != wantVal2)) {
printf("Test01 error: seq %d is %d & %d (should be %d & %d) for bytes (%x,%x,%x,%x,%x,) & len %d\n",
@@ -380,10 +391,46 @@ int test03() {
return 1;
}
+/* ---------------------------------------------------------------------
+ test04 - Test an illegal UTF-32 value > 10FFFF conversion to UTF-8.
+ Expect it will be turned into UNI_REPLACEMENT_CHAR.
+
+ --------------------------------------------------------------------- */
+
+int test04() {
+ int i, n;
+ ConversionResult result;
+ UTF32 utf32_buf[2];
+ UTF8 utf8_buf[8];
+ UTF32 *utf32SourceStart, *utf32TargetStart;
+ UTF8 *utf8SourceStart, *utf8TargetStart;
+
+ printf("Begin Test04\n"); fflush(stdout);
+
+ i = 0x10FFFF + 21; /* an arbitrary value > legal */
+
+ utf32_buf[0] = i; utf32_buf[1] = 0;
+ for (n = 0; n < 8; n++) utf8_buf[n] = 0;
+
+ utf32SourceStart = utf32_buf;
+ utf8TargetStart = utf8_buf;
+
+ /*
+ * Test UTF32 -> UTF8, with legality check on.
+ */
+ result = ConvertUTF32toUTF8((const UTF32 **) &utf32SourceStart, &(utf32_buf[1]), & utf8TargetStart, &(utf8_buf[7]), strictConversion);
+ if (result != sourceIllegal) {
+ fprintf(stderr, "Test04A fatal error: result %d for input %08x\n", result, utf32_buf[0]); exit(1);
+ }
+
+ return 1;
+}
+
/* --------------------------------------------------------------------- */
main() {
printf("Three tests of round-trip conversions will be performed.\n");
+ printf("One test of illegal UTF-32 will be peroformed.\n");
printf("Two illegal result messages are expected; one in test 02A; one in test 03A.\n");
printf("These are for tests of Surrogate conversion.\n\n");
fflush(stdout);
@@ -393,4 +440,6 @@ main() {
else { printf("-------- Test02 failed. --------\n\n"); }
if (test03()) { printf("******** Test03 succeeded without error. ********\n\n"); }
else { printf("-------- Test03 failed. --------\n\n"); }
+ if (test04()) { printf("******** Test04 succeeded without error. ********\n\n"); }
+ else { printf("-------- Test04 failed. --------\n\n"); }
}