Update UTF-8 validation checks.

pull/467/head
Roger A. Light 8 years ago
parent df9ad5f0bd
commit b11855821e

@ -60,11 +60,11 @@ int mosquitto_validate_utf8(const char *str, int len)
} }
/* Reconstruct full code point */ /* Reconstruct full code point */
for(j=0; j<codelen-1; j++){ if(i == len-codelen+1){
if(i == len-1){
/* Not enough data */ /* Not enough data */
return MOSQ_ERR_MALFORMED_UTF8; return MOSQ_ERR_MALFORMED_UTF8;
} }
for(j=0; j<codelen-1; j++){
if((ustr[++i] & 0xC0) != 0x80){ if((ustr[++i] & 0xC0) != 0x80){
/* Not a continuation byte */ /* Not a continuation byte */
return MOSQ_ERR_MALFORMED_UTF8; return MOSQ_ERR_MALFORMED_UTF8;
@ -77,12 +77,12 @@ int mosquitto_validate_utf8(const char *str, int len)
return MOSQ_ERR_MALFORMED_UTF8; return MOSQ_ERR_MALFORMED_UTF8;
} }
/* Check for overlong encodings */ /* Check for overlong or out of range encodings */
if(codelen == 2 && codepoint < 0x0080){ if(codelen == 2 && codepoint < 0x0080){
return MOSQ_ERR_MALFORMED_UTF8; return MOSQ_ERR_MALFORMED_UTF8;
}else if(codelen == 3 && codepoint < 0x0800){ }else if(codelen == 3 && codepoint < 0x0800){
return MOSQ_ERR_MALFORMED_UTF8; return MOSQ_ERR_MALFORMED_UTF8;
}else if(codelen == 4 && codepoint < 0x10000){ }else if(codelen == 4 && (codepoint < 0x10000 || codepoint > 0x10FFFF)){
return MOSQ_ERR_MALFORMED_UTF8; return MOSQ_ERR_MALFORMED_UTF8;
} }
} }

@ -53,7 +53,8 @@ int main(int argc, char *argv[])
assert_valid("2.3.2 U-0000E000 = ee 80 80 = \"\""); assert_valid("2.3.2 U-0000E000 = ee 80 80 = \"\"");
assert_valid("2.3.3 U-0000FFFD = ef bf bd = \"<EFBFBD>\""); assert_valid("2.3.3 U-0000FFFD = ef bf bd = \"<EFBFBD>\"");
assert_valid("2.3.4 U-0010FFFF = f4 8f bf bf = \"􏿿\""); assert_valid("2.3.4 U-0010FFFF = f4 8f bf bf = \"􏿿\"");
assert_valid("2.3.5 U-00110000 = f4 90 80 80 = \"<EFBFBD><EFBFBD><EFBFBD><EFBFBD>\""); /* This used to be valid in pre-2003 utf-8 */
assert_invalid("2.3.5 U-00110000 = f4 90 80 80 = \"<EFBFBD><EFBFBD><EFBFBD><EFBFBD>\"");
/* 3 Malformed sequences */ /* 3 Malformed sequences */
/* 3.1 Unexpected continuation bytes */ /* 3.1 Unexpected continuation bytes */

@ -53,7 +53,8 @@ int main(int argc, char *argv[])
assert_valid("2.3.2 U-0000E000 = ee 80 80 = \"\""); assert_valid("2.3.2 U-0000E000 = ee 80 80 = \"\"");
assert_valid("2.3.3 U-0000FFFD = ef bf bd = \"<EFBFBD>\""); assert_valid("2.3.3 U-0000FFFD = ef bf bd = \"<EFBFBD>\"");
assert_valid("2.3.4 U-0010FFFF = f4 8f bf bf = \"􏿿\""); assert_valid("2.3.4 U-0010FFFF = f4 8f bf bf = \"􏿿\"");
assert_valid("2.3.5 U-00110000 = f4 90 80 80 = \"<EFBFBD><EFBFBD><EFBFBD><EFBFBD>\""); /* This used to be valid in pre-2003 utf-8 */
assert_invalid("2.3.5 U-00110000 = f4 90 80 80 = \"<EFBFBD><EFBFBD><EFBFBD><EFBFBD>\"");
/* 3 Malformed sequences */ /* 3 Malformed sequences */
/* 3.1 Unexpected continuation bytes */ /* 3.1 Unexpected continuation bytes */

Loading…
Cancel
Save