Update UTF-8 validation checks.

pull/467/head
Roger A. Light 8 years ago
parent df9ad5f0bd
commit b11855821e

@ -60,11 +60,11 @@ int mosquitto_validate_utf8(const char *str, int len)
}
/* Reconstruct full code point */
if(i == len-codelen+1){
/* Not enough data */
return MOSQ_ERR_MALFORMED_UTF8;
}
for(j=0; j<codelen-1; j++){
if(i == len-1){
/* Not enough data */
return MOSQ_ERR_MALFORMED_UTF8;
}
if((ustr[++i] & 0xC0) != 0x80){
/* Not a continuation byte */
return MOSQ_ERR_MALFORMED_UTF8;
@ -77,12 +77,12 @@ int mosquitto_validate_utf8(const char *str, int len)
return MOSQ_ERR_MALFORMED_UTF8;
}
/* Check for overlong encodings */
/* Check for overlong or out of range encodings */
if(codelen == 2 && codepoint < 0x0080){
return MOSQ_ERR_MALFORMED_UTF8;
}else if(codelen == 3 && codepoint < 0x0800){
return MOSQ_ERR_MALFORMED_UTF8;
}else if(codelen == 4 && codepoint < 0x10000){
}else if(codelen == 4 && (codepoint < 0x10000 || codepoint > 0x10FFFF)){
return MOSQ_ERR_MALFORMED_UTF8;
}
}

@ -53,7 +53,8 @@ int main(int argc, char *argv[])
assert_valid("2.3.2 U-0000E000 = ee 80 80 = \"\"");
assert_valid("2.3.3 U-0000FFFD = ef bf bd = \"<EFBFBD>\"");
assert_valid("2.3.4 U-0010FFFF = f4 8f bf bf = \"􏿿\"");
assert_valid("2.3.5 U-00110000 = f4 90 80 80 = \"<EFBFBD><EFBFBD><EFBFBD><EFBFBD>\"");
/* This used to be valid in pre-2003 utf-8 */
assert_invalid("2.3.5 U-00110000 = f4 90 80 80 = \"<EFBFBD><EFBFBD><EFBFBD><EFBFBD>\"");
/* 3 Malformed sequences */
/* 3.1 Unexpected continuation bytes */

@ -53,7 +53,8 @@ int main(int argc, char *argv[])
assert_valid("2.3.2 U-0000E000 = ee 80 80 = \"\"");
assert_valid("2.3.3 U-0000FFFD = ef bf bd = \"<EFBFBD>\"");
assert_valid("2.3.4 U-0010FFFF = f4 8f bf bf = \"􏿿\"");
assert_valid("2.3.5 U-00110000 = f4 90 80 80 = \"<EFBFBD><EFBFBD><EFBFBD><EFBFBD>\"");
/* This used to be valid in pre-2003 utf-8 */
assert_invalid("2.3.5 U-00110000 = f4 90 80 80 = \"<EFBFBD><EFBFBD><EFBFBD><EFBFBD>\"");
/* 3 Malformed sequences */
/* 3.1 Unexpected continuation bytes */

Loading…
Cancel
Save