Check client topic inputs for valid UTF-8.

pull/207/head
Roger A. Light 9 years ago
parent fa2d3143a4
commit a8a5daf06b

@ -89,4 +89,4 @@ Create a new bug:
Be sure to search for existing bugs before you create another one. Remember
that contributions are always welcome!
- [Create new Paho bug](https://github.com/eclipse/mosquitto/issues)
- [Create new Mosquitto bug](https://github.com/eclipse/mosquitto/issues)

@ -39,6 +39,10 @@ Client library:
processing messages from a broker very straightforward. An example of its use
is in examples/subscribe_simple.
- Connections now default to using MQTT v3.1.1.
- Add mosquitto_validate_utf8() to check whether a string is valid UTF-8
according to the UTF-8 spec and to the additional restrictions imposed by
the MQTT spec.
- Topic inputs are checked for UTF-8 validity.
Client:
- Add -x to mosquitto_sub for printing the payload in hexadecimal format.

@ -240,8 +240,12 @@ int client_config_load(struct mosq_config *cfg, int pub_or_sub, int argc, char *
return MOSQ_ERR_SUCCESS;
}
int cfg_add_topic(struct mosq_config *cfg, int pub_or_sub, char *topic)
int cfg_add_topic(struct mosq_config *cfg, int pub_or_sub, char *topic, const char *arg)
{
if(mosquitto_validate_utf8(topic, strlen(topic))){
fprintf(stderr, "Error: Malformed UTF-8 in %s argument.\n\n", arg);
return 1;
}
if(pub_or_sub == CLIENT_PUB){
if(mosquitto_pub_topic_check(topic) == MOSQ_ERR_INVAL){
fprintf(stderr, "Error: Invalid publish topic '%s', does it contain '+' or '#'?\n", topic);
@ -439,7 +443,7 @@ int client_config_line_proc(struct mosq_config *cfg, int pub_or_sub, int argc, c
topic = strchr(url, '/');
*topic++ = 0;
if(cfg_add_topic(cfg, pub_or_sub, topic))
if(cfg_add_topic(cfg, pub_or_sub, topic, "-L topic"))
return 1;
tmp = strchr(url, '@');
@ -594,7 +598,7 @@ int client_config_line_proc(struct mosq_config *cfg, int pub_or_sub, int argc, c
fprintf(stderr, "Error: -t argument given but no topic specified.\n\n");
return 1;
}else{
if(cfg_add_topic(cfg, pub_or_sub, argv[i + 1]))
if(cfg_add_topic(cfg, pub_or_sub, argv[i + 1], "-t"))
return 1;
i++;
}
@ -606,6 +610,10 @@ int client_config_line_proc(struct mosq_config *cfg, int pub_or_sub, int argc, c
fprintf(stderr, "Error: -T argument given but no topic filter specified.\n\n");
return 1;
}else{
if(mosquitto_validate_utf8(argv[i+1], strlen(argv[i+1]))){
fprintf(stderr, "Error: Malformed UTF-8 in -T argument.\n\n");
return 1;
}
if(mosquitto_sub_topic_check(argv[i+1]) == MOSQ_ERR_INVAL){
fprintf(stderr, "Error: Invalid filter topic '%s', are all '+' and '#' wildcards correct?\n", argv[i+1]);
return 1;
@ -627,6 +635,10 @@ int client_config_line_proc(struct mosq_config *cfg, int pub_or_sub, int argc, c
fprintf(stderr, "Error: -U argument given but no unsubscribe topic specified.\n\n");
return 1;
}else{
if(mosquitto_validate_utf8(argv[i+1], strlen(argv[i+1]))){
fprintf(stderr, "Error: Malformed UTF-8 in -U argument.\n\n");
return 1;
}
if(mosquitto_sub_topic_check(argv[i+1]) == MOSQ_ERR_INVAL){
fprintf(stderr, "Error: Invalid unsubscribe topic '%s', are all '+' and '#' wildcards correct?\n", argv[i+1]);
return 1;
@ -694,6 +706,10 @@ int client_config_line_proc(struct mosq_config *cfg, int pub_or_sub, int argc, c
fprintf(stderr, "Error: --will-topic argument given but no will topic specified.\n\n");
return 1;
}else{
if(mosquitto_validate_utf8(argv[i+1], strlen(argv[i+1]))){
fprintf(stderr, "Error: Malformed UTF-8 in --will-topic argument.\n\n");
return 1;
}
if(mosquitto_pub_topic_check(argv[i+1]) == MOSQ_ERR_INVAL){
fprintf(stderr, "Error: Invalid will topic '%s', does it contain '+' or '#'?\n", argv[i+1]);
return 1;

@ -57,6 +57,7 @@ set(C_SRC
thread_mosq.c
time_mosq.c
tls_mosq.c
utf8_mosq.c
util_mosq.c util_mosq.h
will_mosq.c will_mosq.h)

@ -29,6 +29,7 @@ MOSQ_OBJS=mosquitto.o \
thread_mosq.o \
time_mosq.o \
tls_mosq.o \
utf8_mosq.o \
util_mosq.o \
will_mosq.o
@ -152,6 +153,9 @@ time_mosq.o : time_mosq.c
tls_mosq.o : tls_mosq.c
${CROSS_COMPILE}$(CC) $(LIB_CFLAGS) -c $< -o $@
utf8_mosq.o : utf8_mosq.c
${CROSS_COMPILE}$(CC) $(LIB_CFLAGS) -c $< -o $@
util_mosq.o : util_mosq.c util_mosq.h
${CROSS_COMPILE}$(CC) $(LIB_CFLAGS) -c $< -o $@

@ -106,6 +106,11 @@ int topic_matches_sub(const char *sub, const char *topic, bool *result)
return mosquitto_topic_matches_sub(sub, topic, result);
}
int validate_utf8(const char *str, int len)
{
return mosquitto_validate_utf8(str, len);
}
int subscribe_simple(
struct mosquitto_message **messages,
int msg_count,

@ -41,6 +41,7 @@ mosqpp_EXPORT int lib_version(int *major, int *minor, int *revision);
mosqpp_EXPORT int lib_init();
mosqpp_EXPORT int lib_cleanup();
mosqpp_EXPORT int topic_matches_sub(const char *sub, const char *topic, bool *result);
mosqpp_EXPORT int validate_utf8(const char *str, int len);
mosqpp_EXPORT int subscribe_simple(
struct mosquitto_message **messages,
int msg_count,

@ -84,4 +84,5 @@ MOSQ_1.5 {
mosquitto_subscribe_simple;
mosquitto_subscribe_callback;
mosquitto_message_free_contents;
mosquitto_validate_utf8;
} MOSQ_1.4;

@ -549,6 +549,7 @@ int mosquitto_publish(struct mosquitto *mosq, int *mid, const char *topic, int p
if(!mosq || !topic || qos<0 || qos>2) return MOSQ_ERR_INVAL;
if(STREMPTY(topic)) return MOSQ_ERR_INVAL;
if(!mosquitto_validate_utf8(topic, strlen(topic))) return MOSQ_ERR_MALFORMED_UTF8;
if(payloadlen < 0 || payloadlen > MQTT_MAX_PAYLOAD) return MOSQ_ERR_PAYLOAD_SIZE;
if(mosquitto_pub_topic_check(topic) != MOSQ_ERR_SUCCESS){
@ -614,6 +615,7 @@ int mosquitto_subscribe(struct mosquitto *mosq, int *mid, const char *sub, int q
if(mosq->sock == INVALID_SOCKET) return MOSQ_ERR_NO_CONN;
if(mosquitto_sub_topic_check(sub)) return MOSQ_ERR_INVAL;
if(mosquitto_validate_utf8(sub, strlen(sub))) return MOSQ_ERR_MALFORMED_UTF8;
return send__subscribe(mosq, mid, sub, qos);
}
@ -624,6 +626,7 @@ int mosquitto_unsubscribe(struct mosquitto *mosq, int *mid, const char *sub)
if(mosq->sock == INVALID_SOCKET) return MOSQ_ERR_NO_CONN;
if(mosquitto_sub_topic_check(sub)) return MOSQ_ERR_INVAL;
if(mosquitto_validate_utf8(sub, strlen(sub))) return MOSQ_ERR_MALFORMED_UTF8;
return send__unsubscribe(mosq, mid, sub);
}
@ -1293,6 +1296,8 @@ const char *mosquitto_strerror(int mosq_errno)
return "Lookup error.";
case MOSQ_ERR_PROXY:
return "Proxy error.";
case MOSQ_ERR_MALFORMED_UTF8:
return "Malformed UTF-8";
default:
return "Unknown error.";
}

@ -81,7 +81,8 @@ enum mosq_err_t {
MOSQ_ERR_ERRNO = 14,
MOSQ_ERR_EAI = 15,
MOSQ_ERR_PROXY = 16,
MOSQ_ERR_PLUGIN_DEFER = 17
MOSQ_ERR_PLUGIN_DEFER = 17,
MOSQ_ERR_MALFORMED_UTF8 = 18
};
/* Error values */
@ -280,9 +281,10 @@ libmosq_EXPORT int mosquitto_reinitialise(struct mosquitto *mosq, const char *id
*
* Returns:
* MOSQ_ERR_SUCCESS - on success.
* MOSQ_ERR_INVAL - if the input parameters were invalid.
* MOSQ_ERR_NOMEM - if an out of memory condition occurred.
* MOSQ_ERR_PAYLOAD_SIZE - if payloadlen is too large.
* MOSQ_ERR_INVAL - if the input parameters were invalid.
* MOSQ_ERR_NOMEM - if an out of memory condition occurred.
* MOSQ_ERR_PAYLOAD_SIZE - if payloadlen is too large.
* MOSQ_ERR_MALFORMED_UTF8 - if the topic is not valid UTF-8.
*/
libmosq_EXPORT int mosquitto_will_set(struct mosquitto *mosq, const char *topic, int payloadlen, const void *payload, int qos, bool retain);
@ -587,13 +589,14 @@ libmosq_EXPORT int mosquitto_disconnect(struct mosquitto *mosq);
* retain - set to true to make the message retained.
*
* Returns:
* MOSQ_ERR_SUCCESS - on success.
* MOSQ_ERR_INVAL - if the input parameters were invalid.
* MOSQ_ERR_NOMEM - if an out of memory condition occurred.
* MOSQ_ERR_NO_CONN - if the client isn't connected to a broker.
* MOSQ_ERR_PROTOCOL - if there is a protocol error communicating with the
* broker.
* MOSQ_ERR_PAYLOAD_SIZE - if payloadlen is too large.
* MOSQ_ERR_SUCCESS - on success.
* MOSQ_ERR_INVAL - if the input parameters were invalid.
* MOSQ_ERR_NOMEM - if an out of memory condition occurred.
* MOSQ_ERR_NO_CONN - if the client isn't connected to a broker.
* MOSQ_ERR_PROTOCOL - if there is a protocol error communicating with the
* broker.
* MOSQ_ERR_PAYLOAD_SIZE - if payloadlen is too large.
* MOSQ_ERR_MALFORMED_UTF8 - if the topic is not valid UTF-8
*
* See Also:
* <mosquitto_max_inflight_messages_set>
@ -615,10 +618,11 @@ libmosq_EXPORT int mosquitto_publish(struct mosquitto *mosq, int *mid, const cha
* qos - the requested Quality of Service for this subscription.
*
* Returns:
* MOSQ_ERR_SUCCESS - on success.
* MOSQ_ERR_INVAL - if the input parameters were invalid.
* MOSQ_ERR_NOMEM - if an out of memory condition occurred.
* MOSQ_ERR_NO_CONN - if the client isn't connected to a broker.
* MOSQ_ERR_SUCCESS - on success.
* MOSQ_ERR_INVAL - if the input parameters were invalid.
* MOSQ_ERR_NOMEM - if an out of memory condition occurred.
* MOSQ_ERR_NO_CONN - if the client isn't connected to a broker.
* MOSQ_ERR_MALFORMED_UTF8 - if the topic is not valid UTF-8
*/
libmosq_EXPORT int mosquitto_subscribe(struct mosquitto *mosq, int *mid, const char *sub, int qos);
@ -636,10 +640,11 @@ libmosq_EXPORT int mosquitto_subscribe(struct mosquitto *mosq, int *mid, const c
* sub - the unsubscription pattern.
*
* Returns:
* MOSQ_ERR_SUCCESS - on success.
* MOSQ_ERR_INVAL - if the input parameters were invalid.
* MOSQ_ERR_NOMEM - if an out of memory condition occurred.
* MOSQ_ERR_NO_CONN - if the client isn't connected to a broker.
* MOSQ_ERR_SUCCESS - on success.
* MOSQ_ERR_INVAL - if the input parameters were invalid.
* MOSQ_ERR_NOMEM - if an out of memory condition occurred.
* MOSQ_ERR_NO_CONN - if the client isn't connected to a broker.
* MOSQ_ERR_MALFORMED_UTF8 - if the topic is not valid UTF-8
*/
libmosq_EXPORT int mosquitto_unsubscribe(struct mosquitto *mosq, int *mid, const char *sub);
@ -1424,8 +1429,9 @@ libmosq_EXPORT const char *mosquitto_connack_string(int connack_code);
* count - an int pointer to store the number of items in the topics array.
*
* Returns:
* MOSQ_ERR_SUCCESS - on success
* MOSQ_ERR_NOMEM - if an out of memory condition occurred.
* MOSQ_ERR_SUCCESS - on success
* MOSQ_ERR_NOMEM - if an out of memory condition occurred.
* MOSQ_ERR_MALFORMED_UTF8 - if the topic is not valid UTF-8
*
* Example:
*
@ -1501,8 +1507,9 @@ libmosq_EXPORT int mosquitto_topic_matches_sub(const char *sub, const char *topi
* topic - the topic to check
*
* Returns:
* MOSQ_ERR_SUCCESS - for a valid topic
* MOSQ_ERR_INVAL - if the topic contains a + or a #, or if it is too long.
* MOSQ_ERR_SUCCESS - for a valid topic
* MOSQ_ERR_INVAL - if the topic contains a + or a #, or if it is too long.
* MOSQ_ERR_MALFORMED_UTF8 - if sub or topic is not valid UTF-8
*
* See Also:
* <mosquitto_sub_topic_check>
@ -1527,9 +1534,10 @@ libmosq_EXPORT int mosquitto_pub_topic_check(const char *topic);
* topic - the topic to check
*
* Returns:
* MOSQ_ERR_SUCCESS - for a valid topic
* MOSQ_ERR_INVAL - if the topic contains a + or a # that is in an invalid
* position, or if it is too long.
* MOSQ_ERR_SUCCESS - for a valid topic
* MOSQ_ERR_INVAL - if the topic contains a + or a # that is in an
* invalid position, or if it is too long.
* MOSQ_ERR_MALFORMED_UTF8 - if topic is not valid UTF-8
*
* See Also:
* <mosquitto_sub_topic_check>
@ -1665,6 +1673,25 @@ libmosq_EXPORT int mosquitto_subscribe_callback(
const char *password,
const struct libmosquitto_will *will,
const struct libmosquitto_tls *tls);
/*
* Function: mosquitto_validate_utf8
*
* Helper function to validate whether a UTF-8 string is valid, according to
* the UTF-8 spec and the MQTT additions.
*
* Parameters:
* str - a string to check
* len - the length of the string in bytes
*
* Returns:
* MOSQ_ERR_SUCCESS - on success
* MOSQ_ERR_INVAL - if str is NULL or len<0 or len>65536
* MOSQ_ERR_MALFORMED_UTF8 - if str is not valid UTF-8
*/
libmosq_EXPORT int mosquitto_validate_utf8(const char *str, int len);
#ifdef __cplusplus
}
#endif

@ -0,0 +1,91 @@
/*
Copyright (c) 2016 Roger Light <roger@atchoo.org>
All rights reserved. This program and the accompanying materials
are made available under the terms of the Eclipse Public License v1.0
and Eclipse Distribution License v1.0 which accompany this distribution.
The Eclipse Public License is available at
http://www.eclipse.org/legal/epl-v10.html
and the Eclipse Distribution License is available at
http://www.eclipse.org/org/documents/edl-v10.php.
Contributors:
Roger Light - initial implementation.
*/
#include <stdio.h>
#include "mosquitto.h"
int mosquitto_validate_utf8(const char *str, int len)
{
int i;
int j;
int codelen;
int codepoint;
const unsigned char *ustr = (const unsigned char *)str;
if(!str) return MOSQ_ERR_INVAL;
if(len < 1 || len > 65536) return MOSQ_ERR_INVAL;
for(i=0; i<len; i++){
if(ustr[i] == 0){
return MOSQ_ERR_MALFORMED_UTF8;
}else if(ustr[i] <= 0x7f){
codelen = 1;
codepoint = ustr[i];
}else if((ustr[i] & 0xE0) == 0xC0){
/* 110xxxxx - 2 byte sequence */
if(ustr[i] == 0xC0 || ustr[i] == 0xC1){
/* Invalid bytes */
return MOSQ_ERR_MALFORMED_UTF8;
}
codelen = 2;
codepoint = (ustr[i] & 0x1F);
}else if((ustr[i] & 0xF0) == 0xE0){
// 1110xxxx - 3 byte sequence
codelen = 3;
codepoint = (ustr[i] & 0x0F);
}else if((ustr[i] & 0xF8) == 0xF0){
// 11110xxx - 4 byte sequence
if(ustr[i] > 0xF4){
/* Invalid, this would produce values > 0x10FFFF. */
return MOSQ_ERR_MALFORMED_UTF8;
}
codelen = 4;
codepoint = (ustr[i] & 0x07);
}else{
/* Unexpected continuation byte. */
return MOSQ_ERR_MALFORMED_UTF8;
}
/* Reconstruct full code point */
for(j=0; j<codelen-1; j++){
if(i == len-1){
/* Not enough data */
return MOSQ_ERR_MALFORMED_UTF8;
}
if((ustr[++i] & 0xC0) != 0x80){
/* Not a continuation byte */
return MOSQ_ERR_MALFORMED_UTF8;
}
codepoint = (codepoint<<6) | (ustr[i] & 0x3F);
}
/* Check for UTF-16 high/low surrogates */
if(codepoint >= 0xD800 && codepoint <= 0xDFFF){
return MOSQ_ERR_MALFORMED_UTF8;
}
/* Check for overlong encodings */
if(codelen == 2 && codepoint < 0x0080){
return MOSQ_ERR_MALFORMED_UTF8;
}else if(codelen == 3 && codepoint < 0x0800){
return MOSQ_ERR_MALFORMED_UTF8;
}else if(codelen == 4 && codepoint < 0x10000){
return MOSQ_ERR_MALFORMED_UTF8;
}
}
return MOSQ_ERR_SUCCESS;
}

@ -37,6 +37,7 @@ int will__set(struct mosquitto *mosq, const char *topic, int payloadlen, const v
if(payloadlen > 0 && !payload) return MOSQ_ERR_INVAL;
if(mosquitto_pub_topic_check(topic)) return MOSQ_ERR_INVAL;
if(mosquitto_validate_utf8(topic, strlen(topic))) return MOSQ_ERR_MALFORMED_UTF8;
if(mosq->will){
mosquitto__free(mosq->will->topic);

@ -45,6 +45,7 @@ ifeq ($(WITH_TLS),yes)
endif
./09-util-topic-matching.py $@/09-util-topic-matching.test
./09-util-topic-tokenise.py $@/09-util-topic-tokenise.test
./09-util-utf8-validate.py $@/09-util-utf8-validate.test
clean :
$(MAKE) -C c clean

@ -80,6 +80,9 @@ all : 01 02 03 04 08 09
09-util-topic-tokenise.test : 09-util-topic-tokenise.c
$(CC) $< -o $@ $(CFLAGS) $(LIBS)
09-util-utf8-validate.test : 09-util-utf8-validate.c
$(CC) $< -o $@ $(CFLAGS) $(LIBS)
01 : 01-con-discon-success.test 01-will-set.test 01-unpwd-set.test 01-will-unpwd-set.test 01-no-clean-session.test 01-keepalive-pingreq.test
02 : 02-subscribe-qos0.test 02-subscribe-qos1.test 02-subscribe-qos2.test 02-unsubscribe.test
@ -90,7 +93,7 @@ all : 01 02 03 04 08 09
08 : 08-ssl-connect-no-auth.test 08-ssl-connect-cert-auth.test 08-ssl-connect-cert-auth-enc.test 08-ssl-bad-cacert.test 08-ssl-fake-cacert.test
09 : 09-util-topic-matching.test 09-util-topic-tokenise.test
09 : 09-util-topic-matching.test 09-util-topic-tokenise.test 09-util-utf8-validate.test
reallyclean : clean
-rm -f *.orig

Loading…
Cancel
Save