diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 005ca232..f873b378 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -89,4 +89,4 @@ Create a new bug: Be sure to search for existing bugs before you create another one. Remember that contributions are always welcome! -- [Create new Paho bug](https://github.com/eclipse/mosquitto/issues) +- [Create new Mosquitto bug](https://github.com/eclipse/mosquitto/issues) diff --git a/ChangeLog.txt b/ChangeLog.txt index 535e268f..55907cef 100644 --- a/ChangeLog.txt +++ b/ChangeLog.txt @@ -39,6 +39,10 @@ Client library: processing messages from a broker very straightforward. An example of its use is in examples/subscribe_simple. - Connections now default to using MQTT v3.1.1. +- Add mosquitto_validate_utf8() to check whether a string is valid UTF-8 + according to the UTF-8 spec and to the additional restrictions imposed by + the MQTT spec. +- Topic inputs are checked for UTF-8 validity. Client: - Add -x to mosquitto_sub for printing the payload in hexadecimal format. diff --git a/client/client_shared.c b/client/client_shared.c index 8fe0be36..10a53bfd 100644 --- a/client/client_shared.c +++ b/client/client_shared.c @@ -240,8 +240,12 @@ int client_config_load(struct mosq_config *cfg, int pub_or_sub, int argc, char * return MOSQ_ERR_SUCCESS; } -int cfg_add_topic(struct mosq_config *cfg, int pub_or_sub, char *topic) +int cfg_add_topic(struct mosq_config *cfg, int pub_or_sub, char *topic, const char *arg) { + if(mosquitto_validate_utf8(topic, strlen(topic))){ + fprintf(stderr, "Error: Malformed UTF-8 in %s argument.\n\n", arg); + return 1; + } if(pub_or_sub == CLIENT_PUB){ if(mosquitto_pub_topic_check(topic) == MOSQ_ERR_INVAL){ fprintf(stderr, "Error: Invalid publish topic '%s', does it contain '+' or '#'?\n", topic); @@ -439,7 +443,7 @@ int client_config_line_proc(struct mosq_config *cfg, int pub_or_sub, int argc, c topic = strchr(url, '/'); *topic++ = 0; - if(cfg_add_topic(cfg, pub_or_sub, topic)) + if(cfg_add_topic(cfg, pub_or_sub, topic, "-L topic")) return 1; tmp = strchr(url, '@'); @@ -594,7 +598,7 @@ int client_config_line_proc(struct mosq_config *cfg, int pub_or_sub, int argc, c fprintf(stderr, "Error: -t argument given but no topic specified.\n\n"); return 1; }else{ - if(cfg_add_topic(cfg, pub_or_sub, argv[i + 1])) + if(cfg_add_topic(cfg, pub_or_sub, argv[i + 1], "-t")) return 1; i++; } @@ -606,6 +610,10 @@ int client_config_line_proc(struct mosq_config *cfg, int pub_or_sub, int argc, c fprintf(stderr, "Error: -T argument given but no topic filter specified.\n\n"); return 1; }else{ + if(mosquitto_validate_utf8(argv[i+1], strlen(argv[i+1]))){ + fprintf(stderr, "Error: Malformed UTF-8 in -T argument.\n\n"); + return 1; + } if(mosquitto_sub_topic_check(argv[i+1]) == MOSQ_ERR_INVAL){ fprintf(stderr, "Error: Invalid filter topic '%s', are all '+' and '#' wildcards correct?\n", argv[i+1]); return 1; @@ -627,6 +635,10 @@ int client_config_line_proc(struct mosq_config *cfg, int pub_or_sub, int argc, c fprintf(stderr, "Error: -U argument given but no unsubscribe topic specified.\n\n"); return 1; }else{ + if(mosquitto_validate_utf8(argv[i+1], strlen(argv[i+1]))){ + fprintf(stderr, "Error: Malformed UTF-8 in -U argument.\n\n"); + return 1; + } if(mosquitto_sub_topic_check(argv[i+1]) == MOSQ_ERR_INVAL){ fprintf(stderr, "Error: Invalid unsubscribe topic '%s', are all '+' and '#' wildcards correct?\n", argv[i+1]); return 1; @@ -694,6 +706,10 @@ int client_config_line_proc(struct mosq_config *cfg, int pub_or_sub, int argc, c fprintf(stderr, "Error: --will-topic argument given but no will topic specified.\n\n"); return 1; }else{ + if(mosquitto_validate_utf8(argv[i+1], strlen(argv[i+1]))){ + fprintf(stderr, "Error: Malformed UTF-8 in --will-topic argument.\n\n"); + return 1; + } if(mosquitto_pub_topic_check(argv[i+1]) == MOSQ_ERR_INVAL){ fprintf(stderr, "Error: Invalid will topic '%s', does it contain '+' or '#'?\n", argv[i+1]); return 1; diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index c7c2f53f..8ad9325e 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -57,6 +57,7 @@ set(C_SRC thread_mosq.c time_mosq.c tls_mosq.c + utf8_mosq.c util_mosq.c util_mosq.h will_mosq.c will_mosq.h) diff --git a/lib/Makefile b/lib/Makefile index 28d4c555..e99f19e4 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -29,6 +29,7 @@ MOSQ_OBJS=mosquitto.o \ thread_mosq.o \ time_mosq.o \ tls_mosq.o \ + utf8_mosq.o \ util_mosq.o \ will_mosq.o @@ -152,6 +153,9 @@ time_mosq.o : time_mosq.c tls_mosq.o : tls_mosq.c ${CROSS_COMPILE}$(CC) $(LIB_CFLAGS) -c $< -o $@ +utf8_mosq.o : utf8_mosq.c + ${CROSS_COMPILE}$(CC) $(LIB_CFLAGS) -c $< -o $@ + util_mosq.o : util_mosq.c util_mosq.h ${CROSS_COMPILE}$(CC) $(LIB_CFLAGS) -c $< -o $@ diff --git a/lib/cpp/mosquittopp.cpp b/lib/cpp/mosquittopp.cpp index 541d5760..090910e4 100644 --- a/lib/cpp/mosquittopp.cpp +++ b/lib/cpp/mosquittopp.cpp @@ -106,6 +106,11 @@ int topic_matches_sub(const char *sub, const char *topic, bool *result) return mosquitto_topic_matches_sub(sub, topic, result); } +int validate_utf8(const char *str, int len) +{ + return mosquitto_validate_utf8(str, len); +} + int subscribe_simple( struct mosquitto_message **messages, int msg_count, diff --git a/lib/cpp/mosquittopp.h b/lib/cpp/mosquittopp.h index 36b05ec4..4150caf4 100644 --- a/lib/cpp/mosquittopp.h +++ b/lib/cpp/mosquittopp.h @@ -41,6 +41,7 @@ mosqpp_EXPORT int lib_version(int *major, int *minor, int *revision); mosqpp_EXPORT int lib_init(); mosqpp_EXPORT int lib_cleanup(); mosqpp_EXPORT int topic_matches_sub(const char *sub, const char *topic, bool *result); +mosqpp_EXPORT int validate_utf8(const char *str, int len); mosqpp_EXPORT int subscribe_simple( struct mosquitto_message **messages, int msg_count, diff --git a/lib/linker.version b/lib/linker.version index f2ea6acd..6b03f66d 100644 --- a/lib/linker.version +++ b/lib/linker.version @@ -84,4 +84,5 @@ MOSQ_1.5 { mosquitto_subscribe_simple; mosquitto_subscribe_callback; mosquitto_message_free_contents; + mosquitto_validate_utf8; } MOSQ_1.4; diff --git a/lib/mosquitto.c b/lib/mosquitto.c index e90346c7..d6501f16 100644 --- a/lib/mosquitto.c +++ b/lib/mosquitto.c @@ -549,6 +549,7 @@ int mosquitto_publish(struct mosquitto *mosq, int *mid, const char *topic, int p if(!mosq || !topic || qos<0 || qos>2) return MOSQ_ERR_INVAL; if(STREMPTY(topic)) return MOSQ_ERR_INVAL; + if(!mosquitto_validate_utf8(topic, strlen(topic))) return MOSQ_ERR_MALFORMED_UTF8; if(payloadlen < 0 || payloadlen > MQTT_MAX_PAYLOAD) return MOSQ_ERR_PAYLOAD_SIZE; if(mosquitto_pub_topic_check(topic) != MOSQ_ERR_SUCCESS){ @@ -614,6 +615,7 @@ int mosquitto_subscribe(struct mosquitto *mosq, int *mid, const char *sub, int q if(mosq->sock == INVALID_SOCKET) return MOSQ_ERR_NO_CONN; if(mosquitto_sub_topic_check(sub)) return MOSQ_ERR_INVAL; + if(mosquitto_validate_utf8(sub, strlen(sub))) return MOSQ_ERR_MALFORMED_UTF8; return send__subscribe(mosq, mid, sub, qos); } @@ -624,6 +626,7 @@ int mosquitto_unsubscribe(struct mosquitto *mosq, int *mid, const char *sub) if(mosq->sock == INVALID_SOCKET) return MOSQ_ERR_NO_CONN; if(mosquitto_sub_topic_check(sub)) return MOSQ_ERR_INVAL; + if(mosquitto_validate_utf8(sub, strlen(sub))) return MOSQ_ERR_MALFORMED_UTF8; return send__unsubscribe(mosq, mid, sub); } @@ -1293,6 +1296,8 @@ const char *mosquitto_strerror(int mosq_errno) return "Lookup error."; case MOSQ_ERR_PROXY: return "Proxy error."; + case MOSQ_ERR_MALFORMED_UTF8: + return "Malformed UTF-8"; default: return "Unknown error."; } diff --git a/lib/mosquitto.h b/lib/mosquitto.h index b41a94c5..70268260 100644 --- a/lib/mosquitto.h +++ b/lib/mosquitto.h @@ -81,7 +81,8 @@ enum mosq_err_t { MOSQ_ERR_ERRNO = 14, MOSQ_ERR_EAI = 15, MOSQ_ERR_PROXY = 16, - MOSQ_ERR_PLUGIN_DEFER = 17 + MOSQ_ERR_PLUGIN_DEFER = 17, + MOSQ_ERR_MALFORMED_UTF8 = 18 }; /* Error values */ @@ -280,9 +281,10 @@ libmosq_EXPORT int mosquitto_reinitialise(struct mosquitto *mosq, const char *id * * Returns: * MOSQ_ERR_SUCCESS - on success. - * MOSQ_ERR_INVAL - if the input parameters were invalid. - * MOSQ_ERR_NOMEM - if an out of memory condition occurred. - * MOSQ_ERR_PAYLOAD_SIZE - if payloadlen is too large. + * MOSQ_ERR_INVAL - if the input parameters were invalid. + * MOSQ_ERR_NOMEM - if an out of memory condition occurred. + * MOSQ_ERR_PAYLOAD_SIZE - if payloadlen is too large. + * MOSQ_ERR_MALFORMED_UTF8 - if the topic is not valid UTF-8. */ libmosq_EXPORT int mosquitto_will_set(struct mosquitto *mosq, const char *topic, int payloadlen, const void *payload, int qos, bool retain); @@ -587,13 +589,14 @@ libmosq_EXPORT int mosquitto_disconnect(struct mosquitto *mosq); * retain - set to true to make the message retained. * * Returns: - * MOSQ_ERR_SUCCESS - on success. - * MOSQ_ERR_INVAL - if the input parameters were invalid. - * MOSQ_ERR_NOMEM - if an out of memory condition occurred. - * MOSQ_ERR_NO_CONN - if the client isn't connected to a broker. - * MOSQ_ERR_PROTOCOL - if there is a protocol error communicating with the - * broker. - * MOSQ_ERR_PAYLOAD_SIZE - if payloadlen is too large. + * MOSQ_ERR_SUCCESS - on success. + * MOSQ_ERR_INVAL - if the input parameters were invalid. + * MOSQ_ERR_NOMEM - if an out of memory condition occurred. + * MOSQ_ERR_NO_CONN - if the client isn't connected to a broker. + * MOSQ_ERR_PROTOCOL - if there is a protocol error communicating with the + * broker. + * MOSQ_ERR_PAYLOAD_SIZE - if payloadlen is too large. + * MOSQ_ERR_MALFORMED_UTF8 - if the topic is not valid UTF-8 * * See Also: * @@ -615,10 +618,11 @@ libmosq_EXPORT int mosquitto_publish(struct mosquitto *mosq, int *mid, const cha * qos - the requested Quality of Service for this subscription. * * Returns: - * MOSQ_ERR_SUCCESS - on success. - * MOSQ_ERR_INVAL - if the input parameters were invalid. - * MOSQ_ERR_NOMEM - if an out of memory condition occurred. - * MOSQ_ERR_NO_CONN - if the client isn't connected to a broker. + * MOSQ_ERR_SUCCESS - on success. + * MOSQ_ERR_INVAL - if the input parameters were invalid. + * MOSQ_ERR_NOMEM - if an out of memory condition occurred. + * MOSQ_ERR_NO_CONN - if the client isn't connected to a broker. + * MOSQ_ERR_MALFORMED_UTF8 - if the topic is not valid UTF-8 */ libmosq_EXPORT int mosquitto_subscribe(struct mosquitto *mosq, int *mid, const char *sub, int qos); @@ -636,10 +640,11 @@ libmosq_EXPORT int mosquitto_subscribe(struct mosquitto *mosq, int *mid, const c * sub - the unsubscription pattern. * * Returns: - * MOSQ_ERR_SUCCESS - on success. - * MOSQ_ERR_INVAL - if the input parameters were invalid. - * MOSQ_ERR_NOMEM - if an out of memory condition occurred. - * MOSQ_ERR_NO_CONN - if the client isn't connected to a broker. + * MOSQ_ERR_SUCCESS - on success. + * MOSQ_ERR_INVAL - if the input parameters were invalid. + * MOSQ_ERR_NOMEM - if an out of memory condition occurred. + * MOSQ_ERR_NO_CONN - if the client isn't connected to a broker. + * MOSQ_ERR_MALFORMED_UTF8 - if the topic is not valid UTF-8 */ libmosq_EXPORT int mosquitto_unsubscribe(struct mosquitto *mosq, int *mid, const char *sub); @@ -1424,8 +1429,9 @@ libmosq_EXPORT const char *mosquitto_connack_string(int connack_code); * count - an int pointer to store the number of items in the topics array. * * Returns: - * MOSQ_ERR_SUCCESS - on success - * MOSQ_ERR_NOMEM - if an out of memory condition occurred. + * MOSQ_ERR_SUCCESS - on success + * MOSQ_ERR_NOMEM - if an out of memory condition occurred. + * MOSQ_ERR_MALFORMED_UTF8 - if the topic is not valid UTF-8 * * Example: * @@ -1501,8 +1507,9 @@ libmosq_EXPORT int mosquitto_topic_matches_sub(const char *sub, const char *topi * topic - the topic to check * * Returns: - * MOSQ_ERR_SUCCESS - for a valid topic - * MOSQ_ERR_INVAL - if the topic contains a + or a #, or if it is too long. + * MOSQ_ERR_SUCCESS - for a valid topic + * MOSQ_ERR_INVAL - if the topic contains a + or a #, or if it is too long. + * MOSQ_ERR_MALFORMED_UTF8 - if sub or topic is not valid UTF-8 * * See Also: * @@ -1527,9 +1534,10 @@ libmosq_EXPORT int mosquitto_pub_topic_check(const char *topic); * topic - the topic to check * * Returns: - * MOSQ_ERR_SUCCESS - for a valid topic - * MOSQ_ERR_INVAL - if the topic contains a + or a # that is in an invalid - * position, or if it is too long. + * MOSQ_ERR_SUCCESS - for a valid topic + * MOSQ_ERR_INVAL - if the topic contains a + or a # that is in an + * invalid position, or if it is too long. + * MOSQ_ERR_MALFORMED_UTF8 - if topic is not valid UTF-8 * * See Also: * @@ -1665,6 +1673,25 @@ libmosq_EXPORT int mosquitto_subscribe_callback( const char *password, const struct libmosquitto_will *will, const struct libmosquitto_tls *tls); + + +/* + * Function: mosquitto_validate_utf8 + * + * Helper function to validate whether a UTF-8 string is valid, according to + * the UTF-8 spec and the MQTT additions. + * + * Parameters: + * str - a string to check + * len - the length of the string in bytes + * + * Returns: + * MOSQ_ERR_SUCCESS - on success + * MOSQ_ERR_INVAL - if str is NULL or len<0 or len>65536 + * MOSQ_ERR_MALFORMED_UTF8 - if str is not valid UTF-8 + */ +libmosq_EXPORT int mosquitto_validate_utf8(const char *str, int len); + #ifdef __cplusplus } #endif diff --git a/lib/utf8_mosq.c b/lib/utf8_mosq.c new file mode 100644 index 00000000..f20e9931 --- /dev/null +++ b/lib/utf8_mosq.c @@ -0,0 +1,91 @@ +/* +Copyright (c) 2016 Roger Light + +All rights reserved. This program and the accompanying materials +are made available under the terms of the Eclipse Public License v1.0 +and Eclipse Distribution License v1.0 which accompany this distribution. + +The Eclipse Public License is available at + http://www.eclipse.org/legal/epl-v10.html +and the Eclipse Distribution License is available at + http://www.eclipse.org/org/documents/edl-v10.php. + +Contributors: + Roger Light - initial implementation. +*/ + +#include +#include "mosquitto.h" + +int mosquitto_validate_utf8(const char *str, int len) +{ + int i; + int j; + int codelen; + int codepoint; + const unsigned char *ustr = (const unsigned char *)str; + + if(!str) return MOSQ_ERR_INVAL; + if(len < 1 || len > 65536) return MOSQ_ERR_INVAL; + + for(i=0; i 0xF4){ + /* Invalid, this would produce values > 0x10FFFF. */ + return MOSQ_ERR_MALFORMED_UTF8; + } + codelen = 4; + codepoint = (ustr[i] & 0x07); + }else{ + /* Unexpected continuation byte. */ + return MOSQ_ERR_MALFORMED_UTF8; + } + + /* Reconstruct full code point */ + for(j=0; j= 0xD800 && codepoint <= 0xDFFF){ + return MOSQ_ERR_MALFORMED_UTF8; + } + + /* Check for overlong encodings */ + if(codelen == 2 && codepoint < 0x0080){ + return MOSQ_ERR_MALFORMED_UTF8; + }else if(codelen == 3 && codepoint < 0x0800){ + return MOSQ_ERR_MALFORMED_UTF8; + }else if(codelen == 4 && codepoint < 0x10000){ + return MOSQ_ERR_MALFORMED_UTF8; + } + } + return MOSQ_ERR_SUCCESS; +} + diff --git a/lib/will_mosq.c b/lib/will_mosq.c index 728ca299..eac40d5e 100644 --- a/lib/will_mosq.c +++ b/lib/will_mosq.c @@ -37,6 +37,7 @@ int will__set(struct mosquitto *mosq, const char *topic, int payloadlen, const v if(payloadlen > 0 && !payload) return MOSQ_ERR_INVAL; if(mosquitto_pub_topic_check(topic)) return MOSQ_ERR_INVAL; + if(mosquitto_validate_utf8(topic, strlen(topic))) return MOSQ_ERR_MALFORMED_UTF8; if(mosq->will){ mosquitto__free(mosq->will->topic); diff --git a/test/lib/Makefile b/test/lib/Makefile index dc27edd2..c069bbdb 100644 --- a/test/lib/Makefile +++ b/test/lib/Makefile @@ -45,6 +45,7 @@ ifeq ($(WITH_TLS),yes) endif ./09-util-topic-matching.py $@/09-util-topic-matching.test ./09-util-topic-tokenise.py $@/09-util-topic-tokenise.test + ./09-util-utf8-validate.py $@/09-util-utf8-validate.test clean : $(MAKE) -C c clean diff --git a/test/lib/c/Makefile b/test/lib/c/Makefile index 0484fedd..323d23ed 100644 --- a/test/lib/c/Makefile +++ b/test/lib/c/Makefile @@ -80,6 +80,9 @@ all : 01 02 03 04 08 09 09-util-topic-tokenise.test : 09-util-topic-tokenise.c $(CC) $< -o $@ $(CFLAGS) $(LIBS) +09-util-utf8-validate.test : 09-util-utf8-validate.c + $(CC) $< -o $@ $(CFLAGS) $(LIBS) + 01 : 01-con-discon-success.test 01-will-set.test 01-unpwd-set.test 01-will-unpwd-set.test 01-no-clean-session.test 01-keepalive-pingreq.test 02 : 02-subscribe-qos0.test 02-subscribe-qos1.test 02-subscribe-qos2.test 02-unsubscribe.test @@ -90,7 +93,7 @@ all : 01 02 03 04 08 09 08 : 08-ssl-connect-no-auth.test 08-ssl-connect-cert-auth.test 08-ssl-connect-cert-auth-enc.test 08-ssl-bad-cacert.test 08-ssl-fake-cacert.test -09 : 09-util-topic-matching.test 09-util-topic-tokenise.test +09 : 09-util-topic-matching.test 09-util-topic-tokenise.test 09-util-utf8-validate.test reallyclean : clean -rm -f *.orig