Просмотр исходного кода

Merge pull request #7175 from JimMoen/mqtt-frame-utf8-check

feat(frame): utf-8 string check in `strict_mode`
JimMoen 4 лет назад
Родитель
Сommit
88b69dd806
2 измененных файлов с 226 добавлено и 145 удалено
  1. 169 107
      apps/emqx/src/emqx_frame.erl
  2. 57 38
      apps/emqx/test/emqx_frame_SUITE.erl

+ 169 - 107
apps/emqx/src/emqx_frame.erl

@@ -218,8 +218,10 @@ packet(Header, Variable) ->
 packet(Header, Variable, Payload) ->
     #mqtt_packet{header = Header, variable = Variable, payload = Payload}.
 
-parse_packet(#mqtt_packet_header{type = ?CONNECT}, FrameBin, _Options) ->
-    {ProtoName, Rest} = parse_utf8_string(FrameBin),
+parse_packet(#mqtt_packet_header{type = ?CONNECT}
+            , FrameBin,
+             #{strict_mode := StrictMode}) ->
+    {ProtoName, Rest} = parse_utf8_string(FrameBin, StrictMode),
     <<BridgeTag:4, ProtoVer:4, Rest1/binary>> = Rest,
     % Note: Crash when reserved flag doesn't equal to 0, there is no strict
     % compliance with the MQTT5.0.
@@ -233,8 +235,8 @@ parse_packet(#mqtt_packet_header{type = ?CONNECT}, FrameBin, _Options) ->
       KeepAlive    : 16/big,
       Rest2/binary>> = Rest1,
 
-    {Properties, Rest3} = parse_properties(Rest2, ProtoVer),
-    {ClientId, Rest4} = parse_utf8_string(Rest3),
+    {Properties, Rest3} = parse_properties(Rest2, ProtoVer, StrictMode),
+    {ClientId, Rest4} = parse_utf8_string(Rest3, StrictMode),
     ConnPacket = #mqtt_packet_connect{proto_name  = ProtoName,
                                       proto_ver   = ProtoVer,
                                       is_bridge   = (BridgeTag =:= 8),
@@ -246,29 +248,31 @@ parse_packet(#mqtt_packet_header{type = ?CONNECT}, FrameBin, _Options) ->
                                       properties  = Properties,
                                       clientid    = ClientId
                                      },
-    {ConnPacket1, Rest5} = parse_will_message(ConnPacket, Rest4),
-    {Username, Rest6} = parse_utf8_string(Rest5, bool(UsernameFlag)),
-    {Password, <<>>} = parse_utf8_string(Rest6, bool(PasswordFlag)),
+    {ConnPacket1, Rest5} = parse_will_message(ConnPacket, Rest4, StrictMode),
+    {Username, Rest6} = parse_utf8_string(Rest5, StrictMode, bool(UsernameFlag)),
+    {Password, <<>>}  = parse_utf8_string(Rest6, StrictMode, bool(PasswordFlag)),
     ConnPacket1#mqtt_packet_connect{username = Username, password = Password};
 
 parse_packet(#mqtt_packet_header{type = ?CONNACK},
-             <<AckFlags:8, ReasonCode:8, Rest/binary>>, #{version := Ver}) ->
-    {Properties, <<>>} = parse_properties(Rest, Ver),
+             <<AckFlags:8, ReasonCode:8, Rest/binary>>,
+             #{version := Ver, strict_mode := StrictMode}) ->
+    {Properties, <<>>} = parse_properties(Rest, Ver, StrictMode),
     #mqtt_packet_connack{ack_flags   = AckFlags,
                          reason_code = ReasonCode,
                          properties  = Properties
                         };
 
-parse_packet(#mqtt_packet_header{type = ?PUBLISH, qos = QoS}, Bin,
+parse_packet(#mqtt_packet_header{type = ?PUBLISH, qos = QoS},
+             Bin,
              #{strict_mode := StrictMode, version := Ver}) ->
-    {TopicName, Rest} = parse_utf8_string(Bin),
+    {TopicName, Rest} = parse_utf8_string(Bin, StrictMode),
     {PacketId, Rest1} = case QoS of
                             ?QOS_0 -> {undefined, Rest};
                             _ -> parse_packet_id(Rest)
                         end,
     (PacketId =/= undefined) andalso
       StrictMode andalso validate_packet_id(PacketId),
-    {Properties, Payload} = parse_properties(Rest1, Ver),
+    {Properties, Payload} = parse_properties(Rest1, Ver, StrictMode),
     Publish = #mqtt_packet_publish{topic_name = TopicName,
                                    packet_id  = PacketId,
                                    properties = Properties
@@ -284,7 +288,7 @@ parse_packet(#mqtt_packet_header{type = PubAck}, <<PacketId:16/big, ReasonCode,
              #{strict_mode := StrictMode, version := Ver = ?MQTT_PROTO_V5})
   when ?PUBACK =< PubAck, PubAck =< ?PUBCOMP ->
     StrictMode andalso validate_packet_id(PacketId),
-    {Properties, <<>>} = parse_properties(Rest, Ver),
+    {Properties, <<>>} = parse_properties(Rest, Ver, StrictMode),
     #mqtt_packet_puback{packet_id   = PacketId,
                         reason_code = ReasonCode,
                         properties  = Properties
@@ -293,7 +297,7 @@ parse_packet(#mqtt_packet_header{type = PubAck}, <<PacketId:16/big, ReasonCode,
 parse_packet(#mqtt_packet_header{type = ?SUBSCRIBE}, <<PacketId:16/big, Rest/binary>>,
              #{strict_mode := StrictMode, version := Ver}) ->
     StrictMode andalso validate_packet_id(PacketId),
-    {Properties, Rest1} = parse_properties(Rest, Ver),
+    {Properties, Rest1} = parse_properties(Rest, Ver, StrictMode),
     TopicFilters = parse_topic_filters(subscribe, Rest1),
     ok = validate_subqos([QoS || {_, #{qos := QoS}} <- TopicFilters]),
     #mqtt_packet_subscribe{packet_id     = PacketId,
@@ -304,7 +308,7 @@ parse_packet(#mqtt_packet_header{type = ?SUBSCRIBE}, <<PacketId:16/big, Rest/bin
 parse_packet(#mqtt_packet_header{type = ?SUBACK}, <<PacketId:16/big, Rest/binary>>,
              #{strict_mode := StrictMode, version := Ver}) ->
     StrictMode andalso validate_packet_id(PacketId),
-    {Properties, Rest1} = parse_properties(Rest, Ver),
+    {Properties, Rest1} = parse_properties(Rest, Ver, StrictMode),
     ReasonCodes = parse_reason_codes(Rest1),
     #mqtt_packet_suback{packet_id    = PacketId,
                         properties   = Properties,
@@ -314,7 +318,7 @@ parse_packet(#mqtt_packet_header{type = ?SUBACK}, <<PacketId:16/big, Rest/binary
 parse_packet(#mqtt_packet_header{type = ?UNSUBSCRIBE}, <<PacketId:16/big, Rest/binary>>,
              #{strict_mode := StrictMode, version := Ver}) ->
     StrictMode andalso validate_packet_id(PacketId),
-    {Properties, Rest1} = parse_properties(Rest, Ver),
+    {Properties, Rest1} = parse_properties(Rest, Ver, StrictMode),
     TopicFilters = parse_topic_filters(unsubscribe, Rest1),
     #mqtt_packet_unsubscribe{packet_id     = PacketId,
                              properties    = Properties,
@@ -329,7 +333,7 @@ parse_packet(#mqtt_packet_header{type = ?UNSUBACK}, <<PacketId:16/big>>,
 parse_packet(#mqtt_packet_header{type = ?UNSUBACK}, <<PacketId:16/big, Rest/binary>>,
              #{strict_mode := StrictMode, version := Ver}) ->
     StrictMode andalso validate_packet_id(PacketId),
-    {Properties, Rest1} = parse_properties(Rest, Ver),
+    {Properties, Rest1} = parse_properties(Rest, Ver, StrictMode),
     ReasonCodes = parse_reason_codes(Rest1),
     #mqtt_packet_unsuback{packet_id    = PacketId,
                           properties   = Properties,
@@ -337,116 +341,118 @@ parse_packet(#mqtt_packet_header{type = ?UNSUBACK}, <<PacketId:16/big, Rest/bina
                          };
 
 parse_packet(#mqtt_packet_header{type = ?DISCONNECT}, <<ReasonCode, Rest/binary>>,
-             #{version := ?MQTT_PROTO_V5}) ->
-    {Properties, <<>>} = parse_properties(Rest, ?MQTT_PROTO_V5),
+             #{strict_mode := StrictMode, version := ?MQTT_PROTO_V5}) ->
+    {Properties, <<>>} = parse_properties(Rest, ?MQTT_PROTO_V5, StrictMode),
     #mqtt_packet_disconnect{reason_code = ReasonCode,
                             properties  = Properties
                            };
 
 parse_packet(#mqtt_packet_header{type = ?AUTH}, <<ReasonCode, Rest/binary>>,
-             #{version := ?MQTT_PROTO_V5}) ->
-    {Properties, <<>>} = parse_properties(Rest, ?MQTT_PROTO_V5),
+             #{strict_mode := StrictMode, version := ?MQTT_PROTO_V5}) ->
+    {Properties, <<>>} = parse_properties(Rest, ?MQTT_PROTO_V5, StrictMode),
     #mqtt_packet_auth{reason_code = ReasonCode, properties = Properties}.
 
-parse_will_message(Packet = #mqtt_packet_connect{will_flag = true,
-                                                 proto_ver = Ver}, Bin) ->
-    {Props, Rest} = parse_properties(Bin, Ver),
-    {Topic, Rest1} = parse_utf8_string(Rest),
+parse_will_message( Packet = #mqtt_packet_connect{will_flag = true,
+                                                 proto_ver = Ver}
+                  , Bin
+                  , StrictMode) ->
+    {Props, Rest} = parse_properties(Bin, Ver, StrictMode),
+    {Topic, Rest1} = parse_utf8_string(Rest, StrictMode),
     {Payload, Rest2} = parse_binary_data(Rest1),
     {Packet#mqtt_packet_connect{will_props   = Props,
                                 will_topic   = Topic,
                                 will_payload = Payload
                                }, Rest2};
-parse_will_message(Packet, Bin) -> {Packet, Bin}.
+parse_will_message(Packet, Bin, _StrictMode) -> {Packet, Bin}.
 
 -compile({inline, [parse_packet_id/1]}).
 parse_packet_id(<<PacketId:16/big, Rest/binary>>) ->
     {PacketId, Rest}.
 
-parse_properties(Bin, Ver) when Ver =/= ?MQTT_PROTO_V5 ->
+parse_properties(Bin, Ver, _StrictMode) when Ver =/= ?MQTT_PROTO_V5 ->
     {#{}, Bin};
 %% TODO: version mess?
-parse_properties(<<>>, ?MQTT_PROTO_V5) ->
+parse_properties(<<>>, ?MQTT_PROTO_V5, _StrictMode) ->
     {#{}, <<>>};
-parse_properties(<<0, Rest/binary>>, ?MQTT_PROTO_V5) ->
+parse_properties(<<0, Rest/binary>>, ?MQTT_PROTO_V5, _StrictMode) ->
     {#{}, Rest};
-parse_properties(Bin, ?MQTT_PROTO_V5) ->
+parse_properties(Bin, ?MQTT_PROTO_V5, StrictMode) ->
     {Len, Rest} = parse_variable_byte_integer(Bin),
     <<PropsBin:Len/binary, Rest1/binary>> = Rest,
-    {parse_property(PropsBin, #{}), Rest1}.
+    {parse_property(PropsBin, #{}, StrictMode), Rest1}.
 
-parse_property(<<>>, Props) ->
+parse_property(<<>>, Props, _StrictMode) ->
     Props;
-parse_property(<<16#01, Val, Bin/binary>>, Props) ->
-    parse_property(Bin, Props#{'Payload-Format-Indicator' => Val});
-parse_property(<<16#02, Val:32/big, Bin/binary>>, Props) ->
-    parse_property(Bin, Props#{'Message-Expiry-Interval' => Val});
-parse_property(<<16#03, Bin/binary>>, Props) ->
-    {Val, Rest} = parse_utf8_string(Bin),
-    parse_property(Rest, Props#{'Content-Type' => Val});
-parse_property(<<16#08, Bin/binary>>, Props) ->
-    {Val, Rest} = parse_utf8_string(Bin),
-    parse_property(Rest, Props#{'Response-Topic' => Val});
-parse_property(<<16#09, Len:16/big, Val:Len/binary, Bin/binary>>, Props) ->
-    parse_property(Bin, Props#{'Correlation-Data' => Val});
-parse_property(<<16#0B, Bin/binary>>, Props) ->
+parse_property(<<16#01, Val, Bin/binary>>, Props, StrictMode) ->
+    parse_property(Bin, Props#{'Payload-Format-Indicator' => Val}, StrictMode);
+parse_property(<<16#02, Val:32/big, Bin/binary>>, Props, StrictMode) ->
+    parse_property(Bin, Props#{'Message-Expiry-Interval' => Val}, StrictMode);
+parse_property(<<16#03, Bin/binary>>, Props, StrictMode) ->
+    {Val, Rest} = parse_utf8_string(Bin, StrictMode),
+    parse_property(Rest, Props#{'Content-Type' => Val}, StrictMode);
+parse_property(<<16#08, Bin/binary>>, Props, StrictMode) ->
+    {Val, Rest} = parse_utf8_string(Bin, StrictMode),
+    parse_property(Rest, Props#{'Response-Topic' => Val}, StrictMode);
+parse_property(<<16#09, Len:16/big, Val:Len/binary, Bin/binary>>, Props, StrictMode) ->
+    parse_property(Bin, Props#{'Correlation-Data' => Val}, StrictMode);
+parse_property(<<16#0B, Bin/binary>>, Props, StrictMode) ->
     {Val, Rest} = parse_variable_byte_integer(Bin),
-    parse_property(Rest, Props#{'Subscription-Identifier' => Val});
-parse_property(<<16#11, Val:32/big, Bin/binary>>, Props) ->
-    parse_property(Bin, Props#{'Session-Expiry-Interval' => Val});
-parse_property(<<16#12, Bin/binary>>, Props) ->
-    {Val, Rest} = parse_utf8_string(Bin),
-    parse_property(Rest, Props#{'Assigned-Client-Identifier' => Val});
-parse_property(<<16#13, Val:16, Bin/binary>>, Props) ->
-    parse_property(Bin, Props#{'Server-Keep-Alive' => Val});
-parse_property(<<16#15, Bin/binary>>, Props) ->
-    {Val, Rest} = parse_utf8_string(Bin),
-    parse_property(Rest, Props#{'Authentication-Method' => Val});
-parse_property(<<16#16, Len:16/big, Val:Len/binary, Bin/binary>>, Props) ->
-    parse_property(Bin, Props#{'Authentication-Data' => Val});
-parse_property(<<16#17, Val, Bin/binary>>, Props) ->
-    parse_property(Bin, Props#{'Request-Problem-Information' => Val});
-parse_property(<<16#18, Val:32, Bin/binary>>, Props) ->
-    parse_property(Bin, Props#{'Will-Delay-Interval' => Val});
-parse_property(<<16#19, Val, Bin/binary>>, Props) ->
-    parse_property(Bin, Props#{'Request-Response-Information' => Val});
-parse_property(<<16#1A, Bin/binary>>, Props) ->
-    {Val, Rest} = parse_utf8_string(Bin),
-    parse_property(Rest, Props#{'Response-Information' => Val});
-parse_property(<<16#1C, Bin/binary>>, Props) ->
-    {Val, Rest} = parse_utf8_string(Bin),
-    parse_property(Rest, Props#{'Server-Reference' => Val});
-parse_property(<<16#1F, Bin/binary>>, Props) ->
-    {Val, Rest} = parse_utf8_string(Bin),
-    parse_property(Rest, Props#{'Reason-String' => Val});
-parse_property(<<16#21, Val:16/big, Bin/binary>>, Props) ->
-    parse_property(Bin, Props#{'Receive-Maximum' => Val});
-parse_property(<<16#22, Val:16/big, Bin/binary>>, Props) ->
-    parse_property(Bin, Props#{'Topic-Alias-Maximum' => Val});
-parse_property(<<16#23, Val:16/big, Bin/binary>>, Props) ->
-    parse_property(Bin, Props#{'Topic-Alias' => Val});
-parse_property(<<16#24, Val, Bin/binary>>, Props) ->
-    parse_property(Bin, Props#{'Maximum-QoS' => Val});
-parse_property(<<16#25, Val, Bin/binary>>, Props) ->
-    parse_property(Bin, Props#{'Retain-Available' => Val});
-parse_property(<<16#26, Bin/binary>>, Props) ->
-    {Pair, Rest} = parse_utf8_pair(Bin),
+    parse_property(Rest, Props#{'Subscription-Identifier' => Val}, StrictMode);
+parse_property(<<16#11, Val:32/big, Bin/binary>>, Props, StrictMode) ->
+    parse_property(Bin, Props#{'Session-Expiry-Interval' => Val}, StrictMode);
+parse_property(<<16#12, Bin/binary>>, Props, StrictMode) ->
+    {Val, Rest} = parse_utf8_string(Bin, StrictMode),
+    parse_property(Rest, Props#{'Assigned-Client-Identifier' => Val}, StrictMode);
+parse_property(<<16#13, Val:16, Bin/binary>>, Props, StrictMode) ->
+    parse_property(Bin, Props#{'Server-Keep-Alive' => Val}, StrictMode);
+parse_property(<<16#15, Bin/binary>>, Props, StrictMode) ->
+    {Val, Rest} = parse_utf8_string(Bin, StrictMode),
+    parse_property(Rest, Props#{'Authentication-Method' => Val}, StrictMode);
+parse_property(<<16#16, Len:16/big, Val:Len/binary, Bin/binary>>, Props, StrictMode) ->
+    parse_property(Bin, Props#{'Authentication-Data' => Val}, StrictMode);
+parse_property(<<16#17, Val, Bin/binary>>, Props, StrictMode) ->
+    parse_property(Bin, Props#{'Request-Problem-Information' => Val}, StrictMode);
+parse_property(<<16#18, Val:32, Bin/binary>>, Props, StrictMode) ->
+    parse_property(Bin, Props#{'Will-Delay-Interval' => Val}, StrictMode);
+parse_property(<<16#19, Val, Bin/binary>>, Props, StrictMode) ->
+    parse_property(Bin, Props#{'Request-Response-Information' => Val}, StrictMode);
+parse_property(<<16#1A, Bin/binary>>, Props, StrictMode) ->
+    {Val, Rest} = parse_utf8_string(Bin, StrictMode),
+    parse_property(Rest, Props#{'Response-Information' => Val}, StrictMode);
+parse_property(<<16#1C, Bin/binary>>, Props, StrictMode) ->
+    {Val, Rest} = parse_utf8_string(Bin, StrictMode),
+    parse_property(Rest, Props#{'Server-Reference' => Val}, StrictMode);
+parse_property(<<16#1F, Bin/binary>>, Props, StrictMode) ->
+    {Val, Rest} = parse_utf8_string(Bin, StrictMode),
+    parse_property(Rest, Props#{'Reason-String' => Val}, StrictMode);
+parse_property(<<16#21, Val:16/big, Bin/binary>>, Props, StrictMode) ->
+    parse_property(Bin, Props#{'Receive-Maximum' => Val}, StrictMode);
+parse_property(<<16#22, Val:16/big, Bin/binary>>, Props, StrictMode) ->
+    parse_property(Bin, Props#{'Topic-Alias-Maximum' => Val}, StrictMode);
+parse_property(<<16#23, Val:16/big, Bin/binary>>, Props, StrictMode) ->
+    parse_property(Bin, Props#{'Topic-Alias' => Val}, StrictMode);
+parse_property(<<16#24, Val, Bin/binary>>, Props, StrictMode) ->
+    parse_property(Bin, Props#{'Maximum-QoS' => Val}, StrictMode);
+parse_property(<<16#25, Val, Bin/binary>>, Props, StrictMode) ->
+    parse_property(Bin, Props#{'Retain-Available' => Val}, StrictMode);
+parse_property(<<16#26, Bin/binary>>, Props, StrictMode) ->
+    {Pair, Rest} = parse_utf8_pair(Bin, StrictMode),
     case maps:find('User-Property', Props) of
         {ok, UserProps} ->
             UserProps1 = lists:append(UserProps, [Pair]),
-            parse_property(Rest, Props#{'User-Property' := UserProps1});
+            parse_property(Rest, Props#{'User-Property' := UserProps1}, StrictMode);
         error ->
-            parse_property(Rest, Props#{'User-Property' => [Pair]})
+            parse_property(Rest, Props#{'User-Property' => [Pair]}, StrictMode)
     end;
-parse_property(<<16#27, Val:32, Bin/binary>>, Props) ->
-    parse_property(Bin, Props#{'Maximum-Packet-Size' => Val});
-parse_property(<<16#28, Val, Bin/binary>>, Props) ->
-    parse_property(Bin, Props#{'Wildcard-Subscription-Available' => Val});
-parse_property(<<16#29, Val, Bin/binary>>, Props) ->
-    parse_property(Bin, Props#{'Subscription-Identifier-Available' => Val});
-parse_property(<<16#2A, Val, Bin/binary>>, Props) ->
-    parse_property(Bin, Props#{'Shared-Subscription-Available' => Val});
-parse_property(<<Property:8, _Rest/binary>>, _Props) ->
+parse_property(<<16#27, Val:32, Bin/binary>>, Props, StrictMode) ->
+    parse_property(Bin, Props#{'Maximum-Packet-Size' => Val}, StrictMode);
+parse_property(<<16#28, Val, Bin/binary>>, Props, StrictMode) ->
+    parse_property(Bin, Props#{'Wildcard-Subscription-Available' => Val}, StrictMode);
+parse_property(<<16#29, Val, Bin/binary>>, Props, StrictMode) ->
+    parse_property(Bin, Props#{'Subscription-Identifier-Available' => Val}, StrictMode);
+parse_property(<<16#2A, Val, Bin/binary>>, Props, StrictMode) ->
+    parse_property(Bin, Props#{'Shared-Subscription-Available' => Val}, StrictMode);
+parse_property(<<Property:8, _Rest/binary>>, _Props, _StrictMode) ->
     ?PARSE_ERR(#{invalid_property_code => Property}).
 %% TODO: invalid property in specific packet.
 
@@ -470,39 +476,46 @@ parse_topic_filters(unsubscribe, Bin) ->
 parse_reason_codes(Bin) ->
     [Code || <<Code>> <= Bin].
 
-parse_utf8_pair(<<Len1:16/big, Key:Len1/binary,
-                  Len2:16/big, Val:Len2/binary, Rest/binary>>) ->
+parse_utf8_pair( <<Len1:16/big, Key:Len1/binary,
+                   Len2:16/big, Val:Len2/binary, Rest/binary>>
+               , true) ->
+    {{validate_utf8(Key), validate_utf8(Val)}, Rest};
+parse_utf8_pair( <<Len1:16/big, Key:Len1/binary,
+                   Len2:16/big, Val:Len2/binary, Rest/binary>>
+               , false) ->
     {{Key, Val}, Rest};
-parse_utf8_pair(<<LenK:16/big, Rest/binary>>)
+parse_utf8_pair(<<LenK:16/big, Rest/binary>>, _StrictMode)
   when LenK > byte_size(Rest) ->
     ?PARSE_ERR(#{ hint => user_property_not_enough_bytes
                 , parsed_key_length => LenK
                 , remaining_bytes_length => byte_size(Rest)});
 parse_utf8_pair(<<LenK:16/big, _Key:LenK/binary, %% key maybe malformed
-                  LenV:16/big, Rest/binary>>)
+                  LenV:16/big, Rest/binary>>, _StrictMode)
   when LenV > byte_size(Rest) ->
     ?PARSE_ERR(#{ hint => malformed_user_property_value
                 , parsed_key_length => LenK
                 , parsed_value_length => LenV
                 , remaining_bytes_length => byte_size(Rest)});
-parse_utf8_pair(Bin)
+parse_utf8_pair(Bin, _StrictMode)
   when 4 > byte_size(Bin) ->
     ?PARSE_ERR(#{ hint => user_property_not_enough_bytes
                 , total_bytes => byte_size(Bin)}).
 
-parse_utf8_string(Bin, false) ->
+parse_utf8_string(Bin, _StrictMode, false) ->
     {undefined, Bin};
-parse_utf8_string(Bin, true) ->
-    parse_utf8_string(Bin).
+parse_utf8_string(Bin, StrictMode, true) ->
+    parse_utf8_string(Bin, StrictMode).
 
-parse_utf8_string(<<Len:16/big, Str:Len/binary, Rest/binary>>) ->
+parse_utf8_string(<<Len:16/big, Str:Len/binary, Rest/binary>>, true) ->
+    {validate_utf8(Str), Rest};
+parse_utf8_string(<<Len:16/big, Str:Len/binary, Rest/binary>>, false) ->
     {Str, Rest};
-parse_utf8_string(<<Len:16/big, Rest/binary>>)
+parse_utf8_string(<<Len:16/big, Rest/binary>>, _)
   when Len > byte_size(Rest) ->
     ?PARSE_ERR(#{ hint => malformed_utf8_string
                 , parsed_length => Len
                 , remaining_bytes_length => byte_size(Rest)});
-parse_utf8_string(Bin)
+parse_utf8_string(Bin, _)
   when 2 > byte_size(Bin) ->
     ?PARSE_ERR(malformed_utf8_string_length).
 
@@ -853,3 +866,52 @@ fixqos(?PUBREL, 0)      -> 1;
 fixqos(?SUBSCRIBE, 0)   -> 1;
 fixqos(?UNSUBSCRIBE, 0) -> 1;
 fixqos(_Type, QoS)      -> QoS.
+
+validate_utf8(Bin) ->
+    case unicode:characters_to_binary(Bin) of
+        {error, _, _} ->
+            ?PARSE_ERR(utf8_string_invalid);
+        {incomplete, _, _} ->
+            ?PARSE_ERR(utf8_string_invalid);
+        Bin when is_binary(Bin) ->
+            case validate_mqtt_utf8_char(Bin) of
+                true -> Bin;
+                false -> ?PARSE_ERR(utf8_string_invalid)
+            end
+    end.
+
+%% Is the utf8 string respecting UTF-8 characters defined by MQTT Spec?
+%% i.e. contains invalid UTF-8 char or control char
+validate_mqtt_utf8_char(<<>>) ->
+    true;
+%% ==== 1-Byte UTF-8 invalid: [[U+0000 .. U+001F] && [U+007F]]
+validate_mqtt_utf8_char(<<B1, Bs/binary>>)
+  when B1 >= 16#20, B1 =< 16#7E ->
+    validate_mqtt_utf8_char(Bs);
+validate_mqtt_utf8_char(<<B1, _Bs/binary>>)
+  when B1 >=  16#00, B1 =< 16#1F;
+       B1 =:= 16#7F ->
+    %% [U+0000 .. U+001F] && [U+007F]
+    false;
+%% ==== 2-Bytes UTF-8 invalid: [U+0080 .. U+009F]
+validate_mqtt_utf8_char(<<B1, B2, Bs/binary>>)
+  when B1 =:= 16#C2;
+       B2 >=  16#A0, B2 =< 16#BF;
+       B1  >  16#C3, B1 =< 16#DE;
+       B2 >=  16#80, B2 =< 16#BF ->
+    validate_mqtt_utf8_char(Bs);
+validate_mqtt_utf8_char(<<16#C2, B2, _Bs/binary>>)
+  when B2 >= 16#80, B2 =< 16#9F ->
+    %% [U+0080 .. U+009F]
+    false;
+%% ==== 3-Bytes UTF-8 invalid: [U+D800 .. U+DFFF]
+validate_mqtt_utf8_char(<<B1, _B2, _B3, Bs/binary>>)
+  when B1 >= 16#E0, B1 =< 16#EE;
+       B1 =:= 16#EF ->
+    validate_mqtt_utf8_char(Bs);
+validate_mqtt_utf8_char(<<16#ED, _B2, _B3, _Bs/binary>>) ->
+    false;
+%% ==== 4-Bytes UTF-8
+validate_mqtt_utf8_char(<<B1, _B2, _B3, _B4, Bs/binary>>)
+  when B1 =:= 16#0F ->
+    validate_mqtt_utf8_char(Bs).

+ 57 - 38
apps/emqx/test/emqx_frame_SUITE.erl

@@ -43,66 +43,68 @@ all() ->
 
 groups() ->
     [{parse, [parallel],
-      [t_parse_cont,
-       t_parse_frame_too_large,
-       t_parse_frame_malformed_variable_byte_integer
+      [ t_parse_cont
+      , t_parse_frame_too_large
+      , t_parse_frame_malformed_variable_byte_integer
+      , t_parse_malformed_utf8_string
       ]},
      {connect, [parallel],
-      [t_serialize_parse_v3_connect,
-       t_serialize_parse_v4_connect,
-       t_serialize_parse_v5_connect,
-       t_serialize_parse_connect_without_clientid,
-       t_serialize_parse_connect_with_will,
-       t_serialize_parse_bridge_connect
+      [ t_serialize_parse_v3_connect
+      , t_serialize_parse_v4_connect
+      , t_serialize_parse_v5_connect
+      , t_serialize_parse_connect_without_clientid
+      , t_serialize_parse_connect_with_will
+      , t_serialize_parse_bridge_connect
       ]},
      {connack, [parallel],
-      [t_serialize_parse_connack,
-       t_serialize_parse_connack_v5
+      [ t_serialize_parse_connack
+      , t_serialize_parse_connack_v5
       ]},
      {publish, [parallel],
-      [t_parse_sticky_frames,
-       t_serialize_parse_qos0_publish,
-       t_serialize_parse_qos1_publish,
-       t_serialize_parse_qos2_publish,
-       t_serialize_parse_publish_v5
+      [ t_parse_sticky_frames
+      , t_serialize_parse_qos0_publish
+      , t_serialize_parse_qos1_publish
+      , t_serialize_parse_qos2_publish
+      , t_serialize_parse_publish_v5
       ]},
      {puback, [parallel],
-      [t_serialize_parse_puback,
-       t_serialize_parse_puback_v3_4,
-       t_serialize_parse_puback_v5,
-       t_serialize_parse_pubrec,
-       t_serialize_parse_pubrec_v5,
-       t_serialize_parse_pubrel,
-       t_serialize_parse_pubrel_v5,
-       t_serialize_parse_pubcomp,
-       t_serialize_parse_pubcomp_v5
+      [ t_serialize_parse_puback
+      , t_serialize_parse_puback_v3_4
+      , t_serialize_parse_puback_v5
+      , t_serialize_parse_pubrec
+      , t_serialize_parse_pubrec_v5
+      , t_serialize_parse_pubrel
+      , t_serialize_parse_pubrel_v5
+      , t_serialize_parse_pubcomp
+      , t_serialize_parse_pubcomp_v5
       ]},
      {subscribe, [parallel],
-      [t_serialize_parse_subscribe,
-       t_serialize_parse_subscribe_v5
+      [ t_serialize_parse_subscribe
+      , t_serialize_parse_subscribe_v5
       ]},
      {suback, [parallel],
-      [t_serialize_parse_suback,
-       t_serialize_parse_suback_v5
+      [ t_serialize_parse_suback
+      , t_serialize_parse_suback_v5
       ]},
      {unsubscribe, [parallel],
-      [t_serialize_parse_unsubscribe,
-       t_serialize_parse_unsubscribe_v5
+      [ t_serialize_parse_unsubscribe
+      , t_serialize_parse_unsubscribe_v5
       ]},
      {unsuback, [parallel],
-      [t_serialize_parse_unsuback,
-       t_serialize_parse_unsuback_v5
+      [ t_serialize_parse_unsuback
+      , t_serialize_parse_unsuback_v5
       ]},
      {ping, [parallel],
-      [t_serialize_parse_pingreq,
-       t_serialize_parse_pingresp
+      [ t_serialize_parse_pingreq
+      , t_serialize_parse_pingresp
       ]},
      {disconnect, [parallel],
-      [t_serialize_parse_disconnect,
-       t_serialize_parse_disconnect_v5
+      [ t_serialize_parse_disconnect
+      , t_serialize_parse_disconnect_v5
       ]},
      {auth, [parallel],
-      [t_serialize_parse_auth_v5]
+      [ t_serialize_parse_auth_v5
+      ]
      }].
 
 init_per_suite(Config) ->
@@ -139,6 +141,23 @@ t_parse_frame_malformed_variable_byte_integer(_) ->
     ?ASSERT_FRAME_THROW(malformed_variable_byte_integer,
                         emqx_frame:parse(MalformedPayload, ParseState)).
 
+t_parse_malformed_utf8_string(_) ->
+    MalformedPacket = <<16,31,0,4,
+                        %% Specification name, should be "MQTT"
+                        %% 77,81,84,84,
+                        %% malformed 1-Byte UTF-8 in (U+0000 .. U+001F] && [U+007F])
+                        16#00,16#01,16#1F,16#7F,
+
+                        4,194,0,60,
+                        0,4,101,109,
+                        113,120,0,5,
+                        97,100,109,105,
+                        110,0,6,112,
+                        117,98,108,105,
+                        99>>,
+    ParseState = emqx_frame:initial_parse_state(#{strict_mode => true}),
+    ?ASSERT_FRAME_THROW(utf8_string_invalid, emqx_frame:parse(MalformedPacket, ParseState)).
+
 t_serialize_parse_v3_connect(_) ->
     Bin = <<16,37,0,6,77,81,73,115,100,112,3,2,0,60,0,23,109,111,115,
             113,112,117, 98,47,49,48,52,53,49,45,105,77,97,99,46,108,