Преглед изворни кода

fix(frame): fixed an optimized the UTF8 validation for MQTT packets

firest пре 1 година
родитељ
комит
bb24403636
2 измењених фајлова са 9 додато и 36 уклоњено
  1. 8 36
      apps/emqx/src/emqx_frame.erl
  2. 1 0
      changes/ce/fix-13842.en.md

+ 8 - 36
apps/emqx/src/emqx_frame.erl

@@ -1258,43 +1258,15 @@ validate_utf8(Bin) ->
     end.
 
 %% Is the utf8 string respecting UTF-8 characters defined by MQTT Spec?
-%% i.e. contains invalid UTF-8 char or control char
+%% i.e. does the string contains control characters?
+%% Note: this is under the assumption that the string is already validated by `unicode:characters_to_binary/1`
+%% hence there is no need to validate utf8 byte sequence integrity
 validate_mqtt_utf8_char(<<>>) ->
     true;
-%% ==== 1-Byte UTF-8 invalid: [[U+0000 .. U+001F] && [U+007F]]
-validate_mqtt_utf8_char(<<B1, Bs/binary>>) when
-    B1 >= 16#20, B1 =< 16#7E
+validate_mqtt_utf8_char(<<H/utf8, _Rest/binary>>) when
+    H >= 16#00, H =< 16#1F;
+    H >= 16#7F, H =< 16#9F
 ->
-    validate_mqtt_utf8_char(Bs);
-validate_mqtt_utf8_char(<<B1, _Bs/binary>>) when
-    B1 >= 16#00, B1 =< 16#1F;
-    B1 =:= 16#7F
-->
-    %% [U+0000 .. U+001F] && [U+007F]
-    false;
-%% ==== 2-Bytes UTF-8 invalid: [U+0080 .. U+009F]
-validate_mqtt_utf8_char(<<B1, B2, Bs/binary>>) when
-    B1 =:= 16#C2;
-    B2 >= 16#A0, B2 =< 16#BF;
-    B1 > 16#C3, B1 =< 16#DE;
-    B2 >= 16#80, B2 =< 16#BF
-->
-    validate_mqtt_utf8_char(Bs);
-validate_mqtt_utf8_char(<<16#C2, B2, _Bs/binary>>) when
-    B2 >= 16#80, B2 =< 16#9F
-->
-    %% [U+0080 .. U+009F]
     false;
-%% ==== 3-Bytes UTF-8 invalid: [U+D800 .. U+DFFF]
-validate_mqtt_utf8_char(<<B1, _B2, _B3, Bs/binary>>) when
-    B1 >= 16#E0, B1 =< 16#EE;
-    B1 =:= 16#EF
-->
-    validate_mqtt_utf8_char(Bs);
-validate_mqtt_utf8_char(<<16#ED, _B2, _B3, _Bs/binary>>) ->
-    false;
-%% ==== 4-Bytes UTF-8
-validate_mqtt_utf8_char(<<B1, _B2, _B3, _B4, Bs/binary>>) when
-    B1 =:= 16#0F
-->
-    validate_mqtt_utf8_char(Bs).
+validate_mqtt_utf8_char(<<_H/utf8, Rest/binary>>) ->
+    validate_mqtt_utf8_char(Rest).

+ 1 - 0
changes/ce/fix-13842.en.md

@@ -0,0 +1 @@
+Fixed and UTF-8 string validation exception.