multipart_parser.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641
  1. /**
  2. * Copyright (c) 2019 Trevor Herselman. All rights reserved.
  3. *
  4. * MIT License
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a copy
  7. * of this software and associated documentation files (the "Software"), to deal
  8. * in the Software without restriction, including without limitation the rights
  9. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10. * copies of the Software, and to permit persons to whom the Software is
  11. * furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice shall be included in all
  14. * copies or substantial portions of the Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  22. * SOFTWARE.
  23. */
  24. #include "multipart_parser.h"
  25. #include <assert.h>
  26. #include <stddef.h>
  27. #include <ctype.h>
  28. #include <string.h>
  29. #include <limits.h>
  30. #include <stdio.h>
  31. #include "utils/Log.h"
  32. #ifndef MIN
  33. # define MIN(a,b) ((a) < (b) ? (a) : (b))
  34. #endif
  35. #define SET_ERRNO(e) \
  36. do { \
  37. parser->multipart_errno = (e); \
  38. } while(0)
  39. #ifdef __GNUC__
  40. # define LIKELY(X) __builtin_expect(!!(X), 1)
  41. # define UNLIKELY(X) __builtin_expect(!!(X), 0)
  42. #else
  43. # define LIKELY(X) (X)
  44. # define UNLIKELY(X) (X)
  45. #endif
  46. #ifndef UNREACHABLE
  47. # ifdef _MSC_VER
  48. # define UNREACHABLE __assume(0)
  49. # else /* GCC, Clang & Intel C++ */
  50. # define UNREACHABLE __builtin_unreachable()
  51. # endif
  52. #endif
  53. #ifndef FALLTHROUGH
  54. # if defined(__GNUC__) || defined(__clang__)
  55. # define FALLTHROUGH __attribute__ ((fallthrough))
  56. # else
  57. # define FALLTHROUGH ((void)0)
  58. # endif
  59. #endif
  60. enum state
  61. { s_start
  62. , s_start_dash
  63. , s_boundary
  64. , s_boundary_cr
  65. , s_boundary_almost_done
  66. , s_header_field_start
  67. , s_header_field
  68. , s_header_value_discard_ws
  69. , s_header_value
  70. , s_header_value_lws
  71. , s_header_almost_done
  72. , s_headers_almost_done
  73. , s_headers_done
  74. , s_body_part_start
  75. , s_body_part
  76. , s_body_part_boundary_cr
  77. , s_body_part_boundary_cr_lf
  78. , s_body_part_boundary_cr_lf_dash
  79. , s_body_part_boundary_cr_lf_dash_dash
  80. , s_end
  81. };
  82. /* Macros for character classes */
  83. #define CR '\r'
  84. #define LF '\n'
  85. #define LOWER(c) (unsigned char)(c | 0x20)
  86. #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
  87. #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
  88. #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
  89. #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
  90. void multipart_parser_init(multipart_parser *parser)
  91. {
  92. parser->state = s_start;
  93. }
  94. void multipart_parser_settings_init(multipart_parser_settings *settings)
  95. {
  96. memset(settings, 0, sizeof(*settings));
  97. }
  98. int multipart_parser_execute(multipart_parser *parser,
  99. const multipart_parser_settings *settings,
  100. const char *data,
  101. size_t len)
  102. {
  103. const char* buf_end = &data[len];
  104. const char* p = data;
  105. const char* body_start = NULL;
  106. for (; p < buf_end; ++p)
  107. {
  108. const char ch = *p;
  109. switch (parser->state)
  110. {
  111. case s_start:
  112. if (LIKELY(ch == '-')) {
  113. parser->state = s_start_dash;
  114. }
  115. continue;
  116. case s_start_dash:
  117. if (LIKELY(ch == '-')) {
  118. parser->nread = 0;
  119. parser->state = s_boundary;
  120. continue;
  121. }
  122. return -1;
  123. case s_boundary:
  124. if (LIKELY(parser->nread < parser->boundary_len)) {
  125. if (LIKELY(ch == parser->boundary[parser->nread++])) {
  126. continue;
  127. }
  128. } else {
  129. if (LIKELY(ch == '\r')) {
  130. parser->state = s_boundary_cr;
  131. continue;
  132. } else if (ch == '-') {
  133. parser->state = s_boundary_almost_done;
  134. continue;
  135. }
  136. }
  137. return -1;
  138. case s_boundary_cr:
  139. if (LIKELY(ch == '\n')) {
  140. if (LIKELY(settings->on_boundary_begin(parser) == 0)) {
  141. parser->state = s_header_field_start;
  142. continue;
  143. }
  144. }
  145. return -1;
  146. case s_boundary_almost_done:
  147. if (LIKELY(ch == '-')) {
  148. parser->state = s_end;
  149. return settings->on_body_parts_complete(parser);
  150. }
  151. return -1;
  152. case s_headers_almost_done:
  153. if (ch == '\r') {
  154. parser->state = s_headers_done;
  155. continue;
  156. }
  157. FALLTHROUGH;
  158. case s_header_field_start:
  159. if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) {
  160. parser->nread = 1;
  161. memset(parser->header_field, 0, sizeof(parser->header_field));
  162. parser->header_field[0] = ch;
  163. parser->state = s_header_field;
  164. continue;
  165. }
  166. return -1;
  167. case s_header_field:
  168. if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || ch == '-') {
  169. if (parser->nread < sizeof(parser->header_field)) {
  170. parser->header_field[parser->nread] = ch;
  171. }
  172. parser->nread++;
  173. continue;
  174. } else if (ch == ':') {
  175. if (settings->on_header_field(
  176. parser,
  177. /*p - parser->nread*/ parser->header_field,
  178. /*parser->nread*/ strlen(parser->header_field)) == 0) {
  179. parser->state = s_header_value_discard_ws;
  180. continue;
  181. }
  182. }
  183. return -1;
  184. case s_header_value_discard_ws:
  185. if (ch > ' ') {
  186. parser->nread = 1;
  187. memset(parser->header_value, 0, sizeof(parser->header_value));
  188. parser->header_value[0] = ch;
  189. parser->state = s_header_value;
  190. continue;
  191. } if (ch == ' ') {
  192. continue;
  193. }
  194. return -1;
  195. case s_header_value:
  196. if (ch != '\r') {
  197. if (parser->nread < sizeof(parser->header_value)) {
  198. parser->header_value[parser->nread] = ch;
  199. }
  200. parser->nread++;
  201. continue;
  202. }
  203. if (settings->on_header_value(
  204. parser,
  205. /*p - parser->nread*/ parser->header_value,
  206. /*parser->nread*/ strlen(parser->header_value)) == 0) {
  207. parser->state = s_header_almost_done;
  208. continue;
  209. }
  210. return -1;
  211. case s_header_almost_done:
  212. if (ch == '\n') {
  213. parser->state = s_headers_almost_done;
  214. continue;
  215. }
  216. case s_header_value_lws:
  217. return -1;
  218. case s_headers_done:
  219. if (ch == '\n') {
  220. if (LIKELY( settings->on_headers_complete(parser) == 0)) {
  221. parser->state = s_body_part_start;
  222. continue;
  223. }
  224. }
  225. return -1;
  226. case s_body_part_start:
  227. body_start = p;
  228. parser->state = s_body_part;
  229. FALLTHROUGH;
  230. case s_body_part:
  231. if (LIKELY(ch != '\r')) {
  232. continue;
  233. }
  234. if (body_start == NULL) {
  235. settings->on_body(parser, data, p - data);
  236. } else {
  237. settings->on_body(parser, body_start, p - body_start);
  238. }
  239. parser->state = s_body_part_boundary_cr;
  240. continue;
  241. case s_body_part_boundary_cr:
  242. if (ch == '\n') {
  243. parser->state = s_body_part_boundary_cr_lf;
  244. continue;
  245. }
  246. settings->on_body(parser, "\r", 1);
  247. if (ch == '\r') {
  248. continue;
  249. }
  250. body_start = p;
  251. parser->state = s_body_part;
  252. continue;
  253. case s_body_part_boundary_cr_lf:
  254. if (ch == '-') {
  255. parser->state = s_body_part_boundary_cr_lf_dash;
  256. continue;
  257. }
  258. settings->on_body(parser, "\r\n", 2);
  259. if (ch == '\r') {
  260. parser->state = s_body_part_boundary_cr;
  261. continue;
  262. }
  263. body_start = p;
  264. parser->state = s_body_part;
  265. continue;
  266. case s_body_part_boundary_cr_lf_dash:
  267. if (ch == '-') {
  268. parser->nread = 0;
  269. parser->state = s_body_part_boundary_cr_lf_dash_dash;
  270. continue;
  271. }
  272. settings->on_body(parser, "\r\n-", 3);
  273. if (ch == '\r') {
  274. parser->state = s_body_part_boundary_cr;
  275. continue;
  276. }
  277. body_start = p;
  278. parser->state = s_body_part;
  279. continue;
  280. case s_body_part_boundary_cr_lf_dash_dash:
  281. if (LIKELY(parser->nread < parser->boundary_len)) {
  282. if (LIKELY(ch == parser->boundary[parser->nread++])) {
  283. continue;
  284. }
  285. settings->on_body(parser, "\r\n--", 4);
  286. if (parser->nread > 0) {
  287. settings->on_body(parser, parser->boundary, parser->nread - 1);
  288. }
  289. if (ch == '\r') {
  290. parser->state = s_body_part_boundary_cr;
  291. continue;
  292. }
  293. body_start = p;
  294. parser->state = s_body_part;
  295. continue;
  296. } else {
  297. if (LIKELY(ch == '\r')) {
  298. parser->state = s_boundary_cr;
  299. continue;
  300. }
  301. if (ch == '-') {
  302. parser->state = s_boundary_almost_done;
  303. continue;
  304. }
  305. }
  306. return -1;
  307. case s_end:
  308. return 0;
  309. default:
  310. UNREACHABLE;
  311. }
  312. UNREACHABLE;
  313. }
  314. switch (parser->state) {
  315. case s_body_part_start:
  316. if ((body_start != NULL) ) {
  317. settings->on_body(parser, body_start, buf_end - body_start);
  318. }
  319. break;
  320. case s_body_part:
  321. if (body_start == NULL ) {
  322. settings->on_body(parser, data, len);
  323. } else {
  324. settings->on_body(parser, body_start, buf_end - body_start);
  325. }
  326. break;
  327. default:
  328. break;
  329. }
  330. return 0;
  331. }
  332. const char* multipart_get_name(const char* str, size_t len,
  333. size_t* value_len)
  334. {
  335. const char* str_end = &str[len];
  336. const char* p = str;
  337. const char* value_start;
  338. typedef enum
  339. { s_seek
  340. , s_N
  341. , s_NA
  342. , s_NAM
  343. , s_NAME
  344. , s_NAME_EQ
  345. , s_NAME_EQ_QUOT
  346. , s_value_start
  347. , s_value
  348. , s_value_end
  349. } e_state;
  350. for (e_state state = s_seek; p < str_end; ++p)
  351. {
  352. const char ch = *p;
  353. switch (state)
  354. {
  355. case s_seek:
  356. _reset:
  357. if (UNLIKELY(LOWER(ch) == 'n')) {
  358. state = s_N;
  359. }
  360. continue;
  361. case s_N:
  362. if (LIKELY(ch == 'a' || ch == 'A')) {
  363. state = s_NA;
  364. } else {
  365. state = s_seek;
  366. goto _reset;
  367. }
  368. continue;
  369. case s_NA:
  370. if (LIKELY(ch == 'm' || ch == 'M')) {
  371. state = s_NAM;
  372. } else {
  373. state = s_seek;
  374. goto _reset;
  375. }
  376. continue;
  377. case s_NAM:
  378. if (LIKELY(ch == 'e' || ch == 'E')) {
  379. state = s_NAME;
  380. } else {
  381. state = s_seek;
  382. goto _reset;
  383. }
  384. continue;
  385. case s_NAME:
  386. if (LIKELY(ch == '=')) {
  387. state = s_NAME_EQ;
  388. } else {
  389. if (ch == ' ') { /* Skip whitespace */
  390. continue;
  391. }
  392. state = s_seek;
  393. goto _reset;
  394. }
  395. continue;
  396. case s_NAME_EQ:
  397. if (LIKELY(ch == '"')) {
  398. state = s_value_start;
  399. } else {
  400. if (ch == ' ') { /* Skip whitespace */
  401. continue;
  402. }
  403. state = s_seek;
  404. goto _reset;
  405. }
  406. continue;
  407. case s_value_start:
  408. value_start = p;
  409. if (LIKELY(ch != '"')) {
  410. state = s_value;
  411. } else {
  412. *value_len = 0; /* detected an empty value */
  413. return value_start;
  414. }
  415. continue;
  416. case s_value:
  417. if (LIKELY(ch != '"')) {
  418. continue;
  419. } else {
  420. *value_len = p - value_start;
  421. return value_start;
  422. }
  423. default:
  424. UNREACHABLE;
  425. }
  426. }
  427. return NULL;
  428. }
  429. const char* multipart_get_filename(const char* str, size_t len,
  430. size_t* value_len)
  431. {
  432. const char* str_end = &str[len];
  433. const char* p = str;
  434. const char* value_start;
  435. typedef enum
  436. { s_F
  437. , s_FI
  438. , s_FIL
  439. , s_FILE
  440. , s_FILEN
  441. , s_FILENA
  442. , s_FILENAM
  443. , s_FILENAME
  444. , s_FILENAME_EQ
  445. , s_FILENAME_EQ_QUOT
  446. , s_value_start
  447. , s_value
  448. } e_state;
  449. for (e_state state = s_F; p < str_end; ++p)
  450. {
  451. const char ch = *p;
  452. switch (state)
  453. {
  454. case s_F:
  455. _reset:
  456. if (UNLIKELY(LOWER(ch) == 'f')) {
  457. state = s_FI;
  458. }
  459. continue;
  460. case s_FI:
  461. if (LIKELY(ch == 'i') || ch == 'I') {
  462. state = s_FIL;
  463. } else {
  464. state = s_F;
  465. goto _reset;
  466. }
  467. continue;
  468. case s_FIL:
  469. if (LIKELY(ch == 'l') || ch == 'L') {
  470. state = s_FILE;
  471. } else {
  472. state = s_F;
  473. goto _reset;
  474. }
  475. continue;
  476. case s_FILE:
  477. if (LIKELY(ch == 'e') || ch == 'E') {
  478. state = s_FILEN;
  479. } else {
  480. state = s_F;
  481. goto _reset;
  482. }
  483. continue;
  484. case s_FILEN:
  485. if (LIKELY(ch == 'n') || ch == 'N') {
  486. state = s_FILENA;
  487. } else {
  488. state = s_F;
  489. goto _reset;
  490. }
  491. continue;
  492. case s_FILENA:
  493. if (LIKELY(ch == 'a') || ch == 'A') {
  494. state = s_FILENAM;
  495. } else {
  496. state = s_F;
  497. goto _reset;
  498. }
  499. continue;
  500. case s_FILENAM:
  501. if (LIKELY(ch == 'm') || ch == 'M') {
  502. state = s_FILENAME;
  503. } else {
  504. state = s_F;
  505. goto _reset;
  506. }
  507. continue;
  508. case s_FILENAME:
  509. if (LIKELY(ch == 'e') || ch == 'E') {
  510. state = s_FILENAME_EQ;
  511. } else {
  512. state = s_F;
  513. goto _reset;
  514. }
  515. continue;
  516. case s_FILENAME_EQ:
  517. if (LIKELY(ch == '=')) {
  518. state = s_FILENAME_EQ_QUOT;
  519. } else {
  520. if (ch == ' ') { /* Skip whitespace */
  521. continue;
  522. }
  523. state = s_F;
  524. goto _reset;
  525. }
  526. continue;
  527. case s_FILENAME_EQ_QUOT:
  528. if (LIKELY(ch == '"')) {
  529. state = s_value_start;
  530. } else {
  531. if (ch == ' ') { /* Skip whitespace */
  532. continue;
  533. }
  534. state = s_F;
  535. goto _reset;
  536. }
  537. continue;
  538. case s_value_start:
  539. value_start = p;
  540. state = s_value;
  541. FALLTHROUGH;
  542. case s_value:
  543. if (LIKELY(ch != '"')) {
  544. continue;
  545. } else {
  546. *value_len = p - value_start;
  547. return value_start;
  548. }
  549. default:
  550. UNREACHABLE;
  551. }
  552. }
  553. return NULL;
  554. }