123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641 |
- /**
- * Copyright (c) 2019 Trevor Herselman. All rights reserved.
- *
- * MIT License
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
- #include "multipart_parser.h"
- #include <assert.h>
- #include <stddef.h>
- #include <ctype.h>
- #include <string.h>
- #include <limits.h>
- #include <stdio.h>
- #include "utils/Log.h"
- #ifndef MIN
- # define MIN(a,b) ((a) < (b) ? (a) : (b))
- #endif
- #define SET_ERRNO(e) \
- do { \
- parser->multipart_errno = (e); \
- } while(0)
- #ifdef __GNUC__
- # define LIKELY(X) __builtin_expect(!!(X), 1)
- # define UNLIKELY(X) __builtin_expect(!!(X), 0)
- #else
- # define LIKELY(X) (X)
- # define UNLIKELY(X) (X)
- #endif
- #ifndef UNREACHABLE
- # ifdef _MSC_VER
- # define UNREACHABLE __assume(0)
- # else /* GCC, Clang & Intel C++ */
- # define UNREACHABLE __builtin_unreachable()
- # endif
- #endif
- #ifndef FALLTHROUGH
- # if defined(__GNUC__) || defined(__clang__)
- # define FALLTHROUGH __attribute__ ((fallthrough))
- # else
- # define FALLTHROUGH ((void)0)
- # endif
- #endif
- enum state
- { s_start
- , s_start_dash
- , s_boundary
- , s_boundary_cr
- , s_boundary_almost_done
- , s_header_field_start
- , s_header_field
- , s_header_value_discard_ws
- , s_header_value
- , s_header_value_lws
- , s_header_almost_done
- , s_headers_almost_done
- , s_headers_done
- , s_body_part_start
- , s_body_part
- , s_body_part_boundary_cr
- , s_body_part_boundary_cr_lf
- , s_body_part_boundary_cr_lf_dash
- , s_body_part_boundary_cr_lf_dash_dash
- , s_end
- };
- /* Macros for character classes */
- #define CR '\r'
- #define LF '\n'
- #define LOWER(c) (unsigned char)(c | 0x20)
- #define IS_ALPHA(c) (LOWER(c) >= 'a' && LOWER(c) <= 'z')
- #define IS_NUM(c) ((c) >= '0' && (c) <= '9')
- #define IS_ALPHANUM(c) (IS_ALPHA(c) || IS_NUM(c))
- #define IS_HEX(c) (IS_NUM(c) || (LOWER(c) >= 'a' && LOWER(c) <= 'f'))
- void multipart_parser_init(multipart_parser *parser)
- {
- parser->state = s_start;
- }
- void multipart_parser_settings_init(multipart_parser_settings *settings)
- {
- memset(settings, 0, sizeof(*settings));
- }
- int multipart_parser_execute(multipart_parser *parser,
- const multipart_parser_settings *settings,
- const char *data,
- size_t len)
- {
- const char* buf_end = &data[len];
- const char* p = data;
- const char* body_start = NULL;
- for (; p < buf_end; ++p)
- {
- const char ch = *p;
- switch (parser->state)
- {
- case s_start:
- if (LIKELY(ch == '-')) {
- parser->state = s_start_dash;
- }
- continue;
- case s_start_dash:
- if (LIKELY(ch == '-')) {
- parser->nread = 0;
- parser->state = s_boundary;
- continue;
- }
- return -1;
- case s_boundary:
- if (LIKELY(parser->nread < parser->boundary_len)) {
- if (LIKELY(ch == parser->boundary[parser->nread++])) {
- continue;
- }
- } else {
- if (LIKELY(ch == '\r')) {
- parser->state = s_boundary_cr;
- continue;
- } else if (ch == '-') {
- parser->state = s_boundary_almost_done;
- continue;
- }
- }
- return -1;
- case s_boundary_cr:
- if (LIKELY(ch == '\n')) {
- if (LIKELY(settings->on_boundary_begin(parser) == 0)) {
- parser->state = s_header_field_start;
- continue;
- }
- }
- return -1;
- case s_boundary_almost_done:
- if (LIKELY(ch == '-')) {
- parser->state = s_end;
- return settings->on_body_parts_complete(parser);
- }
- return -1;
- case s_headers_almost_done:
- if (ch == '\r') {
- parser->state = s_headers_done;
- continue;
- }
- FALLTHROUGH;
- case s_header_field_start:
- if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) {
- parser->nread = 1;
- memset(parser->header_field, 0, sizeof(parser->header_field));
- parser->header_field[0] = ch;
- parser->state = s_header_field;
- continue;
- }
- return -1;
- case s_header_field:
- if ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || ch == '-') {
- if (parser->nread < sizeof(parser->header_field)) {
- parser->header_field[parser->nread] = ch;
- }
- parser->nread++;
- continue;
- } else if (ch == ':') {
- if (settings->on_header_field(
- parser,
- /*p - parser->nread*/ parser->header_field,
- /*parser->nread*/ strlen(parser->header_field)) == 0) {
- parser->state = s_header_value_discard_ws;
- continue;
- }
- }
- return -1;
- case s_header_value_discard_ws:
- if (ch > ' ') {
- parser->nread = 1;
- memset(parser->header_value, 0, sizeof(parser->header_value));
- parser->header_value[0] = ch;
- parser->state = s_header_value;
- continue;
- } if (ch == ' ') {
- continue;
- }
- return -1;
- case s_header_value:
- if (ch != '\r') {
- if (parser->nread < sizeof(parser->header_value)) {
- parser->header_value[parser->nread] = ch;
- }
- parser->nread++;
- continue;
- }
- if (settings->on_header_value(
- parser,
- /*p - parser->nread*/ parser->header_value,
- /*parser->nread*/ strlen(parser->header_value)) == 0) {
- parser->state = s_header_almost_done;
- continue;
- }
- return -1;
- case s_header_almost_done:
- if (ch == '\n') {
- parser->state = s_headers_almost_done;
- continue;
- }
- case s_header_value_lws:
- return -1;
- case s_headers_done:
- if (ch == '\n') {
- if (LIKELY( settings->on_headers_complete(parser) == 0)) {
- parser->state = s_body_part_start;
- continue;
- }
- }
- return -1;
- case s_body_part_start:
- body_start = p;
- parser->state = s_body_part;
- FALLTHROUGH;
- case s_body_part:
- if (LIKELY(ch != '\r')) {
- continue;
- }
- if (body_start == NULL) {
- settings->on_body(parser, data, p - data);
- } else {
- settings->on_body(parser, body_start, p - body_start);
- }
- parser->state = s_body_part_boundary_cr;
- continue;
- case s_body_part_boundary_cr:
- if (ch == '\n') {
- parser->state = s_body_part_boundary_cr_lf;
- continue;
- }
- settings->on_body(parser, "\r", 1);
- if (ch == '\r') {
- continue;
- }
- body_start = p;
- parser->state = s_body_part;
- continue;
- case s_body_part_boundary_cr_lf:
- if (ch == '-') {
- parser->state = s_body_part_boundary_cr_lf_dash;
- continue;
- }
- settings->on_body(parser, "\r\n", 2);
- if (ch == '\r') {
- parser->state = s_body_part_boundary_cr;
- continue;
- }
- body_start = p;
- parser->state = s_body_part;
- continue;
- case s_body_part_boundary_cr_lf_dash:
- if (ch == '-') {
- parser->nread = 0;
- parser->state = s_body_part_boundary_cr_lf_dash_dash;
- continue;
- }
- settings->on_body(parser, "\r\n-", 3);
- if (ch == '\r') {
- parser->state = s_body_part_boundary_cr;
- continue;
- }
- body_start = p;
- parser->state = s_body_part;
- continue;
- case s_body_part_boundary_cr_lf_dash_dash:
- if (LIKELY(parser->nread < parser->boundary_len)) {
- if (LIKELY(ch == parser->boundary[parser->nread++])) {
- continue;
- }
- settings->on_body(parser, "\r\n--", 4);
- if (parser->nread > 0) {
- settings->on_body(parser, parser->boundary, parser->nread - 1);
- }
- if (ch == '\r') {
- parser->state = s_body_part_boundary_cr;
- continue;
- }
- body_start = p;
- parser->state = s_body_part;
- continue;
- } else {
- if (LIKELY(ch == '\r')) {
- parser->state = s_boundary_cr;
- continue;
- }
- if (ch == '-') {
- parser->state = s_boundary_almost_done;
- continue;
- }
- }
- return -1;
- case s_end:
- return 0;
- default:
- UNREACHABLE;
- }
- UNREACHABLE;
- }
- switch (parser->state) {
- case s_body_part_start:
- if ((body_start != NULL) ) {
- settings->on_body(parser, body_start, buf_end - body_start);
- }
- break;
- case s_body_part:
- if (body_start == NULL ) {
- settings->on_body(parser, data, len);
- } else {
- settings->on_body(parser, body_start, buf_end - body_start);
- }
- break;
- default:
- break;
- }
- return 0;
- }
- const char* multipart_get_name(const char* str, size_t len,
- size_t* value_len)
- {
- const char* str_end = &str[len];
- const char* p = str;
- const char* value_start;
- typedef enum
- { s_seek
- , s_N
- , s_NA
- , s_NAM
- , s_NAME
- , s_NAME_EQ
- , s_NAME_EQ_QUOT
- , s_value_start
- , s_value
- , s_value_end
- } e_state;
- for (e_state state = s_seek; p < str_end; ++p)
- {
- const char ch = *p;
- switch (state)
- {
- case s_seek:
- _reset:
- if (UNLIKELY(LOWER(ch) == 'n')) {
- state = s_N;
- }
- continue;
- case s_N:
- if (LIKELY(ch == 'a' || ch == 'A')) {
- state = s_NA;
- } else {
- state = s_seek;
- goto _reset;
- }
- continue;
- case s_NA:
- if (LIKELY(ch == 'm' || ch == 'M')) {
- state = s_NAM;
- } else {
- state = s_seek;
- goto _reset;
- }
- continue;
- case s_NAM:
- if (LIKELY(ch == 'e' || ch == 'E')) {
- state = s_NAME;
- } else {
- state = s_seek;
- goto _reset;
- }
- continue;
- case s_NAME:
- if (LIKELY(ch == '=')) {
- state = s_NAME_EQ;
- } else {
- if (ch == ' ') { /* Skip whitespace */
- continue;
- }
- state = s_seek;
- goto _reset;
- }
- continue;
- case s_NAME_EQ:
- if (LIKELY(ch == '"')) {
- state = s_value_start;
- } else {
- if (ch == ' ') { /* Skip whitespace */
- continue;
- }
- state = s_seek;
- goto _reset;
- }
- continue;
- case s_value_start:
- value_start = p;
- if (LIKELY(ch != '"')) {
- state = s_value;
- } else {
- *value_len = 0; /* detected an empty value */
- return value_start;
- }
- continue;
- case s_value:
- if (LIKELY(ch != '"')) {
- continue;
- } else {
- *value_len = p - value_start;
- return value_start;
- }
- default:
- UNREACHABLE;
- }
- }
- return NULL;
- }
- const char* multipart_get_filename(const char* str, size_t len,
- size_t* value_len)
- {
- const char* str_end = &str[len];
- const char* p = str;
- const char* value_start;
- typedef enum
- { s_F
- , s_FI
- , s_FIL
- , s_FILE
- , s_FILEN
- , s_FILENA
- , s_FILENAM
- , s_FILENAME
- , s_FILENAME_EQ
- , s_FILENAME_EQ_QUOT
- , s_value_start
- , s_value
- } e_state;
- for (e_state state = s_F; p < str_end; ++p)
- {
- const char ch = *p;
- switch (state)
- {
- case s_F:
- _reset:
- if (UNLIKELY(LOWER(ch) == 'f')) {
- state = s_FI;
- }
- continue;
- case s_FI:
- if (LIKELY(ch == 'i') || ch == 'I') {
- state = s_FIL;
- } else {
- state = s_F;
- goto _reset;
- }
- continue;
- case s_FIL:
- if (LIKELY(ch == 'l') || ch == 'L') {
- state = s_FILE;
- } else {
- state = s_F;
- goto _reset;
- }
- continue;
- case s_FILE:
- if (LIKELY(ch == 'e') || ch == 'E') {
- state = s_FILEN;
- } else {
- state = s_F;
- goto _reset;
- }
- continue;
- case s_FILEN:
- if (LIKELY(ch == 'n') || ch == 'N') {
- state = s_FILENA;
- } else {
- state = s_F;
- goto _reset;
- }
- continue;
- case s_FILENA:
- if (LIKELY(ch == 'a') || ch == 'A') {
- state = s_FILENAM;
- } else {
- state = s_F;
- goto _reset;
- }
- continue;
- case s_FILENAM:
- if (LIKELY(ch == 'm') || ch == 'M') {
- state = s_FILENAME;
- } else {
- state = s_F;
- goto _reset;
- }
- continue;
- case s_FILENAME:
- if (LIKELY(ch == 'e') || ch == 'E') {
- state = s_FILENAME_EQ;
- } else {
- state = s_F;
- goto _reset;
- }
- continue;
- case s_FILENAME_EQ:
- if (LIKELY(ch == '=')) {
- state = s_FILENAME_EQ_QUOT;
- } else {
- if (ch == ' ') { /* Skip whitespace */
- continue;
- }
- state = s_F;
- goto _reset;
- }
- continue;
- case s_FILENAME_EQ_QUOT:
- if (LIKELY(ch == '"')) {
- state = s_value_start;
- } else {
- if (ch == ' ') { /* Skip whitespace */
- continue;
- }
- state = s_F;
- goto _reset;
- }
- continue;
- case s_value_start:
- value_start = p;
- state = s_value;
- FALLTHROUGH;
- case s_value:
- if (LIKELY(ch != '"')) {
- continue;
- } else {
- *value_len = p - value_start;
- return value_start;
- }
- default:
- UNREACHABLE;
- }
- }
- return NULL;
- }
|