#include "../include/url.h" #include "../include/error.h" #include "../include/util.h" #include #include #include #include char valid_path[] = "-._~:/?#[]@!$&'()*+,;%="; typedef enum lm_state { URL_PROTOCOL_0 = 0, URL_SPLIT_1 = 1, URL_HOST_2 = 2, URL_PATH_3 = 3, } lm_state_t; uint16_t lm_url_default_port(char *protocol) { if (eq(protocol, "ftp")) return 21; else if (eq(protocol, "ftps")) return 990; else if (eq(protocol, "http")) return 80; else if (eq(protocol, "https")) return 443; else if (eq(protocol, "mptp")) return 5858; return 0; } bool lm_url_init(lm_url_t *url, char *str) { // clear out every variable bzero(url->protocol, sizeof(url->protocol)); url->host = NULL; url->path = NULL; url->port = 0; if (NULL == str) { lm_error_set(LM_ERR_ArgNULL); return false; } // stores the string size size_t strl = 0, index = 0, pos = 0; // make sure the URL string size is not too large // extra 4 for "://" and ":" if ((strl = strlen(str)) > URL_PROTOCOL_MAX + URL_PATH_MAX + URL_HOST_MAX + 4) { lm_error_set(LM_ERR_URLTooLarge); return false; } lm_state_t state = URL_PROTOCOL_0; char buffer[strl + 1]; // temporary buffer, strok_r save pointer bool ret = false; // return value // clear out the temporary buffer bzero(buffer, strl + 1); while ((buffer[index] = *(str + pos)) != 0) { switch (state) { case URL_PROTOCOL_0: if (index > URL_PROTOCOL_MAX) { lm_error_set(LM_ERR_URLBadProtocol); goto end; } if (!is_letter(buffer[index]) && !is_digit(buffer[index]) && buffer[index] != ':') { lm_error_set(LM_ERR_URLBadChar); goto end; } if (buffer[index] != ':') break; if (0 == index) { lm_error_set(LM_ERR_URLBadProtocol); goto end; } buffer[index] = 0; memcpy(url->protocol, buffer, index + 1); goto next; case URL_SPLIT_1: if (index > 1) { lm_error_set(LM_ERR_URLBadProtocol); goto end; } if (buffer[index] != '/') { lm_error_set(LM_ERR_URLBadChar); goto end; } if (index != 1) break; if (buffer[index - 1] != '/' || buffer[index] != '/') { lm_error_set(LM_ERR_URLBadProtocol); goto end; } goto next; case URL_HOST_2: if (index > URL_HOST_MAX) { lm_error_set(LM_ERR_URLHostLarge); goto end; } if (!is_letter(buffer[index]) && !is_digit(buffer[index]) && buffer[index] != '.' && buffer[index] != ':' && buffer[index] != '/') { lm_error_set(LM_ERR_URLBadChar); goto end; } if (buffer[index] != '/') break; if (index == 0) { lm_error_set(LM_ERR_URLBadHost); goto end; } buffer[index] = 0; url->host = malloc((index + 1) * sizeof(char)); memcpy(url->host, buffer, index + 1); goto next; case URL_PATH_3: if (index > URL_PATH_MAX) { lm_error_set(LM_ERR_URLPathLarge); goto end; } if (!is_letter(buffer[index]) && !is_digit(buffer[index]) && !contains(valid_path, buffer[index])) { lm_error_set(LM_ERR_URLBadChar); goto end; } break; default: assert(false); } index++; pos++; continue; next: bzero(buffer, strl + 1); state++; index = 0; pos++; } switch (state) { case URL_HOST_2: if (index == 0) { lm_error_set(LM_ERR_URLBadHost); goto end; } if (index > URL_HOST_MAX) { lm_error_set(LM_ERR_URLHostLarge); goto end; } url->host = malloc((index + 1) * sizeof(char)); memcpy(url->host, buffer, index + 1); url->path = malloc(2 * sizeof(char)); url->path[0] = '/'; url->path[1] = 0; break; case URL_PATH_3: url->path = malloc((index + 2) * sizeof(char)); url->path[0] = '/'; memcpy(url->path + 1, buffer, index + 1); break; default: lm_error_set(LM_ERR_URLEnd); goto end; } if (parse_host(url->host, url->host, &url->port)) { if (url->port != 0) { ret = true; goto end; } url->port = lm_url_default_port(url->protocol); if (url->port == 0) { lm_error_set(LM_ERR_URLPortUnknown); goto end; } ret = true; goto end; } switch (lm_error()) { case LM_ERR_BadHost: lm_error_set(LM_ERR_URLBadHost); break; case LM_ERR_BadPort: lm_error_set(LM_ERR_URLBadPort); break; default: break; } end: if (!ret && NULL != url->host) free(url->host); if (!ret && NULL != url->path) free(url->path); return ret; } void lm_url_free(lm_url_t *url) { if (NULL != url->host) free(url->host); if (NULL != url->path) free(url->path); bzero(url, sizeof(lm_url_t)); }