libmp/src/url.c

240 lines
4.8 KiB
C

#include "../include/url.h"
#include "../include/error.h"
#include "../include/util.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char valid_path[] = "-._~:/?#[]@!$&'()*+,;%=";
typedef enum lm_state {
URL_PROTOCOL_0 = 0,
URL_SPLIT_1 = 1,
URL_HOST_2 = 2,
URL_PATH_3 = 3,
} lm_state_t;
uint16_t lm_url_default_port(char *protocol) {
if (eq(protocol, "ftp"))
return 21;
else if (eq(protocol, "ftps"))
return 990;
else if (eq(protocol, "http"))
return 80;
else if (eq(protocol, "https"))
return 443;
else if (eq(protocol, "mptp"))
return 5858;
return 0;
}
bool lm_url_init(lm_url_t *url, char *str) {
// clear out every variable
bzero(url->protocol, sizeof(url->protocol));
url->host = NULL;
url->path = NULL;
url->port = 0;
if (NULL == str) {
lm_error_set(LM_ERR_ArgNULL);
return false;
}
// stores the string size
size_t strl = 0, index = 0, pos = 0;
// make sure the URL string size is not too large
// extra 4 for "://" and ":"
if ((strl = strlen(str)) > URL_PROTOCOL_MAX + URL_PATH_MAX + URL_HOST_MAX + 4) {
lm_error_set(LM_ERR_URLTooLarge);
return false;
}
lm_state_t state = URL_PROTOCOL_0;
char buffer[strl + 1]; // temporary buffer, strok_r save pointer
bool ret = false; // return value
// clear out the temporary buffer
bzero(buffer, strl + 1);
while ((buffer[index] = *(str + pos)) != 0) {
switch (state) {
case URL_PROTOCOL_0:
if (index > URL_PROTOCOL_MAX) {
lm_error_set(LM_ERR_URLBadProtocol);
goto end;
}
if (!is_letter(buffer[index]) && !is_digit(buffer[index]) && buffer[index] != ':') {
lm_error_set(LM_ERR_URLBadChar);
goto end;
}
if (buffer[index] != ':')
break;
if (0 == index) {
lm_error_set(LM_ERR_URLBadProtocol);
goto end;
}
buffer[index] = 0;
memcpy(url->protocol, buffer, index + 1);
goto next;
case URL_SPLIT_1:
if (index > 1) {
lm_error_set(LM_ERR_URLBadProtocol);
goto end;
}
if (buffer[index] != '/') {
lm_error_set(LM_ERR_URLBadChar);
goto end;
}
if (index != 1)
break;
if (buffer[index - 1] != '/' || buffer[index] != '/') {
lm_error_set(LM_ERR_URLBadProtocol);
goto end;
}
goto next;
case URL_HOST_2:
if (index > URL_HOST_MAX) {
lm_error_set(LM_ERR_URLHostLarge);
goto end;
}
if (!is_letter(buffer[index]) && !is_digit(buffer[index]) && buffer[index] != '.' && buffer[index] != ':' &&
buffer[index] != '/') {
lm_error_set(LM_ERR_URLBadChar);
goto end;
}
if (buffer[index] != '/')
break;
if (index == 0) {
lm_error_set(LM_ERR_URLBadHost);
goto end;
}
buffer[index] = 0;
url->host = malloc((index + 1) * sizeof(char));
memcpy(url->host, buffer, index + 1);
goto next;
case URL_PATH_3:
if (index > URL_PATH_MAX) {
lm_error_set(LM_ERR_URLPathLarge);
goto end;
}
if (!is_letter(buffer[index]) && !is_digit(buffer[index]) && !contains(valid_path, buffer[index])) {
lm_error_set(LM_ERR_URLBadChar);
goto end;
}
break;
default:
assert(false);
}
index++;
pos++;
continue;
next:
bzero(buffer, strl + 1);
state++;
index = 0;
pos++;
}
switch (state) {
case URL_HOST_2:
if (index == 0) {
lm_error_set(LM_ERR_URLBadHost);
goto end;
}
if (index > URL_HOST_MAX) {
lm_error_set(LM_ERR_URLHostLarge);
goto end;
}
url->host = malloc((index + 1) * sizeof(char));
memcpy(url->host, buffer, index + 1);
url->path = malloc(2 * sizeof(char));
url->path[0] = '/';
url->path[1] = 0;
break;
case URL_PATH_3:
url->path = malloc((index + 2) * sizeof(char));
url->path[0] = '/';
memcpy(url->path + 1, buffer, index + 1);
break;
default:
lm_error_set(LM_ERR_URLEnd);
goto end;
}
if (parse_host(url->host, url->host, &url->port)) {
if (url->port != 0) {
ret = true;
goto end;
}
url->port = lm_url_default_port(url->protocol);
if (url->port == 0) {
lm_error_set(LM_ERR_URLPortUnknown);
goto end;
}
ret = true;
goto end;
}
switch (lm_error()) {
case LM_ERR_BadHost:
lm_error_set(LM_ERR_URLBadHost);
break;
case LM_ERR_BadPort:
lm_error_set(LM_ERR_URLBadPort);
break;
default:
break;
}
end:
if (!ret && NULL != url->host)
free(url->host);
if (!ret && NULL != url->path)
free(url->path);
return ret;
}
void lm_url_free(lm_url_t *url) {
if (NULL != url->host)
free(url->host);
if (NULL != url->path)
free(url->path);
bzero(url, sizeof(lm_url_t));
}