2024-06-20 22:36:56 +00:00
|
|
|
#include "../include/url.h"
|
2024-06-20 00:34:32 +00:00
|
|
|
#include "../include/error.h"
|
|
|
|
#include "../include/util.h"
|
2024-06-28 20:09:24 +00:00
|
|
|
|
2024-06-20 00:34:32 +00:00
|
|
|
#include <assert.h>
|
2024-06-22 04:03:17 +00:00
|
|
|
#include <stdio.h>
|
2024-06-20 00:34:32 +00:00
|
|
|
#include <stdlib.h>
|
2024-06-20 22:36:56 +00:00
|
|
|
#include <string.h>
|
2024-06-20 00:34:32 +00:00
|
|
|
|
2024-06-20 22:36:56 +00:00
|
|
|
char valid_path[] = "-._~:/?#[]@!$&'()*+,;%=";
|
2024-06-20 00:34:32 +00:00
|
|
|
|
|
|
|
typedef enum lm_state {
|
|
|
|
URL_PROTOCOL_0 = 0,
|
2024-06-20 22:36:56 +00:00
|
|
|
URL_SPLIT_1 = 1,
|
|
|
|
URL_HOST_2 = 2,
|
|
|
|
URL_PATH_3 = 3,
|
2024-06-20 00:34:32 +00:00
|
|
|
} lm_state_t;
|
|
|
|
|
2024-06-22 04:03:17 +00:00
|
|
|
uint16_t lm_url_default_port(char *protocol) {
|
2024-06-20 22:36:56 +00:00
|
|
|
if (eq(protocol, "ftp"))
|
2024-06-20 00:34:32 +00:00
|
|
|
return 21;
|
2024-06-20 22:36:56 +00:00
|
|
|
else if (eq(protocol, "ftps"))
|
2024-06-20 00:34:32 +00:00
|
|
|
return 990;
|
2024-06-20 22:36:56 +00:00
|
|
|
else if (eq(protocol, "http"))
|
2024-06-20 00:34:32 +00:00
|
|
|
return 80;
|
2024-06-20 22:36:56 +00:00
|
|
|
else if (eq(protocol, "https"))
|
2024-06-20 00:34:32 +00:00
|
|
|
return 443;
|
2024-06-20 22:36:56 +00:00
|
|
|
else if (eq(protocol, "mptp"))
|
2024-06-20 00:34:32 +00:00
|
|
|
return 5858;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2024-06-22 04:03:17 +00:00
|
|
|
bool lm_url_init(lm_url_t *url, char *str) {
|
2024-06-20 00:34:32 +00:00
|
|
|
// clear out every variable
|
|
|
|
bzero(url->protocol, sizeof(url->protocol));
|
|
|
|
url->host = NULL;
|
|
|
|
url->path = NULL;
|
|
|
|
url->port = 0;
|
|
|
|
|
2024-06-22 22:55:01 +00:00
|
|
|
if (NULL == str) {
|
|
|
|
lm_error_set(LM_ERR_ArgNULL);
|
|
|
|
return false;
|
|
|
|
}
|
2024-06-22 04:03:17 +00:00
|
|
|
|
2024-06-20 00:34:32 +00:00
|
|
|
// stores the string size
|
|
|
|
size_t strl = 0, index = 0, pos = 0;
|
2024-06-20 22:36:56 +00:00
|
|
|
|
2024-06-20 00:34:32 +00:00
|
|
|
// make sure the URL string size is not too large
|
|
|
|
// extra 4 for "://" and ":"
|
2024-06-20 22:36:56 +00:00
|
|
|
if ((strl = strlen(str)) > URL_PROTOCOL_MAX + URL_PATH_MAX + URL_HOST_MAX + 4) {
|
2024-06-20 00:34:32 +00:00
|
|
|
lm_error_set(LM_ERR_URLTooLarge);
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
lm_state_t state = URL_PROTOCOL_0;
|
2024-06-22 04:03:17 +00:00
|
|
|
char buffer[strl + 1]; // temporary buffer, strok_r save pointer
|
2024-06-22 22:55:01 +00:00
|
|
|
bool ret = false; // return value
|
2024-06-20 22:36:56 +00:00
|
|
|
|
2024-06-20 00:34:32 +00:00
|
|
|
// clear out the temporary buffer
|
2024-06-20 22:36:56 +00:00
|
|
|
bzero(buffer, strl + 1);
|
2024-06-20 00:34:32 +00:00
|
|
|
|
2024-06-20 22:36:56 +00:00
|
|
|
while ((buffer[index] = *(str + pos)) != 0) {
|
2024-06-20 00:34:32 +00:00
|
|
|
switch (state) {
|
|
|
|
case URL_PROTOCOL_0:
|
2024-06-20 22:36:56 +00:00
|
|
|
if (index > URL_PROTOCOL_MAX) {
|
2024-06-20 00:34:32 +00:00
|
|
|
lm_error_set(LM_ERR_URLBadProtocol);
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
2024-06-20 22:36:56 +00:00
|
|
|
if (!is_letter(buffer[index]) && !is_digit(buffer[index]) && buffer[index] != ':') {
|
2024-06-20 00:34:32 +00:00
|
|
|
lm_error_set(LM_ERR_URLBadChar);
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
2024-06-20 22:36:56 +00:00
|
|
|
if (buffer[index] != ':')
|
2024-06-20 00:34:32 +00:00
|
|
|
break;
|
|
|
|
|
2024-06-20 22:36:56 +00:00
|
|
|
if (0 == index) {
|
2024-06-20 00:34:32 +00:00
|
|
|
lm_error_set(LM_ERR_URLBadProtocol);
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
|
|
|
buffer[index] = 0;
|
2024-06-20 22:36:56 +00:00
|
|
|
memcpy(url->protocol, buffer, index + 1);
|
2024-06-20 00:34:32 +00:00
|
|
|
goto next;
|
|
|
|
|
|
|
|
case URL_SPLIT_1:
|
2024-06-20 22:36:56 +00:00
|
|
|
if (index > 1) {
|
2024-06-20 00:34:32 +00:00
|
|
|
lm_error_set(LM_ERR_URLBadProtocol);
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
2024-06-20 22:36:56 +00:00
|
|
|
if (buffer[index] != '/') {
|
2024-06-20 00:34:32 +00:00
|
|
|
lm_error_set(LM_ERR_URLBadChar);
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
2024-06-20 22:36:56 +00:00
|
|
|
if (index != 1)
|
2024-06-20 00:34:32 +00:00
|
|
|
break;
|
|
|
|
|
2024-06-20 22:36:56 +00:00
|
|
|
if (buffer[index - 1] != '/' || buffer[index] != '/') {
|
2024-06-20 00:34:32 +00:00
|
|
|
lm_error_set(LM_ERR_URLBadProtocol);
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
|
|
|
goto next;
|
|
|
|
|
|
|
|
case URL_HOST_2:
|
2024-06-20 22:36:56 +00:00
|
|
|
if (index > URL_HOST_MAX) {
|
2024-06-20 00:34:32 +00:00
|
|
|
lm_error_set(LM_ERR_URLHostLarge);
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
2024-06-20 22:36:56 +00:00
|
|
|
if (!is_letter(buffer[index]) && !is_digit(buffer[index]) && buffer[index] != '.' && buffer[index] != ':' &&
|
|
|
|
buffer[index] != '/') {
|
2024-06-20 00:34:32 +00:00
|
|
|
lm_error_set(LM_ERR_URLBadChar);
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
2024-06-20 22:36:56 +00:00
|
|
|
if (buffer[index] != '/')
|
2024-06-20 00:34:32 +00:00
|
|
|
break;
|
|
|
|
|
2024-06-20 22:36:56 +00:00
|
|
|
if (index == 0) {
|
2024-06-20 00:34:32 +00:00
|
|
|
lm_error_set(LM_ERR_URLBadHost);
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
|
|
|
buffer[index] = 0;
|
2024-06-20 22:36:56 +00:00
|
|
|
url->host = malloc((index + 1) * sizeof(char));
|
|
|
|
memcpy(url->host, buffer, index + 1);
|
2024-06-20 00:34:32 +00:00
|
|
|
goto next;
|
|
|
|
|
|
|
|
case URL_PATH_3:
|
2024-06-20 22:36:56 +00:00
|
|
|
if (index > URL_PATH_MAX) {
|
2024-06-20 00:34:32 +00:00
|
|
|
lm_error_set(LM_ERR_URLPathLarge);
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
2024-06-20 22:36:56 +00:00
|
|
|
if (!is_letter(buffer[index]) && !is_digit(buffer[index]) && !contains(valid_path, buffer[index])) {
|
2024-06-20 00:34:32 +00:00
|
|
|
lm_error_set(LM_ERR_URLBadChar);
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
assert(false);
|
|
|
|
}
|
|
|
|
|
|
|
|
index++;
|
|
|
|
pos++;
|
|
|
|
continue;
|
|
|
|
|
|
|
|
next:
|
2024-06-20 22:36:56 +00:00
|
|
|
bzero(buffer, strl + 1);
|
2024-06-20 00:34:32 +00:00
|
|
|
state++;
|
|
|
|
index = 0;
|
|
|
|
pos++;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (state) {
|
2024-06-20 22:36:56 +00:00
|
|
|
case URL_HOST_2:
|
|
|
|
if (index == 0) {
|
|
|
|
lm_error_set(LM_ERR_URLBadHost);
|
|
|
|
goto end;
|
|
|
|
}
|
2024-06-20 00:34:32 +00:00
|
|
|
|
2024-06-20 22:36:56 +00:00
|
|
|
if (index > URL_HOST_MAX) {
|
|
|
|
lm_error_set(LM_ERR_URLHostLarge);
|
|
|
|
goto end;
|
|
|
|
}
|
2024-06-20 00:34:32 +00:00
|
|
|
|
2024-06-20 22:36:56 +00:00
|
|
|
url->host = malloc((index + 1) * sizeof(char));
|
|
|
|
memcpy(url->host, buffer, index + 1);
|
2024-06-20 00:34:32 +00:00
|
|
|
|
2024-06-20 22:36:56 +00:00
|
|
|
url->path = malloc(2 * sizeof(char));
|
|
|
|
url->path[0] = '/';
|
|
|
|
url->path[1] = 0;
|
|
|
|
break;
|
2024-06-20 00:34:32 +00:00
|
|
|
|
2024-06-20 22:36:56 +00:00
|
|
|
case URL_PATH_3:
|
|
|
|
url->path = malloc((index + 2) * sizeof(char));
|
|
|
|
url->path[0] = '/';
|
|
|
|
memcpy(url->path + 1, buffer, index + 1);
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
lm_error_set(LM_ERR_URLEnd);
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
2024-06-22 22:55:01 +00:00
|
|
|
if (parse_host(url->host, url->host, &url->port)) {
|
|
|
|
if (url->port != 0) {
|
2024-06-22 04:03:17 +00:00
|
|
|
ret = true;
|
|
|
|
goto end;
|
|
|
|
}
|
2024-06-22 22:55:01 +00:00
|
|
|
|
2024-06-22 04:03:17 +00:00
|
|
|
url->port = lm_url_default_port(url->protocol);
|
2024-06-22 22:55:01 +00:00
|
|
|
if (url->port == 0) {
|
2024-06-20 22:36:56 +00:00
|
|
|
lm_error_set(LM_ERR_URLPortUnknown);
|
2024-06-20 00:34:32 +00:00
|
|
|
goto end;
|
2024-06-20 22:36:56 +00:00
|
|
|
}
|
2024-06-22 04:03:17 +00:00
|
|
|
|
2024-06-20 22:36:56 +00:00
|
|
|
ret = true;
|
|
|
|
goto end;
|
|
|
|
}
|
|
|
|
|
2024-06-22 04:03:17 +00:00
|
|
|
switch (lm_error()) {
|
|
|
|
case LM_ERR_BadHost:
|
|
|
|
lm_error_set(LM_ERR_URLBadHost);
|
|
|
|
break;
|
2024-06-22 22:55:01 +00:00
|
|
|
|
2024-06-22 04:03:17 +00:00
|
|
|
case LM_ERR_BadPort:
|
2024-06-20 22:36:56 +00:00
|
|
|
lm_error_set(LM_ERR_URLBadPort);
|
2024-06-22 04:03:17 +00:00
|
|
|
break;
|
2024-06-20 00:34:32 +00:00
|
|
|
|
2024-06-22 04:03:17 +00:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
2024-06-20 00:34:32 +00:00
|
|
|
|
|
|
|
end:
|
2024-06-20 22:36:56 +00:00
|
|
|
if (!ret && NULL != url->host)
|
2024-06-20 00:34:32 +00:00
|
|
|
free(url->host);
|
|
|
|
|
2024-06-20 22:36:56 +00:00
|
|
|
if (!ret && NULL != url->path)
|
2024-06-20 00:34:32 +00:00
|
|
|
free(url->path);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2024-06-20 22:36:56 +00:00
|
|
|
void lm_url_free(lm_url_t *url) {
|
|
|
|
if (NULL != url->host)
|
2024-06-20 00:34:32 +00:00
|
|
|
free(url->host);
|
|
|
|
|
2024-06-20 22:36:56 +00:00
|
|
|
if (NULL != url->path)
|
2024-06-20 00:34:32 +00:00
|
|
|
free(url->path);
|
2024-06-25 18:21:15 +00:00
|
|
|
|
|
|
|
bzero(url, sizeof(lm_url_t));
|
2024-06-20 00:34:32 +00:00
|
|
|
}
|