Re-wrote HTML request/response parsing

It's now much more robust, supports requests received over multiple recvs. Supports POST variables. Supports 'content-length' header.
This commit is contained in:
Fred Nicolson 2017-04-10 17:24:57 +01:00
parent eaefd93096
commit 6b1ef27764
10 changed files with 336 additions and 172 deletions

View File

@ -79,6 +79,7 @@ namespace fr
};
Http();
virtual ~Http() = default;
/*!
* Parse a raw request or response from a string
@ -111,14 +112,6 @@ namespace fr
*/
void set_type(RequestType type);
/*!
* Access a header
*
* @param key The name of the header data to access/create
* @return The header data.
*/
std::string &operator[](const std::string &key);
/*!
* Sets the request body
*
@ -153,6 +146,18 @@ namespace fr
*/
std::string &post(const std::string &key);
/*!
* Returns a reference to a header.
* Can be used to either set/get the value.
* If the key does not exist, then it will be
* created and an empty value will be returned.
*
* @param key The name of the header
* @return A reference to the header
*/
std::string &header(const std::string &key);
/*!
* Checks to see if a given GET variable exists
*
@ -169,6 +174,15 @@ namespace fr
*/
bool post_exists(const std::string &key) const;
/*!
* Checks to see if a given header exists.
* Note: 'key' should be lower case.
*
* @param key The name of the header
* @return True if it does. False otherwise.
*/
bool header_exists(const std::string &key) const;
/*!
* Returns the requested URI
*
@ -248,9 +262,22 @@ namespace fr
return (int)strtol(&hex[0], 0, 16);
}
/*!
* Converts a parameter list to a vector pair.
* i.e: ?bob=10&fish=hey
* to: <bob, 10>, <fish, hey>
*
* @param str The string to parse
* @return The vector containing the results pairs
*/
std::vector<std::pair<std::string, std::string>> parse_argument_list(const std::string &str);
void parse_header_line(const std::string &str);
//Other request info
std::unordered_map<std::string, std::string> headers;
std::unordered_map<std::string, std::string> get_variables;
std::unordered_map<std::string, std::string> header_data;
std::unordered_map<std::string, std::string> post_data;
std::unordered_map<std::string, std::string> get_data;
std::string body;
RequestType request_type;
std::string uri;

View File

@ -16,8 +16,9 @@ namespace fr
{
public:
//Constructors
HttpRequest() = default;
HttpRequest();
HttpRequest(HttpRequest &&other) = default;
virtual ~HttpRequest() = default;
/*!
* Parse a HTTP response.
@ -33,6 +34,39 @@ namespace fr
* @return The constructed HTTP request.
*/
std::string construct(const std::string &host) const override;
private:
/*!
* Parses the request header.
*
* @param header_end_pos The position in 'body' of the end of the header
*/
void parse_header(ssize_t header_end_pos);
/*!
* Parses the POST data from the body
*/
void parse_post_body();
/*!
* Parses the header type (GET/POST) from the given string.
*
* @param str The first header line
*/
void parse_header_type(const std::string &str);
/*!
* Parses the header URI
*
* @param str The first header line
*/
void parse_header_uri(const std::string &str);
//State
bool header_ended;
ssize_t last_parsed_character;
size_t content_length;
};
}

View File

@ -18,6 +18,7 @@ namespace fr
//Constructors
HttpResponse() = default;
HttpResponse(HttpResponse &&other) = default;
virtual ~HttpResponse() = default;
/*!
* Parse a HTTP response.
@ -33,6 +34,18 @@ namespace fr
* @return The constructed HTTP response.
*/
std::string construct(const std::string &host) const override;
private:
/*!
* Parses the request header.
*
* @param header_end_pos The position in 'body' of the end of the header
*/
void parse_header(ssize_t header_end_pos);
//State
bool header_ended;
size_t content_length;
};
}

View File

@ -6,6 +6,7 @@
#define FRNETLIB_HTTPSOCKET_H
#include "Http.h"
#include "Socket.h"
namespace fr
{
@ -27,6 +28,8 @@ namespace fr
}
virtual ~HttpSocket() = default;
Socket::Status receive(Http &request)
{
size_t received = 0;

View File

@ -25,6 +25,8 @@ namespace fr
}
virtual ~Packet() = default;
//Nasty constructor to allow things like Packet{1, 2, 3, "bob"}.
template <typename T, typename ...Args>
Packet(T const &part, Args &&...args)

View File

@ -12,6 +12,8 @@ namespace fr
class Packetable
{
public:
virtual ~Packetable() = default;
/*!
* Called to pack class data into the 'destination'
* packet.

View File

@ -24,7 +24,7 @@ namespace fr
public:
SSLSocket(std::shared_ptr<SSLContext> ssl_context) noexcept;
~SSLSocket() noexcept;
virtual ~SSLSocket() noexcept;
SSLSocket(SSLSocket &&) noexcept = default;

View File

@ -4,6 +4,7 @@
#include <iostream>
#include <sstream>
#include <algorithm>
#include "frnetlib/Http.h"
namespace fr
@ -20,11 +21,6 @@ namespace fr
return request_type;
}
std::string &Http::operator[](const std::string &key)
{
return headers[key];
}
std::vector<std::string> Http::split_string(const std::string &str)
{
char last_character = '\0';
@ -51,9 +47,10 @@ namespace fr
void Http::clear()
{
headers.clear();
post_data.clear();
get_data.clear();
post_data.clear();
body.clear();
get_variables.clear();
uri = "/";
status = Ok;
request_type = Unknown;
@ -61,22 +58,22 @@ namespace fr
std::string &Http::get(const std::string &key)
{
return get_variables[key];
return get_data[key];
}
std::string &Http::post(const std::string &key)
{
return headers[key];
return post_data[key];
}
bool Http::get_exists(const std::string &key) const
{
return get_variables.find(key) != get_variables.end();
return get_data.find(key) != get_data.end();
}
bool Http::post_exists(const std::string &key) const
{
return headers.find(key) != headers.end();
return post_data.find(key) != post_data.end();
}
const std::string &Http::get_uri() const
@ -153,4 +150,66 @@ namespace fr
}
return result;
}
std::string &Http::header(const std::string &key)
{
return header_data[key];
}
bool Http::header_exists(const std::string &key) const
{
return header_data.find(key) != header_data.end();
}
std::vector<std::pair<std::string, std::string>> Http::parse_argument_list(const std::string &str)
{
std::vector<std::pair<std::string, std::string>> list;
if(str.empty())
return list;
size_t read_index = 0;
if(str.front() == '?')
read_index++;
while(true)
{
auto equal_pos = str.find("=", read_index);
if(equal_pos != std::string::npos)
{
auto and_pos = str.find("&", read_index);
if(and_pos == std::string::npos)
{
list.emplace_back(str.substr(read_index, equal_pos - read_index), str.substr(equal_pos + 1, str.size() - equal_pos - 1));
break;
}
else
{
list.emplace_back(str.substr(read_index, equal_pos - read_index), str.substr(equal_pos + 1, and_pos - equal_pos - 1));
read_index = and_pos + 1;
}
}
else
{
break;
}
}
return list;
}
void Http::parse_header_line(const std::string &str)
{
auto colon_pos = str.find(":");
if(colon_pos != std::string::npos)
{
auto data_begin = str.find_first_not_of(" ", colon_pos + 1);
if(data_begin != std::string::npos)
{
std::string header_name = str.substr(0, colon_pos);
std::cout << "HEADER: " << header_name << std::endl;
std::transform(header_name.begin(), header_name.end(), header_name.begin(), ::tolower);
header_data.emplace(std::move(header_name), str.substr(data_begin, str.size() - data_begin));
}
}
}
}

View File

@ -2,122 +2,79 @@
// Created by fred on 10/12/16.
//
#include <algorithm>
#include "frnetlib/HttpRequest.h"
namespace fr
{
bool HttpRequest::parse(const std::string &request_data)
HttpRequest::HttpRequest()
: header_ended(false),
last_parsed_character(0),
content_length(0)
{
//Warning: Horrible string parsing code
//Clear old headers/data
clear();
}
//Make sure there's actual request data to read
if(request_data.empty())
bool HttpRequest::parse(const std::string &request)
{
body += request;
//Ensure that the whole header has been parsed first
if(!header_ended)
{
//Check to see if this request data contains the end of the header
auto header_end = body.find("\r\n\r\n");
header_ended = header_end != std::string::npos;
//If the header end has not been found, return true, indicating that we need more data.
if(!header_ended)
{
return true;
}
else
{
parse_header(header_end);
body.clear();
}
content_length += 2; //The empty line between header and data
body += request.substr(header_end, request.size() - header_end);
}
//If we've got the whole request, parse the POST if it exists
if(body.size() >= content_length)
{
if(request_type == RequestType::Post)
parse_post_body();
return false;
//Split by new lines
std::vector<std::string> lines = split_string(request_data);
if(lines.empty())
return false;
//Extract request get_type
if(lines[0].find("GET") != std::string::npos)
request_type = RequestType::Get;
else if(lines[0].find("POST") != std::string::npos)
request_type = RequestType::Post;
else
request_type = RequestType::Unknown;
//Remove HTTP version
auto http_version = lines[0].find("HTTP");
if(http_version != std::string::npos && http_version > 0)
lines[0].erase(http_version - 1, lines[0].size() - http_version + 1);
//Extract URI & GET variables
auto uri_start = lines[0].find(" ");
auto uri_end = lines[0].find("?");
if(uri_start != std::string::npos)
{
if(uri_end == std::string::npos) //If no GET arguments
{
uri = url_decode(lines[0].substr(uri_start + 1, lines[0].size() - 1));
}
else //There's get arguments
{
uri = url_decode(lines[0].substr(uri_start + 1, uri_end - uri_start - 1));
std::string get_lines = lines[0].substr(uri_end + 1, lines[0].size());
std::string name_buffer, value_buffer;
bool state = false;
for(size_t a = 0; a < get_lines.size(); a++)
{
if(get_lines[a] == '&')
{
get_variables.emplace(name_buffer, url_decode(value_buffer));
name_buffer.clear();
value_buffer.clear();
state = false;
continue;
}
else if(get_lines[a] == '=')
{
state = true;
}
else if(state)
{
value_buffer += get_lines[a];
}
else
{
name_buffer += get_lines[a];
}
}
get_variables.emplace(name_buffer, url_decode(value_buffer));
}
}
//Extract headers
size_t a;
for(a = 1; a < lines.size(); a++)
return true;
}
void HttpRequest::parse_header(ssize_t header_end_pos)
{
//Split the header into lines
size_t line = 0;
std::vector<std::string> header_lines = split_string(body.substr(0, header_end_pos));
if(header_lines.empty())
return;
//Parse request type & uri
parse_header_type(header_lines[line]);
parse_header_uri(header_lines[line]);
line++;
//Read in headers
for(; line < header_lines.size(); line++)
{
//New line indicates headers have ended
if(lines[a].empty() || lines[a].size() <= 2)
break;
//Find the colon separating the header name and header data
auto colon_iter = lines[a].find(":");
if(colon_iter == std::string::npos)
continue;
//Store the header
std::string header_name = lines[a].substr(0, colon_iter);
std::string header_content = url_decode(lines[a].substr(colon_iter + 2, lines[a].size () - colon_iter - 3));
headers.emplace(header_name, header_content);
parse_header_line(header_lines[line]);
}
//Extract POST data if it's a post request
if(request_type == Post)
{
for(; a < lines.size(); a++)
{
size_t equals_pos = lines[a].find("=");
if(equals_pos != std::string::npos)
{
headers[lines[a].substr(0, equals_pos)] = url_decode(lines[a].substr(equals_pos + 1, (lines[a].size() - equals_pos) + 1));
}
}
}
else
{
//Store request body
for(; a < lines.size(); a++)
{
body += lines[a] + "\n";
}
}
return false;
//Store content length value if it exists
auto length_header_iter = header_data.find("content-length");
if(length_header_iter != header_data.end())
content_length = std::stoull(length_header_iter->second);
}
std::string HttpRequest::construct(const std::string &host) const
@ -126,26 +83,91 @@ namespace fr
std::string request = request_type_to_string(request_type == Http::Unknown ? Http::Get : request_type) + " " + uri + " HTTP/1.1\r\n";
//Add the headers to the request
for(const auto &header : headers)
for(const auto &header : header_data)
{
std::string data = header.first + ": " + header.second + "\r\n";
request += data;
}
//Generate post line
std::string post_string;
for(auto &post : post_data)
post_string += post.first + "=" + post.second + "&";
if(!post_string.empty())
{
post_string.erase(request.size() - 1, 1);
post_string += "\r\n";
}
//Add in required headers if they're missing
if(headers.find("Connection") == headers.end())
if(header_data.find("Connection") == header_data.end())
request += "Connection: keep-alive\n";
if(headers.find("Host") == headers.end())
if(header_data.find("Host") == header_data.end())
request += "Host: " + host + "\r\n";
if(!body.empty())
request += "Content-Length: " + std::to_string(body.size()) + "\r\n";
request += "Content-Length: " + std::to_string(body.size() + post_string.size()) + "\r\n";
//Add in space
request += "\r\n";
//Add in post
request += post_string;
//Add in the body
request += body + "\r\n";
return request;
}
void HttpRequest::parse_post_body()
{
auto post_begin = body.find_first_not_of("\r\n");
if(post_begin != std::string::npos)
{
auto post = parse_argument_list(body.substr(post_begin, body.size() - post_begin));
for(auto &c : post)
post_data.emplace(std::move(c.first), std::move(c.second));
}
}
void HttpRequest::parse_header_type(const std::string &str)
{
//Find the request type
auto type_end = str.find(" ");
if(type_end != std::string::npos)
{
//Check what it is
if(str.compare(0, type_end, "GET") == 0)
request_type = fr::Http::Get;
else
request_type = fr::Http::Post;
return;
}
throw std::invalid_argument("No known request type found in: " + str);
}
void HttpRequest::parse_header_uri(const std::string &str)
{
auto uri_begin = str.find("/");
auto uri_end = str.find("HTTP") - 1;
if(uri_begin != std::string::npos)
{
//Extract URI
std::string uri = str.substr(uri_begin, uri_end - uri_begin);
//Parse GET variables
auto get_begin = str.find("?");
if(get_begin != std::string::npos)
{
auto get_vars = parse_argument_list(str.substr(get_begin, uri_end - get_begin));
for(auto &c : get_vars)
get_data.emplace(std::move(c.first), std::move(c.second));
uri.erase(get_begin, uri.size() - get_begin);
}
set_uri(uri);
return;
}
throw std::invalid_argument("No URI found in: " + str);
}
}

View File

@ -9,51 +9,32 @@ namespace fr
{
bool HttpResponse::parse(const std::string &response_data)
{
//Clear old headers/data
clear();
body += response_data;
//Make sure there's actual request data to read
if(response_data.empty())
return false;
//Split by new lines
std::vector<std::string> lines = split_string(response_data);
if(lines.empty())
return false;
//Extract request get_type
if(lines[0].find("GET") != std::string::npos)
request_type = RequestType::Get;
else if(lines[0].find("POST") != std::string::npos)
request_type = RequestType::Post;
else
request_type = RequestType::Unknown;
//Extract headers
size_t a;
for(a = 1; a < lines.size(); a++)
//Ensure that the whole header has been parsed first
if(!header_ended)
{
//New line indicates headers have ended
if(lines[a].empty() || lines[a].size() <= 2)
break;
//Check to see if this request data contains the end of the header
auto header_end = body.find("\r\n\r\n");
header_ended = header_end != std::string::npos;
//Find the colon separating the header name and header data
auto colon_iter = lines[a].find(":");
if(colon_iter == std::string::npos)
continue;
//If the header end has not been found, return true, indicating that we need more data.
if(!header_ended)
{
return true;
}
else
{
parse_header(header_end);
body.clear();
}
content_length += 2; //The empty line between header and data
//Store the header
std::string header_name = lines[a].substr(0, colon_iter);
std::string header_content = url_decode(lines[a].substr(colon_iter + 2, lines[a].size () - colon_iter - 3));
headers.emplace(header_name, header_content);
body += response_data.substr(header_end, response_data.size() - header_end);
}
//Store request body
for(; a < lines.size(); a++)
{
body += lines[a] + "\n";
}
return false;
return body.size() < content_length;
}
std::string HttpResponse::construct(const std::string &host) const
@ -62,16 +43,16 @@ namespace fr
std::string response = "HTTP/1.1 " + std::to_string(status) + " \r\n";
//Add the headers to the response
for(const auto &header : headers)
for(const auto &header : header_data)
{
std::string data = header.first + ": " + url_encode(header.second) + "\r\n";
response += data;
}
//Add in required headers if they're missing
if(headers.find("Connection") == headers.end())
if(header_data.find("Connection") == header_data.end())
response += "Connection: close_socket\r\n";
if(headers.find("Content-type") == headers.end())
if(header_data.find("Content-type") == header_data.end())
response += "Content-type: text/html\r\n";
//Add in space
@ -81,4 +62,25 @@ namespace fr
response += body + "\r\n";
return response;
}
void HttpResponse::parse_header(ssize_t header_end_pos)
{
//Split the header into lines
size_t line = 0;
std::vector<std::string> header_lines = split_string(body.substr(0, header_end_pos));
if(header_lines.empty())
return;
line++;
//Read in headers
for(; line < header_lines.size(); line++)
{
parse_header_line(header_lines[line]);
}
//Store content length value if it exists
auto length_header_iter = header_data.find("content-length");
if(length_header_iter != header_data.end())
content_length = std::stoull(length_header_iter->second);
}
}