matrix_thing/src/reddit.cpp
2019-03-25 17:11:40 -07:00

534 lines
18 KiB
C++

/**
This file is a part of rexy's matrix bot
Copyright (C) 2019 rexy712
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "reddit.hpp"
#include "raii/rjp_string.hpp"
#include "raii/string.hpp"
#include "raii/curler.hpp"
#include "raii/rjp_ptr.hpp"
#include "raii/static_string.hpp"
#include <algorithm> //search
#include <cstring>
//no idea if this covers everything reddit might dish out at me
//there is no consistency in their content tagging. there are gifs marked as images, others as videos
//they separate audio and video streams for their hosted videos, there is no true way to tell
//what kind of content a post contains since the post_hint field might be completely nonexistent.
//it's just a game of hacking together solutions each time reddit throws me a new type of unexpected complication.
namespace reddit{
namespace time{
period hour = "hour";
period day = "day";
period week = "week";
period month = "month";
period year = "year";
period all = "all";
}
auth_data parse_auth_data(RJP_value* root){
static const char* account_names[2] = {"bot", "account"};
static const char* account_fields[2] = {"username", "password"};
auth_data ret;
RJP_search_res accounts[2];
RJP_search_res details[2];
rjp_search_members(root, 2, account_names, accounts, 0);
rjp_search_members(accounts[0].value, 2, account_fields, details, 0);
ret.bot_name = details[0].value;
ret.bot_pass = details[1].value;
rjp_search_members(accounts[1].value, 2, account_fields, details, 0);
ret.acc_name = details[0].value;
ret.acc_pass = details[1].value;
return ret;
}
static raii::rjp_string media_search(RJP_value* media){
if(!media)
return {};
RJP_search_res res = rjp_search_member(media, "reddit_video", 0);
if(!res.value)
return raii::rjp_string{};
res = rjp_search_member(res.value, "fallback_url", 0);
if(!res.value)
return raii::rjp_string{};
return raii::rjp_string(res.value);
}
static raii::rjp_string preview_search(RJP_value* root){
RJP_search_res media = rjp_search_member(root, "preview", 0);
if(!media.value)
return raii::rjp_string{};
media = rjp_search_member(media.value, "reddit_video_preview", 0);
if(!media.value)
return raii::rjp_string{};
media = rjp_search_member(media.value, "fallback_url", 0);
if(!media.value)
return raii::rjp_string{};
return raii::rjp_string(media.value);
}
static bool check_reddit_media_domain(RJP_value* root){
RJP_search_res res = rjp_search_member(root, "is_reddit_media_domain", 0);
return (res.value && rjp_value_boolean(res.value));
}
static raii::rjp_string find_video_url(RJP_value* root){
RJP_search_res media = rjp_search_member(root, "media", 0);
if(raii::rjp_string res = media_search(media.value)){
return res;
}
raii::rjp_string res = preview_search(root);
return res;
}
static bool is_gifv(const raii::string_base& str){
const char* s = str.get();
size_t len = str.length();
if(len > 5 &&
*(s+len-1) == 'v' &&
*(s+len-2) == 'f' &&
*(s+len-3) == 'i' &&
*(s+len-4) == 'g' &&
*(s+len-5) == '.')
{
return true;
}
return false;
}
static bool has_extension(const raii::string_base& str){
size_t i = 0;
for(const char* p = str.get() + str.length() - 1;*p && i < 6;--p,++i){
if(*p == '/')
return false;
else if(*p == '.')
return true;
}
return false;
}
static bool is_gfycat_link(const raii::string_base& str){
static const char gfycat[] = "gfycat.com";
return *std::search(str.get(), str.get()+str.length(), gfycat, gfycat+sizeof(gfycat)-1) != 0;
}
static bool is_imgur_link(const raii::string_base& str){
static const char imgur[] = "imgur.com";
return *std::search(str.get(), str.get()+str.length(), imgur, imgur+sizeof(imgur)-1) != 0;
}
static bool is_direct_imgur_link(const raii::string_base& str){
return is_imgur_link(str) && has_extension(str);
}
post::post(const raii::string_base& p):
m_post(p)
{
_parse_post();
}
post::post(raii::string_base&& p):
m_post(std::move(p)),
m_type(post_type::unrecognized)
{
_parse_post();
}
post& post::operator=(const raii::string_base& p){
post tmp(p);
if(!tmp)
return *this;
return (*this = std::move(tmp));
}
post::operator bool(void)const{
if(m_type == post_type::text)
return (m_post_url && m_author && m_title && m_name);
else
return (m_post_url && m_media_url && m_author && m_title && m_name);
}
const raii::string& post::raw(void)const{
return m_post;
}
const raii::rjp_string& post::mediaurl(void)const{
return m_media_url;
}
const raii::string& post::hosted_video_audio(void)const{
return m_hosted_video_audio;
}
const raii::rjp_string& post::posturl(void)const{
return m_post_url;
}
const raii::rjp_string& post::author(void)const{
return m_author;
}
const raii::rjp_string& post::post_hint(void)const{
return m_post_hint;
}
const raii::rjp_string& post::title(void)const{
return m_title;
}
const raii::rjp_string& post::name(void)const{
return m_name;
}
bool post::is_crosspost(void)const{
return (m_flags & POST_FLAGS_CROSSPOSTED);
}
post_type post::type(void)const{
return m_type;
}
void post::_parse_post(void){
raii::rjp_ptr root(rjp_parse(m_post));
if(!root)
return;
static const char* search_items[] = {"url", "author", "post_hint", "title", "id", "crosspost_parent_list"};
static constexpr size_t num_searches = sizeof(search_items)/sizeof(search_items[0]);
RJP_search_res results[num_searches];
RJP_search_res data = rjp_search_member(root.get(), "data", 0);
if(!data.value) return;
data = rjp_search_member(data.value, "children", 0);
if(!data.value) return;
data.value = rjp_get_element(data.value);
if(!data.value) return;
RJP_search_res kind = rjp_search_member(data.value, "kind", 0);
if(!kind.value) return;
data = rjp_search_member(data.value, "data", 0);
if(!data.value) return;
RJP_search_res& crosspost = results[5];
rjp_search_members(data.value, num_searches, search_items, results, 0);
//reddit will *sometimes* make the url field point to the crosspost parent's comments page.
//so we just always assume that the true link is in the crosspost parent
if(crosspost.value){
m_flags |= POST_FLAGS_CROSSPOSTED;
crosspost.value = rjp_get_element(crosspost.value);
crosspost = rjp_search_member(crosspost.value, "url", 0);
if(crosspost.value)
m_media_url = crosspost.value;
}else{
m_media_url = results[0].value;
}
m_author = results[1].value;
m_post_hint = results[2].value;
m_title = results[3].value;
m_name = raii::rjp_string(kind.value) + "_" + rjp_value_string(results[4].value);
m_post_url = "https://redd.it/" + raii::rjp_string(results[4].value);
if(m_post_hint){
//handle simple image
if(!strcmp(m_post_hint, "image")){
m_type = post_type::image;
}
//handle link
else if(!strcmp(m_post_hint, "link")){
m_type = post_type::link;
//imgur support
if(is_imgur_link(m_media_url)){
if(is_gifv(m_media_url)){ //gifv is a video
if(raii::rjp_string tmp = preview_search(data.value)){
m_media_url = std::move(tmp);
m_type = post_type::video;
}
}else{
//imgur links don't lead to the image source. adding .jpg to the link leads to the source
//except when the link is to an album or to a gifv
m_media_url += ".jpg"; //imgur is dumb
m_type = post_type::image;
}
//gfycat support
}else if(is_gfycat_link(m_media_url)){
if(raii::rjp_string tmp = find_video_url(data.value)){
m_media_url = std::move(tmp);
m_type = post_type::video;
}
}
}
//handle hosted video
else if(!strcmp(m_post_hint, "hosted:video")){
m_type = _handle_reddit_hosted_video(data.value, m_media_url, m_hosted_video_audio);
}
else if(!strcmp(m_post_hint, "rich:video")){
RJP_search_res media = rjp_search_member(data.value, "media", 0);
raii::rjp_string res = media_search(media.value);
if(res){
m_type = post_type::video;
m_media_url = std::move(res);
return;
}
res = preview_search(data.value);
if(res){
m_type = post_type::video;
m_media_url = std::move(res);
return;
}
m_type = post_type::link;
}
else{
//assume text post for other
m_type = post_type::text;
}
}else if(is_direct_imgur_link(m_media_url)){
m_type = post_type::image;
return;
}else if(check_reddit_media_domain(data.value)){
m_type = _handle_reddit_hosted_video(data.value, m_media_url, m_hosted_video_audio);
/*RJP_value* media = rjp_search_member(data.value, "media", 0).value;
if(media && (rjp_value_type(media) != json_null))
m_type = post_type::video;
else
m_type = post_type::image;
//*/
}else{
m_media_url.reset();
m_type = post_type::text;
}
}
post_type post::_handle_reddit_hosted_video(RJP_value* data, raii::rjp_string& media_url, raii::string& audio_url){
RJP_search_res media = rjp_search_member(data, "media", 0);
RJP_search_res gif = rjp_search_member(media.value, "reddit_video", 0);
//treat gif as image even though reddit thinks they're videos
if(gif.value)
gif = rjp_search_member(media.value, "is_gif", 0);
if(gif.value && rjp_value_boolean(gif.value)){
return post_type::image;
}
raii::rjp_string res = media_search(media.value);
if(!res){
res = preview_search(data);
if(!res){
return post_type::link;
}
}
media_url = std::move(res);
//reddit hosts audio and video separately. Meaning I have to find a way to manually recombine them.
//this sets up a link to the audio source of the video. the video might not actually have audio. when downloading
//from the audio link, always make sure to check for 404 errors.
static constexpr char url_base[] = "https://v.redd.it/";
static constexpr size_t url_base_len = sizeof(url_base)-1;
char* end = strstr(media_url.get()+url_base_len, "/");
if(!end)
end = media_url.get()+media_url.length();
size_t len = end - media_url.get();
audio_url = raii::string(len + 6);
memcpy(audio_url.get(), media_url.get(), len);
memcpy(audio_url.get()+len, "/audio", 6);
audio_url[len+6] = 0;
return post_type::video;
}
bot::bot(const auth_data& a, const raii::string_base& useragent):
m_curl(),
m_useragent(useragent),
m_access_token(_acquire_access_token(a)){}
bot::bot(const auth_data& a, raii::string_base&& useragent):
m_curl(),
m_useragent(std::move(useragent)),
m_access_token(_acquire_access_token(a)){}
bot::bot(const bot& b):
m_curl(b.m_curl),
m_useragent(b.m_useragent),
m_access_token(b.m_access_token){}
bot::bot(bot&& b):
m_curl(std::move(b.m_curl)),
m_useragent(std::move(b.m_useragent)),
m_access_token(std::move(b.m_access_token)){}
bot& bot::operator=(bot&& b){
m_useragent = std::move(b.m_useragent);
m_access_token = std::move(b.m_access_token);
return *this;
}
bot& bot::operator=(const bot& b){
bot tmp(b);
return *this = std::move(tmp);
}
const raii::rjp_string& bot::access_token(void)const{
return m_access_token;
}
const raii::string& bot::useragent(void)const{
return m_useragent;
}
void bot::set_useragent(const raii::string_base& s){
m_useragent = s;
}
void bot::set_useragent(raii::string_base&& s){
m_useragent = std::move(s);
}
void bot::refresh_token(const auth_data& a){
m_access_token = _acquire_access_token(a);
}
post bot::get_new_post(const raii::string_base& subreddit){
return _get_post(subreddit, "new"_ss, "limit=1"_ss);
}
post bot::get_new_post(const raii::string_base& subreddit, const raii::string_base& after){
return _get_post(subreddit, "new"_ss, raii::string("limit=1&after=" + after));
}
post bot::get_hot_post(const raii::string_base& subreddit){
return _get_post(subreddit, "hot"_ss, "limit=1"_ss);
}
post bot::get_hot_post(const raii::string_base& subreddit, const raii::string_base& after){
return _get_post(subreddit, "hot"_ss, raii::string("limit=1&after=" + after));
}
post bot::get_rising_post(const raii::string_base& subreddit){
return _get_post(subreddit, "rising"_ss, "limit=1"_ss);
}
post bot::get_rising_post(const raii::string_base& subreddit, const raii::string_base& after){
return _get_post(subreddit, "rising"_ss, raii::string("limit=1&after=" + after));
}
post bot::get_best_post(const raii::string_base& subreddit){
return _get_post(subreddit, "best"_ss, "limit=1"_ss);
}
post bot::get_best_post(const raii::string_base& subreddit, const raii::string_base& after){
return _get_post(subreddit, "best"_ss, raii::string("limit=1&after=" + after));
}
post bot::get_top_post(const raii::string_base& subreddit, time::period period){
raii::static_string pstr = period.get();
return _get_post(subreddit, "top"_ss, raii::string("limit=1&t=" + pstr));
}
post bot::get_top_post(const raii::string_base& subreddit, const raii::string_base& after, time::period period){
raii::static_string pstr = period.get();
return _get_post(subreddit, "top"_ss, raii::string("limit=1&t=" + pstr + "&after=" + after));
}
post bot::get_controversial_post(const raii::string_base& subreddit, time::period period){
raii::static_string pstr = period.get();
return _get_post(subreddit, "controversial"_ss, raii::string("limit=1&t=" + pstr));
}
post bot::get_controversial_post(const raii::string_base& subreddit, const raii::string_base& after, time::period period){
raii::static_string pstr = period.get();
return _get_post(subreddit, "controversial"_ss, raii::string("limit=1&t=" + pstr + "&after=" + after));
}
post bot::_get_post(const raii::string_base& subreddit, const raii::string_base& category, const raii::string_base& extra){
raii::string rep;
static constexpr char url_base[] = "https://oauth.reddit.com/r/";
raii::string url;
if(extra)
url = (url_base + subreddit) + "/" + category + "?" + extra;
else
url = (url_base + subreddit) + "/" + category;
raii::curl_llist header(_create_auth_header(m_access_token));
m_curl.reset();
_setup_subreddit_get_curl(header, url, rep);
m_curl.perform();
return post(rep);
}
size_t bot::_get_response_curl_callback(char* ptr, size_t size, size_t nmemb, void* userdata){
raii::rjp_string* reply = reinterpret_cast<raii::rjp_string*>(userdata);
(*reply) += ptr;
return size*nmemb;
}
raii::curl_llist bot::_create_auth_header(const raii::string_base& access_token){
return raii::curl_llist(raii::string("Authorization: bearer " + access_token));
}
void bot::_setup_subreddit_get_curl(const raii::curl_llist& header, const raii::string_base& url, const raii::string_base& reply){
m_curl.seturl(url);
m_curl.setopt(CURLOPT_BUFFERSIZE, 102400L);
m_curl.setopt(CURLOPT_NOPROGRESS, 1L);
m_curl.setopt(CURLOPT_MAXREDIRS, 50L);
m_curl.setopt(CURLOPT_FOLLOWLOCATION, 1L);
m_curl.forcessl(CURL_SSLVERSION_TLSv1_2);
m_curl.setopt(CURLOPT_TCP_KEEPALIVE, 1L);
m_curl.setheader(header);
m_curl.setuseragent(m_useragent);
m_curl.setopt(CURLOPT_WRITEFUNCTION, _get_response_curl_callback);
m_curl.setopt(CURLOPT_WRITEDATA, &reply);
m_curl.setopt(CURLOPT_FAILONERROR, 1L);
}
size_t bot::_post_reply_curl_callback(char* ptr, size_t size, size_t nmemb, void* userdata){
raii::string* data = reinterpret_cast<raii::string*>(userdata);
(*data) += ptr;
return size*nmemb;
}
//Create reddit login data
raii::string bot::_create_request_post_data(const raii::string_base& account_name, const raii::string_base& account_pass){
return raii::string("grant_type=password&username=" + account_name + "&password=" + account_pass);
}
//Setup login data for reddit bot
raii::string bot::_create_request_userpwd(const raii::string_base& bot_name, const raii::string_base& bot_pass){
return raii::string(bot_name + ":" + bot_pass);
}
void bot::_setup_token_request_curl(const raii::string_base& userpwd, const raii::string_base& postdata, void* result){
static constexpr char reddit_token_address[] = "https://www.reddit.com/api/v1/access_token";
m_curl.setopt(CURLOPT_BUFFERSIZE, 102400L);
m_curl.seturl(reddit_token_address);
m_curl.setopt(CURLOPT_NOPROGRESS, 1L);
m_curl.setuserpwd(userpwd);
m_curl.setpostdata(postdata);
m_curl.setuseragent(m_useragent);
m_curl.setopt(CURLOPT_MAXREDIRS, 50L);
m_curl.setopt(CURLOPT_FOLLOWLOCATION, 1L);
m_curl.forcessl(CURL_SSLVERSION_TLSv1_2);
m_curl.setopt(CURLOPT_CUSTOMREQUEST, "POST");
m_curl.setopt(CURLOPT_TCP_KEEPALIVE, 1L);
m_curl.setopt(CURLOPT_WRITEFUNCTION, _post_reply_curl_callback);
m_curl.setopt(CURLOPT_WRITEDATA, result);
m_curl.setopt(CURLOPT_FAILONERROR, 1L);
}
raii::string bot::_request_access_token(const auth_data& auth){
CURLcode result;
//URL encode the POST data
raii::curl_string acc_name = m_curl.encode(auth.acc_name, auth.acc_name.length());
raii::curl_string acc_pass = m_curl.encode(auth.acc_pass, auth.acc_pass.length());
//unify the post data, clean up remnants
raii::string postdata = _create_request_post_data(acc_name, acc_pass);
acc_name.reset();
acc_pass.reset();
//Unify the username/password
raii::string userpwd = _create_request_userpwd(auth.bot_name, auth.bot_pass);
//Load curl with data then run POST operation
raii::string reply;
_setup_token_request_curl(userpwd, postdata, &reply);
result = m_curl.perform();
if(result != CURLE_OK)
return {};
return reply;
}
raii::rjp_string bot::_acquire_access_token(const auth_data& a){
raii::string reply = _request_access_token(a);
if(!reply)
return raii::rjp_string{};
raii::rjp_ptr root(rjp_parse(reply));
if(!root)
return raii::rjp_string{};
RJP_search_res token = rjp_search_member(root.get(), "access_token", 0);
return raii::rjp_string{token.value};
}
}