534 lines
18 KiB
C++
534 lines
18 KiB
C++
/**
|
|
This file is a part of rexy's matrix bot
|
|
Copyright (C) 2019 rexy712
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU Affero General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU Affero General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Affero General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include "reddit.hpp"
|
|
#include "raii/rjp_string.hpp"
|
|
#include "raii/string.hpp"
|
|
#include "raii/curler.hpp"
|
|
#include "raii/rjp_ptr.hpp"
|
|
#include "raii/static_string.hpp"
|
|
|
|
#include <algorithm> //search
|
|
#include <cstring>
|
|
|
|
//no idea if this covers everything reddit might dish out at me
|
|
//there is no consistency in their content tagging. there are gifs marked as images, others as videos
|
|
//they separate audio and video streams for their hosted videos, there is no true way to tell
|
|
//what kind of content a post contains since the post_hint field might be completely nonexistent.
|
|
//it's just a game of hacking together solutions each time reddit throws me a new type of unexpected complication.
|
|
|
|
namespace reddit{
|
|
|
|
namespace time{
|
|
period hour = "hour";
|
|
period day = "day";
|
|
period week = "week";
|
|
period month = "month";
|
|
period year = "year";
|
|
period all = "all";
|
|
}
|
|
|
|
auth_data parse_auth_data(RJP_value* root){
|
|
static const char* account_names[2] = {"bot", "account"};
|
|
static const char* account_fields[2] = {"username", "password"};
|
|
|
|
auth_data ret;
|
|
RJP_search_res accounts[2];
|
|
RJP_search_res details[2];
|
|
rjp_search_members(root, 2, account_names, accounts, 0);
|
|
|
|
rjp_search_members(accounts[0].value, 2, account_fields, details, 0);
|
|
ret.bot_name = details[0].value;
|
|
ret.bot_pass = details[1].value;
|
|
|
|
rjp_search_members(accounts[1].value, 2, account_fields, details, 0);
|
|
ret.acc_name = details[0].value;
|
|
ret.acc_pass = details[1].value;
|
|
|
|
return ret;
|
|
}
|
|
|
|
|
|
static raii::rjp_string media_search(RJP_value* media){
|
|
if(!media)
|
|
return {};
|
|
RJP_search_res res = rjp_search_member(media, "reddit_video", 0);
|
|
if(!res.value)
|
|
return raii::rjp_string{};
|
|
res = rjp_search_member(res.value, "fallback_url", 0);
|
|
if(!res.value)
|
|
return raii::rjp_string{};
|
|
return raii::rjp_string(res.value);
|
|
}
|
|
static raii::rjp_string preview_search(RJP_value* root){
|
|
RJP_search_res media = rjp_search_member(root, "preview", 0);
|
|
if(!media.value)
|
|
return raii::rjp_string{};
|
|
media = rjp_search_member(media.value, "reddit_video_preview", 0);
|
|
if(!media.value)
|
|
return raii::rjp_string{};
|
|
media = rjp_search_member(media.value, "fallback_url", 0);
|
|
if(!media.value)
|
|
return raii::rjp_string{};
|
|
return raii::rjp_string(media.value);
|
|
}
|
|
static bool check_reddit_media_domain(RJP_value* root){
|
|
RJP_search_res res = rjp_search_member(root, "is_reddit_media_domain", 0);
|
|
return (res.value && rjp_value_boolean(res.value));
|
|
}
|
|
|
|
static raii::rjp_string find_video_url(RJP_value* root){
|
|
RJP_search_res media = rjp_search_member(root, "media", 0);
|
|
if(raii::rjp_string res = media_search(media.value)){
|
|
return res;
|
|
}
|
|
raii::rjp_string res = preview_search(root);
|
|
return res;
|
|
}
|
|
static bool is_gifv(const raii::string_base& str){
|
|
const char* s = str.get();
|
|
size_t len = str.length();
|
|
if(len > 5 &&
|
|
*(s+len-1) == 'v' &&
|
|
*(s+len-2) == 'f' &&
|
|
*(s+len-3) == 'i' &&
|
|
*(s+len-4) == 'g' &&
|
|
*(s+len-5) == '.')
|
|
{
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
static bool has_extension(const raii::string_base& str){
|
|
size_t i = 0;
|
|
for(const char* p = str.get() + str.length() - 1;*p && i < 6;--p,++i){
|
|
if(*p == '/')
|
|
return false;
|
|
else if(*p == '.')
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
static bool is_gfycat_link(const raii::string_base& str){
|
|
static const char gfycat[] = "gfycat.com";
|
|
return *std::search(str.get(), str.get()+str.length(), gfycat, gfycat+sizeof(gfycat)-1) != 0;
|
|
}
|
|
static bool is_imgur_link(const raii::string_base& str){
|
|
static const char imgur[] = "imgur.com";
|
|
return *std::search(str.get(), str.get()+str.length(), imgur, imgur+sizeof(imgur)-1) != 0;
|
|
}
|
|
static bool is_direct_imgur_link(const raii::string_base& str){
|
|
return is_imgur_link(str) && has_extension(str);
|
|
}
|
|
|
|
|
|
|
|
post::post(const raii::string_base& p):
|
|
m_post(p)
|
|
{
|
|
_parse_post();
|
|
}
|
|
post::post(raii::string_base&& p):
|
|
m_post(std::move(p)),
|
|
m_type(post_type::unrecognized)
|
|
{
|
|
_parse_post();
|
|
}
|
|
post& post::operator=(const raii::string_base& p){
|
|
post tmp(p);
|
|
if(!tmp)
|
|
return *this;
|
|
return (*this = std::move(tmp));
|
|
}
|
|
post::operator bool(void)const{
|
|
if(m_type == post_type::text)
|
|
return (m_post_url && m_author && m_title && m_name);
|
|
else
|
|
return (m_post_url && m_media_url && m_author && m_title && m_name);
|
|
}
|
|
|
|
const raii::string& post::raw(void)const{
|
|
return m_post;
|
|
}
|
|
const raii::rjp_string& post::mediaurl(void)const{
|
|
return m_media_url;
|
|
}
|
|
const raii::string& post::hosted_video_audio(void)const{
|
|
return m_hosted_video_audio;
|
|
}
|
|
const raii::rjp_string& post::posturl(void)const{
|
|
return m_post_url;
|
|
}
|
|
const raii::rjp_string& post::author(void)const{
|
|
return m_author;
|
|
}
|
|
const raii::rjp_string& post::post_hint(void)const{
|
|
return m_post_hint;
|
|
}
|
|
const raii::rjp_string& post::title(void)const{
|
|
return m_title;
|
|
}
|
|
const raii::rjp_string& post::name(void)const{
|
|
return m_name;
|
|
}
|
|
bool post::is_crosspost(void)const{
|
|
return (m_flags & POST_FLAGS_CROSSPOSTED);
|
|
}
|
|
post_type post::type(void)const{
|
|
return m_type;
|
|
}
|
|
void post::_parse_post(void){
|
|
raii::rjp_ptr root(rjp_parse(m_post));
|
|
if(!root)
|
|
return;
|
|
|
|
static const char* search_items[] = {"url", "author", "post_hint", "title", "id", "crosspost_parent_list"};
|
|
static constexpr size_t num_searches = sizeof(search_items)/sizeof(search_items[0]);
|
|
RJP_search_res results[num_searches];
|
|
RJP_search_res data = rjp_search_member(root.get(), "data", 0);
|
|
if(!data.value) return;
|
|
data = rjp_search_member(data.value, "children", 0);
|
|
if(!data.value) return;
|
|
data.value = rjp_get_element(data.value);
|
|
if(!data.value) return;
|
|
RJP_search_res kind = rjp_search_member(data.value, "kind", 0);
|
|
if(!kind.value) return;
|
|
data = rjp_search_member(data.value, "data", 0);
|
|
if(!data.value) return;
|
|
|
|
RJP_search_res& crosspost = results[5];
|
|
|
|
rjp_search_members(data.value, num_searches, search_items, results, 0);
|
|
|
|
//reddit will *sometimes* make the url field point to the crosspost parent's comments page.
|
|
//so we just always assume that the true link is in the crosspost parent
|
|
if(crosspost.value){
|
|
m_flags |= POST_FLAGS_CROSSPOSTED;
|
|
crosspost.value = rjp_get_element(crosspost.value);
|
|
crosspost = rjp_search_member(crosspost.value, "url", 0);
|
|
if(crosspost.value)
|
|
m_media_url = crosspost.value;
|
|
}else{
|
|
m_media_url = results[0].value;
|
|
}
|
|
m_author = results[1].value;
|
|
m_post_hint = results[2].value;
|
|
m_title = results[3].value;
|
|
m_name = raii::rjp_string(kind.value) + "_" + rjp_value_string(results[4].value);
|
|
m_post_url = "https://redd.it/" + raii::rjp_string(results[4].value);
|
|
|
|
if(m_post_hint){
|
|
//handle simple image
|
|
if(!strcmp(m_post_hint, "image")){
|
|
m_type = post_type::image;
|
|
}
|
|
//handle link
|
|
else if(!strcmp(m_post_hint, "link")){
|
|
m_type = post_type::link;
|
|
|
|
//imgur support
|
|
if(is_imgur_link(m_media_url)){
|
|
if(is_gifv(m_media_url)){ //gifv is a video
|
|
if(raii::rjp_string tmp = preview_search(data.value)){
|
|
m_media_url = std::move(tmp);
|
|
m_type = post_type::video;
|
|
}
|
|
}else{
|
|
//imgur links don't lead to the image source. adding .jpg to the link leads to the source
|
|
//except when the link is to an album or to a gifv
|
|
m_media_url += ".jpg"; //imgur is dumb
|
|
m_type = post_type::image;
|
|
}
|
|
//gfycat support
|
|
}else if(is_gfycat_link(m_media_url)){
|
|
if(raii::rjp_string tmp = find_video_url(data.value)){
|
|
m_media_url = std::move(tmp);
|
|
m_type = post_type::video;
|
|
}
|
|
}
|
|
}
|
|
//handle hosted video
|
|
else if(!strcmp(m_post_hint, "hosted:video")){
|
|
m_type = _handle_reddit_hosted_video(data.value, m_media_url, m_hosted_video_audio);
|
|
}
|
|
else if(!strcmp(m_post_hint, "rich:video")){
|
|
RJP_search_res media = rjp_search_member(data.value, "media", 0);
|
|
raii::rjp_string res = media_search(media.value);
|
|
if(res){
|
|
m_type = post_type::video;
|
|
m_media_url = std::move(res);
|
|
return;
|
|
}
|
|
res = preview_search(data.value);
|
|
if(res){
|
|
m_type = post_type::video;
|
|
m_media_url = std::move(res);
|
|
return;
|
|
}
|
|
m_type = post_type::link;
|
|
}
|
|
else{
|
|
//assume text post for other
|
|
m_type = post_type::text;
|
|
}
|
|
}else if(is_direct_imgur_link(m_media_url)){
|
|
m_type = post_type::image;
|
|
return;
|
|
}else if(check_reddit_media_domain(data.value)){
|
|
m_type = _handle_reddit_hosted_video(data.value, m_media_url, m_hosted_video_audio);
|
|
/*RJP_value* media = rjp_search_member(data.value, "media", 0).value;
|
|
if(media && (rjp_value_type(media) != json_null))
|
|
m_type = post_type::video;
|
|
else
|
|
m_type = post_type::image;
|
|
//*/
|
|
}else{
|
|
m_media_url.reset();
|
|
m_type = post_type::text;
|
|
}
|
|
}
|
|
post_type post::_handle_reddit_hosted_video(RJP_value* data, raii::rjp_string& media_url, raii::string& audio_url){
|
|
|
|
RJP_search_res media = rjp_search_member(data, "media", 0);
|
|
RJP_search_res gif = rjp_search_member(media.value, "reddit_video", 0);
|
|
|
|
//treat gif as image even though reddit thinks they're videos
|
|
if(gif.value)
|
|
gif = rjp_search_member(media.value, "is_gif", 0);
|
|
if(gif.value && rjp_value_boolean(gif.value)){
|
|
return post_type::image;
|
|
}
|
|
raii::rjp_string res = media_search(media.value);
|
|
if(!res){
|
|
res = preview_search(data);
|
|
if(!res){
|
|
return post_type::link;
|
|
}
|
|
}
|
|
media_url = std::move(res);
|
|
|
|
//reddit hosts audio and video separately. Meaning I have to find a way to manually recombine them.
|
|
//this sets up a link to the audio source of the video. the video might not actually have audio. when downloading
|
|
//from the audio link, always make sure to check for 404 errors.
|
|
static constexpr char url_base[] = "https://v.redd.it/";
|
|
static constexpr size_t url_base_len = sizeof(url_base)-1;
|
|
char* end = strstr(media_url.get()+url_base_len, "/");
|
|
if(!end)
|
|
end = media_url.get()+media_url.length();
|
|
size_t len = end - media_url.get();
|
|
audio_url = raii::string(len + 6);
|
|
memcpy(audio_url.get(), media_url.get(), len);
|
|
memcpy(audio_url.get()+len, "/audio", 6);
|
|
audio_url[len+6] = 0;
|
|
return post_type::video;
|
|
}
|
|
|
|
|
|
|
|
bot::bot(const auth_data& a, const raii::string_base& useragent):
|
|
m_curl(),
|
|
m_useragent(useragent),
|
|
m_access_token(_acquire_access_token(a)){}
|
|
bot::bot(const auth_data& a, raii::string_base&& useragent):
|
|
m_curl(),
|
|
m_useragent(std::move(useragent)),
|
|
m_access_token(_acquire_access_token(a)){}
|
|
bot::bot(const bot& b):
|
|
m_curl(b.m_curl),
|
|
m_useragent(b.m_useragent),
|
|
m_access_token(b.m_access_token){}
|
|
bot::bot(bot&& b):
|
|
m_curl(std::move(b.m_curl)),
|
|
m_useragent(std::move(b.m_useragent)),
|
|
m_access_token(std::move(b.m_access_token)){}
|
|
|
|
bot& bot::operator=(bot&& b){
|
|
m_useragent = std::move(b.m_useragent);
|
|
m_access_token = std::move(b.m_access_token);
|
|
return *this;
|
|
}
|
|
bot& bot::operator=(const bot& b){
|
|
bot tmp(b);
|
|
return *this = std::move(tmp);
|
|
}
|
|
|
|
const raii::rjp_string& bot::access_token(void)const{
|
|
return m_access_token;
|
|
}
|
|
const raii::string& bot::useragent(void)const{
|
|
return m_useragent;
|
|
}
|
|
void bot::set_useragent(const raii::string_base& s){
|
|
m_useragent = s;
|
|
}
|
|
void bot::set_useragent(raii::string_base&& s){
|
|
m_useragent = std::move(s);
|
|
}
|
|
|
|
void bot::refresh_token(const auth_data& a){
|
|
m_access_token = _acquire_access_token(a);
|
|
}
|
|
|
|
post bot::get_new_post(const raii::string_base& subreddit){
|
|
return _get_post(subreddit, "new"_ss, "limit=1"_ss);
|
|
}
|
|
post bot::get_new_post(const raii::string_base& subreddit, const raii::string_base& after){
|
|
return _get_post(subreddit, "new"_ss, raii::string("limit=1&after=" + after));
|
|
}
|
|
post bot::get_hot_post(const raii::string_base& subreddit){
|
|
return _get_post(subreddit, "hot"_ss, "limit=1"_ss);
|
|
}
|
|
post bot::get_hot_post(const raii::string_base& subreddit, const raii::string_base& after){
|
|
return _get_post(subreddit, "hot"_ss, raii::string("limit=1&after=" + after));
|
|
}
|
|
post bot::get_rising_post(const raii::string_base& subreddit){
|
|
return _get_post(subreddit, "rising"_ss, "limit=1"_ss);
|
|
}
|
|
post bot::get_rising_post(const raii::string_base& subreddit, const raii::string_base& after){
|
|
return _get_post(subreddit, "rising"_ss, raii::string("limit=1&after=" + after));
|
|
}
|
|
post bot::get_best_post(const raii::string_base& subreddit){
|
|
return _get_post(subreddit, "best"_ss, "limit=1"_ss);
|
|
}
|
|
post bot::get_best_post(const raii::string_base& subreddit, const raii::string_base& after){
|
|
return _get_post(subreddit, "best"_ss, raii::string("limit=1&after=" + after));
|
|
}
|
|
post bot::get_top_post(const raii::string_base& subreddit, time::period period){
|
|
raii::static_string pstr = period.get();
|
|
return _get_post(subreddit, "top"_ss, raii::string("limit=1&t=" + pstr));
|
|
}
|
|
post bot::get_top_post(const raii::string_base& subreddit, const raii::string_base& after, time::period period){
|
|
raii::static_string pstr = period.get();
|
|
return _get_post(subreddit, "top"_ss, raii::string("limit=1&t=" + pstr + "&after=" + after));
|
|
}
|
|
post bot::get_controversial_post(const raii::string_base& subreddit, time::period period){
|
|
raii::static_string pstr = period.get();
|
|
return _get_post(subreddit, "controversial"_ss, raii::string("limit=1&t=" + pstr));
|
|
}
|
|
post bot::get_controversial_post(const raii::string_base& subreddit, const raii::string_base& after, time::period period){
|
|
raii::static_string pstr = period.get();
|
|
return _get_post(subreddit, "controversial"_ss, raii::string("limit=1&t=" + pstr + "&after=" + after));
|
|
}
|
|
|
|
|
|
post bot::_get_post(const raii::string_base& subreddit, const raii::string_base& category, const raii::string_base& extra){
|
|
raii::string rep;
|
|
static constexpr char url_base[] = "https://oauth.reddit.com/r/";
|
|
raii::string url;
|
|
if(extra)
|
|
url = (url_base + subreddit) + "/" + category + "?" + extra;
|
|
else
|
|
url = (url_base + subreddit) + "/" + category;
|
|
raii::curl_llist header(_create_auth_header(m_access_token));
|
|
m_curl.reset();
|
|
_setup_subreddit_get_curl(header, url, rep);
|
|
m_curl.perform();
|
|
return post(rep);
|
|
}
|
|
size_t bot::_get_response_curl_callback(char* ptr, size_t size, size_t nmemb, void* userdata){
|
|
raii::rjp_string* reply = reinterpret_cast<raii::rjp_string*>(userdata);
|
|
(*reply) += ptr;
|
|
return size*nmemb;
|
|
}
|
|
raii::curl_llist bot::_create_auth_header(const raii::string_base& access_token){
|
|
return raii::curl_llist(raii::string("Authorization: bearer " + access_token));
|
|
}
|
|
void bot::_setup_subreddit_get_curl(const raii::curl_llist& header, const raii::string_base& url, const raii::string_base& reply){
|
|
m_curl.seturl(url);
|
|
m_curl.setopt(CURLOPT_BUFFERSIZE, 102400L);
|
|
m_curl.setopt(CURLOPT_NOPROGRESS, 1L);
|
|
m_curl.setopt(CURLOPT_MAXREDIRS, 50L);
|
|
m_curl.setopt(CURLOPT_FOLLOWLOCATION, 1L);
|
|
m_curl.forcessl(CURL_SSLVERSION_TLSv1_2);
|
|
m_curl.setopt(CURLOPT_TCP_KEEPALIVE, 1L);
|
|
m_curl.setheader(header);
|
|
m_curl.setuseragent(m_useragent);
|
|
m_curl.setopt(CURLOPT_WRITEFUNCTION, _get_response_curl_callback);
|
|
m_curl.setopt(CURLOPT_WRITEDATA, &reply);
|
|
m_curl.setopt(CURLOPT_FAILONERROR, 1L);
|
|
}
|
|
|
|
size_t bot::_post_reply_curl_callback(char* ptr, size_t size, size_t nmemb, void* userdata){
|
|
raii::string* data = reinterpret_cast<raii::string*>(userdata);
|
|
(*data) += ptr;
|
|
return size*nmemb;
|
|
}
|
|
//Create reddit login data
|
|
raii::string bot::_create_request_post_data(const raii::string_base& account_name, const raii::string_base& account_pass){
|
|
return raii::string("grant_type=password&username=" + account_name + "&password=" + account_pass);
|
|
}
|
|
//Setup login data for reddit bot
|
|
raii::string bot::_create_request_userpwd(const raii::string_base& bot_name, const raii::string_base& bot_pass){
|
|
return raii::string(bot_name + ":" + bot_pass);
|
|
}
|
|
void bot::_setup_token_request_curl(const raii::string_base& userpwd, const raii::string_base& postdata, void* result){
|
|
static constexpr char reddit_token_address[] = "https://www.reddit.com/api/v1/access_token";
|
|
m_curl.setopt(CURLOPT_BUFFERSIZE, 102400L);
|
|
m_curl.seturl(reddit_token_address);
|
|
m_curl.setopt(CURLOPT_NOPROGRESS, 1L);
|
|
m_curl.setuserpwd(userpwd);
|
|
m_curl.setpostdata(postdata);
|
|
m_curl.setuseragent(m_useragent);
|
|
m_curl.setopt(CURLOPT_MAXREDIRS, 50L);
|
|
m_curl.setopt(CURLOPT_FOLLOWLOCATION, 1L);
|
|
m_curl.forcessl(CURL_SSLVERSION_TLSv1_2);
|
|
m_curl.setopt(CURLOPT_CUSTOMREQUEST, "POST");
|
|
m_curl.setopt(CURLOPT_TCP_KEEPALIVE, 1L);
|
|
m_curl.setopt(CURLOPT_WRITEFUNCTION, _post_reply_curl_callback);
|
|
m_curl.setopt(CURLOPT_WRITEDATA, result);
|
|
m_curl.setopt(CURLOPT_FAILONERROR, 1L);
|
|
}
|
|
|
|
raii::string bot::_request_access_token(const auth_data& auth){
|
|
CURLcode result;
|
|
|
|
//URL encode the POST data
|
|
raii::curl_string acc_name = m_curl.encode(auth.acc_name, auth.acc_name.length());
|
|
raii::curl_string acc_pass = m_curl.encode(auth.acc_pass, auth.acc_pass.length());
|
|
|
|
//unify the post data, clean up remnants
|
|
raii::string postdata = _create_request_post_data(acc_name, acc_pass);
|
|
acc_name.reset();
|
|
acc_pass.reset();
|
|
|
|
//Unify the username/password
|
|
raii::string userpwd = _create_request_userpwd(auth.bot_name, auth.bot_pass);
|
|
|
|
//Load curl with data then run POST operation
|
|
raii::string reply;
|
|
_setup_token_request_curl(userpwd, postdata, &reply);
|
|
result = m_curl.perform();
|
|
|
|
if(result != CURLE_OK)
|
|
return {};
|
|
return reply;
|
|
}
|
|
|
|
raii::rjp_string bot::_acquire_access_token(const auth_data& a){
|
|
raii::string reply = _request_access_token(a);
|
|
if(!reply)
|
|
return raii::rjp_string{};
|
|
|
|
raii::rjp_ptr root(rjp_parse(reply));
|
|
if(!root)
|
|
return raii::rjp_string{};
|
|
RJP_search_res token = rjp_search_member(root.get(), "access_token", 0);
|
|
return raii::rjp_string{token.value};
|
|
}
|
|
}
|