/** This file is a part of rexy's matrix bot Copyright (C) 2019 rexy712 This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more details. You should have received a copy of the GNU Affero General Public License along with this program. If not, see . */ #include "reddit.hpp" #include "raii/rjp_string.hpp" #include "raii/string.hpp" #include "raii/curler.hpp" #include "raii/rjp_ptr.hpp" #include "raii/static_string.hpp" #include //search #include //no idea if this covers everything reddit might dish out at me //there is no consistency in their content tagging. there are gifs marked as images, others as videos //they separate audio and video streams for their hosted videos, there is no true way to tell //what kind of content a post contains since the post_hint field might be completely nonexistent. //it's just a game of hacking together solutions each time reddit throws me a new type of unexpected complication. namespace reddit{ namespace time{ period hour = "hour"; period day = "day"; period week = "week"; period month = "month"; period year = "year"; period all = "all"; } auth_data parse_auth_data(RJP_value* root){ static const char* account_names[2] = {"bot", "account"}; static const char* account_fields[2] = {"username", "password"}; auth_data ret; RJP_search_res accounts[2]; RJP_search_res details[2]; rjp_search_members(root, 2, account_names, accounts, 0); rjp_search_members(accounts[0].value, 2, account_fields, details, 0); ret.bot_name = details[0].value; ret.bot_pass = details[1].value; rjp_search_members(accounts[1].value, 2, account_fields, details, 0); ret.acc_name = details[0].value; ret.acc_pass = details[1].value; return ret; } static raii::rjp_string media_search(RJP_value* media){ if(!media) return {}; RJP_search_res res = rjp_search_member(media, "reddit_video", 0); if(!res.value) return raii::rjp_string{}; res = rjp_search_member(res.value, "fallback_url", 0); if(!res.value) return raii::rjp_string{}; return raii::rjp_string(res.value); } static raii::rjp_string preview_search(RJP_value* root){ RJP_search_res media = rjp_search_member(root, "preview", 0); if(!media.value) return raii::rjp_string{}; media = rjp_search_member(media.value, "reddit_video_preview", 0); if(!media.value) return raii::rjp_string{}; media = rjp_search_member(media.value, "fallback_url", 0); if(!media.value) return raii::rjp_string{}; return raii::rjp_string(media.value); } static bool check_reddit_media_domain(RJP_value* root){ RJP_search_res res = rjp_search_member(root, "is_reddit_media_domain", 0); return (res.value && rjp_value_boolean(res.value)); } static raii::rjp_string find_video_url(RJP_value* root){ RJP_search_res media = rjp_search_member(root, "media", 0); if(raii::rjp_string res = media_search(media.value)){ return res; } raii::rjp_string res = preview_search(root); return res; } static bool is_gifv(const raii::string_base& str){ const char* s = str.get(); size_t len = str.length(); if(len > 5 && *(s+len-1) == 'v' && *(s+len-2) == 'f' && *(s+len-3) == 'i' && *(s+len-4) == 'g' && *(s+len-5) == '.') { return true; } return false; } static bool has_extension(const raii::string_base& str){ size_t i = 0; for(const char* p = str.get() + str.length() - 1;*p && i < 6;--p,++i){ if(*p == '/') return false; else if(*p == '.') return true; } return false; } static bool is_gfycat_link(const raii::string_base& str){ static const char gfycat[] = "gfycat.com"; return *std::search(str.get(), str.get()+str.length(), gfycat, gfycat+sizeof(gfycat)-1) != 0; } static bool is_imgur_link(const raii::string_base& str){ static const char imgur[] = "imgur.com"; return *std::search(str.get(), str.get()+str.length(), imgur, imgur+sizeof(imgur)-1) != 0; } static bool is_direct_imgur_link(const raii::string_base& str){ return is_imgur_link(str) && has_extension(str); } post::post(const raii::string_base& p): m_post(p) { _parse_post(); } post::post(raii::string_base&& p): m_post(std::move(p)), m_type(post_type::unrecognized) { _parse_post(); } post& post::operator=(const raii::string_base& p){ post tmp(p); if(!tmp) return *this; return (*this = std::move(tmp)); } post::operator bool(void)const{ if(m_type == post_type::text) return (m_post_url && m_author && m_title && m_name); else return (m_post_url && m_media_url && m_author && m_title && m_name); } const raii::string& post::raw(void)const{ return m_post; } const raii::rjp_string& post::mediaurl(void)const{ return m_media_url; } const raii::string& post::hosted_video_audio(void)const{ return m_hosted_video_audio; } const raii::rjp_string& post::posturl(void)const{ return m_post_url; } const raii::rjp_string& post::author(void)const{ return m_author; } const raii::rjp_string& post::post_hint(void)const{ return m_post_hint; } const raii::rjp_string& post::title(void)const{ return m_title; } const raii::rjp_string& post::name(void)const{ return m_name; } bool post::is_crosspost(void)const{ return (m_flags & POST_FLAGS_CROSSPOSTED); } post_type post::type(void)const{ return m_type; } void post::_parse_post(void){ raii::rjp_ptr root(rjp_parse(m_post)); if(!root) return; static const char* search_items[] = {"url", "author", "post_hint", "title", "id", "crosspost_parent_list"}; static constexpr size_t num_searches = sizeof(search_items)/sizeof(search_items[0]); RJP_search_res results[num_searches]; RJP_search_res data = rjp_search_member(root.get(), "data", 0); if(!data.value) return; data = rjp_search_member(data.value, "children", 0); if(!data.value) return; data.value = rjp_get_element(data.value); if(!data.value) return; RJP_search_res kind = rjp_search_member(data.value, "kind", 0); if(!kind.value) return; data = rjp_search_member(data.value, "data", 0); if(!data.value) return; RJP_search_res& crosspost = results[5]; rjp_search_members(data.value, num_searches, search_items, results, 0); //reddit will *sometimes* make the url field point to the crosspost parent's comments page. //so we just always assume that the true link is in the crosspost parent if(crosspost.value){ m_flags |= POST_FLAGS_CROSSPOSTED; crosspost.value = rjp_get_element(crosspost.value); crosspost = rjp_search_member(crosspost.value, "url", 0); if(crosspost.value) m_media_url = crosspost.value; }else{ m_media_url = results[0].value; } m_author = results[1].value; m_post_hint = results[2].value; m_title = results[3].value; m_name = raii::rjp_string(kind.value) + "_" + rjp_value_string(results[4].value); m_post_url = "https://redd.it/" + raii::rjp_string(results[4].value); if(m_post_hint){ //handle simple image if(!strcmp(m_post_hint, "image")){ m_type = post_type::image; } //handle link else if(!strcmp(m_post_hint, "link")){ m_type = post_type::link; //imgur support if(is_imgur_link(m_media_url)){ if(is_gifv(m_media_url)){ //gifv is a video if(raii::rjp_string tmp = preview_search(data.value)){ m_media_url = std::move(tmp); m_type = post_type::video; } }else{ //imgur links don't lead to the image source. adding .jpg to the link leads to the source //except when the link is to an album or to a gifv m_media_url += ".jpg"; //imgur is dumb m_type = post_type::image; } //gfycat support }else if(is_gfycat_link(m_media_url)){ if(raii::rjp_string tmp = find_video_url(data.value)){ m_media_url = std::move(tmp); m_type = post_type::video; } } } //handle hosted video else if(!strcmp(m_post_hint, "hosted:video")){ m_type = _handle_reddit_hosted_video(data.value, m_media_url, m_hosted_video_audio); } else if(!strcmp(m_post_hint, "rich:video")){ RJP_search_res media = rjp_search_member(data.value, "media", 0); raii::rjp_string res = media_search(media.value); if(res){ m_type = post_type::video; m_media_url = std::move(res); return; } res = preview_search(data.value); if(res){ m_type = post_type::video; m_media_url = std::move(res); return; } m_type = post_type::link; } else{ //assume text post for other m_type = post_type::text; } }else if(is_direct_imgur_link(m_media_url)){ m_type = post_type::image; return; }else if(check_reddit_media_domain(data.value)){ m_type = _handle_reddit_hosted_video(data.value, m_media_url, m_hosted_video_audio); /*RJP_value* media = rjp_search_member(data.value, "media", 0).value; if(media && (rjp_value_type(media) != json_null)) m_type = post_type::video; else m_type = post_type::image; //*/ }else{ m_media_url.reset(); m_type = post_type::text; } } post_type post::_handle_reddit_hosted_video(RJP_value* data, raii::rjp_string& media_url, raii::string& audio_url){ RJP_search_res media = rjp_search_member(data, "media", 0); RJP_search_res gif = rjp_search_member(media.value, "reddit_video", 0); //treat gif as image even though reddit thinks they're videos if(gif.value) gif = rjp_search_member(media.value, "is_gif", 0); if(gif.value && rjp_value_boolean(gif.value)){ return post_type::image; } raii::rjp_string res = media_search(media.value); if(!res){ res = preview_search(data); if(!res){ return post_type::link; } } media_url = std::move(res); //reddit hosts audio and video separately. Meaning I have to find a way to manually recombine them. //this sets up a link to the audio source of the video. the video might not actually have audio. when downloading //from the audio link, always make sure to check for 404 errors. static constexpr char url_base[] = "https://v.redd.it/"; static constexpr size_t url_base_len = sizeof(url_base)-1; char* end = strstr(media_url.get()+url_base_len, "/"); if(!end) end = media_url.get()+media_url.length(); size_t len = end - media_url.get(); audio_url = raii::string(len + 6); memcpy(audio_url.get(), media_url.get(), len); memcpy(audio_url.get()+len, "/audio", 6); audio_url[len+6] = 0; return post_type::video; } bot::bot(const auth_data& a, const raii::string_base& useragent): m_curl(), m_useragent(useragent), m_access_token(_acquire_access_token(a)){} bot::bot(const auth_data& a, raii::string_base&& useragent): m_curl(), m_useragent(std::move(useragent)), m_access_token(_acquire_access_token(a)){} bot::bot(const bot& b): m_curl(b.m_curl), m_useragent(b.m_useragent), m_access_token(b.m_access_token){} bot::bot(bot&& b): m_curl(std::move(b.m_curl)), m_useragent(std::move(b.m_useragent)), m_access_token(std::move(b.m_access_token)){} bot& bot::operator=(bot&& b){ m_useragent = std::move(b.m_useragent); m_access_token = std::move(b.m_access_token); return *this; } bot& bot::operator=(const bot& b){ bot tmp(b); return *this = std::move(tmp); } const raii::rjp_string& bot::access_token(void)const{ return m_access_token; } const raii::string& bot::useragent(void)const{ return m_useragent; } void bot::set_useragent(const raii::string_base& s){ m_useragent = s; } void bot::set_useragent(raii::string_base&& s){ m_useragent = std::move(s); } void bot::refresh_token(const auth_data& a){ m_access_token = _acquire_access_token(a); } post bot::get_new_post(const raii::string_base& subreddit){ return _get_post(subreddit, "new"_ss, "limit=1"_ss); } post bot::get_new_post(const raii::string_base& subreddit, const raii::string_base& after){ return _get_post(subreddit, "new"_ss, raii::string("limit=1&after=" + after)); } post bot::get_hot_post(const raii::string_base& subreddit){ return _get_post(subreddit, "hot"_ss, "limit=1"_ss); } post bot::get_hot_post(const raii::string_base& subreddit, const raii::string_base& after){ return _get_post(subreddit, "hot"_ss, raii::string("limit=1&after=" + after)); } post bot::get_rising_post(const raii::string_base& subreddit){ return _get_post(subreddit, "rising"_ss, "limit=1"_ss); } post bot::get_rising_post(const raii::string_base& subreddit, const raii::string_base& after){ return _get_post(subreddit, "rising"_ss, raii::string("limit=1&after=" + after)); } post bot::get_best_post(const raii::string_base& subreddit){ return _get_post(subreddit, "best"_ss, "limit=1"_ss); } post bot::get_best_post(const raii::string_base& subreddit, const raii::string_base& after){ return _get_post(subreddit, "best"_ss, raii::string("limit=1&after=" + after)); } post bot::get_top_post(const raii::string_base& subreddit, time::period period){ raii::static_string pstr = period.get(); return _get_post(subreddit, "top"_ss, raii::string("limit=1&t=" + pstr)); } post bot::get_top_post(const raii::string_base& subreddit, const raii::string_base& after, time::period period){ raii::static_string pstr = period.get(); return _get_post(subreddit, "top"_ss, raii::string("limit=1&t=" + pstr + "&after=" + after)); } post bot::get_controversial_post(const raii::string_base& subreddit, time::period period){ raii::static_string pstr = period.get(); return _get_post(subreddit, "controversial"_ss, raii::string("limit=1&t=" + pstr)); } post bot::get_controversial_post(const raii::string_base& subreddit, const raii::string_base& after, time::period period){ raii::static_string pstr = period.get(); return _get_post(subreddit, "controversial"_ss, raii::string("limit=1&t=" + pstr + "&after=" + after)); } post bot::_get_post(const raii::string_base& subreddit, const raii::string_base& category, const raii::string_base& extra){ raii::string rep; static constexpr char url_base[] = "https://oauth.reddit.com/r/"; raii::string url; if(extra) url = (url_base + subreddit) + "/" + category + "?" + extra; else url = (url_base + subreddit) + "/" + category; raii::curl_llist header(_create_auth_header(m_access_token)); m_curl.reset(); _setup_subreddit_get_curl(header, url, rep); m_curl.perform(); return post(rep); } size_t bot::_get_response_curl_callback(char* ptr, size_t size, size_t nmemb, void* userdata){ raii::rjp_string* reply = reinterpret_cast(userdata); (*reply) += ptr; return size*nmemb; } raii::curl_llist bot::_create_auth_header(const raii::string_base& access_token){ return raii::curl_llist(raii::string("Authorization: bearer " + access_token)); } void bot::_setup_subreddit_get_curl(const raii::curl_llist& header, const raii::string_base& url, const raii::string_base& reply){ m_curl.seturl(url); m_curl.setopt(CURLOPT_BUFFERSIZE, 102400L); m_curl.setopt(CURLOPT_NOPROGRESS, 1L); m_curl.setopt(CURLOPT_MAXREDIRS, 50L); m_curl.setopt(CURLOPT_FOLLOWLOCATION, 1L); m_curl.forcessl(CURL_SSLVERSION_TLSv1_2); m_curl.setopt(CURLOPT_TCP_KEEPALIVE, 1L); m_curl.setheader(header); m_curl.setuseragent(m_useragent); m_curl.setopt(CURLOPT_WRITEFUNCTION, _get_response_curl_callback); m_curl.setopt(CURLOPT_WRITEDATA, &reply); m_curl.setopt(CURLOPT_FAILONERROR, 1L); } size_t bot::_post_reply_curl_callback(char* ptr, size_t size, size_t nmemb, void* userdata){ raii::string* data = reinterpret_cast(userdata); (*data) += ptr; return size*nmemb; } //Create reddit login data raii::string bot::_create_request_post_data(const raii::string_base& account_name, const raii::string_base& account_pass){ return raii::string("grant_type=password&username=" + account_name + "&password=" + account_pass); } //Setup login data for reddit bot raii::string bot::_create_request_userpwd(const raii::string_base& bot_name, const raii::string_base& bot_pass){ return raii::string(bot_name + ":" + bot_pass); } void bot::_setup_token_request_curl(const raii::string_base& userpwd, const raii::string_base& postdata, void* result){ static constexpr char reddit_token_address[] = "https://www.reddit.com/api/v1/access_token"; m_curl.setopt(CURLOPT_BUFFERSIZE, 102400L); m_curl.seturl(reddit_token_address); m_curl.setopt(CURLOPT_NOPROGRESS, 1L); m_curl.setuserpwd(userpwd); m_curl.setpostdata(postdata); m_curl.setuseragent(m_useragent); m_curl.setopt(CURLOPT_MAXREDIRS, 50L); m_curl.setopt(CURLOPT_FOLLOWLOCATION, 1L); m_curl.forcessl(CURL_SSLVERSION_TLSv1_2); m_curl.setopt(CURLOPT_CUSTOMREQUEST, "POST"); m_curl.setopt(CURLOPT_TCP_KEEPALIVE, 1L); m_curl.setopt(CURLOPT_WRITEFUNCTION, _post_reply_curl_callback); m_curl.setopt(CURLOPT_WRITEDATA, result); m_curl.setopt(CURLOPT_FAILONERROR, 1L); } raii::string bot::_request_access_token(const auth_data& auth){ CURLcode result; //URL encode the POST data raii::curl_string acc_name = m_curl.encode(auth.acc_name, auth.acc_name.length()); raii::curl_string acc_pass = m_curl.encode(auth.acc_pass, auth.acc_pass.length()); //unify the post data, clean up remnants raii::string postdata = _create_request_post_data(acc_name, acc_pass); acc_name.reset(); acc_pass.reset(); //Unify the username/password raii::string userpwd = _create_request_userpwd(auth.bot_name, auth.bot_pass); //Load curl with data then run POST operation raii::string reply; _setup_token_request_curl(userpwd, postdata, &reply); result = m_curl.perform(); if(result != CURLE_OK) return {}; return reply; } raii::rjp_string bot::_acquire_access_token(const auth_data& a){ raii::string reply = _request_access_token(a); if(!reply) return raii::rjp_string{}; raii::rjp_ptr root(rjp_parse(reply)); if(!root) return raii::rjp_string{}; RJP_search_res token = rjp_search_member(root.get(), "access_token", 0); return raii::rjp_string{token.value}; } }