From 0fb6e32fcba06a6a5a760530cf9dd98e6d1e2cbd Mon Sep 17 00:00:00 2001 From: Rexy712 Date: Fri, 8 Mar 2019 15:14:51 -0800 Subject: [PATCH] update ffmpeg calls to work with 4.0, but mess up the output framerate --- README.md | 8 ++ TODO | 3 +- include/reddit.hpp | 8 ++ src/raii/string_base.cpp | 13 +- src/raii/video_man.cpp | 9 +- src/reddit.cpp | 276 +++++++++++++++++++++------------------ src/test.cpp | 48 +++++-- 7 files changed, 220 insertions(+), 145 deletions(-) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..ccfa810 --- /dev/null +++ b/README.md @@ -0,0 +1,8 @@ +# README + +## Dependencies + +FreeImagePlus >= 3.18.0 for image upload support (earlier versions have a buffer overflow issue) +libav/ffmpeg >= 3.4.5 for video/audio upload support (not tested below 3.4.5) +libcurl for network communication + diff --git a/TODO b/TODO index 6c41a3b..be9e3c5 100644 --- a/TODO +++ b/TODO @@ -1,9 +1,10 @@ use libmagic to determine file types? add av_freep cleanup to raii interface for libav* (probably replacing unique_ptr in that case) raii-ify the muxing function +fix muxing fps/bitrate issue scale video thumbnails to same size as image thumbnails (500 in either dimension, same aspect ratio) crosspost title thread pool for workers. rehurn std::promise? sync with matrix. add events to a queue that consumer can wait on or nonblocking check don't create thumbnail if image is already sufficiently small - +youtube video download diff --git a/include/reddit.hpp b/include/reddit.hpp index 3d4a863..f104743 100644 --- a/include/reddit.hpp +++ b/include/reddit.hpp @@ -62,6 +62,11 @@ namespace reddit{ class post { + private: + enum post_flags{ + POST_FLAGS_NONE = 0, + POST_FLAGS_CROSSPOSTED = 1 + }; private: raii::string m_post; raii::rjp_string m_media_url; @@ -72,6 +77,7 @@ namespace reddit{ raii::rjp_string m_name; raii::rjp_string m_post_url; post_type m_type = post_type::unrecognized; + int m_flags = POST_FLAGS_NONE; public: post(void) = default; @@ -95,9 +101,11 @@ namespace reddit{ const raii::rjp_string& post_hint(void)const; const raii::rjp_string& title(void)const; const raii::rjp_string& name(void)const; + bool is_crosspost(void)const; post_type type(void)const; private: void _parse_post(void); + static post_type _handle_reddit_hosted_video(RJP_value* data, raii::rjp_string& media_url, raii::string& audio_url); }; class bot diff --git a/src/raii/string_base.cpp b/src/raii/string_base.cpp index eb82d1b..cac98ee 100644 --- a/src/raii/string_base.cpp +++ b/src/raii/string_base.cpp @@ -22,7 +22,6 @@ #include //exchange, swap #include //memcpy #include //strcpy, strlen -#include //bad_alloc namespace raii{ string_base::string_base(size_t len): @@ -35,8 +34,10 @@ namespace raii{ }else{ _free(m_data); m_data = _copy(c, len+1); - if(!m_data) - throw std::bad_alloc{}; + if(!m_data){ + m_length = 0; + return *this; + } } m_length = len; return *this; @@ -47,8 +48,10 @@ namespace raii{ }else{ _free(m_data); m_data = _copy(s.m_data, s.m_length+1); - if(!m_data) - throw std::bad_alloc{}; + if(!m_data){ + m_length = 0; + return *this; + } } m_length = s.m_length; return *this; diff --git a/src/raii/video_man.cpp b/src/raii/video_man.cpp index 3ee4cac..c2865a2 100644 --- a/src/raii/video_man.cpp +++ b/src/raii/video_man.cpp @@ -21,10 +21,17 @@ #include //exchange +//av_register_all is required before ffmpeg 4.0 and is deprecated after that +#if LIBAVFORMAT_VERSION_MAJOR >= 58 && LIBAVFORMAT_VERSION_MINOR >= 7 +# define REGISTER_LIBAV() +#else +# define REGISTER_LIBAV() av_register_all() +#endif + namespace raii{ video_man::video_man(const raii::string_base& filename){ - av_register_all(); + REGISTER_LIBAV(); _init(filename); } video_man::video_man(video_man&& v): diff --git a/src/reddit.cpp b/src/reddit.cpp index 542f8a9..62137ff 100644 --- a/src/reddit.cpp +++ b/src/reddit.cpp @@ -149,134 +149,6 @@ namespace reddit{ { _parse_post(); } - void post::_parse_post(void){ - raii::rjp_ptr root(rjp_parse(m_post)); - if(!root) - return; - - static const char* search_items[] = {"url", "author", "post_hint", "title", "id"}; - static constexpr size_t num_searches = sizeof(search_items)/sizeof(search_items[0]); - RJP_search_res results[num_searches]; - RJP_search_res data = rjp_search_member(root.get(), "data", 0); - if(!data.value) return; - data = rjp_search_member(data.value, "children", 0); - if(!data.value) return; - data.value = rjp_get_element(data.value); - if(!data.value) return; - RJP_search_res kind = rjp_search_member(data.value, "kind", 0); - if(!kind.value) return; - data = rjp_search_member(data.value, "data", 0); - if(!data.value) return; - - RJP_search_res crosspost = rjp_search_member(data.value, "crosspost_parent_list", 0); - if(crosspost.value){ - crosspost.value = rjp_get_element(crosspost.value); - if(crosspost.value) - data = crosspost; - } - - rjp_search_members(data.value, num_searches, search_items, results, 0); - m_media_url = results[0].value; - m_author = results[1].value; - m_post_hint = results[2].value; - m_title = results[3].value; - m_name = raii::rjp_string(kind.value) + "_" + rjp_value_string(results[4].value); - m_post_url = "https://redd.it/" + raii::rjp_string(results[4].value); - - if(m_post_hint){ - //handle simple image - if(!strcmp(m_post_hint, "image")){ - m_type = post_type::image; - } - //handle link - else if(!strcmp(m_post_hint, "link")){ - m_type = post_type::link; - - //imgur support - if(is_imgur_link(m_media_url)){ - if(is_gifv(m_media_url)){ - if(raii::rjp_string tmp = preview_search(data.value)){ - m_media_url = std::move(tmp); - m_type = post_type::video; - } - }else{ - m_media_url += ".jpg"; //imgur is dumb - m_type = post_type::image; - } - }else if(is_gfycat_link(m_media_url)){ - if(raii::rjp_string tmp = find_video_url(data.value)){ - m_media_url = std::move(tmp); - m_type = post_type::video; - } - } - } - //handle hosted video - else if(!strcmp(m_post_hint, "hosted:video")){ - RJP_search_res media = rjp_search_member(data.value, "media", 0); - RJP_search_res gif = rjp_search_member(media.value, "reddit_video", 0); - if(gif.value) - gif = rjp_search_member(media.value, "is_gif", 0); - if(gif.value && rjp_value_boolean(gif.value)){ - m_type = post_type::image; - }else{ - raii::rjp_string res = media_search(media.value); - if(!res){ - res = preview_search(data.value); - if(!res){ - m_type = post_type::link; - return; - } - } - m_type = post_type::video; - m_media_url = std::move(res); - } - - //reddit hosts audio and video separately. Meaning I have to find a way to manually recombine them - static constexpr char url_base[] = "https://v.redd.it/"; - static constexpr size_t url_base_len = sizeof(url_base)-1; - char* end = strstr(m_media_url.get()+url_base_len, "/"); - if(!end) - end = m_media_url.get()+m_media_url.length(); - size_t len = end - m_media_url.get(); - m_hosted_video_audio = raii::string(len + 6); - memcpy(m_hosted_video_audio.get(), m_media_url.get(), len); - memcpy(m_hosted_video_audio.get()+len, "/audio", 6); - m_hosted_video_audio[len+6] = 0; - } - else if(!strcmp(m_post_hint, "rich:video")){ - RJP_search_res media = rjp_search_member(data.value, "media", 0); - raii::rjp_string res = media_search(media.value); - if(res){ - m_type = post_type::video; - m_media_url = std::move(res); - return; - } - res = preview_search(data.value); - if(res){ - m_type = post_type::video; - m_media_url = std::move(res); - return; - } - m_type = post_type::link; - } - //assume text post for other - else{ - m_type = post_type::text; - } - }else if(is_direct_imgur_link(m_media_url)){ - m_type = post_type::image; - return; - }else if(check_reddit_media_domain(data.value)){ - RJP_value* media = rjp_search_member(data.value, "media", 0).value; - if(media && (rjp_value_type(media) != json_null)) - m_type = post_type::video; - else - m_type = post_type::image; - }else{ - m_media_url.reset(); - m_type = post_type::text; - } - } post& post::operator=(const raii::string_base& p){ post tmp(p); if(!tmp) @@ -314,9 +186,157 @@ namespace reddit{ const raii::rjp_string& post::name(void)const{ return m_name; } + bool post::is_crosspost(void)const{ + return (m_flags & POST_FLAGS_CROSSPOSTED); + } post_type post::type(void)const{ return m_type; } + void post::_parse_post(void){ + raii::rjp_ptr root(rjp_parse(m_post)); + if(!root) + return; + + static const char* search_items[] = {"url", "author", "post_hint", "title", "id", "crosspost_parent_list"}; + static constexpr size_t num_searches = sizeof(search_items)/sizeof(search_items[0]); + RJP_search_res results[num_searches]; + RJP_search_res data = rjp_search_member(root.get(), "data", 0); + if(!data.value) return; + data = rjp_search_member(data.value, "children", 0); + if(!data.value) return; + data.value = rjp_get_element(data.value); + if(!data.value) return; + RJP_search_res kind = rjp_search_member(data.value, "kind", 0); + if(!kind.value) return; + data = rjp_search_member(data.value, "data", 0); + if(!data.value) return; + + RJP_search_res& crosspost = results[5]; + + rjp_search_members(data.value, num_searches, search_items, results, 0); + + //reddit will *sometimes* make the url field point to the crosspost parent's comments page. + //so we just always assume that the true link is in the crosspost parent + if(crosspost.value){ + m_flags |= POST_FLAGS_CROSSPOSTED; + crosspost.value = rjp_get_element(crosspost.value); + crosspost = rjp_search_member(crosspost.value, "url", 0); + if(crosspost.value) + m_media_url = crosspost.value; + }else{ + m_media_url = results[0].value; + } + m_author = results[1].value; + m_post_hint = results[2].value; + m_title = results[3].value; + m_name = raii::rjp_string(kind.value) + "_" + rjp_value_string(results[4].value); + m_post_url = "https://redd.it/" + raii::rjp_string(results[4].value); + + if(m_post_hint){ + //handle simple image + if(!strcmp(m_post_hint, "image")){ + m_type = post_type::image; + } + //handle link + else if(!strcmp(m_post_hint, "link")){ + m_type = post_type::link; + + //imgur support + if(is_imgur_link(m_media_url)){ + if(is_gifv(m_media_url)){ //gifv is a video + if(raii::rjp_string tmp = preview_search(data.value)){ + m_media_url = std::move(tmp); + m_type = post_type::video; + } + }else{ + //imgur links don't lead to the image source. adding .jpg to the link leads to the source + //except when the link is to an album or to a gifv + m_media_url += ".jpg"; //imgur is dumb + m_type = post_type::image; + } + //gfycat support + }else if(is_gfycat_link(m_media_url)){ + if(raii::rjp_string tmp = find_video_url(data.value)){ + m_media_url = std::move(tmp); + m_type = post_type::video; + } + } + } + //handle hosted video + else if(!strcmp(m_post_hint, "hosted:video")){ + m_type = _handle_reddit_hosted_video(data.value, m_media_url, m_hosted_video_audio); + } + else if(!strcmp(m_post_hint, "rich:video")){ + RJP_search_res media = rjp_search_member(data.value, "media", 0); + raii::rjp_string res = media_search(media.value); + if(res){ + m_type = post_type::video; + m_media_url = std::move(res); + return; + } + res = preview_search(data.value); + if(res){ + m_type = post_type::video; + m_media_url = std::move(res); + return; + } + m_type = post_type::link; + } + else{ + //assume text post for other + m_type = post_type::text; + } + }else if(is_direct_imgur_link(m_media_url)){ + m_type = post_type::image; + return; + }else if(check_reddit_media_domain(data.value)){ + m_type = _handle_reddit_hosted_video(data.value, m_media_url, m_hosted_video_audio); + /*RJP_value* media = rjp_search_member(data.value, "media", 0).value; + if(media && (rjp_value_type(media) != json_null)) + m_type = post_type::video; + else + m_type = post_type::image; + //*/ + }else{ + m_media_url.reset(); + m_type = post_type::text; + } + } + post_type post::_handle_reddit_hosted_video(RJP_value* data, raii::rjp_string& media_url, raii::string& audio_url){ + + RJP_search_res media = rjp_search_member(data, "media", 0); + RJP_search_res gif = rjp_search_member(media.value, "reddit_video", 0); + + //treat gif as image even though reddit thinks they're videos + if(gif.value) + gif = rjp_search_member(media.value, "is_gif", 0); + if(gif.value && rjp_value_boolean(gif.value)){ + return post_type::image; + } + raii::rjp_string res = media_search(media.value); + if(!res){ + res = preview_search(data); + if(!res){ + return post_type::link; + } + } + media_url = std::move(res); + + //reddit hosts audio and video separately. Meaning I have to find a way to manually recombine them. + //this sets up a link to the audio source of the video. the video might not actually have audio. when downloading + //from the audio link, always make sure to check for 404 errors. + static constexpr char url_base[] = "https://v.redd.it/"; + static constexpr size_t url_base_len = sizeof(url_base)-1; + char* end = strstr(media_url.get()+url_base_len, "/"); + if(!end) + end = media_url.get()+media_url.length(); + size_t len = end - media_url.get(); + audio_url = raii::string(len + 6); + memcpy(audio_url.get(), media_url.get(), len); + memcpy(audio_url.get()+len, "/audio", 6); + audio_url[len+6] = 0; + return post_type::video; + } diff --git a/src/test.cpp b/src/test.cpp index 5d72686..5268e9d 100644 --- a/src/test.cpp +++ b/src/test.cpp @@ -45,11 +45,19 @@ extern "C"{ # include //av_image_alloc } + +//av_register_all is required before ffmpeg 4.0 and is deprecated after that +#if LIBAVFORMAT_VERSION_MAJOR >= 58 && LIBAVFORMAT_VERSION_MINOR >= 7 +# define REGISTER_LIBAV() +#else +# define REGISTER_LIBAV() av_register_all() +#endif + //a lot copied from a github repo, but with all deprecation warnings fixed. //no idea how the one guy managed to figure out all this with the minimal and conflicting documentation for ffmpeg and libav +//with the new code that fixes invalid input pts/dts, now the output framerate/bitrate is off bool mux_audio_video(const raii::string_base& audio_file, const raii::string_base& video_file, const raii::string_base& output_file){ - av_register_all(); - av_log_set_level(AV_LOG_FATAL); + REGISTER_LIBAV(); AVOutputFormat* out_format = NULL; AVFormatContext* audio_context = NULL, *video_context = NULL, *output_context = NULL; @@ -91,7 +99,7 @@ bool mux_audio_video(const raii::string_base& audio_file, const raii::string_bas AVStream* out_stream = avformat_new_stream(output_context, codec); audio_index_out = out_stream->index; if(output_context->oformat->flags & AVFMT_GLOBALHEADER){ - tmp->flags |= CODEC_FLAG_GLOBAL_HEADER; + tmp->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; } tmp->codec_tag = 0; avcodec_parameters_from_context(out_stream->codecpar, tmp); @@ -111,7 +119,7 @@ bool mux_audio_video(const raii::string_base& audio_file, const raii::string_bas AVStream* out_stream = avformat_new_stream(output_context, codec); video_index_out = out_stream->index; if(output_context->oformat->flags & AVFMT_GLOBALHEADER){ - tmp->flags |= CODEC_FLAG_GLOBAL_HEADER; + tmp->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; } tmp->codec_tag = 0; avcodec_parameters_from_context(out_stream->codecpar, tmp); @@ -141,9 +149,14 @@ bool mux_audio_video(const raii::string_base& audio_file, const raii::string_bas } int64_t video_pts = 0, audio_pts = 0; - AVPacket packet; + int64_t last_video_dts = 0, last_audio_dts = 0; while(true){ + AVPacket packet; + av_init_packet(&packet); + packet.data = NULL; + packet.size = 0; + int64_t* last_dts; AVFormatContext* in_context; int stream_index = 0; AVStream* in_stream, *out_stream; @@ -152,6 +165,7 @@ bool mux_audio_video(const raii::string_base& audio_file, const raii::string_bas audio_pts, audio_context->streams[audio_index_in]->time_base) <= 0) { //video + last_dts = &last_video_dts; in_context = video_context; stream_index = video_index_out; @@ -168,6 +182,7 @@ bool mux_audio_video(const raii::string_base& audio_file, const raii::string_bas } }else{ //audio + last_dts = &last_audio_dts; in_context = audio_context; stream_index = audio_index_out; @@ -186,9 +201,20 @@ bool mux_audio_video(const raii::string_base& audio_file, const raii::string_bas in_stream = in_context->streams[packet.stream_index]; out_stream = output_context->streams[stream_index]; - packet.pts = av_rescale_q_rnd(packet.pts, in_stream->time_base, out_stream->time_base, (AVRounding)(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX)); - packet.dts = av_rescale_q_rnd(packet.dts, in_stream->time_base, out_stream->time_base, (AVRounding)(AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX)); - packet.duration = av_rescale_q(packet.duration, in_stream->time_base, out_stream->time_base); + av_packet_rescale_ts(&packet, in_stream->time_base, out_stream->time_base); + + if(packet.dts < (*last_dts + !(output_context->oformat->flags & AVFMT_TS_NONSTRICT)) && packet.dts != AV_NOPTS_VALUE && (*last_dts) != AV_NOPTS_VALUE){ + int64_t next_dts = (*last_dts)+1; + if(packet.pts >= packet.dts && packet.pts != AV_NOPTS_VALUE){ + packet.pts = FFMAX(packet.pts, next_dts); + } + if(packet.pts == AV_NOPTS_VALUE){ + packet.pts = next_dts; + } + packet.dts = next_dts; + } + (*last_dts) = packet.dts; + packet.pos = -1; packet.stream_index = stream_index; @@ -296,11 +322,13 @@ int main(){ DEBUG_PRINT("matrix bot initialized\n"); reddit::post reply; + reply = mybot.get_top_post("WTF"_ss, reply.name(), reddit::time::hour); + reply = mybot.get_top_post("WTF"_ss, reply.name(), reddit::time::hour); { int retries = 5; do{ - reply = mybot.get_top_post("ProgrammerHumor"_ss, reply.name(), reddit::time::hour); + reply = mybot.get_top_post("WTF"_ss, reply.name(), reddit::time::hour); if(reply.type() != reddit::post_type::text && reply.type() != reddit::post_type::link) break; --retries; @@ -348,8 +376,8 @@ int main(){ }else{ DEBUG_PRINT("Remuxing audio and video\n"); mux_audio_video("audio"_ss, "video"_ss, "testout"_ss); - remove("video"); remove("audio"); + remove("video"); } }else{ file_output_curl(curl, target, reply.mediaurl());