From ae07b06c4b088fb275872cd3d6854a30842a4123 Mon Sep 17 00:00:00 2001 From: winlin Date: Sun, 16 Apr 2023 20:43:34 +0800 Subject: [PATCH 01/60] WHIP: Add WebRTC WHIP muxer. --- .gitignore | 1 + libavformat/Makefile | 1 + libavformat/allformats.c | 1 + libavformat/rtcenc.c | 84 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 87 insertions(+) create mode 100644 libavformat/rtcenc.c diff --git a/.gitignore b/.gitignore index e810d11107f26..08cc4bc5e19b4 100644 --- a/.gitignore +++ b/.gitignore @@ -41,3 +41,4 @@ /src /mapfile /tools/python/__pycache__/ +.idea diff --git a/libavformat/Makefile b/libavformat/Makefile index 329055ccfd9ba..49f32bc47aae1 100644 --- a/libavformat/Makefile +++ b/libavformat/Makefile @@ -499,6 +499,7 @@ OBJS-$(CONFIG_RSD_DEMUXER) += rsd.o OBJS-$(CONFIG_RPL_DEMUXER) += rpl.o OBJS-$(CONFIG_RSO_DEMUXER) += rsodec.o rso.o pcm.o OBJS-$(CONFIG_RSO_MUXER) += rsoenc.o rso.o rawenc.o +OBJS-$(CONFIG_RTC_MUXER) += rtcenc.o avc.o OBJS-$(CONFIG_RTP_MPEGTS_MUXER) += rtpenc_mpegts.o OBJS-$(CONFIG_RTP_MUXER) += rtp.o \ rtpenc_aac.o \ diff --git a/libavformat/allformats.c b/libavformat/allformats.c index d4b505a5a3253..ddaa4da0bfd63 100644 --- a/libavformat/allformats.c +++ b/libavformat/allformats.c @@ -398,6 +398,7 @@ extern const AVInputFormat ff_rpl_demuxer; extern const AVInputFormat ff_rsd_demuxer; extern const AVInputFormat ff_rso_demuxer; extern const FFOutputFormat ff_rso_muxer; +extern const FFOutputFormat ff_rtc_muxer; extern const AVInputFormat ff_rtp_demuxer; extern const FFOutputFormat ff_rtp_muxer; extern const FFOutputFormat ff_rtp_mpegts_muxer; diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c new file mode 100644 index 0000000000000..5e8374032dbd9 --- /dev/null +++ b/libavformat/rtcenc.c @@ -0,0 +1,84 @@ +/* + * FLV muxer + * Copyright (c) 2003 The FFmpeg Project + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/dict.h" +#include "libavutil/avassert.h" +#include "libavutil/mathematics.h" +#include "libavcodec/codec_desc.h" +#include "libavcodec/mpeg4audio.h" +#include "avformat.h" +#include "internal.h" +#include "mux.h" +#include "libavutil/opt.h" + +typedef struct RTCContext { + AVClass *av_class; +} RTCContext; + +static int rtc_init(struct AVFormatContext *s) +{ + return 0; +} + +static int rtc_write_header(struct AVFormatContext *s) +{ + return 0; +} + +static int rtc_write_packet(struct AVFormatContext *s, AVPacket *pkt) +{ + return 0; +} + +static int rtc_write_trailer(struct AVFormatContext *s) +{ + return 0; +} + +static void rtc_deinit(struct AVFormatContext *s) +{ +} + +static const AVOption options[] = { + { NULL }, +}; + +static const AVClass rtc_muxer_class = { + .class_name = "RTC WHIP muxer", + .item_name = av_default_item_name, + .option = NULL, + .version = LIBAVUTIL_VERSION_INT, +}; + +const FFOutputFormat ff_rtc_muxer = { + .p.name = "rtc", + .p.long_name = NULL_IF_CONFIG_SMALL("WebRTC WHIP muxer"), + .p.audio_codec = AV_CODEC_ID_OPUS, + .p.video_codec = AV_CODEC_ID_H264, + .p.flags = AVFMT_NOFILE, + .p.priv_class = &rtc_muxer_class, + .priv_data_size = sizeof(RTCContext), + .init = rtc_init, + .write_header = rtc_write_header, + .write_packet = rtc_write_packet, + .write_trailer = rtc_write_trailer, + .deinit = rtc_deinit, +}; From f090d6438fbc62fca5a37146308322681ece59c1 Mon Sep 17 00:00:00 2001 From: winlin Date: Sun, 16 Apr 2023 21:55:06 +0800 Subject: [PATCH 02/60] WHIP: Only support h264+opus codec. 1. Input stream codec should be h264 or opus. 2. For video codec profile, should be h264 baseline or constrained baseline. 3. For audio, should be 48000HZ and stereo. 4. Only support one video and audio stream. --- libavformat/rtcenc.c | 89 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 82 insertions(+), 7 deletions(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index 5e8374032dbd9..3feb0fc3594b5 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -1,6 +1,6 @@ /* - * FLV muxer - * Copyright (c) 2003 The FFmpeg Project + * WebRTC muxer + * Copyright (c) 2023 The FFmpeg Project * * This file is part of FFmpeg. * @@ -28,32 +28,107 @@ #include "internal.h" #include "mux.h" #include "libavutil/opt.h" +#include "libavcodec/avcodec.h" typedef struct RTCContext { AVClass *av_class; + + /* Input audio and video codec parameters */ + AVCodecParameters *audio_par; + AVCodecParameters *video_par; } RTCContext; -static int rtc_init(struct AVFormatContext *s) +/** + * Only support video(h264) and audio(opus) for now. Note that only baseline + * and constrained baseline of h264 are supported. + */ +static int check_codec(AVFormatContext *s) +{ + int i; + RTCContext *rtc = s->priv_data; + + for (i = 0; i < s->nb_streams; i++) { + AVCodecParameters *par = s->streams[i]->codecpar; + const AVCodecDescriptor *desc = avcodec_descriptor_get(par->codec_id); + switch (par->codec_type) { + case AVMEDIA_TYPE_VIDEO: + if (rtc->video_par) { + av_log(s, AV_LOG_ERROR, "Only one video stream is supported by RTC\n"); + return AVERROR(EINVAL); + } + rtc->video_par = par; + + if (par->codec_id != AV_CODEC_ID_H264) { + av_log(s, AV_LOG_ERROR, "Unsupported video codec %s by RTC, choose h264\n", + desc ? desc->name : "unknown"); + return AVERROR(EINVAL); + } + if ((par->profile & ~FF_PROFILE_H264_CONSTRAINED) != FF_PROFILE_H264_BASELINE) { + av_log(s, AV_LOG_ERROR, "Profile %d of stream %d is not baseline, currently unsupported by RTC\n", + par->profile, i); + return AVERROR(EINVAL); + } + break; + case AVMEDIA_TYPE_AUDIO: + if (rtc->audio_par) { + av_log(s, AV_LOG_ERROR, "Only one audio stream is supported by RTC\n"); + return AVERROR(EINVAL); + } + rtc->audio_par = par; + + if (par->codec_id != AV_CODEC_ID_OPUS) { + av_log(s, AV_LOG_ERROR, "Unsupported audio codec %s by RTC, choose opus\n", + desc ? desc->name : "unknown"); + return AVERROR(EINVAL); + } + + if (par->ch_layout.nb_channels != 2) { + av_log(s, AV_LOG_ERROR, "Unsupported audio channels %d by RTC, choose stereo\n", + par->ch_layout.nb_channels); + return AVERROR(EINVAL); + } + + if (par->sample_rate != 48000) { + av_log(s, AV_LOG_ERROR, "Unsupported audio sample rate %d by RTC, choose 48000\n", par->sample_rate); + return AVERROR(EINVAL); + } + break; + default: + av_log(s, AV_LOG_ERROR, "Codec type '%s' for stream %d is not supported by RTC\n", + av_get_media_type_string(par->codec_type), i); + return AVERROR(EINVAL); + } + } + + return 0; +} + +static int rtc_init(AVFormatContext *s) { + int ret; + + if ((ret = check_codec(s)) < 0) + return ret; + return 0; } -static int rtc_write_header(struct AVFormatContext *s) +static int rtc_write_header(AVFormatContext *s) { return 0; } -static int rtc_write_packet(struct AVFormatContext *s, AVPacket *pkt) +static int rtc_write_packet(AVFormatContext *s, AVPacket *pkt) { return 0; } -static int rtc_write_trailer(struct AVFormatContext *s) +static int rtc_write_trailer(AVFormatContext *s) { return 0; } -static void rtc_deinit(struct AVFormatContext *s) +static void rtc_deinit(AVFormatContext *s) { } From edaf213779aaeb741a20fc3bc8f4d499e724222f Mon Sep 17 00:00:00 2001 From: yangrtc Date: Sat, 22 Apr 2023 07:00:55 +0800 Subject: [PATCH 03/60] WHIP: Generate and exchange offer server to get answer. 1. Generate random ice ufrag, pwd and ssrc. 2. Use HTTP POST to send offer to server and read answer. 3. Logging offer and answer in verbose level. --- libavformat/rtcenc.c | 255 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 255 insertions(+) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index 3feb0fc3594b5..29662d8716254 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -29,6 +29,9 @@ #include "mux.h" #include "libavutil/opt.h" #include "libavcodec/avcodec.h" +#include "libavutil/avstring.h" +#include "url.h" +#include "libavutil/random_seed.h" typedef struct RTCContext { AVClass *av_class; @@ -36,11 +39,32 @@ typedef struct RTCContext { /* Input audio and video codec parameters */ AVCodecParameters *audio_par; AVCodecParameters *video_par; + + /* The ICE username and pwd fragment generated by the muxer. */ + char ice_ufrag_local[9]; + char ice_pwd_local[33]; + /* The SSRC of the audio and video stream, generated by the muxer. */ + uint32_t audio_ssrc; + uint32_t video_ssrc; + /* The PT(Payload Type) of stream, generated by the muxer. */ + uint8_t audio_payload_type; + uint8_t video_payload_type; + /** + * The SDP offer generated by the muxer according to the codec parameters, + * DTLS and ICE information. + * */ + char *sdp_offer; + /* The SDP answer received from the WebRTC server. */ + char *sdp_answer; + /* The HTTP URL context is the transport layer for the WHIP protocol. */ + URLContext *whip_uc; } RTCContext; /** * Only support video(h264) and audio(opus) for now. Note that only baseline * and constrained baseline of h264 are supported. + * + * @return 0 if OK, AVERROR_xxx on error */ static int check_codec(AVFormatContext *s) { @@ -103,6 +127,227 @@ static int check_codec(AVFormatContext *s) return 0; } +/** + * Generate SDP offer according to the codec parameters, DTLS and ICE information. + * The below is an example of SDP offer: + * + * v=0 + * o=FFmpeg 4489045141692799359 2 IN IP4 127.0.0.1 + * s=FFmpegPublishSession + * t=0 0 + * a=group:BUNDLE 0 1 + * a=extmap-allow-mixed + * a=msid-semantic: WMS + * + * m=audio 9 UDP/TLS/RTP/SAVPF 111 + * c=IN IP4 0.0.0.0 + * a=ice-ufrag:a174B + * a=ice-pwd:wY8rJ3gNLxL3eWZs6UPOxy + * a=fingerprint:sha-256 EE:FE:A2:E5:6A:21:78:60:71:2C:21:DC:1A:2C:98:12:0C:E8:AD:68:07:61:1B:0E:FC:46:97:1E:BC:97:4A:54 + * a=setup:actpass + * a=mid:0 + * a=sendonly + * a=msid:FFmpeg audio + * a=rtcp-mux + * a=rtpmap:111 opus/48000/2 + * a=ssrc:4267647086 cname:FFmpeg + * a=ssrc:4267647086 msid:FFmpeg audio + * + * m=video 9 UDP/TLS/RTP/SAVPF 106 + * c=IN IP4 0.0.0.0 + * a=ice-ufrag:a174B + * a=ice-pwd:wY8rJ3gNLxL3eWZs6UPOxy + * a=fingerprint:sha-256 EE:FE:A2:E5:6A:21:78:60:71:2C:21:DC:1A:2C:98:12:0C:E8:AD:68:07:61:1B:0E:FC:46:97:1E:BC:97:4A:54 + * a=setup:actpass + * a=mid:1 + * a=sendonly + * a=msid:FFmpeg video + * a=rtcp-mux + * a=rtcp-rsize + * a=rtpmap:106 H264/90000 + * a=fmtp:106 level-asymmetry-allowed=1;packetization-mode=1;profile-level-id=42e01f + * a=ssrc:107169110 cname:FFmpeg + * a=ssrc:107169110 msid:FFmpeg video + * + * Note that we don't use av_sdp_create to generate SDP offer because it doesn't + * support DTLS and ICE information. + * + * @return 0 if OK, AVERROR_xxx on error + */ +static int generate_sdp_offer(AVFormatContext *s) +{ + int profile_iop; + RTCContext *rtc = s->priv_data; + + if (rtc->sdp_offer) { + av_log(s, AV_LOG_ERROR, "SDP offer is already set\n"); + return AVERROR(EINVAL); + } + + snprintf(rtc->ice_ufrag_local, sizeof(rtc->ice_ufrag_local), "%08x", + av_get_random_seed()); + snprintf(rtc->ice_pwd_local, sizeof(rtc->ice_pwd_local), "%08x%08x%08x%08x", + av_get_random_seed(), av_get_random_seed(), av_get_random_seed(), + av_get_random_seed()); + + rtc->audio_ssrc = av_get_random_seed(); + rtc->video_ssrc = av_get_random_seed(); + + rtc->audio_payload_type = 111; + rtc->video_payload_type = 106; + + profile_iop = rtc->video_par->profile & FF_PROFILE_H264_CONSTRAINED ? 0xe0 : 0x00; + rtc->sdp_offer = av_asprintf( + "v=0\r\n" + "o=FFmpeg 4489045141692799359 2 IN IP4 127.0.0.1\r\n" + "s=FFmpegPublishSession\r\n" + "t=0 0\r\n" + "a=group:BUNDLE 0 1\r\n" + "a=extmap-allow-mixed\r\n" + "a=msid-semantic: WMS\r\n" + "" + "m=audio 9 UDP/TLS/RTP/SAVPF %u\r\n" + "c=IN IP4 0.0.0.0\r\n" + "a=ice-ufrag:%s\r\n" + "a=ice-pwd:%s\r\n" + "a=fingerprint:sha-256 EE:FE:A2:E5:6A:21:78:60:71:2C:21:DC:1A:2C:98:12:0C:E8:AD:68:07:61:1B:0E:FC:46:97:1E:BC:97:4A:54\r\n" + "a=setup:active\r\n" + "a=mid:0\r\n" + "a=sendonly\r\n" + "a=msid:FFmpeg audio\r\n" + "a=rtcp-mux\r\n" + "a=rtpmap:%u opus/%d/%d\r\n" + "a=ssrc:%u cname:FFmpeg\r\n" + "a=ssrc:%u msid:FFmpeg audio\r\n" + "" + "m=video 9 UDP/TLS/RTP/SAVPF %u\r\n" + "c=IN IP4 0.0.0.0\r\n" + "a=ice-ufrag:%s\r\n" + "a=ice-pwd:%s\r\n" + "a=fingerprint:sha-256 EE:FE:A2:E5:6A:21:78:60:71:2C:21:DC:1A:2C:98:12:0C:E8:AD:68:07:61:1B:0E:FC:46:97:1E:BC:97:4A:54\r\n" + "a=setup:active\r\n" + "a=mid:1\r\n" + "a=sendonly\r\n" + "a=msid:FFmpeg video\r\n" + "a=rtcp-mux\r\n" + "a=rtcp-rsize\r\n" + "a=rtpmap:%u H264/90000\r\n" + "a=fmtp:%u level-asymmetry-allowed=1;packetization-mode=1;profile-level-id=%02x%02x%02x\r\n" + "a=ssrc:%u cname:FFmpeg\r\n" + "a=ssrc:%u msid:FFmpeg video\r\n", + rtc->audio_payload_type, + rtc->ice_ufrag_local, + rtc->ice_pwd_local, + rtc->audio_payload_type, + rtc->audio_par->sample_rate, + rtc->audio_par->ch_layout.nb_channels, + rtc->audio_ssrc, + rtc->audio_ssrc, + rtc->video_payload_type, + rtc->ice_ufrag_local, + rtc->ice_pwd_local, + rtc->video_payload_type, + rtc->video_payload_type, + rtc->video_par->profile & (~FF_PROFILE_H264_CONSTRAINED), + profile_iop, + rtc->video_par->level, + rtc->video_ssrc, + rtc->video_ssrc + ); + av_log(s, AV_LOG_VERBOSE, "Generated offer: %s", rtc->sdp_offer); + + return 0; +} + +/** + * Exchange SDP offer with WebRTC peer to get the answer. + * The below is an example of SDP answer: + * + * v=0 + * o=SRS/6.0.42(Bee) 107408542208384 2 IN IP4 0.0.0.0 + * s=SRSPublishSession + * t=0 0 + * a=ice-lite + * a=group:BUNDLE 0 1 + * a=msid-semantic: WMS live/show + * + * m=audio 9 UDP/TLS/RTP/SAVPF 111 + * c=IN IP4 0.0.0.0 + * a=ice-ufrag:ex9061f9 + * a=ice-pwd:bi8k19m9n836187b00d1gm3946234w85 + * a=fingerprint:sha-256 68:DD:7A:95:27:BD:0A:99:F4:7A:83:21:2F:50:15:2A:1D:1F:8A:D8:96:24:42:2D:A1:83:99:BF:F1:E2:11:A2 + * a=setup:passive + * a=mid:0 + * a=recvonly + * a=rtcp-mux + * a=rtcp-rsize + * a=rtpmap:111 opus/48000/2 + * a=candidate:0 1 udp 2130706431 172.20.10.7 8000 typ host generation 0 + * + * m=video 9 UDP/TLS/RTP/SAVPF 106 + * c=IN IP4 0.0.0.0 + * a=ice-ufrag:ex9061f9 + * a=ice-pwd:bi8k19m9n836187b00d1gm3946234w85 + * a=fingerprint:sha-256 68:DD:7A:95:27:BD:0A:99:F4:7A:83:21:2F:50:15:2A:1D:1F:8A:D8:96:24:42:2D:A1:83:99:BF:F1:E2:11:A2 + * a=setup:passive + * a=mid:1 + * a=recvonly + * a=rtcp-mux + * a=rtcp-rsize + * a=rtpmap:106 H264/90000 + * a=fmtp:106 level-asymmetry-allowed=1;packetization-mode=1;profile-level-id=42e01e + * a=candidate:0 1 udp 2130706431 172.20.10.7 8000 typ host generation 0 + * + * @return 0 if OK, AVERROR_xxx on error + */ +static int exchange_sdp(AVFormatContext *s) +{ + int ret; + char headers[MAX_URL_SIZE], buf[MAX_URL_SIZE]; + char *p; + RTCContext *rtc = s->priv_data; + + ret = ffurl_alloc(&rtc->whip_uc, s->url, AVIO_FLAG_READ_WRITE, &s->interrupt_callback); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to alloc HTTP context: %s", s->url); + return ret; + } + + snprintf(headers, sizeof(headers), + "Cache-Control: no-cache\r\n" + "Content-Type: application/sdp\r\n"); + av_opt_set(rtc->whip_uc->priv_data, "headers", headers, 0); + av_opt_set(rtc->whip_uc->priv_data, "chunked_post", "0", 0); + av_opt_set_bin(rtc->whip_uc->priv_data, "post_data", rtc->sdp_offer, (int)strlen(rtc->sdp_offer), 0); + + ret = ffurl_connect(rtc->whip_uc, NULL); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to request url=%s, offer: %s", s->url, rtc->sdp_offer); + return ret; + } + + for (;;) { + ret = ffurl_read(rtc->whip_uc, buf, sizeof(buf)); + if (ret == AVERROR_EOF) { + /* Reset the error because we read all response as answer util EOF. */ + ret = 0; + break; + } + if (ret <= 0) { + av_log(s, AV_LOG_ERROR, "Failed to read response from url=%s, offer is %s, answer is %s", + s->url, rtc->sdp_offer, rtc->sdp_answer); + return ret; + } + + p = rtc->sdp_answer; + rtc->sdp_answer = av_asprintf("%s%.*s", p ? p : "", ret, buf); + av_free(p); + } + av_log(s, AV_LOG_VERBOSE, "Got answer: %s", rtc->sdp_answer); + + return ret; +} + static int rtc_init(AVFormatContext *s) { int ret; @@ -110,6 +355,12 @@ static int rtc_init(AVFormatContext *s) if ((ret = check_codec(s)) < 0) return ret; + if ((ret = generate_sdp_offer(s)) < 0) + return ret; + + if ((ret = exchange_sdp(s)) < 0) + return ret; + return 0; } @@ -130,6 +381,10 @@ static int rtc_write_trailer(AVFormatContext *s) static void rtc_deinit(AVFormatContext *s) { + RTCContext *rtc = s->priv_data; + av_freep(&rtc->sdp_offer); + av_freep(&rtc->sdp_answer); + ffurl_closep(&rtc->whip_uc); } static const AVOption options[] = { From 83202f7305ad2fa14b4a8cc79d47003d8b4d424a Mon Sep 17 00:00:00 2001 From: winlin Date: Sat, 22 Apr 2023 09:07:36 +0800 Subject: [PATCH 04/60] WHIP: Support parse ice from answer. 1. Parse ice username and pwd. 2. Parse ice candidate, the priority and host. 3. Only support udp. --- libavformat/rtcenc.c | 82 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index 29662d8716254..3abc85f52a246 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -54,8 +54,18 @@ typedef struct RTCContext { * DTLS and ICE information. * */ char *sdp_offer; + + /* The ICE username and pwd from remote server. */ + char *ice_ufrag_remote; + char *ice_pwd_remote; + /* The ICE candidate protocol, priority, host and port. */ + char *ice_protocol; + int ice_priority; + char *ice_host; + int ice_port; /* The SDP answer received from the WebRTC server. */ char *sdp_answer; + /* The HTTP URL context is the transport layer for the WHIP protocol. */ URLContext *whip_uc; } RTCContext; @@ -348,6 +358,71 @@ static int exchange_sdp(AVFormatContext *s) return ret; } +/** + * Parse the ice ufrag, pwd and candidates from the answer. + * + * @return 0 if OK, AVERROR_xxx on error + */ +static int parse_answer(AVFormatContext *s) +{ + int ret = 0; + AVIOContext *pb; + char line[MAX_URL_SIZE]; + const char *ptr; + int i; + RTCContext *rtc = s->priv_data; + + pb = avio_alloc_context( + (unsigned char *)rtc->sdp_answer, (int)strlen(rtc->sdp_answer), + AVIO_FLAG_READ, NULL, NULL, NULL, NULL); + if (!pb) { + av_log(s, AV_LOG_ERROR, "Failed to alloc AVIOContext for answer: %s", rtc->sdp_answer); + ret = AVERROR(ENOMEM); + goto end; + } + + for (i = 0; !avio_feof(pb); i++) { + ff_get_chomp_line(pb, line, sizeof(line)); + if (av_strstart(line, "a=ice-ufrag:", &ptr)) { + av_freep(&rtc->ice_ufrag_remote); + rtc->ice_ufrag_remote = av_strdup(ptr); + } else if (av_strstart(line, "a=ice-pwd:", &ptr)) { + av_freep(&rtc->ice_pwd_remote); + rtc->ice_pwd_remote = av_strdup(ptr); + } else if (av_strstart(line, "a=candidate:", &ptr)) { + ptr = av_stristr(ptr, "udp"); + if (ptr && av_stristr(ptr, "host")) { + char protocol[17], host[129]; + int priority, port; + ret = sscanf(ptr, "%16s %d %128s %d typ host", protocol, &priority, host, &port); + if (ret != 4) { + av_log(s, AV_LOG_ERROR, "Failed %d to parse line %d %s from %s", + ret, i, line, rtc->sdp_answer); + ret = AVERROR(EINVAL); + goto end; + } + + if (av_strcasecmp(protocol, "udp")) { + av_log(s, AV_LOG_ERROR, "Protocol %s is not supported by RTC, choose udp", protocol); + ret = AVERROR(EINVAL); + goto end; + } + + av_freep(&rtc->ice_protocol); + rtc->ice_protocol = av_strdup(protocol); + av_freep(&rtc->ice_host); + rtc->ice_host = av_strdup(host); + rtc->ice_priority = priority; + rtc->ice_port = port; + } + } + } + +end: + avio_context_free(&pb); + return ret; +} + static int rtc_init(AVFormatContext *s) { int ret; @@ -361,6 +436,9 @@ static int rtc_init(AVFormatContext *s) if ((ret = exchange_sdp(s)) < 0) return ret; + if ((ret = parse_answer(s)) < 0) + return ret; + return 0; } @@ -385,6 +463,10 @@ static void rtc_deinit(AVFormatContext *s) av_freep(&rtc->sdp_offer); av_freep(&rtc->sdp_answer); ffurl_closep(&rtc->whip_uc); + av_freep(&rtc->ice_ufrag_remote); + av_freep(&rtc->ice_pwd_remote); + av_freep(&rtc->ice_protocol); + av_freep(&rtc->ice_host); } static const AVOption options[] = { From 8a63f74247dabc4c8598b0e8993ced51fe5d14b3 Mon Sep 17 00:00:00 2001 From: winlin Date: Sat, 22 Apr 2023 11:19:01 +0800 Subject: [PATCH 05/60] WHIP: Refine code. --- .gitignore | 1 + libavformat/rtcenc.c | 111 +++++++++++++++++++++++++++---------------- 2 files changed, 71 insertions(+), 41 deletions(-) diff --git a/.gitignore b/.gitignore index 08cc4bc5e19b4..1b7086289db7f 100644 --- a/.gitignore +++ b/.gitignore @@ -42,3 +42,4 @@ /mapfile /tools/python/__pycache__/ .idea +patcheck.* diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index 3abc85f52a246..ec8b6579aed47 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -33,6 +33,8 @@ #include "url.h" #include "libavutil/random_seed.h" +#define MAX_SDP_SIZE 8192 + typedef struct RTCContext { AVClass *av_class; @@ -58,16 +60,16 @@ typedef struct RTCContext { /* The ICE username and pwd from remote server. */ char *ice_ufrag_remote; char *ice_pwd_remote; - /* The ICE candidate protocol, priority, host and port. */ + /** + * The ICE candidate protocol, priority, host and port. Note that only + * support one candidate for now. We will choose the first udp candidate. + * We will support multiple candidates in the future. + */ char *ice_protocol; - int ice_priority; char *ice_host; int ice_port; /* The SDP answer received from the WebRTC server. */ char *sdp_answer; - - /* The HTTP URL context is the transport layer for the WHIP protocol. */ - URLContext *whip_uc; } RTCContext; /** @@ -186,9 +188,15 @@ static int check_codec(AVFormatContext *s) */ static int generate_sdp_offer(AVFormatContext *s) { - int profile_iop; + int ret, profile_iop; RTCContext *rtc = s->priv_data; + char *tmp = av_mallocz(MAX_SDP_SIZE); + if (!tmp) { + av_log(s, AV_LOG_ERROR, "Failed to alloc answer: %s", s->url); + return AVERROR(EINVAL); + } + if (rtc->sdp_offer) { av_log(s, AV_LOG_ERROR, "SDP offer is already set\n"); return AVERROR(EINVAL); @@ -207,7 +215,8 @@ static int generate_sdp_offer(AVFormatContext *s) rtc->video_payload_type = 106; profile_iop = rtc->video_par->profile & FF_PROFILE_H264_CONSTRAINED ? 0xe0 : 0x00; - rtc->sdp_offer = av_asprintf( + + ret = av_strlcatf(tmp, MAX_SDP_SIZE, "v=0\r\n" "o=FFmpeg 4489045141692799359 2 IN IP4 127.0.0.1\r\n" "s=FFmpegPublishSession\r\n" @@ -262,11 +271,19 @@ static int generate_sdp_offer(AVFormatContext *s) profile_iop, rtc->video_par->level, rtc->video_ssrc, - rtc->video_ssrc - ); + rtc->video_ssrc); + if (ret >= MAX_SDP_SIZE) { + av_log(s, AV_LOG_ERROR, "Offer %d exceed max %d, %s", ret, MAX_SDP_SIZE, tmp); + ret = AVERROR(EINVAL); + goto end; + } + + rtc->sdp_offer = av_strdup(tmp); av_log(s, AV_LOG_VERBOSE, "Generated offer: %s", rtc->sdp_offer); - return 0; +end: + av_free(tmp); + return ret; } /** @@ -313,31 +330,38 @@ static int generate_sdp_offer(AVFormatContext *s) static int exchange_sdp(AVFormatContext *s) { int ret; - char headers[MAX_URL_SIZE], buf[MAX_URL_SIZE]; - char *p; + char buf[MAX_URL_SIZE]; RTCContext *rtc = s->priv_data; + /* The URL context is an HTTP transport layer for the WHIP protocol. */ + URLContext *whip_uc = NULL; + + char *tmp = av_mallocz(MAX_SDP_SIZE); + if (!tmp) { + av_log(s, AV_LOG_ERROR, "Failed to alloc answer: %s", s->url); + return AVERROR(EINVAL); + } - ret = ffurl_alloc(&rtc->whip_uc, s->url, AVIO_FLAG_READ_WRITE, &s->interrupt_callback); + ret = ffurl_alloc(&whip_uc, s->url, AVIO_FLAG_READ_WRITE, &s->interrupt_callback); if (ret < 0) { av_log(s, AV_LOG_ERROR, "Failed to alloc HTTP context: %s", s->url); - return ret; + goto end; } - snprintf(headers, sizeof(headers), + snprintf(buf, sizeof(buf), "Cache-Control: no-cache\r\n" "Content-Type: application/sdp\r\n"); - av_opt_set(rtc->whip_uc->priv_data, "headers", headers, 0); - av_opt_set(rtc->whip_uc->priv_data, "chunked_post", "0", 0); - av_opt_set_bin(rtc->whip_uc->priv_data, "post_data", rtc->sdp_offer, (int)strlen(rtc->sdp_offer), 0); + av_opt_set(whip_uc->priv_data, "headers", buf, 0); + av_opt_set(whip_uc->priv_data, "chunked_post", "0", 0); + av_opt_set_bin(whip_uc->priv_data, "post_data", rtc->sdp_offer, (int)strlen(rtc->sdp_offer), 0); - ret = ffurl_connect(rtc->whip_uc, NULL); + ret = ffurl_connect(whip_uc, NULL); if (ret < 0) { av_log(s, AV_LOG_ERROR, "Failed to request url=%s, offer: %s", s->url, rtc->sdp_offer); - return ret; + goto end; } - for (;;) { - ret = ffurl_read(rtc->whip_uc, buf, sizeof(buf)); + while (1) { + ret = ffurl_read(whip_uc, buf, sizeof(buf)); if (ret == AVERROR_EOF) { /* Reset the error because we read all response as answer util EOF. */ ret = 0; @@ -346,15 +370,23 @@ static int exchange_sdp(AVFormatContext *s) if (ret <= 0) { av_log(s, AV_LOG_ERROR, "Failed to read response from url=%s, offer is %s, answer is %s", s->url, rtc->sdp_offer, rtc->sdp_answer); - return ret; + goto end; } - p = rtc->sdp_answer; - rtc->sdp_answer = av_asprintf("%s%.*s", p ? p : "", ret, buf); - av_free(p); + ret = av_strlcatf(tmp, MAX_SDP_SIZE, "%.*s", ret, buf); + if (ret >= MAX_SDP_SIZE) { + av_log(s, AV_LOG_ERROR, "Answer %d exceed max size %d, %s", ret, MAX_SDP_SIZE, tmp); + ret = AVERROR(EINVAL); + goto end; + } } + + rtc->sdp_answer = av_strdup(tmp); av_log(s, AV_LOG_VERBOSE, "Got answer: %s", rtc->sdp_answer); +end: + ffurl_closep(&whip_uc); + av_free(tmp); return ret; } @@ -372,9 +404,7 @@ static int parse_answer(AVFormatContext *s) int i; RTCContext *rtc = s->priv_data; - pb = avio_alloc_context( - (unsigned char *)rtc->sdp_answer, (int)strlen(rtc->sdp_answer), - AVIO_FLAG_READ, NULL, NULL, NULL, NULL); + pb = avio_alloc_context(rtc->sdp_answer, strlen(rtc->sdp_answer), AVIO_FLAG_READ, NULL, NULL, NULL, NULL); if (!pb) { av_log(s, AV_LOG_ERROR, "Failed to alloc AVIOContext for answer: %s", rtc->sdp_answer); ret = AVERROR(ENOMEM); @@ -383,13 +413,11 @@ static int parse_answer(AVFormatContext *s) for (i = 0; !avio_feof(pb); i++) { ff_get_chomp_line(pb, line, sizeof(line)); - if (av_strstart(line, "a=ice-ufrag:", &ptr)) { - av_freep(&rtc->ice_ufrag_remote); + if (av_strstart(line, "a=ice-ufrag:", &ptr) && !rtc->ice_ufrag_remote) { rtc->ice_ufrag_remote = av_strdup(ptr); - } else if (av_strstart(line, "a=ice-pwd:", &ptr)) { - av_freep(&rtc->ice_pwd_remote); + } else if (av_strstart(line, "a=ice-pwd:", &ptr) && !rtc->ice_pwd_remote) { rtc->ice_pwd_remote = av_strdup(ptr); - } else if (av_strstart(line, "a=candidate:", &ptr)) { + } else if (av_strstart(line, "a=candidate:", &ptr) && !rtc->ice_protocol) { ptr = av_stristr(ptr, "udp"); if (ptr && av_stristr(ptr, "host")) { char protocol[17], host[129]; @@ -403,27 +431,29 @@ static int parse_answer(AVFormatContext *s) } if (av_strcasecmp(protocol, "udp")) { - av_log(s, AV_LOG_ERROR, "Protocol %s is not supported by RTC, choose udp", protocol); + av_log(s, AV_LOG_ERROR, "Protocol %s is not supported by RTC, choose udp, line %d %s of %s", + protocol, i, line, rtc->sdp_answer); ret = AVERROR(EINVAL); goto end; } - av_freep(&rtc->ice_protocol); rtc->ice_protocol = av_strdup(protocol); - av_freep(&rtc->ice_host); rtc->ice_host = av_strdup(host); - rtc->ice_priority = priority; rtc->ice_port = port; } } } + av_log(s, AV_LOG_VERBOSE, "SDP offer=%luB, answer=%luB, ufrag=%s, pwd=%luB, transport=%s://%s:%d\n", + strlen(rtc->sdp_offer), strlen(rtc->sdp_answer), rtc->ice_ufrag_remote, strlen(rtc->ice_pwd_remote), + rtc->ice_protocol, rtc->ice_host, rtc->ice_port); + end: avio_context_free(&pb); return ret; } -static int rtc_init(AVFormatContext *s) +static av_cold int rtc_init(AVFormatContext *s) { int ret; @@ -457,12 +487,11 @@ static int rtc_write_trailer(AVFormatContext *s) return 0; } -static void rtc_deinit(AVFormatContext *s) +static av_cold void rtc_deinit(AVFormatContext *s) { RTCContext *rtc = s->priv_data; av_freep(&rtc->sdp_offer); av_freep(&rtc->sdp_answer); - ffurl_closep(&rtc->whip_uc); av_freep(&rtc->ice_ufrag_remote); av_freep(&rtc->ice_pwd_remote); av_freep(&rtc->ice_protocol); From 67bccbf0d0f5fdab5ce5dbe6ccbc9a45d4482990 Mon Sep 17 00:00:00 2001 From: winlin Date: Tue, 2 May 2023 00:43:02 +0800 Subject: [PATCH 06/60] WHIP: Support ICE handshake by binding. 1. Create UDP URLContext for ICE, DTLS and RTP. 2. Connect to ice-lite server as UDP client. 3. Create, build and send ICE binding request. 4. Read and verify ICE binding response. --- libavformat/rtcenc.c | 144 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 132 insertions(+), 12 deletions(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index ec8b6579aed47..e51359ea827f2 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -32,8 +32,12 @@ #include "libavutil/avstring.h" #include "url.h" #include "libavutil/random_seed.h" +#include "avio_internal.h" +#include "libavutil/hmac.h" +#include "libavutil/crc.h" #define MAX_SDP_SIZE 8192 +#define MAX_UDP_SIZE 1500 typedef struct RTCContext { AVClass *av_class; @@ -70,6 +74,9 @@ typedef struct RTCContext { int ice_port; /* The SDP answer received from the WebRTC server. */ char *sdp_answer; + + /* The UDP transport is used for delivering ICE, DTLS and SRTP packets. */ + URLContext *udp_uc; } RTCContext; /** @@ -97,12 +104,12 @@ static int check_codec(AVFormatContext *s) if (par->codec_id != AV_CODEC_ID_H264) { av_log(s, AV_LOG_ERROR, "Unsupported video codec %s by RTC, choose h264\n", desc ? desc->name : "unknown"); - return AVERROR(EINVAL); + return AVERROR_PATCHWELCOME; } if ((par->profile & ~FF_PROFILE_H264_CONSTRAINED) != FF_PROFILE_H264_BASELINE) { av_log(s, AV_LOG_ERROR, "Profile %d of stream %d is not baseline, currently unsupported by RTC\n", par->profile, i); - return AVERROR(EINVAL); + return AVERROR_PATCHWELCOME; } break; case AVMEDIA_TYPE_AUDIO: @@ -115,24 +122,24 @@ static int check_codec(AVFormatContext *s) if (par->codec_id != AV_CODEC_ID_OPUS) { av_log(s, AV_LOG_ERROR, "Unsupported audio codec %s by RTC, choose opus\n", desc ? desc->name : "unknown"); - return AVERROR(EINVAL); + return AVERROR_PATCHWELCOME; } if (par->ch_layout.nb_channels != 2) { av_log(s, AV_LOG_ERROR, "Unsupported audio channels %d by RTC, choose stereo\n", par->ch_layout.nb_channels); - return AVERROR(EINVAL); + return AVERROR_PATCHWELCOME; } if (par->sample_rate != 48000) { av_log(s, AV_LOG_ERROR, "Unsupported audio sample rate %d by RTC, choose 48000\n", par->sample_rate); - return AVERROR(EINVAL); + return AVERROR_PATCHWELCOME; } break; default: av_log(s, AV_LOG_ERROR, "Codec type '%s' for stream %d is not supported by RTC\n", av_get_media_type_string(par->codec_type), i); - return AVERROR(EINVAL); + return AVERROR_PATCHWELCOME; } } @@ -194,7 +201,7 @@ static int generate_sdp_offer(AVFormatContext *s) char *tmp = av_mallocz(MAX_SDP_SIZE); if (!tmp) { av_log(s, AV_LOG_ERROR, "Failed to alloc answer: %s", s->url); - return AVERROR(EINVAL); + return AVERROR(ENOMEM); } if (rtc->sdp_offer) { @@ -274,7 +281,7 @@ static int generate_sdp_offer(AVFormatContext *s) rtc->video_ssrc); if (ret >= MAX_SDP_SIZE) { av_log(s, AV_LOG_ERROR, "Offer %d exceed max %d, %s", ret, MAX_SDP_SIZE, tmp); - ret = AVERROR(EINVAL); + ret = AVERROR(EIO); goto end; } @@ -338,7 +345,7 @@ static int exchange_sdp(AVFormatContext *s) char *tmp = av_mallocz(MAX_SDP_SIZE); if (!tmp) { av_log(s, AV_LOG_ERROR, "Failed to alloc answer: %s", s->url); - return AVERROR(EINVAL); + return AVERROR(ENOMEM); } ret = ffurl_alloc(&whip_uc, s->url, AVIO_FLAG_READ_WRITE, &s->interrupt_callback); @@ -376,7 +383,7 @@ static int exchange_sdp(AVFormatContext *s) ret = av_strlcatf(tmp, MAX_SDP_SIZE, "%.*s", ret, buf); if (ret >= MAX_SDP_SIZE) { av_log(s, AV_LOG_ERROR, "Answer %d exceed max size %d, %s", ret, MAX_SDP_SIZE, tmp); - ret = AVERROR(EINVAL); + ret = AVERROR(EIO); goto end; } } @@ -426,14 +433,14 @@ static int parse_answer(AVFormatContext *s) if (ret != 4) { av_log(s, AV_LOG_ERROR, "Failed %d to parse line %d %s from %s", ret, i, line, rtc->sdp_answer); - ret = AVERROR(EINVAL); + ret = AVERROR(EIO); goto end; } if (av_strcasecmp(protocol, "udp")) { av_log(s, AV_LOG_ERROR, "Protocol %s is not supported by RTC, choose udp, line %d %s of %s", protocol, i, line, rtc->sdp_answer); - ret = AVERROR(EINVAL); + ret = AVERROR(EIO); goto end; } @@ -453,6 +460,115 @@ static int parse_answer(AVFormatContext *s) return ret; } +/** + * Open the UDP transport and complete the ICE handshake. + * + * @return 0 if OK, AVERROR_xxx on error + */ +static int ice_handshake(AVFormatContext *s) +{ + int ret, len, crc32; + char url[256], buf[MAX_UDP_SIZE]; + AVIOContext *pb = NULL; + AVHMAC *hmac = NULL; + RTCContext *rtc = s->priv_data; + + pb = avio_alloc_context(buf, sizeof(buf), AVIO_FLAG_WRITE, NULL, NULL, NULL, NULL); + if (!pb) { + av_log(s, AV_LOG_ERROR, "Failed to alloc AVIOContext for ICE"); + ret = AVERROR(ENOMEM); + goto end; + } + + hmac = av_hmac_alloc(AV_HMAC_SHA1); + if (!hmac) { + av_log(s, AV_LOG_ERROR, "Failed to alloc AVHMAC for ICE"); + ret = AVERROR(ENOMEM); + goto end; + } + + /* Build UDP URL and create the UDP context as transport. */ + ff_url_join(url, sizeof(url), "udp", NULL, rtc->ice_host, rtc->ice_port, NULL); + ret = ffurl_alloc(&rtc->udp_uc, url, AVIO_FLAG_WRITE | AVIO_FLAG_NONBLOCK, &s->interrupt_callback); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to open udp://%s:%d", rtc->ice_host, rtc->ice_port); + goto end; + } + + av_opt_set(rtc->udp_uc->priv_data, "connect", "1", 0); + av_opt_set(rtc->udp_uc->priv_data, "fifo_size", "0", 0); + + ret = ffurl_connect(rtc->udp_uc, NULL); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to connect udp://%s:%d", rtc->ice_host, rtc->ice_port); + goto end; + } + + /* Set the transport as READ and WRITE after connected. */ + rtc->udp_uc->flags |= AVIO_FLAG_READ; + + /* Build and send the STUN binding request. */ + /* Write 20 bytes header */ + avio_wb16(pb, 0x0001); /* STUN binding request */ + avio_wb16(pb, 0); /* length */ + avio_wb32(pb, 0x2112A442); /* magic cookie */ + avio_wb32(pb, av_get_random_seed()); /* transaction ID */ + avio_wb32(pb, av_get_random_seed()); /* transaction ID */ + avio_wb32(pb, av_get_random_seed()); /* transaction ID */ + /* Write the username attribute */ + ret = snprintf(url, sizeof(url), "%s:%s", rtc->ice_ufrag_remote, rtc->ice_ufrag_local); + avio_wb16(pb, 0x0006); /* attribute type username */ + avio_wb16(pb, ret); /* size of username */ + avio_write(pb, url, ret); /* bytes of username */ + ffio_fill(pb, 0, (4 - (ret % 4)) % 4); /* padding */ + /* Build and update message integrity */ + avio_wb16(pb, 0x0008); /* attribute type message integrity */ + avio_wb16(pb, 20); /* size of message integrity */ + ffio_fill(pb, 0, 20); /* fill with zero to directly write and skip it */ + len = avio_tell(pb); + buf[2] = (len - 20) >> 8; + buf[3] = (len - 20) & 0xFF; + av_hmac_init(hmac, rtc->ice_pwd_local, strlen(rtc->ice_pwd_local)); + av_hmac_update(hmac, buf, len - 24); + av_hmac_final(hmac, buf + len - 20, 20); + /* Write the fingerprint attribute */ + avio_wb16(pb, 0x8028); /* attribute type fingerprint */ + avio_wb16(pb, 4); /* size of fingerprint */ + ffio_fill(pb, 0, 4); /* fill with zero to directly write and skip it */ + len = avio_tell(pb); + buf[2] = (len - 20) >> 8; + buf[3] = (len - 20) & 0xFF; + crc32 = av_crc(av_crc_get_table(AV_CRC_32_IEEE_LE), 0xFFFFFFFF, buf, len - 8) ^ 0xFFFFFFFF; + avio_skip(pb, -4); + avio_wb32(pb, crc32 ^ 0x5354554E); /* hash message by CRC32 */ + + ret = ffurl_write(rtc->udp_uc, buf, len); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to send STUN binding request, size=%d", len); + goto end; + } + + /* Read the STUN binding response. */ + ret = ffurl_read(rtc->udp_uc, buf, sizeof(buf)); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to read STUN binding response"); + goto end; + } + + if (ret < 2 || buf[0] != 0x01 || buf[1] != 0x01) { + av_log(s, AV_LOG_ERROR, "Invalid STUN binding response, size=%d, type=%02X%02X", ret, buf[0], buf[1]); + ret = AVERROR(EIO); + goto end; + } + av_log(s, AV_LOG_VERBOSE, "ICE STUN ok, url=udp://%s:%d, username=%s:%s, request=%dB, response=%dB\n", + rtc->ice_host, rtc->ice_port, rtc->ice_ufrag_remote, rtc->ice_ufrag_local, len, ret); + +end: + avio_context_free(&pb); + av_hmac_free(hmac); + return ret; +} + static av_cold int rtc_init(AVFormatContext *s) { int ret; @@ -469,6 +585,9 @@ static av_cold int rtc_init(AVFormatContext *s) if ((ret = parse_answer(s)) < 0) return ret; + if ((ret = ice_handshake(s)) < 0) + return ret; + return 0; } @@ -496,6 +615,7 @@ static av_cold void rtc_deinit(AVFormatContext *s) av_freep(&rtc->ice_pwd_remote); av_freep(&rtc->ice_protocol); av_freep(&rtc->ice_host); + ffurl_closep(&rtc->udp_uc); } static const AVOption options[] = { From 9db69613eefc8cc9d6dfe6174e1d438b024103d8 Mon Sep 17 00:00:00 2001 From: winlin Date: Tue, 2 May 2023 07:26:13 +0800 Subject: [PATCH 07/60] WHIP: Support fast retransmit for binding request. --- libavformat/rtcenc.c | 207 ++++++++++++++++++++++++++++--------------- 1 file changed, 136 insertions(+), 71 deletions(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index e51359ea827f2..fbd64b856a351 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -35,47 +35,55 @@ #include "avio_internal.h" #include "libavutil/hmac.h" #include "libavutil/crc.h" +#include "network.h" +#include "libavutil/time.h" +/* The maximum size of an SDP, either offer or answer. */ #define MAX_SDP_SIZE 8192 +/* The maximum size of a UDP packet, should be smaller than the MTU. */ #define MAX_UDP_SIZE 1500 +/* The maximum number of retries for UDP transmission. */ +#define UDP_FAST_RETRIES 6 +/* The startup timeout for UDP transmission. */ +#define UDP_START_TIMEOUT 21 typedef struct RTCContext { AVClass *av_class; - /* Input audio and video codec parameters */ + /* input audio and video codec parameters */ AVCodecParameters *audio_par; AVCodecParameters *video_par; - /* The ICE username and pwd fragment generated by the muxer. */ + /* the ICE username and pwd fragment generated by the muxer. */ char ice_ufrag_local[9]; char ice_pwd_local[33]; - /* The SSRC of the audio and video stream, generated by the muxer. */ + /* the SSRC of the audio and video stream, generated by the muxer. */ uint32_t audio_ssrc; uint32_t video_ssrc; - /* The PT(Payload Type) of stream, generated by the muxer. */ + /* the PT(Payload Type) of stream, generated by the muxer. */ uint8_t audio_payload_type; uint8_t video_payload_type; /** - * The SDP offer generated by the muxer according to the codec parameters, + * the SDP offer generated by the muxer according to the codec parameters, * DTLS and ICE information. * */ char *sdp_offer; - /* The ICE username and pwd from remote server. */ + /* the ICE username and pwd from remote server. */ char *ice_ufrag_remote; char *ice_pwd_remote; /** - * The ICE candidate protocol, priority, host and port. Note that only + * the ICE candidate protocol, priority, host and port. Note that only * support one candidate for now. We will choose the first udp candidate. * We will support multiple candidates in the future. */ char *ice_protocol; char *ice_host; int ice_port; - /* The SDP answer received from the WebRTC server. */ + /* the SDP answer received from the WebRTC server. */ char *sdp_answer; - /* The UDP transport is used for delivering ICE, DTLS and SRTP packets. */ + /* the UDP transport is used for delivering ICE, DTLS and SRTP packets. */ URLContext *udp_uc; } RTCContext; @@ -200,7 +208,7 @@ static int generate_sdp_offer(AVFormatContext *s) char *tmp = av_mallocz(MAX_SDP_SIZE); if (!tmp) { - av_log(s, AV_LOG_ERROR, "Failed to alloc answer: %s", s->url); + av_log(s, AV_LOG_ERROR, "Failed to alloc answer: %s\n", s->url); return AVERROR(ENOMEM); } @@ -280,13 +288,13 @@ static int generate_sdp_offer(AVFormatContext *s) rtc->video_ssrc, rtc->video_ssrc); if (ret >= MAX_SDP_SIZE) { - av_log(s, AV_LOG_ERROR, "Offer %d exceed max %d, %s", ret, MAX_SDP_SIZE, tmp); + av_log(s, AV_LOG_ERROR, "Offer %d exceed max %d, %s\n", ret, MAX_SDP_SIZE, tmp); ret = AVERROR(EIO); goto end; } rtc->sdp_offer = av_strdup(tmp); - av_log(s, AV_LOG_VERBOSE, "Generated offer: %s", rtc->sdp_offer); + av_log(s, AV_LOG_VERBOSE, "Generated offer: %s\n", rtc->sdp_offer); end: av_free(tmp); @@ -339,18 +347,18 @@ static int exchange_sdp(AVFormatContext *s) int ret; char buf[MAX_URL_SIZE]; RTCContext *rtc = s->priv_data; - /* The URL context is an HTTP transport layer for the WHIP protocol. */ + /* the URL context is an HTTP transport layer for the WHIP protocol. */ URLContext *whip_uc = NULL; char *tmp = av_mallocz(MAX_SDP_SIZE); if (!tmp) { - av_log(s, AV_LOG_ERROR, "Failed to alloc answer: %s", s->url); + av_log(s, AV_LOG_ERROR, "Failed to alloc answer: %s\n", s->url); return AVERROR(ENOMEM); } ret = ffurl_alloc(&whip_uc, s->url, AVIO_FLAG_READ_WRITE, &s->interrupt_callback); if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to alloc HTTP context: %s", s->url); + av_log(s, AV_LOG_ERROR, "Failed to alloc HTTP context: %s\n", s->url); goto end; } @@ -363,33 +371,33 @@ static int exchange_sdp(AVFormatContext *s) ret = ffurl_connect(whip_uc, NULL); if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to request url=%s, offer: %s", s->url, rtc->sdp_offer); + av_log(s, AV_LOG_ERROR, "Failed to request url=%s, offer: %s\n", s->url, rtc->sdp_offer); goto end; } while (1) { ret = ffurl_read(whip_uc, buf, sizeof(buf)); if (ret == AVERROR_EOF) { - /* Reset the error because we read all response as answer util EOF. */ + /* reset the error because we read all response as answer util EOF. */ ret = 0; break; } if (ret <= 0) { - av_log(s, AV_LOG_ERROR, "Failed to read response from url=%s, offer is %s, answer is %s", + av_log(s, AV_LOG_ERROR, "Failed to read response from url=%s, offer is %s, answer is %s\n", s->url, rtc->sdp_offer, rtc->sdp_answer); goto end; } ret = av_strlcatf(tmp, MAX_SDP_SIZE, "%.*s", ret, buf); if (ret >= MAX_SDP_SIZE) { - av_log(s, AV_LOG_ERROR, "Answer %d exceed max size %d, %s", ret, MAX_SDP_SIZE, tmp); + av_log(s, AV_LOG_ERROR, "Answer %d exceed max size %d, %s\n", ret, MAX_SDP_SIZE, tmp); ret = AVERROR(EIO); goto end; } } rtc->sdp_answer = av_strdup(tmp); - av_log(s, AV_LOG_VERBOSE, "Got answer: %s", rtc->sdp_answer); + av_log(s, AV_LOG_VERBOSE, "Got answer: %s\n", rtc->sdp_answer); end: ffurl_closep(&whip_uc); @@ -413,7 +421,7 @@ static int parse_answer(AVFormatContext *s) pb = avio_alloc_context(rtc->sdp_answer, strlen(rtc->sdp_answer), AVIO_FLAG_READ, NULL, NULL, NULL, NULL); if (!pb) { - av_log(s, AV_LOG_ERROR, "Failed to alloc AVIOContext for answer: %s", rtc->sdp_answer); + av_log(s, AV_LOG_ERROR, "Failed to alloc AVIOContext for answer: %s\n", rtc->sdp_answer); ret = AVERROR(ENOMEM); goto end; } @@ -431,14 +439,14 @@ static int parse_answer(AVFormatContext *s) int priority, port; ret = sscanf(ptr, "%16s %d %128s %d typ host", protocol, &priority, host, &port); if (ret != 4) { - av_log(s, AV_LOG_ERROR, "Failed %d to parse line %d %s from %s", + av_log(s, AV_LOG_ERROR, "Failed %d to parse line %d %s from %s\n", ret, i, line, rtc->sdp_answer); ret = AVERROR(EIO); goto end; } if (av_strcasecmp(protocol, "udp")) { - av_log(s, AV_LOG_ERROR, "Protocol %s is not supported by RTC, choose udp, line %d %s of %s", + av_log(s, AV_LOG_ERROR, "Protocol %s is not supported by RTC, choose udp, line %d %s of %s\n", protocol, i, line, rtc->sdp_answer); ret = AVERROR(EIO); goto end; @@ -461,67 +469,57 @@ static int parse_answer(AVFormatContext *s) } /** - * Open the UDP transport and complete the ICE handshake. + * Create and marshal ICE binding request packet. The size of the response is + * returned in size_of_response. * * @return 0 if OK, AVERROR_xxx on error */ -static int ice_handshake(AVFormatContext *s) +static int ice_create_request(AVFormatContext *s, uint8_t *buf, int size, int *size_of_response) { int ret, len, crc32; - char url[256], buf[MAX_UDP_SIZE]; + char username[128]; AVIOContext *pb = NULL; AVHMAC *hmac = NULL; RTCContext *rtc = s->priv_data; - pb = avio_alloc_context(buf, sizeof(buf), AVIO_FLAG_WRITE, NULL, NULL, NULL, NULL); + pb = avio_alloc_context(buf, size, AVIO_FLAG_WRITE, NULL, NULL, NULL, NULL); if (!pb) { - av_log(s, AV_LOG_ERROR, "Failed to alloc AVIOContext for ICE"); + av_log(s, AV_LOG_ERROR, "Failed to alloc AVIOContext for ICE\n"); ret = AVERROR(ENOMEM); goto end; } hmac = av_hmac_alloc(AV_HMAC_SHA1); if (!hmac) { - av_log(s, AV_LOG_ERROR, "Failed to alloc AVHMAC for ICE"); + av_log(s, AV_LOG_ERROR, "Failed to alloc AVHMAC for ICE\n"); ret = AVERROR(ENOMEM); goto end; } - /* Build UDP URL and create the UDP context as transport. */ - ff_url_join(url, sizeof(url), "udp", NULL, rtc->ice_host, rtc->ice_port, NULL); - ret = ffurl_alloc(&rtc->udp_uc, url, AVIO_FLAG_WRITE | AVIO_FLAG_NONBLOCK, &s->interrupt_callback); - if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to open udp://%s:%d", rtc->ice_host, rtc->ice_port); - goto end; - } - - av_opt_set(rtc->udp_uc->priv_data, "connect", "1", 0); - av_opt_set(rtc->udp_uc->priv_data, "fifo_size", "0", 0); - - ret = ffurl_connect(rtc->udp_uc, NULL); - if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to connect udp://%s:%d", rtc->ice_host, rtc->ice_port); - goto end; - } - - /* Set the transport as READ and WRITE after connected. */ - rtc->udp_uc->flags |= AVIO_FLAG_READ; - - /* Build and send the STUN binding request. */ - /* Write 20 bytes header */ + /* write 20 bytes header */ avio_wb16(pb, 0x0001); /* STUN binding request */ avio_wb16(pb, 0); /* length */ avio_wb32(pb, 0x2112A442); /* magic cookie */ avio_wb32(pb, av_get_random_seed()); /* transaction ID */ avio_wb32(pb, av_get_random_seed()); /* transaction ID */ avio_wb32(pb, av_get_random_seed()); /* transaction ID */ - /* Write the username attribute */ - ret = snprintf(url, sizeof(url), "%s:%s", rtc->ice_ufrag_remote, rtc->ice_ufrag_local); + + /* the username is the concatenation of the two ICE ufrag */ + ret = snprintf(username, sizeof(username), "%s:%s", rtc->ice_ufrag_remote, rtc->ice_ufrag_local); + if (ret <= 0 || ret >= sizeof(username)) { + av_log(s, AV_LOG_ERROR, "Failed to build username %s:%s, max=%lu, ret=%d\n", + rtc->ice_ufrag_remote, rtc->ice_ufrag_local, sizeof(username), ret); + ret = AVERROR(EIO); + goto end; + } + + /* write the username attribute */ avio_wb16(pb, 0x0006); /* attribute type username */ avio_wb16(pb, ret); /* size of username */ - avio_write(pb, url, ret); /* bytes of username */ + avio_write(pb, username, ret); /* bytes of username */ ffio_fill(pb, 0, (4 - (ret % 4)) % 4); /* padding */ - /* Build and update message integrity */ + + /* build and update message integrity */ avio_wb16(pb, 0x0008); /* attribute type message integrity */ avio_wb16(pb, 20); /* size of message integrity */ ffio_fill(pb, 0, 20); /* fill with zero to directly write and skip it */ @@ -531,41 +529,106 @@ static int ice_handshake(AVFormatContext *s) av_hmac_init(hmac, rtc->ice_pwd_local, strlen(rtc->ice_pwd_local)); av_hmac_update(hmac, buf, len - 24); av_hmac_final(hmac, buf + len - 20, 20); - /* Write the fingerprint attribute */ + + /* write the fingerprint attribute */ avio_wb16(pb, 0x8028); /* attribute type fingerprint */ avio_wb16(pb, 4); /* size of fingerprint */ ffio_fill(pb, 0, 4); /* fill with zero to directly write and skip it */ len = avio_tell(pb); buf[2] = (len - 20) >> 8; buf[3] = (len - 20) & 0xFF; + /* refer to the av_hash_alloc("CRC32"), av_hash_init and av_hash_final */ crc32 = av_crc(av_crc_get_table(AV_CRC_32_IEEE_LE), 0xFFFFFFFF, buf, len - 8) ^ 0xFFFFFFFF; avio_skip(pb, -4); - avio_wb32(pb, crc32 ^ 0x5354554E); /* hash message by CRC32 */ + avio_wb32(pb, crc32 ^ 0x5354554E); /* xor with "STUN" */ + + *size_of_response = len; + +end: + avio_context_free(&pb); + av_hmac_free(hmac); + return ret; +} - ret = ffurl_write(rtc->udp_uc, buf, len); +/** + * Open the UDP transport and complete the ICE handshake. Use fast retransmit to + * handle packet loss for the binding request. + * + * @return 0 if OK, AVERROR_xxx on error + */ +static int ice_handshake(AVFormatContext *s) +{ + int ret, len, fast_retries = UDP_FAST_RETRIES, timeout = UDP_START_TIMEOUT; + char url[256], buf[MAX_UDP_SIZE]; + RTCContext *rtc = s->priv_data; + + /* Build UDP URL and create the UDP context as transport. */ + ff_url_join(url, sizeof(url), "udp", NULL, rtc->ice_host, rtc->ice_port, NULL); + ret = ffurl_alloc(&rtc->udp_uc, url, AVIO_FLAG_WRITE, &s->interrupt_callback); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to open udp://%s:%d\n", rtc->ice_host, rtc->ice_port); + goto end; + } + + av_opt_set(rtc->udp_uc->priv_data, "connect", "1", 0); + av_opt_set(rtc->udp_uc->priv_data, "fifo_size", "0", 0); + + ret = ffurl_connect(rtc->udp_uc, NULL); if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to send STUN binding request, size=%d", len); + av_log(s, AV_LOG_ERROR, "Failed to connect udp://%s:%d\n", rtc->ice_host, rtc->ice_port); goto end; } - /* Read the STUN binding response. */ - ret = ffurl_read(rtc->udp_uc, buf, sizeof(buf)); + /* make the socket non-blocking, set to READ and WRITE mode after connected */ + ff_socket_nonblock(ffurl_get_file_handle(rtc->udp_uc), 1); + rtc->udp_uc->flags |= AVIO_FLAG_READ | AVIO_FLAG_NONBLOCK; + + /* Build the STUN binding request. */ + ret = ice_create_request(s, buf, sizeof(buf), &len); if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to read STUN binding response"); + av_log(s, AV_LOG_ERROR, "Failed to create STUN binding request, len=%d\n", len); goto end; } + /* Fast retransmit the STUN binding request. */ + do { + ret = ffurl_write(rtc->udp_uc, buf, len); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to send STUN binding request, size=%d\n", len); + goto end; + } + + /* If max retries is 6 and start timeout is 21ms, the total timeout + * is about 21 + 42 + 84 + 168 + 336 + 672 = 1263ms. */ + if (fast_retries) { + av_usleep(timeout * 1000); + timeout *= 2; + } + + /* Read the STUN binding response. */ + ret = ffurl_read(rtc->udp_uc, buf, sizeof(buf)); + if (ret < 0) { + if (ret == AVERROR(EAGAIN) && fast_retries) { + fast_retries--; + continue; + } + av_log(s, AV_LOG_ERROR, "Failed to read STUN binding response, retries=%d\n", UDP_FAST_RETRIES); + goto end; + } + } while (ret < 0); + if (ret < 2 || buf[0] != 0x01 || buf[1] != 0x01) { - av_log(s, AV_LOG_ERROR, "Invalid STUN binding response, size=%d, type=%02X%02X", ret, buf[0], buf[1]); + av_log(s, AV_LOG_ERROR, "Invalid STUN binding response, size=%d, type=%02X%02X\n", ret, buf[0], buf[1]); ret = AVERROR(EIO); goto end; } - av_log(s, AV_LOG_VERBOSE, "ICE STUN ok, url=udp://%s:%d, username=%s:%s, request=%dB, response=%dB\n", - rtc->ice_host, rtc->ice_port, rtc->ice_ufrag_remote, rtc->ice_ufrag_local, len, ret); + + av_log(s, AV_LOG_VERBOSE, "ICE STUN ok, url=udp://%s:%d, username=%s:%s, req=%dB, res=%dB, arq=%d\n", + rtc->ice_host, rtc->ice_port, rtc->ice_ufrag_remote, rtc->ice_ufrag_local, len, ret, + UDP_FAST_RETRIES - fast_retries); + ret = 0; end: - avio_context_free(&pb); - av_hmac_free(hmac); return ret; } @@ -585,15 +648,17 @@ static av_cold int rtc_init(AVFormatContext *s) if ((ret = parse_answer(s)) < 0) return ret; - if ((ret = ice_handshake(s)) < 0) - return ret; - return 0; } static int rtc_write_header(AVFormatContext *s) { - return 0; + int ret; + + if ((ret = ice_handshake(s)) < 0) + return ret; + + return ret; } static int rtc_write_packet(AVFormatContext *s, AVPacket *pkt) From 149668479eed94aade5d8298d0dee388bf606cc3 Mon Sep 17 00:00:00 2001 From: winlin Date: Tue, 2 May 2023 08:57:22 +0800 Subject: [PATCH 08/60] WHIP: Update the dependency, requires openssl. --- configure | 1 + libavformat/rtcenc.c | 81 +++++++++++++++++++++++++++++++++----------- 2 files changed, 63 insertions(+), 19 deletions(-) diff --git a/configure b/configure index d203177a74792..2ef0ff537e4cf 100755 --- a/configure +++ b/configure @@ -3532,6 +3532,7 @@ ogg_demuxer_select="dirac_parse" ogv_muxer_select="ogg_muxer" opus_muxer_select="ogg_muxer" psp_muxer_select="mov_muxer" +rtc_muxer_deps_any="openssl" rtp_demuxer_select="sdp_demuxer" rtp_mpegts_muxer_select="mpegts_muxer rtp_muxer" rtpdec_select="asf_demuxer mov_demuxer mpegts_demuxer rm_demuxer rtp_protocol srtp" diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index fbd64b856a351..ccdcc8c4a75d0 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -19,6 +19,13 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "config.h" + +#if CONFIG_OPENSSL +#include +#include +#endif + #include "libavutil/dict.h" #include "libavutil/avassert.h" #include "libavutil/mathematics.h" @@ -474,15 +481,15 @@ static int parse_answer(AVFormatContext *s) * * @return 0 if OK, AVERROR_xxx on error */ -static int ice_create_request(AVFormatContext *s, uint8_t *buf, int size, int *size_of_response) +static int ice_create_request(AVFormatContext *s, uint8_t *buf, int size_of_buf, int *size_of_response) { - int ret, len, crc32; + int ret, size, crc32; char username[128]; AVIOContext *pb = NULL; AVHMAC *hmac = NULL; RTCContext *rtc = s->priv_data; - pb = avio_alloc_context(buf, size, AVIO_FLAG_WRITE, NULL, NULL, NULL, NULL); + pb = avio_alloc_context(buf, size_of_buf, AVIO_FLAG_WRITE, NULL, NULL, NULL, NULL); if (!pb) { av_log(s, AV_LOG_ERROR, "Failed to alloc AVIOContext for ICE\n"); ret = AVERROR(ENOMEM); @@ -523,26 +530,26 @@ static int ice_create_request(AVFormatContext *s, uint8_t *buf, int size, int *s avio_wb16(pb, 0x0008); /* attribute type message integrity */ avio_wb16(pb, 20); /* size of message integrity */ ffio_fill(pb, 0, 20); /* fill with zero to directly write and skip it */ - len = avio_tell(pb); - buf[2] = (len - 20) >> 8; - buf[3] = (len - 20) & 0xFF; + size = avio_tell(pb); + buf[2] = (size - 20) >> 8; + buf[3] = (size - 20) & 0xFF; av_hmac_init(hmac, rtc->ice_pwd_local, strlen(rtc->ice_pwd_local)); - av_hmac_update(hmac, buf, len - 24); - av_hmac_final(hmac, buf + len - 20, 20); + av_hmac_update(hmac, buf, size - 24); + av_hmac_final(hmac, buf + size - 20, 20); /* write the fingerprint attribute */ avio_wb16(pb, 0x8028); /* attribute type fingerprint */ avio_wb16(pb, 4); /* size of fingerprint */ ffio_fill(pb, 0, 4); /* fill with zero to directly write and skip it */ - len = avio_tell(pb); - buf[2] = (len - 20) >> 8; - buf[3] = (len - 20) & 0xFF; + size = avio_tell(pb); + buf[2] = (size - 20) >> 8; + buf[3] = (size - 20) & 0xFF; /* refer to the av_hash_alloc("CRC32"), av_hash_init and av_hash_final */ - crc32 = av_crc(av_crc_get_table(AV_CRC_32_IEEE_LE), 0xFFFFFFFF, buf, len - 8) ^ 0xFFFFFFFF; + crc32 = av_crc(av_crc_get_table(AV_CRC_32_IEEE_LE), 0xFFFFFFFF, buf, size - 8) ^ 0xFFFFFFFF; avio_skip(pb, -4); avio_wb32(pb, crc32 ^ 0x5354554E); /* xor with "STUN" */ - *size_of_response = len; + *size_of_response = size; end: avio_context_free(&pb); @@ -558,7 +565,7 @@ static int ice_create_request(AVFormatContext *s, uint8_t *buf, int size, int *s */ static int ice_handshake(AVFormatContext *s) { - int ret, len, fast_retries = UDP_FAST_RETRIES, timeout = UDP_START_TIMEOUT; + int ret, size, fast_retries = UDP_FAST_RETRIES, timeout = UDP_START_TIMEOUT; char url[256], buf[MAX_UDP_SIZE]; RTCContext *rtc = s->priv_data; @@ -584,17 +591,17 @@ static int ice_handshake(AVFormatContext *s) rtc->udp_uc->flags |= AVIO_FLAG_READ | AVIO_FLAG_NONBLOCK; /* Build the STUN binding request. */ - ret = ice_create_request(s, buf, sizeof(buf), &len); + ret = ice_create_request(s, buf, sizeof(buf), &size); if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to create STUN binding request, len=%d\n", len); + av_log(s, AV_LOG_ERROR, "Failed to create STUN binding request, size=%d\n", size); goto end; } /* Fast retransmit the STUN binding request. */ do { - ret = ffurl_write(rtc->udp_uc, buf, len); + ret = ffurl_write(rtc->udp_uc, buf, size); if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to send STUN binding request, size=%d\n", len); + av_log(s, AV_LOG_ERROR, "Failed to send STUN binding request, size=%d\n", size); goto end; } @@ -624,7 +631,7 @@ static int ice_handshake(AVFormatContext *s) } av_log(s, AV_LOG_VERBOSE, "ICE STUN ok, url=udp://%s:%d, username=%s:%s, req=%dB, res=%dB, arq=%d\n", - rtc->ice_host, rtc->ice_port, rtc->ice_ufrag_remote, rtc->ice_ufrag_local, len, ret, + rtc->ice_host, rtc->ice_port, rtc->ice_ufrag_remote, rtc->ice_ufrag_local, size, ret, UDP_FAST_RETRIES - fast_retries); ret = 0; @@ -632,6 +639,34 @@ static int ice_handshake(AVFormatContext *s) return ret; } +#if CONFIG_OPENSSL +/** + * DTLS handshake with server, as a client in active mode, using openssl. + * + * @return 0 if OK, AVERROR_xxx on error + */ +static int dtls_handshake_openssl(AVFormatContext *s) +{ + int ret = 0; + SSL_CTX *dtls_ctx = NULL; + +#if OPENSSL_VERSION_NUMBER < 0x10002000L // v1.0.2 + dtls_ctx = SSL_CTX_new(DTLSv1_method()); +#else + dtls_ctx = SSL_CTX_new(DTLS_client_method()); +#endif + if (!dtls_ctx) { + av_log(s, AV_LOG_ERROR, "Failed to create DTLS context\n"); + ret = AVERROR(ENOMEM); + goto end; + } + +end: + SSL_CTX_free(dtls_ctx); + return ret; +} +#endif + static av_cold int rtc_init(AVFormatContext *s) { int ret; @@ -658,6 +693,14 @@ static int rtc_write_header(AVFormatContext *s) if ((ret = ice_handshake(s)) < 0) return ret; +#if CONFIG_OPENSSL + if ((ret = dtls_handshake_openssl(s)) < 0) + return ret; +#else + av_log(s, AV_LOG_ERROR, "DTLS is not supported, please enable openssl\n"); + return AVERROR(ENOSYS); +#endif + return ret; } From 0522db85f8c44f37f612db12d4f6985f37063902 Mon Sep 17 00:00:00 2001 From: winlin Date: Tue, 2 May 2023 20:45:45 +0800 Subject: [PATCH 09/60] WHIP: Support DTLS handshake by openssl 1. Require openssl 1.1.1b or newer. 2. Enable ECDHE cipher sutes by default. 3. Support retransmit by openssl. 4. Export SRTP material to build master key. 5. Do not verify the certificate of peer. --- libavformat/rtcenc.c | 373 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 356 insertions(+), 17 deletions(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index ccdcc8c4a75d0..3bbab3609619d 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -1,5 +1,5 @@ /* - * WebRTC muxer + * WebRTC-HTTP ingestion protocol (WHIP) muxer * Copyright (c) 2023 The FFmpeg Project * * This file is part of FFmpeg. @@ -24,6 +24,9 @@ #if CONFIG_OPENSSL #include #include +#if OPENSSL_VERSION_NUMBER < 0x1010102fL +#error "OpenSSL version 1.1.1b or newer is required" +#endif #endif #include "libavutil/dict.h" @@ -53,6 +56,17 @@ #define UDP_FAST_RETRIES 6 /* The startup timeout for UDP transmission. */ #define UDP_START_TIMEOUT 21 +/* Avoid dtls negotiate failed, set max fragment size 1200. */ +#define DTLS_FRAGMENT_MAX_SIZE 1200 +/* Supported DTLS cipher suites. */ +#define DTLS_CIPHER_SUTES "ECDHE-ECDSA-AES128-GCM-SHA256"\ + ":ECDHE-RSA-AES128-GCM-SHA256"\ + ":ECDHE-ECDSA-AES128-SHA"\ + ":ECDHE-RSA-AES128-SHA"\ + ":ECDHE-ECDSA-AES256-SHA"\ + ":ECDHE-RSA-AES256-SHA" +/* The SRTP key size, defined by SRTP_MASTER_KEY_LEN */ +#define DTLS_SRTP_MASTER_KEY_LEN 30 typedef struct RTCContext { AVClass *av_class; @@ -90,6 +104,15 @@ typedef struct RTCContext { /* the SDP answer received from the WebRTC server. */ char *sdp_answer; + /* whether the timer should be reset. */ + int dtls_should_reset_timer; + /* whether the DTLS is done at least for us. */ + int dtls_done_for_us; + /* the number of packets retransmitted for DTLS. */ + int dtls_arq_packets; + /* the material to build SRTP master key, generated by DTLS. */ + uint8_t dtls_srtp_material[DTLS_SRTP_MASTER_KEY_LEN * 2]; + /* the UDP transport is used for delivering ICE, DTLS and SRTP packets. */ URLContext *udp_uc; } RTCContext; @@ -466,7 +489,7 @@ static int parse_answer(AVFormatContext *s) } } - av_log(s, AV_LOG_VERBOSE, "SDP offer=%luB, answer=%luB, ufrag=%s, pwd=%luB, transport=%s://%s:%d\n", + av_log(s, AV_LOG_INFO, "SDP offer=%luB, answer=%luB, ufrag=%s, pwd=%luB, transport=%s://%s:%d\n", strlen(rtc->sdp_offer), strlen(rtc->sdp_answer), rtc->ice_ufrag_remote, strlen(rtc->ice_pwd_remote), rtc->ice_protocol, rtc->ice_host, rtc->ice_port); @@ -590,14 +613,14 @@ static int ice_handshake(AVFormatContext *s) ff_socket_nonblock(ffurl_get_file_handle(rtc->udp_uc), 1); rtc->udp_uc->flags |= AVIO_FLAG_READ | AVIO_FLAG_NONBLOCK; - /* Build the STUN binding request. */ + /* build the STUN binding request. */ ret = ice_create_request(s, buf, sizeof(buf), &size); if (ret < 0) { av_log(s, AV_LOG_ERROR, "Failed to create STUN binding request, size=%d\n", size); goto end; } - /* Fast retransmit the STUN binding request. */ + /* fast retransmit the STUN binding request. */ do { ret = ffurl_write(rtc->udp_uc, buf, size); if (ret < 0) { @@ -605,14 +628,14 @@ static int ice_handshake(AVFormatContext *s) goto end; } - /* If max retries is 6 and start timeout is 21ms, the total timeout + /* if max retries is 6 and start timeout is 21ms, the total timeout * is about 21 + 42 + 84 + 168 + 336 + 672 = 1263ms. */ if (fast_retries) { av_usleep(timeout * 1000); timeout *= 2; } - /* Read the STUN binding response. */ + /* read the STUN binding response. */ ret = ffurl_read(rtc->udp_uc, buf, sizeof(buf)); if (ret < 0) { if (ret == AVERROR(EAGAIN) && fast_retries) { @@ -630,7 +653,7 @@ static int ice_handshake(AVFormatContext *s) goto end; } - av_log(s, AV_LOG_VERBOSE, "ICE STUN ok, url=udp://%s:%d, username=%s:%s, req=%dB, res=%dB, arq=%d\n", + av_log(s, AV_LOG_INFO, "ICE STUN ok, url=udp://%s:%d, username=%s:%s, req=%dB, res=%dB, arq=%d\n", rtc->ice_host, rtc->ice_port, rtc->ice_ufrag_remote, rtc->ice_ufrag_local, size, ret, UDP_FAST_RETRIES - fast_retries); ret = 0; @@ -640,29 +663,345 @@ static int ice_handshake(AVFormatContext *s) } #if CONFIG_OPENSSL +static void openssl_on_info(const SSL *dtls, int where, int ret) +{ + int w, r1; + const char *method, *alert_type, *alert_desc; + AVFormatContext *s = (AVFormatContext*)SSL_get_ex_data(dtls, 0); + + w = where & ~SSL_ST_MASK; + if (w & SSL_ST_CONNECT) { + method = "SSL_connect"; + } else if (w & SSL_ST_ACCEPT) { + method = "SSL_accept"; + } else { + method = "undefined"; + } + + r1 = SSL_get_error(dtls, ret); + if (where & SSL_CB_LOOP) { + av_log(s, AV_LOG_VERBOSE, "DTLS: method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", + method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, ret, r1); + } else if (where & SSL_CB_ALERT) { + method = (where & SSL_CB_READ) ? "read":"write"; + + alert_type = SSL_alert_type_string_long(ret); + alert_desc = SSL_alert_desc_string(ret); + + if (!av_strcasecmp(alert_type, "warning") && !av_strcasecmp(alert_desc, "CN")) { + av_log(s, AV_LOG_WARNING, "DTLS: SSL3 alert method=%s type=%s, desc=%s(%s), where=%d, ret=%d, r1=%d\n", + method, alert_type, alert_desc, SSL_alert_desc_string_long(ret), where, ret, r1); + } else { + av_log(s, AV_LOG_ERROR, "DTLS: SSL3 alert method=%s type=%s, desc=%s(%s), where=%d, ret=%d, r1=%d\n", + method, alert_type, alert_desc, SSL_alert_desc_string_long(ret), where, ret, r1); + } + } else if (where & SSL_CB_EXIT) { + if (!ret) { + av_log(s, AV_LOG_WARNING, "DTLS: Fail method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", + method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, ret, r1); + } else if (ret < 0) { + if (r1 != SSL_ERROR_NONE && r1 != SSL_ERROR_WANT_READ && r1 != SSL_ERROR_WANT_WRITE) { + av_log(s, AV_LOG_ERROR, "DTLS: Error method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", + method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, ret, r1); + } else { + av_log(s, AV_LOG_VERBOSE, "DTLS: method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", + method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, ret, r1); + } + } + } +} + +static unsigned int openssl_dtls_timer_cb(SSL *dtls, unsigned int previous_us) +{ + AVFormatContext *s = (AVFormatContext*)SSL_get_ex_data(dtls, 0); + RTCContext *rtc = s->priv_data; + + /* double the timeout, note that it may be 0. */ + unsigned int timeout_us = previous_us * 2; + + /* if previous_us is 0, for example, the HelloVerifyRequest, we should respond it ASAP. + * when got ServerHello, we should reset the timer. */ + if (!previous_us || rtc->dtls_should_reset_timer) { + timeout_us = 50 * 1000; /* in us */ + } + + // never exceed the max timeout. + timeout_us = FFMIN(timeout_us, 30 * 1000 * 1000); // in us + + av_log(s, AV_LOG_VERBOSE, "DTLS: ARQ timer cb timeout=%ums, previous=%ums\n", + timeout_us / 1000, previous_us / 1000); + + return timeout_us; +} + +static void openssl_state_trace(AVFormatContext *s, uint8_t *data, int length, int incoming, int r0, int r1) +{ + uint8_t content_type = 0; + uint16_t size = 0; + uint8_t handshake_type = 0; + RTCContext *rtc = s->priv_data; + + /* change_cipher_spec(20), alert(21), handshake(22), application_data(23) */ + if (length >= 1) { + content_type = (uint8_t)data[0]; + } + + if (length >= 13) { + size = (uint16_t)(data[11])<<8 | (uint16_t)data[12]; + } + + if (length >= 14) { + handshake_type = (uint8_t)data[13]; + } + + av_log(s, AV_LOG_INFO, "DTLS: State %s %s, done=%u, arq=%u, r0=%d, r1=%d, len=%u, cnt=%u, size=%u, hs=%u\n", + "Active", (incoming? "RECV":"SEND"), rtc->dtls_done_for_us, rtc->dtls_arq_packets, r0, r1, length, + content_type, size, handshake_type); +} + +/** + * The return value of verify_callback controls the strategy of the further verification process. If verify_callback + * returns 0, the verification process is immediately stopped with "verification failed" state. If SSL_VERIFY_PEER is + * set, a verification failure alert is sent to the peer and the TLS/SSL handshake is terminated. If verify_callback + * returns 1, the verification process is continued. If verify_callback always returns 1, the TLS/SSL handshake will + * not be terminated with respect to verification failures and the connection will be established. The calling process + * can however retrieve the error code of the last verification error using SSL_get_verify_result(3) or by maintaining + * its own error storage managed by verify_callback. + */ +static int openssl_verify_callback(int preverify_ok, X509_STORE_CTX *ctx) +{ + /* always OK, we don't check the certificate of client, because we allow client self-sign certificate. */ + return 1; +} + +/** + * Initialize DTLS context. + * + * @return 0 if OK, AVERROR_xxx on error + */ +static av_cold int openssl_init_dtls(AVFormatContext *s, SSL *dtls, SSL_CTX *dtls_ctx, EVP_PKEY *dtls_pkey, EC_KEY *eckey) +{ + int ret; + + /* should use the curves in ClientHello.supported_groups, for example: + * Supported Group: x25519 (0x001d) + * Supported Group: secp256r1 (0x0017) + * Supported Group: secp384r1 (0x0018) + * note that secp256r1 in openssl is called NID_X9_62_prime256v1, not NID_secp256k1 + */ + EC_GROUP *ecgroup = EC_GROUP_new_by_curve_name(NID_X9_62_prime256v1); + + if (EC_KEY_set_group(eckey, ecgroup) != 1) { + av_log(s, AV_LOG_ERROR, "DTLS: EC_KEY_set_group failed\n"); + ret = AVERROR(EINVAL); + goto end; + } + if (EC_KEY_generate_key(eckey) != 1) { + av_log(s, AV_LOG_ERROR, "DTLS: EC_KEY_generate_key failed\n"); + ret = AVERROR(EINVAL); + goto end; + } + if (EVP_PKEY_set1_EC_KEY(dtls_pkey, eckey) != 1) { + av_log(s, AV_LOG_ERROR, "DTLS: EVP_PKEY_set1_EC_KEY failed\n"); + ret = AVERROR(EINVAL); + goto end; + } + + /* for ECDSA, we could set the curves list. */ + SSL_CTX_set1_curves_list(dtls_ctx, "P-521:P-384:P-256"); + + /* we use "ALL", while you can use "DEFAULT" means "ALL:!EXPORT:!LOW:!aNULL:!eNULL:!SSLv2" */ + if (SSL_CTX_set_cipher_list(dtls_ctx, DTLS_CIPHER_SUTES) != 1) { + av_log(s, AV_LOG_ERROR, "DTLS: SSL_CTX_set_cipher_list failed\n"); + ret = AVERROR(EINVAL); + goto end; + } + /* server will send Certificate Request. */ + SSL_CTX_set_verify(dtls_ctx, SSL_VERIFY_PEER | SSL_VERIFY_CLIENT_ONCE, openssl_verify_callback); + /* the depth count is "level 0:peer certificate", "level 1: CA certificate", + * "level 2: higher level CA certificate", and so on. */ + SSL_CTX_set_verify_depth(dtls_ctx, 4); + /* whether we should read as many input bytes as possible (for non-blocking reads) or not. */ + SSL_CTX_set_read_ahead(dtls_ctx, 1); + /* only support SRTP_AES128_CM_SHA1_80, please read ssl/d1_srtp.c */ + ret = SSL_CTX_set_tlsext_use_srtp(dtls_ctx, "SRTP_AES128_CM_SHA1_80"); + if (ret) { + av_log(s, AV_LOG_ERROR, "DTLS: SSL_CTX_set_tlsext_use_srtp failed, ret=%d\n", ret); + ret = AVERROR(EINVAL); + goto end; + } + + /* setup the callback for logging. */ + SSL_set_ex_data(dtls, 0, s); + SSL_set_info_callback(dtls, openssl_on_info); + + /* set dtls fragment size */ + SSL_set_options(dtls, SSL_OP_NO_QUERY_MTU); + SSL_set_mtu(dtls, DTLS_FRAGMENT_MAX_SIZE); + + /* set the callback for ARQ timer. */ + DTLS_set_timer_cb(dtls, openssl_dtls_timer_cb); + + /* setup DTLS as active, which is client role. */ + SSL_set_connect_state(dtls); + SSL_set_max_send_fragment(dtls, DTLS_FRAGMENT_MAX_SIZE); + +end: + EC_GROUP_free(ecgroup); + return ret; +} + +static int openssl_drive_context(AVFormatContext *s, SSL *dtls, BIO *bio_in, BIO *bio_out, int loop) +{ + int ret, i, j, r0, r1, req_size, res_size = 0; + uint8_t *data = NULL, req_ct = 0, req_ht = 0, res_ct = 0, res_ht = 0; + char buf[MAX_UDP_SIZE]; + RTCContext *rtc = s->priv_data; + + /* drive the SSL context by state change, arq or response messages. */ + r0 = SSL_do_handshake(dtls); + r1 = SSL_get_error(dtls, r0); + + /* handshake successfully done */ + if (r0 == 1) { + rtc->dtls_done_for_us = 1; + return 0; + } + + /* handshake failed with fatal error */ + if (r0 < 0 && r1 != SSL_ERROR_WANT_READ) { + av_log(s, AV_LOG_ERROR, "DTLS: Start handshake failed, loop=%d, r0=%d, r1=%d\n", loop, r0, r1); + return AVERROR(EIO); + } + + /* fast retransmit the request util got response. */ + for (i = 0; i < UDP_FAST_RETRIES && !res_size; i++) { + req_size = BIO_get_mem_data(bio_out, (char**)&data); + openssl_state_trace(s, data, req_size, 0, r0, r1); + ret = ffurl_write(rtc->udp_uc, data, req_size); + BIO_reset(bio_out); + req_ct = req_size > 0 ? data[0] : 0; + req_ht = req_size > 13 ? data[13] : 0; + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "DTLS: Send request failed, loop=%d, content=%d, handshake=%d, size=%d\n", + loop, req_ct, req_ht, req_size); + return ret; + } + + for (j = 0; j < UDP_FAST_RETRIES && !res_size; j++) { + ret = ffurl_read(rtc->udp_uc, buf, sizeof(buf)); + /* got response successfully. */ + if (ret > 0) { + res_size = ret; + rtc->dtls_should_reset_timer = 1; + break; + } + + /* fatal error or timeout. */ + if (ret != AVERROR(EAGAIN)) { + av_log(s, AV_LOG_ERROR, "DTLS: Read response failed, loop=%d, content=%d, handshake=%d\n", + loop, req_ct, req_ht); + return ret; + } + + /* DTLSv1_handle_timeout is called when a DTLS handshake timeout expires. If no timeout + * had expired, it returns 0. Otherwise, it retransmits the previous flight of handshake + * messages and returns 1. If too many timeouts had expired without progress or an error + * occurs, it returns -1. */ + r0 = DTLSv1_handle_timeout(dtls); + if (!r0) { + av_usleep(UDP_START_TIMEOUT * 1000); + continue; /* no timeout had expired. */ + } + if (r0 != 1) { + r1 = SSL_get_error(dtls, r0); + av_log(s, AV_LOG_ERROR, "DTLS: Handle timeout, loop=%d, content=%d, handshake=%d, r0=%d, r1=%d\n", + loop, req_ct, req_ht, r0, r1); + return AVERROR(EIO); + } + + rtc->dtls_arq_packets++; + break; + } + } + + /* trace the response packet, feed to SSL. */ + BIO_reset(bio_in); + openssl_state_trace(s, buf, res_size, 1, r0, SSL_ERROR_NONE); + res_ct = res_size > 0 ? buf[0]: 0; + res_ht = res_size > 13 ? buf[13] : 0; + if ((r0 = BIO_write(bio_in, buf, res_size)) <= 0) { + av_log(s, AV_LOG_ERROR, "DTLS: Feed response failed, loop=%d, content=%d, handshake=%d, size=%d, r0=%d\n", + loop, res_ct, res_ht, res_size, r0); + return AVERROR(EIO); + } + + return 0; +} + /** * DTLS handshake with server, as a client in active mode, using openssl. * * @return 0 if OK, AVERROR_xxx on error */ -static int dtls_handshake_openssl(AVFormatContext *s) +static int openssl_dtls_handshake(AVFormatContext *s) { - int ret = 0; + int ret, loop; SSL_CTX *dtls_ctx = NULL; + SSL *dtls = NULL; + BIO *bio_in = NULL, *bio_out = NULL; + EC_KEY *eckey = NULL; + EVP_PKEY *dtls_pkey = NULL; + const char* dst = "EXTRACTOR-dtls_srtp"; + RTCContext *rtc = s->priv_data; + + /* create and initialize SSL context. */ + dtls_pkey = EVP_PKEY_new(); + eckey = EC_KEY_new(); -#if OPENSSL_VERSION_NUMBER < 0x10002000L // v1.0.2 - dtls_ctx = SSL_CTX_new(DTLSv1_method()); -#else dtls_ctx = SSL_CTX_new(DTLS_client_method()); -#endif - if (!dtls_ctx) { - av_log(s, AV_LOG_ERROR, "Failed to create DTLS context\n"); - ret = AVERROR(ENOMEM); + dtls = SSL_new(dtls_ctx); + + bio_in = BIO_new(BIO_s_mem()); + bio_out = BIO_new(BIO_s_mem()); + SSL_set_bio(dtls, bio_in, bio_out); + + ret = openssl_init_dtls(s, dtls, dtls_ctx, dtls_pkey, eckey); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to initialize SSL context\n"); + goto end; + } + + for (loop = 0; loop < 64 && !rtc->dtls_done_for_us; loop++) { + ret = openssl_drive_context(s, dtls, bio_in, bio_out, loop); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to drive SSL context\n"); + goto end; + } + } + if (!rtc->dtls_done_for_us) { + av_log(s, AV_LOG_ERROR, "DTLS: Handshake failed, loop=%d\n", loop); + ret = AVERROR(EIO); goto end; } + /* export SRTP master key after DTLS done */ + ret = SSL_export_keying_material(dtls, rtc->dtls_srtp_material, sizeof(rtc->dtls_srtp_material), + dst, strlen(dst), NULL, 0, 0); + if (!ret) { + av_log(s, AV_LOG_ERROR, "DTLS: SSL export key r0=%lu, ret=%d\n", ERR_get_error(), ret); + ret = AVERROR(EIO); + goto end; + } + + av_log(s, AV_LOG_INFO, "DTLS: Handshake done=%d, arq=%d, srtp=%luB\n", + rtc->dtls_done_for_us, rtc->dtls_arq_packets, sizeof(rtc->dtls_srtp_material)); + end: + SSL_free(dtls); SSL_CTX_free(dtls_ctx); + EC_KEY_free(eckey); + EVP_PKEY_free(dtls_pkey); return ret; } #endif @@ -694,7 +1033,7 @@ static int rtc_write_header(AVFormatContext *s) return ret; #if CONFIG_OPENSSL - if ((ret = dtls_handshake_openssl(s)) < 0) + if ((ret = openssl_dtls_handshake(s)) < 0) return ret; #else av_log(s, AV_LOG_ERROR, "DTLS is not supported, please enable openssl\n"); From 4ee4ca56079f9e790a0ff10ea033e7a4662e28da Mon Sep 17 00:00:00 2001 From: winlin Date: Wed, 3 May 2023 08:22:52 +0800 Subject: [PATCH 10/60] WHIP: Setup SRTP with key material generated by DTLS 1. As DTLS client, the send key is client master key plus salt. 2. As DTLS client, the recv key is server master key plus salt. 3. Use cipher suite AES_CM_128_HMAC_SHA1_80. --- libavformat/rtcenc.c | 78 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 76 insertions(+), 2 deletions(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index 3bbab3609619d..e06923fe90959 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -47,6 +47,8 @@ #include "libavutil/crc.h" #include "network.h" #include "libavutil/time.h" +#include "libavutil/base64.h" +#include "srtp.h" /* The maximum size of an SDP, either offer or answer. */ #define MAX_SDP_SIZE 8192 @@ -110,9 +112,18 @@ typedef struct RTCContext { int dtls_done_for_us; /* the number of packets retransmitted for DTLS. */ int dtls_arq_packets; - /* the material to build SRTP master key, generated by DTLS. */ + /** + * the material to build SRTP master key, generated by DTLS, the layout is: + * 16B 16B 14B 14B + * client_key | server_key | client_salt | server_salt + */ uint8_t dtls_srtp_material[DTLS_SRTP_MASTER_KEY_LEN * 2]; + /* the SRTP send context, to encrypt outgoing packets. */ + struct SRTPContext srtp_send; + /* the SRTP receive context, to decrypt incoming packets. */ + struct SRTPContext srtp_recv; + /* the UDP transport is used for delivering ICE, DTLS and SRTP packets. */ URLContext *udp_uc; } RTCContext; @@ -663,6 +674,9 @@ static int ice_handshake(AVFormatContext *s) } #if CONFIG_OPENSSL +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + static void openssl_on_info(const SSL *dtls, int where, int ret) { int w, r1; @@ -994,7 +1008,7 @@ static int openssl_dtls_handshake(AVFormatContext *s) goto end; } - av_log(s, AV_LOG_INFO, "DTLS: Handshake done=%d, arq=%d, srtp=%luB\n", + av_log(s, AV_LOG_INFO, "DTLS: Handshake done=%d, arq=%d, srtp_material=%luB\n", rtc->dtls_done_for_us, rtc->dtls_arq_packets, sizeof(rtc->dtls_srtp_material)); end: @@ -1004,8 +1018,63 @@ static int openssl_dtls_handshake(AVFormatContext *s) EVP_PKEY_free(dtls_pkey); return ret; } + +#pragma GCC diagnostic pop #endif +/** + * Setup the SRTP context by the exported keying material using DTLS. + * + * @return 0 if OK, AVERROR_xxx on error + */ +static int setup_srtp(AVFormatContext *s) +{ + int ret; + char recv_key[DTLS_SRTP_MASTER_KEY_LEN], send_key[DTLS_SRTP_MASTER_KEY_LEN]; + char buf[AV_BASE64_SIZE(DTLS_SRTP_MASTER_KEY_LEN)]; + const char* suite = "AES_CM_128_HMAC_SHA1_80"; + RTCContext *rtc = s->priv_data; + + /* as DTLS client, the send key is client master key plus salt. */ + memcpy(send_key, rtc->dtls_srtp_material, 16); + memcpy(send_key + 16, rtc->dtls_srtp_material + 32, 14); + + /* as DTLS client, the recv key is server master key plus salt. */ + memcpy(recv_key, rtc->dtls_srtp_material + 16, 16); + memcpy(recv_key + 16, rtc->dtls_srtp_material + 46, 14); + + /* setup SRTP context for outgoing packets */ + if (!av_base64_encode(buf, sizeof(buf), send_key, sizeof(send_key))) { + av_log(s, AV_LOG_ERROR, "Failed to encode send key\n"); + ret = AVERROR(EIO); + goto end; + } + + ret = ff_srtp_set_crypto(&rtc->srtp_send, suite, buf); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to set crypto for send\n"); + goto end; + } + + /* setup SRTP context for incoming packets */ + if (!av_base64_encode(buf, sizeof(buf), recv_key, sizeof(recv_key))) { + av_log(s, AV_LOG_ERROR, "Failed to encode recv key\n"); + ret = AVERROR(EIO); + goto end; + } + + ret = ff_srtp_set_crypto(&rtc->srtp_recv, suite, buf); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to set crypto for recv\n"); + goto end; + } + + av_log(s, AV_LOG_INFO, "SRTP: Setup done, suite=%s, key=%luB\n", suite, sizeof(send_key)); + +end: + return ret; +} + static av_cold int rtc_init(AVFormatContext *s) { int ret; @@ -1040,6 +1109,9 @@ static int rtc_write_header(AVFormatContext *s) return AVERROR(ENOSYS); #endif + if ((ret = setup_srtp(s)) < 0) + return ret; + return ret; } @@ -1063,6 +1135,8 @@ static av_cold void rtc_deinit(AVFormatContext *s) av_freep(&rtc->ice_protocol); av_freep(&rtc->ice_host); ffurl_closep(&rtc->udp_uc); + ff_srtp_free(&rtc->srtp_send); + ff_srtp_free(&rtc->srtp_recv); } static const AVOption options[] = { From 67f99427dd495b9b9b18e71444f2fa89ed218049 Mon Sep 17 00:00:00 2001 From: winlin Date: Wed, 3 May 2023 18:38:57 +0800 Subject: [PATCH 11/60] WHIP: Refine code. --- libavformat/rtcenc.c | 112 +++++++++++++++++++++---------------------- 1 file changed, 56 insertions(+), 56 deletions(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index e06923fe90959..92475cfc899db 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -73,58 +73,58 @@ typedef struct RTCContext { AVClass *av_class; - /* input audio and video codec parameters */ + /* Input audio and video codec parameters */ AVCodecParameters *audio_par; AVCodecParameters *video_par; - /* the ICE username and pwd fragment generated by the muxer. */ + /* The ICE username and pwd fragment generated by the muxer. */ char ice_ufrag_local[9]; char ice_pwd_local[33]; - /* the SSRC of the audio and video stream, generated by the muxer. */ + /* The SSRC of the audio and video stream, generated by the muxer. */ uint32_t audio_ssrc; uint32_t video_ssrc; - /* the PT(Payload Type) of stream, generated by the muxer. */ + /* The PT(Payload Type) of stream, generated by the muxer. */ uint8_t audio_payload_type; uint8_t video_payload_type; /** - * the SDP offer generated by the muxer according to the codec parameters, + * The SDP offer generated by the muxer according to the codec parameters, * DTLS and ICE information. * */ char *sdp_offer; - /* the ICE username and pwd from remote server. */ + /* The ICE username and pwd from remote server. */ char *ice_ufrag_remote; char *ice_pwd_remote; /** - * the ICE candidate protocol, priority, host and port. Note that only + * The ICE candidate protocol, priority, host and port. Note that only * support one candidate for now. We will choose the first udp candidate. * We will support multiple candidates in the future. */ char *ice_protocol; char *ice_host; int ice_port; - /* the SDP answer received from the WebRTC server. */ + /* The SDP answer received from the WebRTC server. */ char *sdp_answer; - /* whether the timer should be reset. */ + /* Whether the timer should be reset. */ int dtls_should_reset_timer; - /* whether the DTLS is done at least for us. */ + /* Whether the DTLS is done at least for us. */ int dtls_done_for_us; - /* the number of packets retransmitted for DTLS. */ + /* The number of packets retransmitted for DTLS. */ int dtls_arq_packets; /** - * the material to build SRTP master key, generated by DTLS, the layout is: + * The material to build SRTP master key, generated by DTLS, the layout is: * 16B 16B 14B 14B * client_key | server_key | client_salt | server_salt */ uint8_t dtls_srtp_material[DTLS_SRTP_MASTER_KEY_LEN * 2]; - /* the SRTP send context, to encrypt outgoing packets. */ + /* The SRTP send context, to encrypt outgoing packets. */ struct SRTPContext srtp_send; - /* the SRTP receive context, to decrypt incoming packets. */ + /* The SRTP receive context, to decrypt incoming packets. */ struct SRTPContext srtp_recv; - /* the UDP transport is used for delivering ICE, DTLS and SRTP packets. */ + /* The UDP transport is used for delivering ICE, DTLS and SRTP packets. */ URLContext *udp_uc; } RTCContext; @@ -388,7 +388,7 @@ static int exchange_sdp(AVFormatContext *s) int ret; char buf[MAX_URL_SIZE]; RTCContext *rtc = s->priv_data; - /* the URL context is an HTTP transport layer for the WHIP protocol. */ + /* The URL context is an HTTP transport layer for the WHIP protocol. */ URLContext *whip_uc = NULL; char *tmp = av_mallocz(MAX_SDP_SIZE); @@ -419,7 +419,7 @@ static int exchange_sdp(AVFormatContext *s) while (1) { ret = ffurl_read(whip_uc, buf, sizeof(buf)); if (ret == AVERROR_EOF) { - /* reset the error because we read all response as answer util EOF. */ + /* Reset the error because we read all response as answer util EOF. */ ret = 0; break; } @@ -537,7 +537,7 @@ static int ice_create_request(AVFormatContext *s, uint8_t *buf, int size_of_buf, goto end; } - /* write 20 bytes header */ + /* Write 20 bytes header */ avio_wb16(pb, 0x0001); /* STUN binding request */ avio_wb16(pb, 0); /* length */ avio_wb32(pb, 0x2112A442); /* magic cookie */ @@ -545,7 +545,7 @@ static int ice_create_request(AVFormatContext *s, uint8_t *buf, int size_of_buf, avio_wb32(pb, av_get_random_seed()); /* transaction ID */ avio_wb32(pb, av_get_random_seed()); /* transaction ID */ - /* the username is the concatenation of the two ICE ufrag */ + /* The username is the concatenation of the two ICE ufrag */ ret = snprintf(username, sizeof(username), "%s:%s", rtc->ice_ufrag_remote, rtc->ice_ufrag_local); if (ret <= 0 || ret >= sizeof(username)) { av_log(s, AV_LOG_ERROR, "Failed to build username %s:%s, max=%lu, ret=%d\n", @@ -554,13 +554,13 @@ static int ice_create_request(AVFormatContext *s, uint8_t *buf, int size_of_buf, goto end; } - /* write the username attribute */ + /* Write the username attribute */ avio_wb16(pb, 0x0006); /* attribute type username */ avio_wb16(pb, ret); /* size of username */ avio_write(pb, username, ret); /* bytes of username */ ffio_fill(pb, 0, (4 - (ret % 4)) % 4); /* padding */ - /* build and update message integrity */ + /* Build and update message integrity */ avio_wb16(pb, 0x0008); /* attribute type message integrity */ avio_wb16(pb, 20); /* size of message integrity */ ffio_fill(pb, 0, 20); /* fill with zero to directly write and skip it */ @@ -571,14 +571,14 @@ static int ice_create_request(AVFormatContext *s, uint8_t *buf, int size_of_buf, av_hmac_update(hmac, buf, size - 24); av_hmac_final(hmac, buf + size - 20, 20); - /* write the fingerprint attribute */ + /* Write the fingerprint attribute */ avio_wb16(pb, 0x8028); /* attribute type fingerprint */ avio_wb16(pb, 4); /* size of fingerprint */ ffio_fill(pb, 0, 4); /* fill with zero to directly write and skip it */ size = avio_tell(pb); buf[2] = (size - 20) >> 8; buf[3] = (size - 20) & 0xFF; - /* refer to the av_hash_alloc("CRC32"), av_hash_init and av_hash_final */ + /* Refer to the av_hash_alloc("CRC32"), av_hash_init and av_hash_final */ crc32 = av_crc(av_crc_get_table(AV_CRC_32_IEEE_LE), 0xFFFFFFFF, buf, size - 8) ^ 0xFFFFFFFF; avio_skip(pb, -4); avio_wb32(pb, crc32 ^ 0x5354554E); /* xor with "STUN" */ @@ -620,18 +620,18 @@ static int ice_handshake(AVFormatContext *s) goto end; } - /* make the socket non-blocking, set to READ and WRITE mode after connected */ + /* Make the socket non-blocking, set to READ and WRITE mode after connected */ ff_socket_nonblock(ffurl_get_file_handle(rtc->udp_uc), 1); rtc->udp_uc->flags |= AVIO_FLAG_READ | AVIO_FLAG_NONBLOCK; - /* build the STUN binding request. */ + /* Build the STUN binding request. */ ret = ice_create_request(s, buf, sizeof(buf), &size); if (ret < 0) { av_log(s, AV_LOG_ERROR, "Failed to create STUN binding request, size=%d\n", size); goto end; } - /* fast retransmit the STUN binding request. */ + /* Fast retransmit the STUN binding request. */ do { ret = ffurl_write(rtc->udp_uc, buf, size); if (ret < 0) { @@ -639,14 +639,14 @@ static int ice_handshake(AVFormatContext *s) goto end; } - /* if max retries is 6 and start timeout is 21ms, the total timeout + /* If max retries is 6 and start timeout is 21ms, the total timeout * is about 21 + 42 + 84 + 168 + 336 + 672 = 1263ms. */ if (fast_retries) { av_usleep(timeout * 1000); timeout *= 2; } - /* read the STUN binding response. */ + /* Read the STUN binding response. */ ret = ffurl_read(rtc->udp_uc, buf, sizeof(buf)); if (ret < 0) { if (ret == AVERROR(EAGAIN) && fast_retries) { @@ -730,10 +730,10 @@ static unsigned int openssl_dtls_timer_cb(SSL *dtls, unsigned int previous_us) AVFormatContext *s = (AVFormatContext*)SSL_get_ex_data(dtls, 0); RTCContext *rtc = s->priv_data; - /* double the timeout, note that it may be 0. */ + /* Double the timeout, note that it may be 0. */ unsigned int timeout_us = previous_us * 2; - /* if previous_us is 0, for example, the HelloVerifyRequest, we should respond it ASAP. + /* If previous_us is 0, for example, the HelloVerifyRequest, we should respond it ASAP. * when got ServerHello, we should reset the timer. */ if (!previous_us || rtc->dtls_should_reset_timer) { timeout_us = 50 * 1000; /* in us */ @@ -755,7 +755,7 @@ static void openssl_state_trace(AVFormatContext *s, uint8_t *data, int length, i uint8_t handshake_type = 0; RTCContext *rtc = s->priv_data; - /* change_cipher_spec(20), alert(21), handshake(22), application_data(23) */ + /* Change_cipher_spec(20), alert(21), handshake(22), application_data(23) */ if (length >= 1) { content_type = (uint8_t)data[0]; } @@ -784,7 +784,7 @@ static void openssl_state_trace(AVFormatContext *s, uint8_t *data, int length, i */ static int openssl_verify_callback(int preverify_ok, X509_STORE_CTX *ctx) { - /* always OK, we don't check the certificate of client, because we allow client self-sign certificate. */ + /* Always OK, we don't check the certificate of client, because we allow client self-sign certificate. */ return 1; } @@ -797,7 +797,7 @@ static av_cold int openssl_init_dtls(AVFormatContext *s, SSL *dtls, SSL_CTX *dtl { int ret; - /* should use the curves in ClientHello.supported_groups, for example: + /* Should use the curves in ClientHello.supported_groups, for example: * Supported Group: x25519 (0x001d) * Supported Group: secp256r1 (0x0017) * Supported Group: secp384r1 (0x0018) @@ -821,23 +821,23 @@ static av_cold int openssl_init_dtls(AVFormatContext *s, SSL *dtls, SSL_CTX *dtl goto end; } - /* for ECDSA, we could set the curves list. */ + /* For ECDSA, we could set the curves list. */ SSL_CTX_set1_curves_list(dtls_ctx, "P-521:P-384:P-256"); - /* we use "ALL", while you can use "DEFAULT" means "ALL:!EXPORT:!LOW:!aNULL:!eNULL:!SSLv2" */ + /* We use "ALL", while you can use "DEFAULT" means "ALL:!EXPORT:!LOW:!aNULL:!eNULL:!SSLv2" */ if (SSL_CTX_set_cipher_list(dtls_ctx, DTLS_CIPHER_SUTES) != 1) { av_log(s, AV_LOG_ERROR, "DTLS: SSL_CTX_set_cipher_list failed\n"); ret = AVERROR(EINVAL); goto end; } - /* server will send Certificate Request. */ + /* Server will send Certificate Request. */ SSL_CTX_set_verify(dtls_ctx, SSL_VERIFY_PEER | SSL_VERIFY_CLIENT_ONCE, openssl_verify_callback); - /* the depth count is "level 0:peer certificate", "level 1: CA certificate", + /* The depth count is "level 0:peer certificate", "level 1: CA certificate", * "level 2: higher level CA certificate", and so on. */ SSL_CTX_set_verify_depth(dtls_ctx, 4); - /* whether we should read as many input bytes as possible (for non-blocking reads) or not. */ + /* Whether we should read as many input bytes as possible (for non-blocking reads) or not. */ SSL_CTX_set_read_ahead(dtls_ctx, 1); - /* only support SRTP_AES128_CM_SHA1_80, please read ssl/d1_srtp.c */ + /* Only support SRTP_AES128_CM_SHA1_80, please read ssl/d1_srtp.c */ ret = SSL_CTX_set_tlsext_use_srtp(dtls_ctx, "SRTP_AES128_CM_SHA1_80"); if (ret) { av_log(s, AV_LOG_ERROR, "DTLS: SSL_CTX_set_tlsext_use_srtp failed, ret=%d\n", ret); @@ -845,18 +845,18 @@ static av_cold int openssl_init_dtls(AVFormatContext *s, SSL *dtls, SSL_CTX *dtl goto end; } - /* setup the callback for logging. */ + /* Setup the callback for logging. */ SSL_set_ex_data(dtls, 0, s); SSL_set_info_callback(dtls, openssl_on_info); - /* set dtls fragment size */ + /* Set dtls fragment size */ SSL_set_options(dtls, SSL_OP_NO_QUERY_MTU); SSL_set_mtu(dtls, DTLS_FRAGMENT_MAX_SIZE); - /* set the callback for ARQ timer. */ + /* Set the callback for ARQ timer. */ DTLS_set_timer_cb(dtls, openssl_dtls_timer_cb); - /* setup DTLS as active, which is client role. */ + /* Setup DTLS as active, which is client role. */ SSL_set_connect_state(dtls); SSL_set_max_send_fragment(dtls, DTLS_FRAGMENT_MAX_SIZE); @@ -872,23 +872,23 @@ static int openssl_drive_context(AVFormatContext *s, SSL *dtls, BIO *bio_in, BIO char buf[MAX_UDP_SIZE]; RTCContext *rtc = s->priv_data; - /* drive the SSL context by state change, arq or response messages. */ + /* Drive the SSL context by state change, arq or response messages. */ r0 = SSL_do_handshake(dtls); r1 = SSL_get_error(dtls, r0); - /* handshake successfully done */ + /* Handshake successfully done */ if (r0 == 1) { rtc->dtls_done_for_us = 1; return 0; } - /* handshake failed with fatal error */ + /* Handshake failed with fatal error */ if (r0 < 0 && r1 != SSL_ERROR_WANT_READ) { av_log(s, AV_LOG_ERROR, "DTLS: Start handshake failed, loop=%d, r0=%d, r1=%d\n", loop, r0, r1); return AVERROR(EIO); } - /* fast retransmit the request util got response. */ + /* Fast retransmit the request util got response. */ for (i = 0; i < UDP_FAST_RETRIES && !res_size; i++) { req_size = BIO_get_mem_data(bio_out, (char**)&data); openssl_state_trace(s, data, req_size, 0, r0, r1); @@ -904,14 +904,14 @@ static int openssl_drive_context(AVFormatContext *s, SSL *dtls, BIO *bio_in, BIO for (j = 0; j < UDP_FAST_RETRIES && !res_size; j++) { ret = ffurl_read(rtc->udp_uc, buf, sizeof(buf)); - /* got response successfully. */ + /* Got response successfully. */ if (ret > 0) { res_size = ret; rtc->dtls_should_reset_timer = 1; break; } - /* fatal error or timeout. */ + /* Fatal error or timeout. */ if (ret != AVERROR(EAGAIN)) { av_log(s, AV_LOG_ERROR, "DTLS: Read response failed, loop=%d, content=%d, handshake=%d\n", loop, req_ct, req_ht); @@ -939,7 +939,7 @@ static int openssl_drive_context(AVFormatContext *s, SSL *dtls, BIO *bio_in, BIO } } - /* trace the response packet, feed to SSL. */ + /* Trace the response packet, feed to SSL. */ BIO_reset(bio_in); openssl_state_trace(s, buf, res_size, 1, r0, SSL_ERROR_NONE); res_ct = res_size > 0 ? buf[0]: 0; @@ -969,7 +969,7 @@ static int openssl_dtls_handshake(AVFormatContext *s) const char* dst = "EXTRACTOR-dtls_srtp"; RTCContext *rtc = s->priv_data; - /* create and initialize SSL context. */ + /* Create and initialize SSL context. */ dtls_pkey = EVP_PKEY_new(); eckey = EC_KEY_new(); @@ -999,7 +999,7 @@ static int openssl_dtls_handshake(AVFormatContext *s) goto end; } - /* export SRTP master key after DTLS done */ + /* Export SRTP master key after DTLS done */ ret = SSL_export_keying_material(dtls, rtc->dtls_srtp_material, sizeof(rtc->dtls_srtp_material), dst, strlen(dst), NULL, 0, 0); if (!ret) { @@ -1035,15 +1035,15 @@ static int setup_srtp(AVFormatContext *s) const char* suite = "AES_CM_128_HMAC_SHA1_80"; RTCContext *rtc = s->priv_data; - /* as DTLS client, the send key is client master key plus salt. */ + /* As DTLS client, the send key is client master key plus salt. */ memcpy(send_key, rtc->dtls_srtp_material, 16); memcpy(send_key + 16, rtc->dtls_srtp_material + 32, 14); - /* as DTLS client, the recv key is server master key plus salt. */ + /* As DTLS client, the recv key is server master key plus salt. */ memcpy(recv_key, rtc->dtls_srtp_material + 16, 16); memcpy(recv_key + 16, rtc->dtls_srtp_material + 46, 14); - /* setup SRTP context for outgoing packets */ + /* Setup SRTP context for outgoing packets */ if (!av_base64_encode(buf, sizeof(buf), send_key, sizeof(send_key))) { av_log(s, AV_LOG_ERROR, "Failed to encode send key\n"); ret = AVERROR(EIO); @@ -1056,7 +1056,7 @@ static int setup_srtp(AVFormatContext *s) goto end; } - /* setup SRTP context for incoming packets */ + /* Setup SRTP context for incoming packets */ if (!av_base64_encode(buf, sizeof(buf), recv_key, sizeof(recv_key))) { av_log(s, AV_LOG_ERROR, "Failed to encode recv key\n"); ret = AVERROR(EIO); From ff5ef9665d729c858f2b91a4fc4f09d0bbeb4e3a Mon Sep 17 00:00:00 2001 From: winlin Date: Thu, 4 May 2023 07:39:03 +0800 Subject: [PATCH 12/60] WHIP: Support mux and send audio by RTP 1. Use RTP muxer to mux audio packet. 2. Encrypt RTP packet to SRTP. 3. Send RTP over UDP transport. --- libavformat/rtcenc.c | 135 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 134 insertions(+), 1 deletion(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index 92475cfc899db..06b8830c88883 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -54,6 +54,17 @@ #define MAX_SDP_SIZE 8192 /* The maximum size of a UDP packet, should be smaller than the MTU. */ #define MAX_UDP_SIZE 1500 +/** + * The RTP payload max size, reserved some paddings for SRTP as such: + * kRtpPacketSize = kRtpMaxPayloadSize + paddings + * For example, if kRtpPacketSize is 1500, recommend to set kRtpMaxPayloadSize to 1400, + * which reserves 100 bytes for SRTP or paddings. + * otherwise, the kRtpPacketSize must less than MTU, in webrtc source code, + * the rtp max size is assigned by kVideoMtu = 1200. + * so we set kRtpMaxPayloadSize = 1200. + * see @doc https://groups.google.com/g/discuss-webrtc/c/gH5ysR3SoZI + */ +#define MAX_UDP_PAYLOAD_SIZE (MAX_UDP_SIZE - 300) /* The maximum number of retries for UDP transmission. */ #define UDP_FAST_RETRIES 6 /* The startup timeout for UDP transmission. */ @@ -1075,6 +1086,107 @@ static int setup_srtp(AVFormatContext *s) return ret; } +static int write_packet(void *opaque, uint8_t *buf, int buf_size) +{ + AVFormatContext *s = opaque; + RTCContext *rtc = s->priv_data; + + char cipher[MAX_UDP_SIZE]; + int len = ff_srtp_encrypt(&rtc->srtp_send, buf, buf_size, cipher, sizeof(cipher)); + if (len <= 0) { + av_log(s, AV_LOG_ERROR, "Failed to encrypt packet=%dB, cipher=%dB\n", buf_size, len); + return AVERROR(EIO); + } + return ffurl_write(rtc->udp_uc, cipher, len); +} + +/** + * Create a RTP muxer to build RTP packets from the encoded frames. + * + * @return 0 if OK, AVERROR_xxx on error + */ +static int create_rtp_muxer(AVFormatContext *s) +{ + int ret, i, is_video; + AVFormatContext *rtp_ctx = NULL; + AVDictionary *opts = NULL; + uint8_t *buffer = NULL; + char buf[64]; + RTCContext *rtc = s->priv_data; + + const AVOutputFormat *rtp_format = av_guess_format("rtp", NULL, NULL); + if (!rtp_format) { + av_log(s, AV_LOG_ERROR, "Failed to guess rtp muxer\n"); + ret = AVERROR(ENOSYS); + goto end; + } + + for (i = 0; i < s->nb_streams; i++) { + rtp_ctx = avformat_alloc_context(); + if (!rtp_ctx) { + av_log(s, AV_LOG_ERROR, "Failed to allocate rtp muxer\n"); + ret = AVERROR(ENOMEM); + goto end; + } + + rtp_ctx->oformat = rtp_format; + if (!avformat_new_stream(rtp_ctx, NULL)) { + av_log(s, AV_LOG_ERROR, "Failed to create rtp stream\n"); + ret = AVERROR(ENOMEM); + goto end; + } + /* Pass the interrupt callback on */ + rtp_ctx->interrupt_callback = s->interrupt_callback; + /* Copy the max delay setting; the rtp muxer reads this. */ + rtp_ctx->max_delay = s->max_delay; + /* Copy other stream parameters. */ + rtp_ctx->streams[0]->sample_aspect_ratio = s->streams[i]->sample_aspect_ratio; + rtp_ctx->flags |= s->flags & AVFMT_FLAG_BITEXACT; + rtp_ctx->strict_std_compliance = s->strict_std_compliance; + + /* Set the synchronized start time. */ + rtp_ctx->start_time_realtime = s->start_time_realtime; + + avcodec_parameters_copy(rtp_ctx->streams[0]->codecpar, s->streams[i]->codecpar); + rtp_ctx->streams[0]->time_base = s->streams[i]->time_base; + + buffer = av_malloc(MAX_UDP_SIZE); + rtp_ctx->pb = avio_alloc_context(buffer, MAX_UDP_SIZE, AVIO_FLAG_WRITE, s, NULL, write_packet, NULL); + if (!rtp_ctx->pb) { + av_log(s, AV_LOG_ERROR, "Failed to allocate rtp pb\n"); + ret = AVERROR(ENOMEM); + goto end; + } + rtp_ctx->pb->max_packet_size = MAX_UDP_PAYLOAD_SIZE; + rtp_ctx->pb->av_class = &ff_avio_class; + + is_video = s->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO; + snprintf(buf, sizeof(buf), "%d", is_video? rtc->video_payload_type : rtc->audio_payload_type); + av_dict_set(&opts, "payload_type", buf, 0); + snprintf(buf, sizeof(buf), "%d", is_video? rtc->video_ssrc : rtc->audio_ssrc); + av_dict_set(&opts, "ssrc", buf, 0); + av_dict_set(&opts, "rtpflags", "4", 0); /* FF_RTP_FLAG_SKIP_RTCP */ + + ret = avformat_write_header(rtp_ctx, &opts); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to write rtp header\n"); + goto end; + } + + ff_format_set_url(rtp_ctx, av_strdup(s->url)); + s->streams[i]->time_base = rtp_ctx->streams[0]->time_base; + s->streams[i]->priv_data = rtp_ctx; + rtp_ctx = NULL; + } + +end: + if (rtp_ctx) + avio_context_free(&rtp_ctx->pb); + avformat_free_context(rtp_ctx); + av_dict_free(&opts); + return ret; +} + static av_cold int rtc_init(AVFormatContext *s) { int ret; @@ -1112,12 +1224,20 @@ static int rtc_write_header(AVFormatContext *s) if ((ret = setup_srtp(s)) < 0) return ret; + if ((ret = create_rtp_muxer(s)) < 0) + return ret; + return ret; } static int rtc_write_packet(AVFormatContext *s, AVPacket *pkt) { - return 0; + AVStream *st = s->streams[pkt->stream_index]; + AVFormatContext *rtp_ctx = st->priv_data; + if (st->codecpar->codec_type != AVMEDIA_TYPE_AUDIO) { + return 0; + } + return ff_write_chained(rtp_ctx, 0, pkt, s, 0); } static int rtc_write_trailer(AVFormatContext *s) @@ -1127,7 +1247,20 @@ static int rtc_write_trailer(AVFormatContext *s) static av_cold void rtc_deinit(AVFormatContext *s) { + int i; RTCContext *rtc = s->priv_data; + + for (i = 0; i < s->nb_streams; i++) { + AVFormatContext* rtp_ctx = s->streams[i]->priv_data; + if (!rtp_ctx) + continue; + + av_write_trailer(rtp_ctx); + avio_context_free(&rtp_ctx->pb); + avformat_free_context(rtp_ctx); + s->streams[i]->priv_data = NULL; + } + av_freep(&rtc->sdp_offer); av_freep(&rtc->sdp_answer); av_freep(&rtc->ice_ufrag_remote); From 98fca7422b9bd98b186205f636ce467d6fe7b6d0 Mon Sep 17 00:00:00 2001 From: winlin Date: Thu, 4 May 2023 16:49:12 +0800 Subject: [PATCH 13/60] WHIP: Support mux and send video by RTP 1. Parse AVC SPS/PPS from extradata. 2. Use dedicated SRTP context for each stream. 3. For STAP-A, set marker to zero, set NRI to the first NALU's. 4. Insert SPS/PPS before each IDR frame. 5. Set AVFMT_GLOBALHEADER flag. --- libavformat/rtcenc.c | 249 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 226 insertions(+), 23 deletions(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index 06b8830c88883..70231dff931a0 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -80,6 +80,8 @@ ":ECDHE-RSA-AES256-SHA" /* The SRTP key size, defined by SRTP_MASTER_KEY_LEN */ #define DTLS_SRTP_MASTER_KEY_LEN 30 +/* The NALU type for STAP-A */ +#define NALU_TYPE_STAP_A 24 typedef struct RTCContext { AVClass *av_class; @@ -88,6 +90,14 @@ typedef struct RTCContext { AVCodecParameters *audio_par; AVCodecParameters *video_par; + /* The SPS/PPS of AVC video */ + uint8_t *avc_sps; + int avc_sps_size; + uint8_t *avc_pps; + int avc_pps_size; + /* The size of NALU in ISOM format. */ + int avc_nal_length_size; + /* The ICE username and pwd fragment generated by the muxer. */ char ice_ufrag_local[9]; char ice_pwd_local[33]; @@ -131,7 +141,8 @@ typedef struct RTCContext { uint8_t dtls_srtp_material[DTLS_SRTP_MASTER_KEY_LEN * 2]; /* The SRTP send context, to encrypt outgoing packets. */ - struct SRTPContext srtp_send; + struct SRTPContext srtp_audio_send; + struct SRTPContext srtp_video_send; /* The SRTP receive context, to decrypt incoming packets. */ struct SRTPContext srtp_recv; @@ -139,15 +150,100 @@ typedef struct RTCContext { URLContext *udp_uc; } RTCContext; +/* Parse SPS/PPS from ISOM AVCC, see ff_isom_write_avcc */ +static int isom_read_avcc(AVFormatContext *s, uint8_t *extradata, int extradata_size) +{ + int ret = 0; + uint8_t version, nal_length_size, nb_sps, nb_pps; + AVIOContext *pb; + RTCContext *rtc = s->priv_data; + + pb = avio_alloc_context(extradata, extradata_size, 0, NULL, NULL, NULL, NULL); + if (!pb) { + av_log(s, AV_LOG_ERROR, "Failed to alloc AVIOContext, size=%d\n", extradata_size); + ret = AVERROR(ENOMEM); + goto end; + } + + version = avio_r8(pb); /* version */ + avio_r8(pb); /* avc profile */ + avio_r8(pb); /* avc profile compat */ + avio_r8(pb); /* avc level */ + nal_length_size = avio_r8(pb); /* 6 bits reserved (111111) + 2 bits nal size length - 1 (11) */ + nb_sps = avio_r8(pb); /* 3 bits reserved (111) + 5 bits number of sps */ + + if (version != 1) { + av_log(s, AV_LOG_ERROR, "Invalid version=%d\n", version); + ret = AVERROR_INVALIDDATA; + goto end; + } + + rtc->avc_nal_length_size = (nal_length_size & 0x03) + 1; + if (rtc->avc_nal_length_size == 3) { + av_log(s, AV_LOG_ERROR, "Invalid nal length size=%d\n", rtc->avc_nal_length_size); + ret = AVERROR_INVALIDDATA; + goto end; + } + + /* Read SPS */ + nb_sps &= 0x1f; + if (nb_sps != 1 || avio_feof(pb)) { + av_log(s, AV_LOG_ERROR, "Invalid number of sps=%d, eof=%d\n", nb_sps, avio_feof(pb)); + ret = AVERROR_INVALIDDATA; + goto end; + } + + rtc->avc_sps_size = avio_rb16(pb); /* sps size */ + if (rtc->avc_sps_size <= 0 || avio_feof(pb)) { + av_log(s, AV_LOG_ERROR, "Invalid sps size=%d, eof=%d\n", rtc->avc_sps_size, avio_feof(pb)); + ret = AVERROR_INVALIDDATA; + goto end; + } + + rtc->avc_sps = av_malloc(rtc->avc_sps_size); + ret = avio_read(pb, rtc->avc_sps, rtc->avc_sps_size); /* sps */ + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to read sps, size=%d\n", rtc->avc_sps_size); + goto end; + } + + /* Read PPS */ + nb_pps = avio_r8(pb); /* number of pps */ + if (nb_pps != 1 || avio_feof(pb)) { + av_log(s, AV_LOG_ERROR, "Invalid number of pps=%d, eof=%d\n", nb_pps, avio_feof(pb)); + ret = AVERROR_INVALIDDATA; + goto end; + } + + rtc->avc_pps_size = avio_rb16(pb); /* pps size */ + if (rtc->avc_pps_size <= 0 || avio_feof(pb)) { + av_log(s, AV_LOG_ERROR, "Invalid pps size=%d, eof=%d\n", rtc->avc_pps_size, avio_feof(pb)); + ret = AVERROR_INVALIDDATA; + goto end; + } + + rtc->avc_pps = av_malloc(rtc->avc_pps_size); + ret = avio_read(pb, rtc->avc_pps, rtc->avc_pps_size); /* pps */ + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to read pps, size=%d\n", rtc->avc_pps_size); + goto end; + } + +end: + avio_context_free(&pb); + return ret; +} + /** + * Parse video SPS/PPS from extradata of codecpar, then check the codec. * Only support video(h264) and audio(opus) for now. Note that only baseline * and constrained baseline of h264 are supported. * * @return 0 if OK, AVERROR_xxx on error */ -static int check_codec(AVFormatContext *s) +static int parse_codec(AVFormatContext *s) { - int i; + int i, ret; RTCContext *rtc = s->priv_data; for (i = 0; i < s->nb_streams; i++) { @@ -171,6 +267,12 @@ static int check_codec(AVFormatContext *s) par->profile, i); return AVERROR_PATCHWELCOME; } + + ret = isom_read_avcc(s, par->extradata, par->extradata_size); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to parse SPS/PPS from extradata\n"); + return ret; + } break; case AVMEDIA_TYPE_AUDIO: if (rtc->audio_par) { @@ -471,7 +573,7 @@ static int parse_answer(AVFormatContext *s) int i; RTCContext *rtc = s->priv_data; - pb = avio_alloc_context(rtc->sdp_answer, strlen(rtc->sdp_answer), AVIO_FLAG_READ, NULL, NULL, NULL, NULL); + pb = avio_alloc_context(rtc->sdp_answer, strlen(rtc->sdp_answer), 0, NULL, NULL, NULL, NULL); if (!pb) { av_log(s, AV_LOG_ERROR, "Failed to alloc AVIOContext for answer: %s\n", rtc->sdp_answer); ret = AVERROR(ENOMEM); @@ -522,11 +624,11 @@ static int parse_answer(AVFormatContext *s) /** * Create and marshal ICE binding request packet. The size of the response is - * returned in size_of_response. + * returned in request_size. * * @return 0 if OK, AVERROR_xxx on error */ -static int ice_create_request(AVFormatContext *s, uint8_t *buf, int size_of_buf, int *size_of_response) +static int ice_create_request(AVFormatContext *s, uint8_t *buf, int buf_size, int *request_size) { int ret, size, crc32; char username[128]; @@ -534,7 +636,7 @@ static int ice_create_request(AVFormatContext *s, uint8_t *buf, int size_of_buf, AVHMAC *hmac = NULL; RTCContext *rtc = s->priv_data; - pb = avio_alloc_context(buf, size_of_buf, AVIO_FLAG_WRITE, NULL, NULL, NULL, NULL); + pb = avio_alloc_context(buf, buf_size, 1, NULL, NULL, NULL, NULL); if (!pb) { av_log(s, AV_LOG_ERROR, "Failed to alloc AVIOContext for ICE\n"); ret = AVERROR(ENOMEM); @@ -594,7 +696,7 @@ static int ice_create_request(AVFormatContext *s, uint8_t *buf, int size_of_buf, avio_skip(pb, -4); avio_wb32(pb, crc32 ^ 0x5354554E); /* xor with "STUN" */ - *size_of_response = size; + *request_size = size; end: avio_context_free(&pb); @@ -1061,9 +1163,15 @@ static int setup_srtp(AVFormatContext *s) goto end; } - ret = ff_srtp_set_crypto(&rtc->srtp_send, suite, buf); + ret = ff_srtp_set_crypto(&rtc->srtp_audio_send, suite, buf); if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to set crypto for send\n"); + av_log(s, AV_LOG_ERROR, "Failed to set crypto for audio send\n"); + goto end; + } + + ret = ff_srtp_set_crypto(&rtc->srtp_video_send, suite, buf); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to set crypto for video send\n"); goto end; } @@ -1088,16 +1196,63 @@ static int setup_srtp(AVFormatContext *s) static int write_packet(void *opaque, uint8_t *buf, int buf_size) { + int ret, cipher_size, is_rtcp; + uint8_t payload_type, nalu_header; + char cipher[MAX_UDP_SIZE]; AVFormatContext *s = opaque; RTCContext *rtc = s->priv_data; + struct SRTPContext *srtp; - char cipher[MAX_UDP_SIZE]; - int len = ff_srtp_encrypt(&rtc->srtp_send, buf, buf_size, cipher, sizeof(cipher)); - if (len <= 0) { - av_log(s, AV_LOG_ERROR, "Failed to encrypt packet=%dB, cipher=%dB\n", buf_size, len); - return AVERROR(EIO); + /* Ignore if not RTP or RTCP packet. */ + if (buf_size < 12 || (buf[0] & 0xC0) != 0x80) + return 0; + + /* RTCP is not supported yet. */ + is_rtcp = buf[1] >= 192 && buf[1] <= 223; + if (is_rtcp) + return 0; + + /* Only support audio and video. */ + payload_type = buf[1] & 0x7f; + if (payload_type != rtc->video_payload_type && payload_type != rtc->audio_payload_type) { + return 0; } - return ffurl_write(rtc->udp_uc, cipher, len); + + /** + * For video, the STAP-A with SPS/PPS should: + * 1. The marker bit should be 0, never be 1. + * 2. The NRI should equal to the first NALU's. + */ + if (payload_type == rtc->video_payload_type && buf_size > 12) { + nalu_header = buf[12] & 0x1f; + if (nalu_header == NALU_TYPE_STAP_A) { + /* Reset the marker bit to 0. */ + if (buf[1] & 0x80) { + buf[1] &= 0x7f; + } + + /* Reset the NRI to the first NALU's NRI. */ + if (buf_size > 15 && (buf[15]&0x60) != (buf[12]&0x60)) { + buf[12] = (buf[12]&0x80) | (buf[15]&0x60) | (buf[12]&0x1f); + } + } + } + + /* Encrypt by SRTP and send out. */ + srtp = payload_type == rtc->video_payload_type ? &rtc->srtp_video_send : &rtc->srtp_audio_send; + cipher_size = ff_srtp_encrypt(srtp, buf, buf_size, cipher, sizeof(cipher)); + if (cipher_size <= 0 || cipher_size < buf_size) { + av_log(s, AV_LOG_WARNING, "Failed to encrypt packet=%dB, cipher=%dB\n", buf_size, cipher_size); + return 0; + } + + ret = ffurl_write(rtc->udp_uc, cipher, cipher_size); + if (ret < 0) { + av_log(s, AV_LOG_WARNING, "Failed to write packet=%dB, ret=%d\n", cipher_size, ret); + return ret; + } + + return ret; } /** @@ -1151,7 +1306,7 @@ static int create_rtp_muxer(AVFormatContext *s) rtp_ctx->streams[0]->time_base = s->streams[i]->time_base; buffer = av_malloc(MAX_UDP_SIZE); - rtp_ctx->pb = avio_alloc_context(buffer, MAX_UDP_SIZE, AVIO_FLAG_WRITE, s, NULL, write_packet, NULL); + rtp_ctx->pb = avio_alloc_context(buffer, MAX_UDP_SIZE, 1, s, NULL, write_packet, NULL); if (!rtp_ctx->pb) { av_log(s, AV_LOG_ERROR, "Failed to allocate rtp pb\n"); ret = AVERROR(ENOMEM); @@ -1191,7 +1346,7 @@ static av_cold int rtc_init(AVFormatContext *s) { int ret; - if ((ret = check_codec(s)) < 0) + if ((ret = parse_codec(s)) < 0) return ret; if ((ret = generate_sdp_offer(s)) < 0) @@ -1232,12 +1387,57 @@ static int rtc_write_header(AVFormatContext *s) static int rtc_write_packet(AVFormatContext *s, AVPacket *pkt) { + int ret, size, is_idr, i; + AVPacket* extra = NULL; + uint8_t *p; + RTCContext *rtc = s->priv_data; AVStream *st = s->streams[pkt->stream_index]; AVFormatContext *rtp_ctx = st->priv_data; - if (st->codecpar->codec_type != AVMEDIA_TYPE_AUDIO) { - return 0; + + /* TODO: Send binding request every 1s as WebRTC heartbeat. */ + + /* Insert a packet with SPS/PPS before each IDR frame. */ + is_idr = (pkt->flags & AV_PKT_FLAG_KEY) && st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO; + if (is_idr && rtc->avc_sps && rtc->avc_pps) { + extra = av_packet_alloc(); + size = rtc->avc_nal_length_size * 2 + rtc->avc_sps_size + rtc->avc_pps_size; + ret = av_new_packet(extra, size); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to allocate extra packet\n"); + return ret; + } + + /* Encode SPS in ISOM format. */ + p = extra->data; + for (i = 0; i < rtc->avc_nal_length_size; i++) { + *p++ = rtc->avc_sps_size >> (8 * (rtc->avc_nal_length_size - i - 1)); + } + memcpy(p, rtc->avc_sps, rtc->avc_sps_size); + p += rtc->avc_sps_size; + + /* Encode PPS in ISOM format. */ + for (i = 0; i < rtc->avc_nal_length_size; i++) { + *p++ = rtc->avc_pps_size >> (8 * (rtc->avc_nal_length_size - i - 1)); + } + memcpy(p, rtc->avc_pps, rtc->avc_pps_size); + p += rtc->avc_pps_size; + + /* Setup packet and feed it to chain. */ + extra->pts = pkt->pts; + extra->dts = pkt->dts; + extra->stream_index = pkt->stream_index; + extra->time_base = pkt->time_base; + + ret = ff_write_chained(rtp_ctx, 0, extra, s, 0); + if (ret < 0) + goto end; } - return ff_write_chained(rtp_ctx, 0, pkt, s, 0); + + ret = ff_write_chained(rtp_ctx, 0, pkt, s, 0); + +end: + av_packet_free(&extra); + return ret; } static int rtc_write_trailer(AVFormatContext *s) @@ -1261,6 +1461,8 @@ static av_cold void rtc_deinit(AVFormatContext *s) s->streams[i]->priv_data = NULL; } + av_freep(&rtc->avc_sps); + av_freep(&rtc->avc_pps); av_freep(&rtc->sdp_offer); av_freep(&rtc->sdp_answer); av_freep(&rtc->ice_ufrag_remote); @@ -1268,7 +1470,8 @@ static av_cold void rtc_deinit(AVFormatContext *s) av_freep(&rtc->ice_protocol); av_freep(&rtc->ice_host); ffurl_closep(&rtc->udp_uc); - ff_srtp_free(&rtc->srtp_send); + ff_srtp_free(&rtc->srtp_audio_send); + ff_srtp_free(&rtc->srtp_video_send); ff_srtp_free(&rtc->srtp_recv); } @@ -1288,7 +1491,7 @@ const FFOutputFormat ff_rtc_muxer = { .p.long_name = NULL_IF_CONFIG_SMALL("WebRTC WHIP muxer"), .p.audio_codec = AV_CODEC_ID_OPUS, .p.video_codec = AV_CODEC_ID_H264, - .p.flags = AVFMT_NOFILE, + .p.flags = AVFMT_GLOBALHEADER | AVFMT_NOFILE, .p.priv_class = &rtc_muxer_class, .priv_data_size = sizeof(RTCContext), .init = rtc_init, From 9ecbef009d5ab3d590e74230a4a59538a5a5340d Mon Sep 17 00:00:00 2001 From: winlin Date: Thu, 4 May 2023 17:32:57 +0800 Subject: [PATCH 14/60] WHIP: Support audio or video only 1. Suport audio or video only stream. 2. Fix audio corrupt issue, rebuild the dts. --- libavformat/rtcenc.c | 126 +++++++++++++++++++++++++------------------ 1 file changed, 75 insertions(+), 51 deletions(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index 70231dff931a0..7216c23145701 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -146,6 +146,9 @@ typedef struct RTCContext { /* The SRTP receive context, to decrypt incoming packets. */ struct SRTPContext srtp_recv; + /* The time jitter base for audio OPUS stream. */ + int64_t audio_jitter_base; + /* The UDP transport is used for delivering ICE, DTLS and SRTP packets. */ URLContext *udp_uc; } RTCContext; @@ -383,8 +386,6 @@ static int generate_sdp_offer(AVFormatContext *s) rtc->audio_payload_type = 111; rtc->video_payload_type = 106; - profile_iop = rtc->video_par->profile & FF_PROFILE_H264_CONSTRAINED ? 0xe0 : 0x00; - ret = av_strlcatf(tmp, MAX_SDP_SIZE, "v=0\r\n" "o=FFmpeg 4489045141692799359 2 IN IP4 127.0.0.1\r\n" @@ -392,61 +393,78 @@ static int generate_sdp_offer(AVFormatContext *s) "t=0 0\r\n" "a=group:BUNDLE 0 1\r\n" "a=extmap-allow-mixed\r\n" - "a=msid-semantic: WMS\r\n" - "" - "m=audio 9 UDP/TLS/RTP/SAVPF %u\r\n" - "c=IN IP4 0.0.0.0\r\n" - "a=ice-ufrag:%s\r\n" - "a=ice-pwd:%s\r\n" - "a=fingerprint:sha-256 EE:FE:A2:E5:6A:21:78:60:71:2C:21:DC:1A:2C:98:12:0C:E8:AD:68:07:61:1B:0E:FC:46:97:1E:BC:97:4A:54\r\n" - "a=setup:active\r\n" - "a=mid:0\r\n" - "a=sendonly\r\n" - "a=msid:FFmpeg audio\r\n" - "a=rtcp-mux\r\n" - "a=rtpmap:%u opus/%d/%d\r\n" - "a=ssrc:%u cname:FFmpeg\r\n" - "a=ssrc:%u msid:FFmpeg audio\r\n" - "" - "m=video 9 UDP/TLS/RTP/SAVPF %u\r\n" - "c=IN IP4 0.0.0.0\r\n" - "a=ice-ufrag:%s\r\n" - "a=ice-pwd:%s\r\n" - "a=fingerprint:sha-256 EE:FE:A2:E5:6A:21:78:60:71:2C:21:DC:1A:2C:98:12:0C:E8:AD:68:07:61:1B:0E:FC:46:97:1E:BC:97:4A:54\r\n" - "a=setup:active\r\n" - "a=mid:1\r\n" - "a=sendonly\r\n" - "a=msid:FFmpeg video\r\n" - "a=rtcp-mux\r\n" - "a=rtcp-rsize\r\n" - "a=rtpmap:%u H264/90000\r\n" - "a=fmtp:%u level-asymmetry-allowed=1;packetization-mode=1;profile-level-id=%02x%02x%02x\r\n" - "a=ssrc:%u cname:FFmpeg\r\n" - "a=ssrc:%u msid:FFmpeg video\r\n", - rtc->audio_payload_type, - rtc->ice_ufrag_local, - rtc->ice_pwd_local, - rtc->audio_payload_type, - rtc->audio_par->sample_rate, - rtc->audio_par->ch_layout.nb_channels, - rtc->audio_ssrc, - rtc->audio_ssrc, - rtc->video_payload_type, - rtc->ice_ufrag_local, - rtc->ice_pwd_local, - rtc->video_payload_type, - rtc->video_payload_type, - rtc->video_par->profile & (~FF_PROFILE_H264_CONSTRAINED), - profile_iop, - rtc->video_par->level, - rtc->video_ssrc, - rtc->video_ssrc); + "a=msid-semantic: WMS\r\n"); if (ret >= MAX_SDP_SIZE) { av_log(s, AV_LOG_ERROR, "Offer %d exceed max %d, %s\n", ret, MAX_SDP_SIZE, tmp); ret = AVERROR(EIO); goto end; } + if (rtc->audio_par) { + ret = av_strlcatf(tmp, MAX_SDP_SIZE, "" + "m=audio 9 UDP/TLS/RTP/SAVPF %u\r\n" + "c=IN IP4 0.0.0.0\r\n" + "a=ice-ufrag:%s\r\n" + "a=ice-pwd:%s\r\n" + "a=fingerprint:sha-256 EE:FE:A2:E5:6A:21:78:60:71:2C:21:DC:1A:2C:98:12:0C:E8:AD:68:07:61:1B:0E:FC:46:97:1E:BC:97:4A:54\r\n" + "a=setup:active\r\n" + "a=mid:0\r\n" + "a=sendonly\r\n" + "a=msid:FFmpeg audio\r\n" + "a=rtcp-mux\r\n" + "a=rtpmap:%u opus/%d/%d\r\n" + "a=ssrc:%u cname:FFmpeg\r\n" + "a=ssrc:%u msid:FFmpeg audio\r\n", + rtc->audio_payload_type, + rtc->ice_ufrag_local, + rtc->ice_pwd_local, + rtc->audio_payload_type, + rtc->audio_par->sample_rate, + rtc->audio_par->ch_layout.nb_channels, + rtc->audio_ssrc, + rtc->audio_ssrc); + if (ret >= MAX_SDP_SIZE) { + av_log(s, AV_LOG_ERROR, "Offer %d exceed max %d, %s\n", ret, MAX_SDP_SIZE, tmp); + ret = AVERROR(EIO); + goto end; + } + } + + if (rtc->video_par) { + profile_iop = rtc->video_par->profile & FF_PROFILE_H264_CONSTRAINED ? 0xe0 : 0x00; + ret = av_strlcatf(tmp, MAX_SDP_SIZE, "" + "m=video 9 UDP/TLS/RTP/SAVPF %u\r\n" + "c=IN IP4 0.0.0.0\r\n" + "a=ice-ufrag:%s\r\n" + "a=ice-pwd:%s\r\n" + "a=fingerprint:sha-256 EE:FE:A2:E5:6A:21:78:60:71:2C:21:DC:1A:2C:98:12:0C:E8:AD:68:07:61:1B:0E:FC:46:97:1E:BC:97:4A:54\r\n" + "a=setup:active\r\n" + "a=mid:1\r\n" + "a=sendonly\r\n" + "a=msid:FFmpeg video\r\n" + "a=rtcp-mux\r\n" + "a=rtcp-rsize\r\n" + "a=rtpmap:%u H264/90000\r\n" + "a=fmtp:%u level-asymmetry-allowed=1;packetization-mode=1;profile-level-id=%02x%02x%02x\r\n" + "a=ssrc:%u cname:FFmpeg\r\n" + "a=ssrc:%u msid:FFmpeg video\r\n", + rtc->video_payload_type, + rtc->ice_ufrag_local, + rtc->ice_pwd_local, + rtc->video_payload_type, + rtc->video_payload_type, + rtc->video_par->profile & (~FF_PROFILE_H264_CONSTRAINED), + profile_iop, + rtc->video_par->level, + rtc->video_ssrc, + rtc->video_ssrc); + if (ret >= MAX_SDP_SIZE) { + av_log(s, AV_LOG_ERROR, "Offer %d exceed max %d, %s\n", ret, MAX_SDP_SIZE, tmp); + ret = AVERROR(EIO); + goto end; + } + } + rtc->sdp_offer = av_strdup(tmp); av_log(s, AV_LOG_VERBOSE, "Generated offer: %s\n", rtc->sdp_offer); @@ -1396,6 +1414,12 @@ static int rtc_write_packet(AVFormatContext *s, AVPacket *pkt) /* TODO: Send binding request every 1s as WebRTC heartbeat. */ + /* For audio OPUS stream, correct the timestamp. */ + if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) { + pkt->dts = pkt->pts = rtc->audio_jitter_base; + rtc->audio_jitter_base += 960; + } + /* Insert a packet with SPS/PPS before each IDR frame. */ is_idr = (pkt->flags & AV_PKT_FLAG_KEY) && st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO; if (is_idr && rtc->avc_sps && rtc->avc_pps) { From 40c385a30415763b4e2b670e47427ca1b4187228 Mon Sep 17 00:00:00 2001 From: winlin Date: Thu, 4 May 2023 19:41:57 +0800 Subject: [PATCH 15/60] WHIP: Support video annexb format. --- libavformat/rtcenc.c | 62 ++++++++++++++++++++++++++++++-------------- 1 file changed, 42 insertions(+), 20 deletions(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index 7216c23145701..1d9a1444b6c8d 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -49,6 +49,7 @@ #include "libavutil/time.h" #include "libavutil/base64.h" #include "srtp.h" +#include "avc.h" /* The maximum size of an SDP, either offer or answer. */ #define MAX_SDP_SIZE 8192 @@ -161,6 +162,19 @@ static int isom_read_avcc(AVFormatContext *s, uint8_t *extradata, int extradata AVIOContext *pb; RTCContext *rtc = s->priv_data; + if (!extradata || !extradata_size) + return 0; + + /* Not H.264 IOSM format, may be annexb etc. */ + if (extradata_size < 4 || extradata[0] != 1) { + if (!ff_avc_find_startcode(extradata, extradata + extradata_size)) { + av_log(s, AV_LOG_ERROR, "Format must be ISOM or annexb\n"); + return AVERROR_INVALIDDATA; + } + return 0; + } + + /* Parse the SPS/PPS in ISOM format in extradata. */ pb = avio_alloc_context(extradata, extradata_size, 0, NULL, NULL, NULL, NULL); if (!pb) { av_log(s, AV_LOG_ERROR, "Failed to alloc AVIOContext, size=%d\n", extradata_size); @@ -265,7 +279,7 @@ static int parse_codec(AVFormatContext *s) desc ? desc->name : "unknown"); return AVERROR_PATCHWELCOME; } - if ((par->profile & ~FF_PROFILE_H264_CONSTRAINED) != FF_PROFILE_H264_BASELINE) { + if (par->profile > 0 && (par->profile & ~FF_PROFILE_H264_CONSTRAINED) != FF_PROFILE_H264_BASELINE) { av_log(s, AV_LOG_ERROR, "Profile %d of stream %d is not baseline, currently unsupported by RTC\n", par->profile, i); return AVERROR_PATCHWELCOME; @@ -360,7 +374,7 @@ static int parse_codec(AVFormatContext *s) */ static int generate_sdp_offer(AVFormatContext *s) { - int ret, profile_iop; + int ret, profile, level, profile_iop; RTCContext *rtc = s->priv_data; char *tmp = av_mallocz(MAX_SDP_SIZE); @@ -431,7 +445,9 @@ static int generate_sdp_offer(AVFormatContext *s) } if (rtc->video_par) { - profile_iop = rtc->video_par->profile & FF_PROFILE_H264_CONSTRAINED ? 0xe0 : 0x00; + profile = rtc->video_par->profile < 0 ? 0xe0 : rtc->video_par->profile; + level = rtc->video_par->level < 0 ? 30 : rtc->video_par->level; + profile_iop = profile & FF_PROFILE_H264_CONSTRAINED; ret = av_strlcatf(tmp, MAX_SDP_SIZE, "" "m=video 9 UDP/TLS/RTP/SAVPF %u\r\n" "c=IN IP4 0.0.0.0\r\n" @@ -453,9 +469,9 @@ static int generate_sdp_offer(AVFormatContext *s) rtc->ice_pwd_local, rtc->video_payload_type, rtc->video_payload_type, - rtc->video_par->profile & (~FF_PROFILE_H264_CONSTRAINED), + profile & (~FF_PROFILE_H264_CONSTRAINED), profile_iop, - rtc->video_par->level, + level, rtc->video_ssrc, rtc->video_ssrc); if (ret >= MAX_SDP_SIZE) { @@ -1266,7 +1282,7 @@ static int write_packet(void *opaque, uint8_t *buf, int buf_size) ret = ffurl_write(rtc->udp_uc, cipher, cipher_size); if (ret < 0) { - av_log(s, AV_LOG_WARNING, "Failed to write packet=%dB, ret=%d\n", cipher_size, ret); + av_log(s, AV_LOG_ERROR, "Failed to write packet=%dB, ret=%d\n", cipher_size, ret); return ret; } @@ -1422,29 +1438,35 @@ static int rtc_write_packet(AVFormatContext *s, AVPacket *pkt) /* Insert a packet with SPS/PPS before each IDR frame. */ is_idr = (pkt->flags & AV_PKT_FLAG_KEY) && st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO; - if (is_idr && rtc->avc_sps && rtc->avc_pps) { + if (is_idr && st->codecpar->extradata) { extra = av_packet_alloc(); - size = rtc->avc_nal_length_size * 2 + rtc->avc_sps_size + rtc->avc_pps_size; + size = !rtc->avc_nal_length_size ? st->codecpar->extradata_size : + rtc->avc_nal_length_size * 2 + rtc->avc_sps_size + rtc->avc_pps_size; ret = av_new_packet(extra, size); if (ret < 0) { av_log(s, AV_LOG_ERROR, "Failed to allocate extra packet\n"); return ret; } - /* Encode SPS in ISOM format. */ - p = extra->data; - for (i = 0; i < rtc->avc_nal_length_size; i++) { - *p++ = rtc->avc_sps_size >> (8 * (rtc->avc_nal_length_size - i - 1)); - } - memcpy(p, rtc->avc_sps, rtc->avc_sps_size); - p += rtc->avc_sps_size; + /* Encode SPS/PPS in annexb format. */ + if (!rtc->avc_nal_length_size) { + memcpy(extra->data, st->codecpar->extradata, size); + } else { + /* Encode SPS/PPS in ISOM format. */ + p = extra->data; + for (i = 0; i < rtc->avc_nal_length_size; i++) { + *p++ = rtc->avc_sps_size >> (8 * (rtc->avc_nal_length_size - i - 1)); + } + memcpy(p, rtc->avc_sps, rtc->avc_sps_size); + p += rtc->avc_sps_size; - /* Encode PPS in ISOM format. */ - for (i = 0; i < rtc->avc_nal_length_size; i++) { - *p++ = rtc->avc_pps_size >> (8 * (rtc->avc_nal_length_size - i - 1)); + /* Encode PPS in ISOM format. */ + for (i = 0; i < rtc->avc_nal_length_size; i++) { + *p++ = rtc->avc_pps_size >> (8 * (rtc->avc_nal_length_size - i - 1)); + } + memcpy(p, rtc->avc_pps, rtc->avc_pps_size); + p += rtc->avc_pps_size; } - memcpy(p, rtc->avc_pps, rtc->avc_pps_size); - p += rtc->avc_pps_size; /* Setup packet and feed it to chain. */ extra->pts = pkt->pts; From fc0974aab29a96062733988927b70ccdc8de0570 Mon Sep 17 00:00:00 2001 From: winlin Date: Fri, 5 May 2023 20:58:16 +0800 Subject: [PATCH 16/60] WHIP: Refine code. --- libavformat/rtcenc.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index 1d9a1444b6c8d..613428b1b5516 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -482,7 +482,7 @@ static int generate_sdp_offer(AVFormatContext *s) } rtc->sdp_offer = av_strdup(tmp); - av_log(s, AV_LOG_VERBOSE, "Generated offer: %s\n", rtc->sdp_offer); + av_log(s, AV_LOG_VERBOSE, "WHIP: Generated offer: %s\n", rtc->sdp_offer); end: av_free(tmp); @@ -585,7 +585,7 @@ static int exchange_sdp(AVFormatContext *s) } rtc->sdp_answer = av_strdup(tmp); - av_log(s, AV_LOG_VERBOSE, "Got answer: %s\n", rtc->sdp_answer); + av_log(s, AV_LOG_VERBOSE, "WHIP: Got answer: %s\n", rtc->sdp_answer); end: ffurl_closep(&whip_uc); @@ -647,7 +647,7 @@ static int parse_answer(AVFormatContext *s) } } - av_log(s, AV_LOG_INFO, "SDP offer=%luB, answer=%luB, ufrag=%s, pwd=%luB, transport=%s://%s:%d\n", + av_log(s, AV_LOG_INFO, "WHIP: SDP offer=%luB, answer=%luB, ufrag=%s, pwd=%luB, transport=%s://%s:%d\n", strlen(rtc->sdp_offer), strlen(rtc->sdp_answer), rtc->ice_ufrag_remote, strlen(rtc->ice_pwd_remote), rtc->ice_protocol, rtc->ice_host, rtc->ice_port); @@ -811,7 +811,7 @@ static int ice_handshake(AVFormatContext *s) goto end; } - av_log(s, AV_LOG_INFO, "ICE STUN ok, url=udp://%s:%d, username=%s:%s, req=%dB, res=%dB, arq=%d\n", + av_log(s, AV_LOG_INFO, "WHIP: ICE STUN ok, url=udp://%s:%d, username=%s:%s, req=%dB, res=%dB, arq=%d\n", rtc->ice_host, rtc->ice_port, rtc->ice_ufrag_remote, rtc->ice_ufrag_local, size, ret, UDP_FAST_RETRIES - fast_retries); ret = 0; @@ -915,7 +915,7 @@ static void openssl_state_trace(AVFormatContext *s, uint8_t *data, int length, i handshake_type = (uint8_t)data[13]; } - av_log(s, AV_LOG_INFO, "DTLS: State %s %s, done=%u, arq=%u, r0=%d, r1=%d, len=%u, cnt=%u, size=%u, hs=%u\n", + av_log(s, AV_LOG_INFO, "WHIP: DTLS state %s %s, done=%u, arq=%u, r0=%d, r1=%d, len=%u, cnt=%u, size=%u, hs=%u\n", "Active", (incoming? "RECV":"SEND"), rtc->dtls_done_for_us, rtc->dtls_arq_packets, r0, r1, length, content_type, size, handshake_type); } @@ -1155,7 +1155,7 @@ static int openssl_dtls_handshake(AVFormatContext *s) goto end; } - av_log(s, AV_LOG_INFO, "DTLS: Handshake done=%d, arq=%d, srtp_material=%luB\n", + av_log(s, AV_LOG_INFO, "WHIP: DTLS handshake done=%d, arq=%d, srtp_material=%luB\n", rtc->dtls_done_for_us, rtc->dtls_arq_packets, sizeof(rtc->dtls_srtp_material)); end: @@ -1222,7 +1222,7 @@ static int setup_srtp(AVFormatContext *s) goto end; } - av_log(s, AV_LOG_INFO, "SRTP: Setup done, suite=%s, key=%luB\n", suite, sizeof(send_key)); + av_log(s, AV_LOG_INFO, "WHIP: SRTP setup done, suite=%s, key=%luB\n", suite, sizeof(send_key)); end: return ret; From fae7b40acacdd0d1b54a779baed912e93bbea17a Mon Sep 17 00:00:00 2001 From: winlin Date: Fri, 5 May 2023 20:58:56 +0800 Subject: [PATCH 17/60] WHIP: Dispose resource when write trailer. --- libavformat/http.c | 6 ++++++ libavformat/http.h | 2 ++ libavformat/rtcenc.c | 49 +++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 56 insertions(+), 1 deletion(-) diff --git a/libavformat/http.c b/libavformat/http.c index c0fe7c36d91d6..24b31c70d111c 100644 --- a/libavformat/http.c +++ b/libavformat/http.c @@ -531,6 +531,12 @@ int ff_http_averror(int status_code, int default_averror) return default_averror; } +const char* ff_http_get_new_location(URLContext *h) +{ + HTTPContext *s = h->priv_data; + return s->new_location; +} + static int http_write_reply(URLContext* h, int status_code) { int ret, body = 0, reply_code, message_len; diff --git a/libavformat/http.h b/libavformat/http.h index 5f650ef143f77..d1b691826bf3b 100644 --- a/libavformat/http.h +++ b/libavformat/http.h @@ -62,4 +62,6 @@ int ff_http_do_new_request2(URLContext *h, const char *uri, AVDictionary **optio int ff_http_averror(int status_code, int default_averror); +const char* ff_http_get_new_location(URLContext *h); + #endif /* AVFORMAT_HTTP_H */ diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index 613428b1b5516..fe865d6598bc8 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -50,6 +50,7 @@ #include "libavutil/base64.h" #include "srtp.h" #include "avc.h" +#include "http.h" /* The maximum size of an SDP, either offer or answer. */ #define MAX_SDP_SIZE 8192 @@ -127,6 +128,8 @@ typedef struct RTCContext { int ice_port; /* The SDP answer received from the WebRTC server. */ char *sdp_answer; + /* The resource URL returned in the Location header of WHIP HTTP response. */ + char *whip_resource_url; /* Whether the timer should be reset. */ int dtls_should_reset_timer; @@ -563,6 +566,14 @@ static int exchange_sdp(AVFormatContext *s) goto end; } + if (ff_http_get_new_location(whip_uc)) { + rtc->whip_resource_url = av_strdup(ff_http_get_new_location(whip_uc)); + if (!rtc->whip_resource_url) { + ret = AVERROR(ENOMEM); + goto end; + } + } + while (1) { ret = ffurl_read(whip_uc, buf, sizeof(buf)); if (ret == AVERROR_EOF) { @@ -1488,7 +1499,42 @@ static int rtc_write_packet(AVFormatContext *s, AVPacket *pkt) static int rtc_write_trailer(AVFormatContext *s) { - return 0; + int ret; + char buf[MAX_URL_SIZE]; + URLContext *whip_uc = NULL; + RTCContext *rtc = s->priv_data; + + ret = ffurl_alloc(&whip_uc, rtc->whip_resource_url, AVIO_FLAG_READ_WRITE, &s->interrupt_callback); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to alloc WHIP delete context: %s\n", s->url); + goto end; + } + + av_opt_set(whip_uc->priv_data, "chunked_post", "0", 0); + av_opt_set(whip_uc->priv_data, "method", "DELETE", 0); + ret = ffurl_connect(whip_uc, NULL); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to DELETE url=%s\n", rtc->whip_resource_url); + goto end; + } + + while (1) { + ret = ffurl_read(whip_uc, buf, sizeof(buf)); + if (ret == AVERROR_EOF) { + ret = 0; + break; + } + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to read response from DELETE url=%s\n", rtc->whip_resource_url); + goto end; + } + } + + av_log(s, AV_LOG_INFO, "WHIP: Dispose resource %s\n", rtc->whip_resource_url); + +end: + ffurl_closep(&whip_uc); + return ret; } static av_cold void rtc_deinit(AVFormatContext *s) @@ -1511,6 +1557,7 @@ static av_cold void rtc_deinit(AVFormatContext *s) av_freep(&rtc->avc_pps); av_freep(&rtc->sdp_offer); av_freep(&rtc->sdp_answer); + av_freep(&rtc->whip_resource_url); av_freep(&rtc->ice_ufrag_remote); av_freep(&rtc->ice_pwd_remote); av_freep(&rtc->ice_protocol); From 5e400cd5e4acf994e45464484792ae4743f581b5 Mon Sep 17 00:00:00 2001 From: winlin Date: Fri, 5 May 2023 21:09:35 +0800 Subject: [PATCH 18/60] WHIP: Check alloc fail and return EMOMEM. --- libavformat/rtcenc.c | 53 +++++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index fe865d6598bc8..7fd29477603c1 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -179,11 +179,8 @@ static int isom_read_avcc(AVFormatContext *s, uint8_t *extradata, int extradata /* Parse the SPS/PPS in ISOM format in extradata. */ pb = avio_alloc_context(extradata, extradata_size, 0, NULL, NULL, NULL, NULL); - if (!pb) { - av_log(s, AV_LOG_ERROR, "Failed to alloc AVIOContext, size=%d\n", extradata_size); - ret = AVERROR(ENOMEM); - goto end; - } + if (!pb) + return AVERROR(ENOMEM); version = avio_r8(pb); /* version */ avio_r8(pb); /* avc profile */ @@ -381,10 +378,8 @@ static int generate_sdp_offer(AVFormatContext *s) RTCContext *rtc = s->priv_data; char *tmp = av_mallocz(MAX_SDP_SIZE); - if (!tmp) { - av_log(s, AV_LOG_ERROR, "Failed to alloc answer: %s\n", s->url); + if (!tmp) return AVERROR(ENOMEM); - } if (rtc->sdp_offer) { av_log(s, AV_LOG_ERROR, "SDP offer is already set\n"); @@ -485,6 +480,10 @@ static int generate_sdp_offer(AVFormatContext *s) } rtc->sdp_offer = av_strdup(tmp); + if (!rtc->sdp_offer) { + ret = AVERROR(ENOMEM); + goto end; + } av_log(s, AV_LOG_VERBOSE, "WHIP: Generated offer: %s\n", rtc->sdp_offer); end: @@ -542,10 +541,8 @@ static int exchange_sdp(AVFormatContext *s) URLContext *whip_uc = NULL; char *tmp = av_mallocz(MAX_SDP_SIZE); - if (!tmp) { - av_log(s, AV_LOG_ERROR, "Failed to alloc answer: %s\n", s->url); + if (!tmp) return AVERROR(ENOMEM); - } ret = ffurl_alloc(&whip_uc, s->url, AVIO_FLAG_READ_WRITE, &s->interrupt_callback); if (ret < 0) { @@ -596,6 +593,10 @@ static int exchange_sdp(AVFormatContext *s) } rtc->sdp_answer = av_strdup(tmp); + if (!rtc->sdp_answer) { + ret = AVERROR(ENOMEM); + goto end; + } av_log(s, AV_LOG_VERBOSE, "WHIP: Got answer: %s\n", rtc->sdp_answer); end: @@ -619,18 +620,23 @@ static int parse_answer(AVFormatContext *s) RTCContext *rtc = s->priv_data; pb = avio_alloc_context(rtc->sdp_answer, strlen(rtc->sdp_answer), 0, NULL, NULL, NULL, NULL); - if (!pb) { - av_log(s, AV_LOG_ERROR, "Failed to alloc AVIOContext for answer: %s\n", rtc->sdp_answer); - ret = AVERROR(ENOMEM); - goto end; - } + if (!pb) + return AVERROR(ENOMEM); for (i = 0; !avio_feof(pb); i++) { ff_get_chomp_line(pb, line, sizeof(line)); if (av_strstart(line, "a=ice-ufrag:", &ptr) && !rtc->ice_ufrag_remote) { rtc->ice_ufrag_remote = av_strdup(ptr); + if (!rtc->ice_ufrag_remote) { + ret = AVERROR(ENOMEM); + goto end; + } } else if (av_strstart(line, "a=ice-pwd:", &ptr) && !rtc->ice_pwd_remote) { rtc->ice_pwd_remote = av_strdup(ptr); + if (!rtc->ice_pwd_remote) { + ret = AVERROR(ENOMEM); + goto end; + } } else if (av_strstart(line, "a=candidate:", &ptr) && !rtc->ice_protocol) { ptr = av_stristr(ptr, "udp"); if (ptr && av_stristr(ptr, "host")) { @@ -654,6 +660,10 @@ static int parse_answer(AVFormatContext *s) rtc->ice_protocol = av_strdup(protocol); rtc->ice_host = av_strdup(host); rtc->ice_port = port; + if (!rtc->ice_protocol || !rtc->ice_host) { + ret = AVERROR(ENOMEM); + goto end; + } } } } @@ -682,15 +692,11 @@ static int ice_create_request(AVFormatContext *s, uint8_t *buf, int buf_size, in RTCContext *rtc = s->priv_data; pb = avio_alloc_context(buf, buf_size, 1, NULL, NULL, NULL, NULL); - if (!pb) { - av_log(s, AV_LOG_ERROR, "Failed to alloc AVIOContext for ICE\n"); - ret = AVERROR(ENOMEM); - goto end; - } + if (!pb) + return AVERROR(ENOMEM); hmac = av_hmac_alloc(AV_HMAC_SHA1); if (!hmac) { - av_log(s, AV_LOG_ERROR, "Failed to alloc AVHMAC for ICE\n"); ret = AVERROR(ENOMEM); goto end; } @@ -1324,14 +1330,12 @@ static int create_rtp_muxer(AVFormatContext *s) for (i = 0; i < s->nb_streams; i++) { rtp_ctx = avformat_alloc_context(); if (!rtp_ctx) { - av_log(s, AV_LOG_ERROR, "Failed to allocate rtp muxer\n"); ret = AVERROR(ENOMEM); goto end; } rtp_ctx->oformat = rtp_format; if (!avformat_new_stream(rtp_ctx, NULL)) { - av_log(s, AV_LOG_ERROR, "Failed to create rtp stream\n"); ret = AVERROR(ENOMEM); goto end; } @@ -1353,7 +1357,6 @@ static int create_rtp_muxer(AVFormatContext *s) buffer = av_malloc(MAX_UDP_SIZE); rtp_ctx->pb = avio_alloc_context(buffer, MAX_UDP_SIZE, 1, s, NULL, write_packet, NULL); if (!rtp_ctx->pb) { - av_log(s, AV_LOG_ERROR, "Failed to allocate rtp pb\n"); ret = AVERROR(ENOMEM); goto end; } From d9c7dc7a2db616233c6feca50dd25637ed2312a8 Mon Sep 17 00:00:00 2001 From: winlin Date: Sat, 6 May 2023 07:35:16 +0800 Subject: [PATCH 19/60] WHIP: Always dispose resource by WHIP. --- libavformat/rtcenc.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index 7fd29477603c1..71d125cb972e7 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -128,8 +128,11 @@ typedef struct RTCContext { int ice_port; /* The SDP answer received from the WebRTC server. */ char *sdp_answer; + /* The resource URL returned in the Location header of WHIP HTTP response. */ char *whip_resource_url; + /* Whether resource already disposed. */ + int whip_disposed; /* Whether the timer should be reset. */ int dtls_should_reset_timer; @@ -1500,13 +1503,16 @@ static int rtc_write_packet(AVFormatContext *s, AVPacket *pkt) return ret; } -static int rtc_write_trailer(AVFormatContext *s) +static int whip_dispose(AVFormatContext *s) { int ret; char buf[MAX_URL_SIZE]; URLContext *whip_uc = NULL; RTCContext *rtc = s->priv_data; + if (!rtc->whip_resource_url || rtc->whip_disposed) + return 0; + ret = ffurl_alloc(&whip_uc, rtc->whip_resource_url, AVIO_FLAG_READ_WRITE, &s->interrupt_callback); if (ret < 0) { av_log(s, AV_LOG_ERROR, "Failed to alloc WHIP delete context: %s\n", s->url); @@ -1533,18 +1539,26 @@ static int rtc_write_trailer(AVFormatContext *s) } } - av_log(s, AV_LOG_INFO, "WHIP: Dispose resource %s\n", rtc->whip_resource_url); + av_log(s, AV_LOG_INFO, "WHIP: Dispose resource %s, disposed=%d\n", rtc->whip_resource_url, rtc->whip_disposed); + rtc->whip_disposed = 1; end: ffurl_closep(&whip_uc); return ret; } +static int rtc_write_trailer(AVFormatContext *s) +{ + return whip_dispose(s); +} + static av_cold void rtc_deinit(AVFormatContext *s) { int i; RTCContext *rtc = s->priv_data; + whip_dispose(s); + for (i = 0; i < s->nb_streams; i++) { AVFormatContext* rtp_ctx = s->streams[i]->priv_data; if (!rtp_ctx) From 235aaa0049978cb8c701c3dbdf0e85ca7a42f0b6 Mon Sep 17 00:00:00 2001 From: winlin Date: Sat, 6 May 2023 08:34:27 +0800 Subject: [PATCH 20/60] WHIP: Refine macros. --- libavformat/rtcenc.c | 58 ++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 32 deletions(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index 71d125cb972e7..3c02a4a76feab 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -54,25 +54,12 @@ /* The maximum size of an SDP, either offer or answer. */ #define MAX_SDP_SIZE 8192 -/* The maximum size of a UDP packet, should be smaller than the MTU. */ -#define MAX_UDP_SIZE 1500 -/** - * The RTP payload max size, reserved some paddings for SRTP as such: - * kRtpPacketSize = kRtpMaxPayloadSize + paddings - * For example, if kRtpPacketSize is 1500, recommend to set kRtpMaxPayloadSize to 1400, - * which reserves 100 bytes for SRTP or paddings. - * otherwise, the kRtpPacketSize must less than MTU, in webrtc source code, - * the rtp max size is assigned by kVideoMtu = 1200. - * so we set kRtpMaxPayloadSize = 1200. - * see @doc https://groups.google.com/g/discuss-webrtc/c/gH5ysR3SoZI - */ -#define MAX_UDP_PAYLOAD_SIZE (MAX_UDP_SIZE - 300) -/* The maximum number of retries for UDP transmission. */ -#define UDP_FAST_RETRIES 6 -/* The startup timeout for UDP transmission. */ -#define UDP_START_TIMEOUT 21 -/* Avoid dtls negotiate failed, set max fragment size 1200. */ -#define DTLS_FRAGMENT_MAX_SIZE 1200 +/* The maximum size of the buffer for sending or receiving a UDP packet. */ +#define MAX_UDP_BUFFER_SIZE 1500 +/* The RTP payload max size. Reserved some bytes for SRTP checksum and padding. */ +#define MAX_UDP_PAYLOAD_SIZE 1200 +/* Avoid dtls negotiate failed, limit the max size of DTLS fragment. */ +#define MAX_DTLS_FRAGMENT_SIZE 1200 /* Supported DTLS cipher suites. */ #define DTLS_CIPHER_SUTES "ECDHE-ECDSA-AES128-GCM-SHA256"\ ":ECDHE-RSA-AES128-GCM-SHA256"\ @@ -85,6 +72,13 @@ /* The NALU type for STAP-A */ #define NALU_TYPE_STAP_A 24 +/* The maximum number of retries and step start timeout in ms for ICE transmission. */ +#define ICE_ARQ_MAX 6 +#define ICE_ARQ_STEP_TIMEOUT 21 +/* The maximum number of retries and interval in ms for DTLS transmission. */ +#define DTLS_ARQ_MAX 6 +#define DTLS_ARQ_INTERVAL 21 + typedef struct RTCContext { AVClass *av_class; @@ -766,8 +760,8 @@ static int ice_create_request(AVFormatContext *s, uint8_t *buf, int buf_size, in */ static int ice_handshake(AVFormatContext *s) { - int ret, size, fast_retries = UDP_FAST_RETRIES, timeout = UDP_START_TIMEOUT; - char url[256], buf[MAX_UDP_SIZE]; + int ret, size, fast_retries = ICE_ARQ_MAX, timeout = ICE_ARQ_STEP_TIMEOUT; + char url[256], buf[MAX_UDP_BUFFER_SIZE]; RTCContext *rtc = s->priv_data; /* Build UDP URL and create the UDP context as transport. */ @@ -820,7 +814,7 @@ static int ice_handshake(AVFormatContext *s) fast_retries--; continue; } - av_log(s, AV_LOG_ERROR, "Failed to read STUN binding response, retries=%d\n", UDP_FAST_RETRIES); + av_log(s, AV_LOG_ERROR, "Failed to read STUN binding response, retries=%d\n", ICE_ARQ_MAX); goto end; } } while (ret < 0); @@ -833,7 +827,7 @@ static int ice_handshake(AVFormatContext *s) av_log(s, AV_LOG_INFO, "WHIP: ICE STUN ok, url=udp://%s:%d, username=%s:%s, req=%dB, res=%dB, arq=%d\n", rtc->ice_host, rtc->ice_port, rtc->ice_ufrag_remote, rtc->ice_ufrag_local, size, ret, - UDP_FAST_RETRIES - fast_retries); + ICE_ARQ_MAX - fast_retries); ret = 0; end: @@ -1018,14 +1012,14 @@ static av_cold int openssl_init_dtls(AVFormatContext *s, SSL *dtls, SSL_CTX *dtl /* Set dtls fragment size */ SSL_set_options(dtls, SSL_OP_NO_QUERY_MTU); - SSL_set_mtu(dtls, DTLS_FRAGMENT_MAX_SIZE); + SSL_set_mtu(dtls, MAX_DTLS_FRAGMENT_SIZE); /* Set the callback for ARQ timer. */ DTLS_set_timer_cb(dtls, openssl_dtls_timer_cb); /* Setup DTLS as active, which is client role. */ SSL_set_connect_state(dtls); - SSL_set_max_send_fragment(dtls, DTLS_FRAGMENT_MAX_SIZE); + SSL_set_max_send_fragment(dtls, MAX_DTLS_FRAGMENT_SIZE); end: EC_GROUP_free(ecgroup); @@ -1036,7 +1030,7 @@ static int openssl_drive_context(AVFormatContext *s, SSL *dtls, BIO *bio_in, BIO { int ret, i, j, r0, r1, req_size, res_size = 0; uint8_t *data = NULL, req_ct = 0, req_ht = 0, res_ct = 0, res_ht = 0; - char buf[MAX_UDP_SIZE]; + char buf[MAX_UDP_BUFFER_SIZE]; RTCContext *rtc = s->priv_data; /* Drive the SSL context by state change, arq or response messages. */ @@ -1056,7 +1050,7 @@ static int openssl_drive_context(AVFormatContext *s, SSL *dtls, BIO *bio_in, BIO } /* Fast retransmit the request util got response. */ - for (i = 0; i < UDP_FAST_RETRIES && !res_size; i++) { + for (i = 0; i < DTLS_ARQ_MAX && !res_size; i++) { req_size = BIO_get_mem_data(bio_out, (char**)&data); openssl_state_trace(s, data, req_size, 0, r0, r1); ret = ffurl_write(rtc->udp_uc, data, req_size); @@ -1069,7 +1063,7 @@ static int openssl_drive_context(AVFormatContext *s, SSL *dtls, BIO *bio_in, BIO return ret; } - for (j = 0; j < UDP_FAST_RETRIES && !res_size; j++) { + for (j = 0; j < DTLS_ARQ_MAX && !res_size; j++) { ret = ffurl_read(rtc->udp_uc, buf, sizeof(buf)); /* Got response successfully. */ if (ret > 0) { @@ -1091,7 +1085,7 @@ static int openssl_drive_context(AVFormatContext *s, SSL *dtls, BIO *bio_in, BIO * occurs, it returns -1. */ r0 = DTLSv1_handle_timeout(dtls); if (!r0) { - av_usleep(UDP_START_TIMEOUT * 1000); + av_usleep(DTLS_ARQ_INTERVAL * 1000); continue; /* no timeout had expired. */ } if (r0 != 1) { @@ -1252,7 +1246,7 @@ static int write_packet(void *opaque, uint8_t *buf, int buf_size) { int ret, cipher_size, is_rtcp; uint8_t payload_type, nalu_header; - char cipher[MAX_UDP_SIZE]; + char cipher[MAX_UDP_BUFFER_SIZE]; AVFormatContext *s = opaque; RTCContext *rtc = s->priv_data; struct SRTPContext *srtp; @@ -1357,8 +1351,8 @@ static int create_rtp_muxer(AVFormatContext *s) avcodec_parameters_copy(rtp_ctx->streams[0]->codecpar, s->streams[i]->codecpar); rtp_ctx->streams[0]->time_base = s->streams[i]->time_base; - buffer = av_malloc(MAX_UDP_SIZE); - rtp_ctx->pb = avio_alloc_context(buffer, MAX_UDP_SIZE, 1, s, NULL, write_packet, NULL); + buffer = av_malloc(MAX_UDP_BUFFER_SIZE); + rtp_ctx->pb = avio_alloc_context(buffer, MAX_UDP_BUFFER_SIZE, 1, s, NULL, write_packet, NULL); if (!rtp_ctx->pb) { ret = AVERROR(ENOMEM); goto end; From 34fd9f968f0e996e987bab76fc7e4c58f6d619ba Mon Sep 17 00:00:00 2001 From: winlin Date: Sat, 6 May 2023 09:22:15 +0800 Subject: [PATCH 21/60] WHIP: Support set parameters by options. 1. Set ICE and DTLS ARQ by options. 2. Set pkt_size(priv_data; + int fast_retries = rtc->ice_arq_max, timeout = rtc->ice_arq_timeout; /* Build UDP URL and create the UDP context as transport. */ ff_url_join(url, sizeof(url), "udp", NULL, rtc->ice_host, rtc->ice_port, NULL); @@ -774,6 +784,9 @@ static int ice_handshake(AVFormatContext *s) av_opt_set(rtc->udp_uc->priv_data, "connect", "1", 0); av_opt_set(rtc->udp_uc->priv_data, "fifo_size", "0", 0); + /* Set the max packet size to the buffer size. */ + snprintf(buf, sizeof(buf), "%d", rtc->pkt_size); + av_opt_set(rtc->udp_uc->priv_data, "pkt_size", buf, 0); ret = ffurl_connect(rtc->udp_uc, NULL); if (ret < 0) { @@ -800,21 +813,24 @@ static int ice_handshake(AVFormatContext *s) goto end; } - /* If max retries is 6 and start timeout is 21ms, the total timeout - * is about 21 + 42 + 84 + 168 + 336 + 672 = 1263ms. */ - if (fast_retries) { - av_usleep(timeout * 1000); - timeout *= 2; - } + /* Wait so that the server can process the request and no need ARQ then. */ +#if ICE_PROCESSING_TIMEOUT > 0 + av_usleep(ICE_PROCESSING_TIMEOUT * 10000); +#endif /* Read the STUN binding response. */ ret = ffurl_read(rtc->udp_uc, buf, sizeof(buf)); if (ret < 0) { + /* If max retries is 6 and start timeout is 21ms, the total timeout + * is about 21 + 42 + 84 + 168 + 336 + 672 = 1263ms. */ + av_usleep(timeout * 1000); + timeout *= 2; + if (ret == AVERROR(EAGAIN) && fast_retries) { fast_retries--; continue; } - av_log(s, AV_LOG_ERROR, "Failed to read STUN binding response, retries=%d\n", ICE_ARQ_MAX); + av_log(s, AV_LOG_ERROR, "Failed to read STUN binding response, retries=%d\n", rtc->ice_arq_max); goto end; } } while (ret < 0); @@ -827,7 +843,7 @@ static int ice_handshake(AVFormatContext *s) av_log(s, AV_LOG_INFO, "WHIP: ICE STUN ok, url=udp://%s:%d, username=%s:%s, req=%dB, res=%dB, arq=%d\n", rtc->ice_host, rtc->ice_port, rtc->ice_ufrag_remote, rtc->ice_ufrag_local, size, ret, - ICE_ARQ_MAX - fast_retries); + rtc->ice_arq_max - fast_retries); ret = 0; end: @@ -897,11 +913,11 @@ static unsigned int openssl_dtls_timer_cb(SSL *dtls, unsigned int previous_us) /* If previous_us is 0, for example, the HelloVerifyRequest, we should respond it ASAP. * when got ServerHello, we should reset the timer. */ if (!previous_us || rtc->dtls_should_reset_timer) { - timeout_us = 50 * 1000; /* in us */ + timeout_us = rtc->dtls_arq_timeout * 1000; /* in us */ } - // never exceed the max timeout. - timeout_us = FFMIN(timeout_us, 30 * 1000 * 1000); // in us + /* never exceed the max timeout. */ + timeout_us = FFMIN(timeout_us, 30 * 1000 * 1000); /* in us */ av_log(s, AV_LOG_VERBOSE, "DTLS: ARQ timer cb timeout=%ums, previous=%ums\n", timeout_us / 1000, previous_us / 1000); @@ -957,6 +973,7 @@ static int openssl_verify_callback(int preverify_ok, X509_STORE_CTX *ctx) static av_cold int openssl_init_dtls(AVFormatContext *s, SSL *dtls, SSL_CTX *dtls_ctx, EVP_PKEY *dtls_pkey, EC_KEY *eckey) { int ret; + RTCContext *rtc = s->priv_data; /* Should use the curves in ClientHello.supported_groups, for example: * Supported Group: x25519 (0x001d) @@ -1012,14 +1029,15 @@ static av_cold int openssl_init_dtls(AVFormatContext *s, SSL *dtls, SSL_CTX *dtl /* Set dtls fragment size */ SSL_set_options(dtls, SSL_OP_NO_QUERY_MTU); - SSL_set_mtu(dtls, MAX_DTLS_FRAGMENT_SIZE); + /* Avoid dtls negotiate failed, limit the max size of DTLS fragment. */ + SSL_set_mtu(dtls, rtc->pkt_size); /* Set the callback for ARQ timer. */ DTLS_set_timer_cb(dtls, openssl_dtls_timer_cb); /* Setup DTLS as active, which is client role. */ SSL_set_connect_state(dtls); - SSL_set_max_send_fragment(dtls, MAX_DTLS_FRAGMENT_SIZE); + SSL_set_max_send_fragment(dtls, rtc->pkt_size); end: EC_GROUP_free(ecgroup); @@ -1050,7 +1068,7 @@ static int openssl_drive_context(AVFormatContext *s, SSL *dtls, BIO *bio_in, BIO } /* Fast retransmit the request util got response. */ - for (i = 0; i < DTLS_ARQ_MAX && !res_size; i++) { + for (i = 0; i <= rtc->dtls_arq_max && !res_size; i++) { req_size = BIO_get_mem_data(bio_out, (char**)&data); openssl_state_trace(s, data, req_size, 0, r0, r1); ret = ffurl_write(rtc->udp_uc, data, req_size); @@ -1063,7 +1081,12 @@ static int openssl_drive_context(AVFormatContext *s, SSL *dtls, BIO *bio_in, BIO return ret; } - for (j = 0; j < DTLS_ARQ_MAX && !res_size; j++) { + /* Wait so that the server can process the request and no need ARQ then. */ +#if DTLS_PROCESSING_TIMEOUT > 0 + av_usleep(DTLS_PROCESSING_TIMEOUT * 10000); +#endif + + for (j = 0; j <= DTLS_EAGAIN_RETRIES_MAX && !res_size; j++) { ret = ffurl_read(rtc->udp_uc, buf, sizeof(buf)); /* Got response successfully. */ if (ret > 0) { @@ -1085,7 +1108,7 @@ static int openssl_drive_context(AVFormatContext *s, SSL *dtls, BIO *bio_in, BIO * occurs, it returns -1. */ r0 = DTLSv1_handle_timeout(dtls); if (!r0) { - av_usleep(DTLS_ARQ_INTERVAL * 1000); + av_usleep(rtc->dtls_arq_timeout * 1000); continue; /* no timeout had expired. */ } if (r0 != 1) { @@ -1310,7 +1333,7 @@ static int write_packet(void *opaque, uint8_t *buf, int buf_size) */ static int create_rtp_muxer(AVFormatContext *s) { - int ret, i, is_video; + int ret, i, is_video, buffer_size, max_packet_size; AVFormatContext *rtp_ctx = NULL; AVDictionary *opts = NULL; uint8_t *buffer = NULL; @@ -1324,6 +1347,11 @@ static int create_rtp_muxer(AVFormatContext *s) goto end; } + /* The UDP buffer size, may greater than MTU. */ + buffer_size = MAX_UDP_BUFFER_SIZE; + /* The RTP payload max size. Reserved some bytes for SRTP checksum and padding. */ + max_packet_size = rtc->pkt_size - DTLS_SRTP_CHECKSUM_LEN; + for (i = 0; i < s->nb_streams; i++) { rtp_ctx = avformat_alloc_context(); if (!rtp_ctx) { @@ -1351,13 +1379,13 @@ static int create_rtp_muxer(AVFormatContext *s) avcodec_parameters_copy(rtp_ctx->streams[0]->codecpar, s->streams[i]->codecpar); rtp_ctx->streams[0]->time_base = s->streams[i]->time_base; - buffer = av_malloc(MAX_UDP_BUFFER_SIZE); - rtp_ctx->pb = avio_alloc_context(buffer, MAX_UDP_BUFFER_SIZE, 1, s, NULL, write_packet, NULL); + buffer = av_malloc(buffer_size); + rtp_ctx->pb = avio_alloc_context(buffer, buffer_size, 1, s, NULL, write_packet, NULL); if (!rtp_ctx->pb) { ret = AVERROR(ENOMEM); goto end; } - rtp_ctx->pb->max_packet_size = MAX_UDP_PAYLOAD_SIZE; + rtp_ctx->pb->max_packet_size = max_packet_size; rtp_ctx->pb->av_class = &ff_avio_class; is_video = s->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO; @@ -1379,6 +1407,9 @@ static int create_rtp_muxer(AVFormatContext *s) rtp_ctx = NULL; } + av_log(s, AV_LOG_INFO, "WHIP: Create RTP muxer OK, buffer_size=%d, max_packet_size=%d\n", + buffer_size, max_packet_size); + end: if (rtp_ctx) avio_context_free(&rtp_ctx->pb); @@ -1389,7 +1420,16 @@ static int create_rtp_muxer(AVFormatContext *s) static av_cold int rtc_init(AVFormatContext *s) { - int ret; + int ret, ideal_pkt_size = 532; + RTCContext *rtc = s->priv_data; + + av_log(s, AV_LOG_INFO, "WHIP: Init ice_arq_max=%d, ice_arq_timeout=%d, dtls_arq_max=%d, dtls_arq_timeout=%d pkt_size=%d\n", + rtc->ice_arq_max, rtc->ice_arq_timeout, rtc->dtls_arq_max, rtc->dtls_arq_timeout, rtc->pkt_size); + + if (rtc->pkt_size < ideal_pkt_size) { + av_log(s, AV_LOG_WARNING, "WHIP: pkt_size=%d(<%d) is too small, may cause packet loss\n", + rtc->pkt_size, ideal_pkt_size); + } if ((ret = parse_codec(s)) < 0) return ret; @@ -1491,6 +1531,15 @@ static int rtc_write_packet(AVFormatContext *s, AVPacket *pkt) } ret = ff_write_chained(rtp_ctx, 0, pkt, s, 0); + if (ret < 0) { + if (ret == AVERROR(EINVAL)) { + av_log(s, AV_LOG_WARNING, "Ignore failed to write packet=%dB, ret=%d\n", pkt->size, ret); + ret = 0; + } else { + av_log(s, AV_LOG_ERROR, "Failed to write packet, size=%d\n", pkt->size); + } + goto end; + } end: av_packet_free(&extra); @@ -1579,14 +1628,21 @@ static av_cold void rtc_deinit(AVFormatContext *s) ff_srtp_free(&rtc->srtp_recv); } +#define OFFSET(x) offsetof(RTCContext, x) +#define DEC AV_OPT_FLAG_DECODING_PARAM static const AVOption options[] = { + { "ice_arq_max", "Maximum retransmissions for ICE ARQ", OFFSET(ice_arq_max), AV_OPT_TYPE_INT, { .i64 = 5 }, -1, INT_MAX, DEC }, + { "ice_arq_timeout", "Start timeout in ms for ICE ARQ", OFFSET(ice_arq_timeout), AV_OPT_TYPE_INT, { .i64 = 30 }, -1, INT_MAX, DEC }, + { "dtls_arq_max", "Maximum retransmissions for DTLS ARQ", OFFSET(dtls_arq_max), AV_OPT_TYPE_INT, { .i64 = 5 }, -1, INT_MAX, DEC }, + { "dtls_arq_timeout", "Start timeout in ms for DTLS ARQ", OFFSET(dtls_arq_timeout), AV_OPT_TYPE_INT, { .i64 = 50 }, -1, INT_MAX, DEC }, + { "pkt_size", "Maximum RTP packet size", OFFSET(pkt_size), AV_OPT_TYPE_INT, { .i64 = 1500 }, -1, INT_MAX, DEC }, { NULL }, }; static const AVClass rtc_muxer_class = { .class_name = "RTC WHIP muxer", .item_name = av_default_item_name, - .option = NULL, + .option = options, .version = LIBAVUTIL_VERSION_INT, }; From 2bfdb67ad2c423d100a24e2e646f6546bdb5a734 Mon Sep 17 00:00:00 2001 From: winlin Date: Sat, 6 May 2023 18:25:11 +0800 Subject: [PATCH 22/60] WHIP: Eliminate the unused write_trailer. --- libavformat/rtcenc.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index ea630cf8a9879..d706b723d7527 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -1590,17 +1590,14 @@ static int whip_dispose(AVFormatContext *s) return ret; } -static int rtc_write_trailer(AVFormatContext *s) -{ - return whip_dispose(s); -} - static av_cold void rtc_deinit(AVFormatContext *s) { - int i; + int i, ret; RTCContext *rtc = s->priv_data; - whip_dispose(s); + ret = whip_dispose(s); + if (ret < 0) + av_log(s, AV_LOG_WARNING, "Failed to dispose resource, ret=%d\n", ret); for (i = 0; i < s->nb_streams; i++) { AVFormatContext* rtp_ctx = s->streams[i]->priv_data; @@ -1657,6 +1654,5 @@ const FFOutputFormat ff_rtc_muxer = { .init = rtc_init, .write_header = rtc_write_header, .write_packet = rtc_write_packet, - .write_trailer = rtc_write_trailer, .deinit = rtc_deinit, }; From e3fdedde81c0f052094b4f55073416a05001dbc7 Mon Sep 17 00:00:00 2001 From: winlin Date: Sat, 6 May 2023 18:50:23 +0800 Subject: [PATCH 23/60] WHIP: Refine code. --- libavformat/rtcenc.c | 349 +++++++++++++++++++++++-------------------- 1 file changed, 190 insertions(+), 159 deletions(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index d706b723d7527..d1ec7d3546e4d 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -120,11 +120,8 @@ typedef struct RTCContext { int ice_port; /* The SDP answer received from the WebRTC server. */ char *sdp_answer; - /* The resource URL returned in the Location header of WHIP HTTP response. */ char *whip_resource_url; - /* Whether resource already disposed. */ - int whip_disposed; /* Whether the timer should be reset. */ int dtls_should_reset_timer; @@ -163,6 +160,8 @@ typedef struct RTCContext { int pkt_size; } RTCContext; +static int on_rtp_write_packet(void *opaque, uint8_t *buf, int buf_size); + /* Parse SPS/PPS from ISOM AVCC, see ff_isom_write_avcc */ static int isom_read_avcc(AVFormatContext *s, uint8_t *extradata, int extradata_size) { @@ -945,7 +944,7 @@ static void openssl_state_trace(AVFormatContext *s, uint8_t *data, int length, i handshake_type = (uint8_t)data[13]; } - av_log(s, AV_LOG_INFO, "WHIP: DTLS state %s %s, done=%u, arq=%u, r0=%d, r1=%d, len=%u, cnt=%u, size=%u, hs=%u\n", + av_log(s, AV_LOG_VERBOSE, "WHIP: DTLS state %s %s, done=%u, arq=%u, r0=%d, r1=%d, len=%u, cnt=%u, size=%u, hs=%u\n", "Active", (incoming? "RECV":"SEND"), rtc->dtls_done_for_us, rtc->dtls_arq_packets, r0, r1, length, content_type, size, handshake_type); } @@ -1265,67 +1264,6 @@ static int setup_srtp(AVFormatContext *s) return ret; } -static int write_packet(void *opaque, uint8_t *buf, int buf_size) -{ - int ret, cipher_size, is_rtcp; - uint8_t payload_type, nalu_header; - char cipher[MAX_UDP_BUFFER_SIZE]; - AVFormatContext *s = opaque; - RTCContext *rtc = s->priv_data; - struct SRTPContext *srtp; - - /* Ignore if not RTP or RTCP packet. */ - if (buf_size < 12 || (buf[0] & 0xC0) != 0x80) - return 0; - - /* RTCP is not supported yet. */ - is_rtcp = buf[1] >= 192 && buf[1] <= 223; - if (is_rtcp) - return 0; - - /* Only support audio and video. */ - payload_type = buf[1] & 0x7f; - if (payload_type != rtc->video_payload_type && payload_type != rtc->audio_payload_type) { - return 0; - } - - /** - * For video, the STAP-A with SPS/PPS should: - * 1. The marker bit should be 0, never be 1. - * 2. The NRI should equal to the first NALU's. - */ - if (payload_type == rtc->video_payload_type && buf_size > 12) { - nalu_header = buf[12] & 0x1f; - if (nalu_header == NALU_TYPE_STAP_A) { - /* Reset the marker bit to 0. */ - if (buf[1] & 0x80) { - buf[1] &= 0x7f; - } - - /* Reset the NRI to the first NALU's NRI. */ - if (buf_size > 15 && (buf[15]&0x60) != (buf[12]&0x60)) { - buf[12] = (buf[12]&0x80) | (buf[15]&0x60) | (buf[12]&0x1f); - } - } - } - - /* Encrypt by SRTP and send out. */ - srtp = payload_type == rtc->video_payload_type ? &rtc->srtp_video_send : &rtc->srtp_audio_send; - cipher_size = ff_srtp_encrypt(srtp, buf, buf_size, cipher, sizeof(cipher)); - if (cipher_size <= 0 || cipher_size < buf_size) { - av_log(s, AV_LOG_WARNING, "Failed to encrypt packet=%dB, cipher=%dB\n", buf_size, cipher_size); - return 0; - } - - ret = ffurl_write(rtc->udp_uc, cipher, cipher_size); - if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to write packet=%dB, ret=%d\n", cipher_size, ret); - return ret; - } - - return ret; -} - /** * Create a RTP muxer to build RTP packets from the encoded frames. * @@ -1380,7 +1318,7 @@ static int create_rtp_muxer(AVFormatContext *s) rtp_ctx->streams[0]->time_base = s->streams[i]->time_base; buffer = av_malloc(buffer_size); - rtp_ctx->pb = avio_alloc_context(buffer, buffer_size, 1, s, NULL, write_packet, NULL); + rtp_ctx->pb = avio_alloc_context(buffer, buffer_size, 1, s, NULL, on_rtp_write_packet, NULL); if (!rtp_ctx->pb) { ret = AVERROR(ENOMEM); goto end; @@ -1418,6 +1356,184 @@ static int create_rtp_muxer(AVFormatContext *s) return ret; } +/** + * When RTP muxer builds and outputs a RTP packet, this callback will be called. + */ +static int on_rtp_write_packet(void *opaque, uint8_t *buf, int buf_size) +{ + int ret, cipher_size, is_rtcp; + uint8_t payload_type, nalu_header; + char cipher[MAX_UDP_BUFFER_SIZE]; + AVFormatContext *s = opaque; + RTCContext *rtc = s->priv_data; + struct SRTPContext *srtp; + + /* Ignore if not RTP or RTCP packet. */ + if (buf_size < 12 || (buf[0] & 0xC0) != 0x80) + return 0; + + /* RTCP is not supported yet. */ + is_rtcp = buf[1] >= 192 && buf[1] <= 223; + if (is_rtcp) + return 0; + + /* Only support audio and video. */ + payload_type = buf[1] & 0x7f; + if (payload_type != rtc->video_payload_type && payload_type != rtc->audio_payload_type) { + return 0; + } + + /** + * For video, the STAP-A with SPS/PPS should: + * 1. The marker bit should be 0, never be 1. + * 2. The NRI should equal to the first NALU's. + */ + if (payload_type == rtc->video_payload_type && buf_size > 12) { + nalu_header = buf[12] & 0x1f; + if (nalu_header == NALU_TYPE_STAP_A) { + /* Reset the marker bit to 0. */ + if (buf[1] & 0x80) { + buf[1] &= 0x7f; + } + + /* Reset the NRI to the first NALU's NRI. */ + if (buf_size > 15 && (buf[15]&0x60) != (buf[12]&0x60)) { + buf[12] = (buf[12]&0x80) | (buf[15]&0x60) | (buf[12]&0x1f); + } + } + } + + /* Encrypt by SRTP and send out. */ + srtp = payload_type == rtc->video_payload_type ? &rtc->srtp_video_send : &rtc->srtp_audio_send; + cipher_size = ff_srtp_encrypt(srtp, buf, buf_size, cipher, sizeof(cipher)); + if (cipher_size <= 0 || cipher_size < buf_size) { + av_log(s, AV_LOG_WARNING, "Failed to encrypt packet=%dB, cipher=%dB\n", buf_size, cipher_size); + return 0; + } + + ret = ffurl_write(rtc->udp_uc, cipher, cipher_size); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to write packet=%dB, ret=%d\n", cipher_size, ret); + return ret; + } + + return ret; +} + +/** + * Insert the SPS/PPS before each IDR frame. + */ +static int insert_sps_pps_packet(AVFormatContext *s, AVPacket *pkt) +{ + int ret, is_idr, size, i; + uint8_t *p; + AVPacket* extra = NULL; + AVStream *st = s->streams[pkt->stream_index]; + AVFormatContext *rtp_ctx = st->priv_data; + RTCContext *rtc = s->priv_data; + + is_idr = (pkt->flags & AV_PKT_FLAG_KEY) && st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO; + if (!is_idr || !st->codecpar->extradata) + return 0; + + extra = av_packet_alloc(); + if (!extra) + return AVERROR(ENOMEM); + + size = !rtc->avc_nal_length_size ? st->codecpar->extradata_size : + rtc->avc_nal_length_size * 2 + rtc->avc_sps_size + rtc->avc_pps_size; + ret = av_new_packet(extra, size); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to allocate extra packet\n"); + goto end; + } + + /* Encode SPS/PPS in annexb format. */ + if (!rtc->avc_nal_length_size) { + memcpy(extra->data, st->codecpar->extradata, size); + } else { + /* Encode SPS/PPS in ISOM format. */ + p = extra->data; + for (i = 0; i < rtc->avc_nal_length_size; i++) { + *p++ = rtc->avc_sps_size >> (8 * (rtc->avc_nal_length_size - i - 1)); + } + memcpy(p, rtc->avc_sps, rtc->avc_sps_size); + p += rtc->avc_sps_size; + + /* Encode PPS in ISOM format. */ + for (i = 0; i < rtc->avc_nal_length_size; i++) { + *p++ = rtc->avc_pps_size >> (8 * (rtc->avc_nal_length_size - i - 1)); + } + memcpy(p, rtc->avc_pps, rtc->avc_pps_size); + p += rtc->avc_pps_size; + } + + /* Setup packet and feed it to chain. */ + extra->pts = pkt->pts; + extra->dts = pkt->dts; + extra->stream_index = pkt->stream_index; + extra->time_base = pkt->time_base; + + ret = ff_write_chained(rtp_ctx, 0, extra, s, 0); + if (ret < 0) + goto end; + +end: + av_packet_free(&extra); + return ret; +} + +/** + * RTC is connectionless, for it's based on UDP, so it check whether sesison is + * timeout. In such case, publishers can't republish the stream util the session + * is timeout. + * This function is called to notify the server that the stream is ended, server + * should expire and close the session immediately, so that publishers can republish + * the stream quickly. + */ +static int whip_dispose(AVFormatContext *s) +{ + int ret; + char buf[MAX_URL_SIZE]; + URLContext *whip_uc = NULL; + RTCContext *rtc = s->priv_data; + + if (!rtc->whip_resource_url) + return 0; + + ret = ffurl_alloc(&whip_uc, rtc->whip_resource_url, AVIO_FLAG_READ_WRITE, &s->interrupt_callback); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to alloc WHIP delete context: %s\n", s->url); + goto end; + } + + av_opt_set(whip_uc->priv_data, "chunked_post", "0", 0); + av_opt_set(whip_uc->priv_data, "method", "DELETE", 0); + ret = ffurl_connect(whip_uc, NULL); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to DELETE url=%s\n", rtc->whip_resource_url); + goto end; + } + + while (1) { + ret = ffurl_read(whip_uc, buf, sizeof(buf)); + if (ret == AVERROR_EOF) { + ret = 0; + break; + } + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to read response from DELETE url=%s\n", rtc->whip_resource_url); + goto end; + } + } + + av_log(s, AV_LOG_INFO, "WHIP: Dispose resource %s\n", rtc->whip_resource_url); + +end: + ffurl_closep(&whip_uc); + return ret; +} + static av_cold int rtc_init(AVFormatContext *s) { int ret, ideal_pkt_size = 532; @@ -1472,9 +1588,7 @@ static int rtc_write_header(AVFormatContext *s) static int rtc_write_packet(AVFormatContext *s, AVPacket *pkt) { - int ret, size, is_idr, i; - AVPacket* extra = NULL; - uint8_t *p; + int ret; RTCContext *rtc = s->priv_data; AVStream *st = s->streams[pkt->stream_index]; AVFormatContext *rtp_ctx = st->priv_data; @@ -1487,47 +1601,10 @@ static int rtc_write_packet(AVFormatContext *s, AVPacket *pkt) rtc->audio_jitter_base += 960; } - /* Insert a packet with SPS/PPS before each IDR frame. */ - is_idr = (pkt->flags & AV_PKT_FLAG_KEY) && st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO; - if (is_idr && st->codecpar->extradata) { - extra = av_packet_alloc(); - size = !rtc->avc_nal_length_size ? st->codecpar->extradata_size : - rtc->avc_nal_length_size * 2 + rtc->avc_sps_size + rtc->avc_pps_size; - ret = av_new_packet(extra, size); - if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to allocate extra packet\n"); - return ret; - } - - /* Encode SPS/PPS in annexb format. */ - if (!rtc->avc_nal_length_size) { - memcpy(extra->data, st->codecpar->extradata, size); - } else { - /* Encode SPS/PPS in ISOM format. */ - p = extra->data; - for (i = 0; i < rtc->avc_nal_length_size; i++) { - *p++ = rtc->avc_sps_size >> (8 * (rtc->avc_nal_length_size - i - 1)); - } - memcpy(p, rtc->avc_sps, rtc->avc_sps_size); - p += rtc->avc_sps_size; - - /* Encode PPS in ISOM format. */ - for (i = 0; i < rtc->avc_nal_length_size; i++) { - *p++ = rtc->avc_pps_size >> (8 * (rtc->avc_nal_length_size - i - 1)); - } - memcpy(p, rtc->avc_pps, rtc->avc_pps_size); - p += rtc->avc_pps_size; - } - - /* Setup packet and feed it to chain. */ - extra->pts = pkt->pts; - extra->dts = pkt->dts; - extra->stream_index = pkt->stream_index; - extra->time_base = pkt->time_base; - - ret = ff_write_chained(rtp_ctx, 0, extra, s, 0); - if (ret < 0) - goto end; + ret = insert_sps_pps_packet(s, pkt); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to insert SPS/PPS packet\n"); + return ret; } ret = ff_write_chained(rtp_ctx, 0, pkt, s, 0); @@ -1538,55 +1615,9 @@ static int rtc_write_packet(AVFormatContext *s, AVPacket *pkt) } else { av_log(s, AV_LOG_ERROR, "Failed to write packet, size=%d\n", pkt->size); } - goto end; - } - -end: - av_packet_free(&extra); - return ret; -} - -static int whip_dispose(AVFormatContext *s) -{ - int ret; - char buf[MAX_URL_SIZE]; - URLContext *whip_uc = NULL; - RTCContext *rtc = s->priv_data; - - if (!rtc->whip_resource_url || rtc->whip_disposed) - return 0; - - ret = ffurl_alloc(&whip_uc, rtc->whip_resource_url, AVIO_FLAG_READ_WRITE, &s->interrupt_callback); - if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to alloc WHIP delete context: %s\n", s->url); - goto end; - } - - av_opt_set(whip_uc->priv_data, "chunked_post", "0", 0); - av_opt_set(whip_uc->priv_data, "method", "DELETE", 0); - ret = ffurl_connect(whip_uc, NULL); - if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to DELETE url=%s\n", rtc->whip_resource_url); - goto end; - } - - while (1) { - ret = ffurl_read(whip_uc, buf, sizeof(buf)); - if (ret == AVERROR_EOF) { - ret = 0; - break; - } - if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to read response from DELETE url=%s\n", rtc->whip_resource_url); - goto end; - } + return ret; } - av_log(s, AV_LOG_INFO, "WHIP: Dispose resource %s, disposed=%d\n", rtc->whip_resource_url, rtc->whip_disposed); - rtc->whip_disposed = 1; - -end: - ffurl_closep(&whip_uc); return ret; } @@ -1637,7 +1668,7 @@ static const AVOption options[] = { }; static const AVClass rtc_muxer_class = { - .class_name = "RTC WHIP muxer", + .class_name = "WebRTC muxer", .item_name = av_default_item_name, .option = options, .version = LIBAVUTIL_VERSION_INT, @@ -1645,7 +1676,7 @@ static const AVClass rtc_muxer_class = { const FFOutputFormat ff_rtc_muxer = { .p.name = "rtc", - .p.long_name = NULL_IF_CONFIG_SMALL("WebRTC WHIP muxer"), + .p.long_name = NULL_IF_CONFIG_SMALL("WHIP WebRTC muxer"), .p.audio_codec = AV_CODEC_ID_OPUS, .p.video_codec = AV_CODEC_ID_H264, .p.flags = AVFMT_GLOBALHEADER | AVFMT_NOFILE, From b4574d6c66d452f447079938ca3966de8690cc7f Mon Sep 17 00:00:00 2001 From: winlin Date: Thu, 11 May 2023 19:44:41 +0800 Subject: [PATCH 24/60] WHIP: Check for null string to avoid strlen crash, scanned by ASAN --- libavformat/rtcenc.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index d1ec7d3546e4d..2bc1c84c41a3b 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -555,6 +555,12 @@ static int exchange_sdp(AVFormatContext *s) goto end; } + if (!rtc->sdp_offer || !strlen(rtc->sdp_offer)) { + av_log(s, AV_LOG_ERROR, "No offer to exchange\n"); + ret = AVERROR(EINVAL); + goto end; + } + snprintf(buf, sizeof(buf), "Cache-Control: no-cache\r\n" "Content-Type: application/sdp\r\n"); @@ -624,6 +630,12 @@ static int parse_answer(AVFormatContext *s) int i; RTCContext *rtc = s->priv_data; + if (!rtc->sdp_answer || !strlen(rtc->sdp_answer)) { + av_log(s, AV_LOG_ERROR, "No answer to parse\n"); + ret = AVERROR(EINVAL); + goto end; + } + pb = avio_alloc_context(rtc->sdp_answer, strlen(rtc->sdp_answer), 0, NULL, NULL, NULL, NULL); if (!pb) return AVERROR(ENOMEM); @@ -673,6 +685,18 @@ static int parse_answer(AVFormatContext *s) } } + if (!rtc->ice_pwd_remote || !strlen(rtc->ice_pwd_remote) || !rtc->ice_ufrag_remote || !rtc->ice_ufrag_remote) { + av_log(s, AV_LOG_ERROR, "No ice pwd or ufrag parsed from %s\n", rtc->sdp_answer); + ret = AVERROR(EINVAL); + goto end; + } + + if (!rtc->ice_protocol || !rtc->ice_host || !rtc->ice_port) { + av_log(s, AV_LOG_ERROR, "No ice candidate parsed from %s\n", rtc->sdp_answer); + ret = AVERROR(EINVAL); + goto end; + } + av_log(s, AV_LOG_INFO, "WHIP: SDP offer=%luB, answer=%luB, ufrag=%s, pwd=%luB, transport=%s://%s:%d\n", strlen(rtc->sdp_offer), strlen(rtc->sdp_answer), rtc->ice_ufrag_remote, strlen(rtc->ice_pwd_remote), rtc->ice_protocol, rtc->ice_host, rtc->ice_port); From 248243a7f764bd66dbbeeb0d54d4971642197909 Mon Sep 17 00:00:00 2001 From: winlin Date: Thu, 11 May 2023 21:08:02 +0800 Subject: [PATCH 25/60] WHIP: Improve macro and options comments with more background details --- libavformat/rtcenc.c | 80 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 65 insertions(+), 15 deletions(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index 2bc1c84c41a3b..0a13ce3810caf 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -52,35 +52,85 @@ #include "avc.h" #include "http.h" -/* The maximum size of an SDP, either offer or answer. */ +/** + * Maximum size limit of a Session Description Protocol (SDP), + * be it an offer or answer. + */ #define MAX_SDP_SIZE 8192 -/* The maximum size of the buffer for sending or receiving a UDP packet. */ +/** + * Maximum size of the buffer for sending and receiving UDP packets. + * Please note that this size does not limit the size of the UDP packet that can be sent. + * To set the limit for packet size, modify the `pkt_size` parameter. + * For instance, it is possible to set the UDP buffer to 4096 to send or receive packets, + * but please keep in mind that the `pkt_size` option limits the packet size to 1400. + */ #define MAX_UDP_BUFFER_SIZE 4096 -/* Supported DTLS cipher suites. */ +/* + * Supported DTLS cipher suites for FFmpeg as a DTLS client. + * These cipher suites are used to negotiate with DTLS servers. + * + * It is advisable to use a limited number of cipher suites to reduce + * the size of DTLS UDP packets. + */ #define DTLS_CIPHER_SUTES "ECDHE-ECDSA-AES128-GCM-SHA256"\ ":ECDHE-RSA-AES128-GCM-SHA256"\ ":ECDHE-ECDSA-AES128-SHA"\ ":ECDHE-RSA-AES128-SHA"\ ":ECDHE-ECDSA-AES256-SHA"\ ":ECDHE-RSA-AES256-SHA" -/* The SRTP key size, defined by SRTP_MASTER_KEY_LEN */ +/** + * The size of the Secure Real-time Transport Protocol (SRTP) master key material + * that is exported by Secure Sockets Layer (SSL) after a successful Datagram + * Transport Layer Security (DTLS) handshake. This material consists of a key + * of 16 bytes and a salt of 14 bytes. + * + * The material is exported by SSL in the following format: client_key (16 bytes) | + * server_key (16 bytes) | client_salt (14 bytes) | server_salt (14 bytes). + */ #define DTLS_SRTP_MASTER_KEY_LEN 30 -/* The maximum size of SRTP hmac checksum and padding. */ +/** + * The maximum size of the Secure Real-time Transport Protocol (SRTP) HMAC checksum + * and padding that is appended to the end of the packet. To calculate the maximum + * size of the User Datagram Protocol (UDP) packet that can be sent out, subtract + * this size from the `pkt_size`. + */ #define DTLS_SRTP_CHECKSUM_LEN 16 -/* The NALU type for STAP-A */ +/** + * STAP-A stands for Single-Time Aggregation Packet. + * The NALU type for STAP-A is 24 (0x18). + */ #define NALU_TYPE_STAP_A 24 -/* Wait for a small timeout in ms to let server processing the ICE request. */ +/** + * Wait for a small timeout in milliseconds to allow for the server to process + * the Interactive Connectivity Establishment (ICE) request. If we immediately + * read the response after sending the request, we may receive nothing and need + * to immediately retry. To lessen the likelihood of retries, we can send the + * request and wait for a small amount of time for the server to process it + * before reading the response. + */ #define ICE_PROCESSING_TIMEOUT 10 -/* Wait for a small timeout in ms to let server processing the DTLS request. */ +/** + * Wait for a short timeout in milliseconds to allow the server to process + * the Datagram Transport Layer Security (DTLS) request. If we immediately + * read the response after sending the request, we may receive nothing and + * need to immediately retry. To reduce the likelihood of retries, we can + * send the request and wait a short amount of time for the server to + * process it before attempting to read the response. + */ #define DTLS_PROCESSING_TIMEOUT 30 -/* The maximum number of retries for DTLS EGAIN. */ +/** + * The maximum number of retries for Datagram Transport Layer Security (DTLS) EAGAIN errors. + * When we send a DTLS request and receive no response, we may encounter an EAGAIN error. + * In this situation, we wait briefly and attempt to read the response again. + * We limit the maximum number of times we retry this loop. + */ #define DTLS_EAGAIN_RETRIES_MAX 5 typedef struct RTCContext { AVClass *av_class; - /* Input audio and video codec parameters */ + /* Parameters for the input audio and video codecs. */ AVCodecParameters *audio_par; AVCodecParameters *video_par; @@ -1683,11 +1733,11 @@ static av_cold void rtc_deinit(AVFormatContext *s) #define OFFSET(x) offsetof(RTCContext, x) #define DEC AV_OPT_FLAG_DECODING_PARAM static const AVOption options[] = { - { "ice_arq_max", "Maximum retransmissions for ICE ARQ", OFFSET(ice_arq_max), AV_OPT_TYPE_INT, { .i64 = 5 }, -1, INT_MAX, DEC }, - { "ice_arq_timeout", "Start timeout in ms for ICE ARQ", OFFSET(ice_arq_timeout), AV_OPT_TYPE_INT, { .i64 = 30 }, -1, INT_MAX, DEC }, - { "dtls_arq_max", "Maximum retransmissions for DTLS ARQ", OFFSET(dtls_arq_max), AV_OPT_TYPE_INT, { .i64 = 5 }, -1, INT_MAX, DEC }, - { "dtls_arq_timeout", "Start timeout in ms for DTLS ARQ", OFFSET(dtls_arq_timeout), AV_OPT_TYPE_INT, { .i64 = 50 }, -1, INT_MAX, DEC }, - { "pkt_size", "Maximum RTP packet size", OFFSET(pkt_size), AV_OPT_TYPE_INT, { .i64 = 1500 }, -1, INT_MAX, DEC }, + { "ice_arq_max", "Maximum number of retransmissions for the ICE ARQ mechanism", OFFSET(ice_arq_max), AV_OPT_TYPE_INT, { .i64 = 5 }, -1, INT_MAX, DEC }, + { "ice_arq_timeout", "Start timeout in milliseconds for the ICE ARQ mechanism", OFFSET(ice_arq_timeout), AV_OPT_TYPE_INT, { .i64 = 30 }, -1, INT_MAX, DEC }, + { "dtls_arq_max", "Maximum number of retransmissions for the DTLS ARQ mechanism", OFFSET(dtls_arq_max), AV_OPT_TYPE_INT, { .i64 = 5 }, -1, INT_MAX, DEC }, + { "dtls_arq_timeout", "Start timeout in milliseconds for the DTLS ARQ mechanism", OFFSET(dtls_arq_timeout), AV_OPT_TYPE_INT, { .i64 = 50 }, -1, INT_MAX, DEC }, + { "pkt_size", "The maximum size, in bytes, of RTP packets that send out", OFFSET(pkt_size), AV_OPT_TYPE_INT, { .i64 = 1500 }, -1, INT_MAX, DEC }, { NULL }, }; From d801061f050bbc15426456a2076421dfffc29c57 Mon Sep 17 00:00:00 2001 From: winlin Date: Fri, 12 May 2023 18:56:08 +0800 Subject: [PATCH 26/60] WHIP: Remote password should be used to generate the message integrity. --- libavformat/rtcenc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index 0a13ce3810caf..92176413cd5ff 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -810,7 +810,7 @@ static int ice_create_request(AVFormatContext *s, uint8_t *buf, int buf_size, in size = avio_tell(pb); buf[2] = (size - 20) >> 8; buf[3] = (size - 20) & 0xFF; - av_hmac_init(hmac, rtc->ice_pwd_local, strlen(rtc->ice_pwd_local)); + av_hmac_init(hmac, rtc->ice_pwd_remote, strlen(rtc->ice_pwd_remote)); av_hmac_update(hmac, buf, size - 24); av_hmac_final(hmac, buf + size - 20, 20); From a1a8335d935819f226ab6bbc158e77917c86fbaa Mon Sep 17 00:00:00 2001 From: winlin Date: Fri, 12 May 2023 22:25:20 +0800 Subject: [PATCH 27/60] Use AVPrint to generate the offer and answer for WebRTC SDP. --- libavformat/Makefile | 2 +- libavformat/rtcenc.c | 176 +++++++++++++++++++++++++++++++------------ 2 files changed, 128 insertions(+), 50 deletions(-) diff --git a/libavformat/Makefile b/libavformat/Makefile index 49f32bc47aae1..b980b1232126b 100644 --- a/libavformat/Makefile +++ b/libavformat/Makefile @@ -499,7 +499,7 @@ OBJS-$(CONFIG_RSD_DEMUXER) += rsd.o OBJS-$(CONFIG_RPL_DEMUXER) += rpl.o OBJS-$(CONFIG_RSO_DEMUXER) += rsodec.o rso.o pcm.o OBJS-$(CONFIG_RSO_MUXER) += rsoenc.o rso.o rawenc.o -OBJS-$(CONFIG_RTC_MUXER) += rtcenc.o avc.o +OBJS-$(CONFIG_RTC_MUXER) += rtcenc.o avc.o http.o srtp.o OBJS-$(CONFIG_RTP_MPEGTS_MUXER) += rtpenc_mpegts.o OBJS-$(CONFIG_RTP_MUXER) += rtp.o \ rtpenc_aac.o \ diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index 92176413cd5ff..fdc57bd8d50c9 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -51,6 +51,7 @@ #include "srtp.h" #include "avc.h" #include "http.h" +#include "libavutil/bprint.h" /** * Maximum size limit of a Session Description Protocol (SDP), @@ -152,18 +153,18 @@ typedef struct RTCContext { uint8_t audio_payload_type; uint8_t video_payload_type; /** - * The SDP offer generated by the muxer according to the codec parameters, - * DTLS and ICE information. - * */ + * This is the SDP offer generated by the muxer based on the codec parameters, + * DTLS, and ICE information. + */ char *sdp_offer; /* The ICE username and pwd from remote server. */ char *ice_ufrag_remote; char *ice_pwd_remote; /** - * The ICE candidate protocol, priority, host and port. Note that only - * support one candidate for now. We will choose the first udp candidate. - * We will support multiple candidates in the future. + * This represents the ICE candidate protocol, priority, host and port. + * Currently, we only support one candidate and choose the first UDP candidate. + * However, we plan to support multiple candidates in the future. */ char *ice_protocol; char *ice_host; @@ -180,7 +181,8 @@ typedef struct RTCContext { /* The number of packets retransmitted for DTLS. */ int dtls_arq_packets; /** - * The material to build SRTP master key, generated by DTLS, the layout is: + * This represents the material used to build the SRTP master key. It is + * generated by DTLS and has the following layout: * 16B 16B 14B 14B * client_key | server_key | client_salt | server_salt */ @@ -212,7 +214,19 @@ typedef struct RTCContext { static int on_rtp_write_packet(void *opaque, uint8_t *buf, int buf_size); -/* Parse SPS/PPS from ISOM AVCC, see ff_isom_write_avcc */ +/** + * Parses the ISOM AVCC format of extradata and extracts SPS/PPS. + * + * This function is used to parse SPS/PPS from the extradata in ISOM AVCC format. + * It can handle both ISOM and annexb formats but only parses data in ISOM format. + * If the extradata is in annexb format, this function ignores it, and uses the entire + * extradata as a sequence header with SPS/PPS. Refer to ff_isom_write_avcc. + * + * @param s Pointer to the AVFormatContext + * @param extradata Pointer to the extradata + * @param extradata_size Size of the extradata + * @returns Returns 0 if successful or AVERROR_xxx in case of an error. + */ static int isom_read_avcc(AVFormatContext *s, uint8_t *extradata, int extradata_size) { int ret = 0; @@ -223,7 +237,7 @@ static int isom_read_avcc(AVFormatContext *s, uint8_t *extradata, int extradata if (!extradata || !extradata_size) return 0; - /* Not H.264 IOSM format, may be annexb etc. */ + /* Not H.264 ISOM format, may be annexb etc. */ if (extradata_size < 4 || extradata[0] != 1) { if (!ff_avc_find_startcode(extradata, extradata + extradata_size)) { av_log(s, AV_LOG_ERROR, "Format must be ISOM or annexb\n"); @@ -273,6 +287,11 @@ static int isom_read_avcc(AVFormatContext *s, uint8_t *extradata, int extradata } rtc->avc_sps = av_malloc(rtc->avc_sps_size); + if (!rtc->avc_sps) { + ret = AVERROR(ENOMEM); + goto end; + } + ret = avio_read(pb, rtc->avc_sps, rtc->avc_sps_size); /* sps */ if (ret < 0) { av_log(s, AV_LOG_ERROR, "Failed to read sps, size=%d\n", rtc->avc_sps_size); @@ -295,6 +314,11 @@ static int isom_read_avcc(AVFormatContext *s, uint8_t *extradata, int extradata } rtc->avc_pps = av_malloc(rtc->avc_pps_size); + if (!rtc->avc_pps) { + ret = AVERROR(ENOMEM); + goto end; + } + ret = avio_read(pb, rtc->avc_pps, rtc->avc_pps_size); /* pps */ if (ret < 0) { av_log(s, AV_LOG_ERROR, "Failed to read pps, size=%d\n", rtc->avc_pps_size); @@ -307,11 +331,16 @@ static int isom_read_avcc(AVFormatContext *s, uint8_t *extradata, int extradata } /** - * Parse video SPS/PPS from extradata of codecpar, then check the codec. - * Only support video(h264) and audio(opus) for now. Note that only baseline - * and constrained baseline of h264 are supported. + * Parses video SPS/PPS from the extradata of codecpar and checks the codec. + * Currently only supports video(h264) and audio(opus). Note that only baseline + * and constrained baseline profiles of h264 are supported. * - * @return 0 if OK, AVERROR_xxx on error + * If the profile is less than 0, the function considers the profile as baseline. + * It may need to parse the profile from SPS/PPS. This situation occurs when ingesting + * desktop and transcoding. + * + * @param s Pointer to the AVFormatContext + * @returns Returns 0 if successful or AVERROR_xxx in case of an error. */ static int parse_codec(AVFormatContext *s) { @@ -429,16 +458,17 @@ static int parse_codec(AVFormatContext *s) */ static int generate_sdp_offer(AVFormatContext *s) { - int ret, profile, level, profile_iop; + int ret = 0, profile, level, profile_iop; + AVBPrint bp; RTCContext *rtc = s->priv_data; - char *tmp = av_mallocz(MAX_SDP_SIZE); - if (!tmp) - return AVERROR(ENOMEM); + /* To prevent a crash during cleanup, always initialize it. */ + av_bprint_init(&bp, 1, MAX_SDP_SIZE); if (rtc->sdp_offer) { av_log(s, AV_LOG_ERROR, "SDP offer is already set\n"); - return AVERROR(EINVAL); + ret = AVERROR(EINVAL); + goto end; } snprintf(rtc->ice_ufrag_local, sizeof(rtc->ice_ufrag_local), "%08x", @@ -453,7 +483,7 @@ static int generate_sdp_offer(AVFormatContext *s) rtc->audio_payload_type = 111; rtc->video_payload_type = 106; - ret = av_strlcatf(tmp, MAX_SDP_SIZE, + av_bprintf(&bp, "" "v=0\r\n" "o=FFmpeg 4489045141692799359 2 IN IP4 127.0.0.1\r\n" "s=FFmpegPublishSession\r\n" @@ -461,14 +491,14 @@ static int generate_sdp_offer(AVFormatContext *s) "a=group:BUNDLE 0 1\r\n" "a=extmap-allow-mixed\r\n" "a=msid-semantic: WMS\r\n"); - if (ret >= MAX_SDP_SIZE) { - av_log(s, AV_LOG_ERROR, "Offer %d exceed max %d, %s\n", ret, MAX_SDP_SIZE, tmp); + if (!av_bprint_is_complete(&bp)) { + av_log(s, AV_LOG_ERROR, "Offer %d exceed max %d, %s\n", ret, MAX_SDP_SIZE, bp.str); ret = AVERROR(EIO); goto end; } if (rtc->audio_par) { - ret = av_strlcatf(tmp, MAX_SDP_SIZE, "" + av_bprintf(&bp, "" "m=audio 9 UDP/TLS/RTP/SAVPF %u\r\n" "c=IN IP4 0.0.0.0\r\n" "a=ice-ufrag:%s\r\n" @@ -490,8 +520,8 @@ static int generate_sdp_offer(AVFormatContext *s) rtc->audio_par->ch_layout.nb_channels, rtc->audio_ssrc, rtc->audio_ssrc); - if (ret >= MAX_SDP_SIZE) { - av_log(s, AV_LOG_ERROR, "Offer %d exceed max %d, %s\n", ret, MAX_SDP_SIZE, tmp); + if (!av_bprint_is_complete(&bp)) { + av_log(s, AV_LOG_ERROR, "Offer %d exceed max %d, %s\n", ret, MAX_SDP_SIZE, bp.str); ret = AVERROR(EIO); goto end; } @@ -501,7 +531,7 @@ static int generate_sdp_offer(AVFormatContext *s) profile = rtc->video_par->profile < 0 ? 0xe0 : rtc->video_par->profile; level = rtc->video_par->level < 0 ? 30 : rtc->video_par->level; profile_iop = profile & FF_PROFILE_H264_CONSTRAINED; - ret = av_strlcatf(tmp, MAX_SDP_SIZE, "" + av_bprintf(&bp, "" "m=video 9 UDP/TLS/RTP/SAVPF %u\r\n" "c=IN IP4 0.0.0.0\r\n" "a=ice-ufrag:%s\r\n" @@ -527,14 +557,14 @@ static int generate_sdp_offer(AVFormatContext *s) level, rtc->video_ssrc, rtc->video_ssrc); - if (ret >= MAX_SDP_SIZE) { - av_log(s, AV_LOG_ERROR, "Offer %d exceed max %d, %s\n", ret, MAX_SDP_SIZE, tmp); + if (!av_bprint_is_complete(&bp)) { + av_log(s, AV_LOG_ERROR, "Offer %d exceed max %d, %s\n", ret, MAX_SDP_SIZE, bp.str); ret = AVERROR(EIO); goto end; } } - rtc->sdp_offer = av_strdup(tmp); + rtc->sdp_offer = av_strdup(bp.str); if (!rtc->sdp_offer) { ret = AVERROR(ENOMEM); goto end; @@ -542,7 +572,7 @@ static int generate_sdp_offer(AVFormatContext *s) av_log(s, AV_LOG_VERBOSE, "WHIP: Generated offer: %s\n", rtc->sdp_offer); end: - av_free(tmp); + av_bprint_finalize(&bp, NULL); return ret; } @@ -591,13 +621,13 @@ static int exchange_sdp(AVFormatContext *s) { int ret; char buf[MAX_URL_SIZE]; + AVBPrint bp; RTCContext *rtc = s->priv_data; /* The URL context is an HTTP transport layer for the WHIP protocol. */ URLContext *whip_uc = NULL; - char *tmp = av_mallocz(MAX_SDP_SIZE); - if (!tmp) - return AVERROR(ENOMEM); + /* To prevent a crash during cleanup, always initialize it. */ + av_bprint_init(&bp, 1, MAX_SDP_SIZE); ret = ffurl_alloc(&whip_uc, s->url, AVIO_FLAG_READ_WRITE, &s->interrupt_callback); if (ret < 0) { @@ -645,15 +675,15 @@ static int exchange_sdp(AVFormatContext *s) goto end; } - ret = av_strlcatf(tmp, MAX_SDP_SIZE, "%.*s", ret, buf); - if (ret >= MAX_SDP_SIZE) { - av_log(s, AV_LOG_ERROR, "Answer %d exceed max size %d, %s\n", ret, MAX_SDP_SIZE, tmp); + av_bprintf(&bp, "%.*s", ret, buf); + if (!av_bprint_is_complete(&bp)) { + av_log(s, AV_LOG_ERROR, "Answer %d exceed max size %d, %s\n", ret, MAX_SDP_SIZE, bp.str); ret = AVERROR(EIO); goto end; } } - rtc->sdp_answer = av_strdup(tmp); + rtc->sdp_answer = av_strdup(bp.str); if (!rtc->sdp_answer) { ret = AVERROR(ENOMEM); goto end; @@ -662,14 +692,20 @@ static int exchange_sdp(AVFormatContext *s) end: ffurl_closep(&whip_uc); - av_free(tmp); + av_bprint_finalize(&bp, NULL); return ret; } /** - * Parse the ice ufrag, pwd and candidates from the answer. + * Parses the ICE ufrag, pwd, and candidates from the SDP answer. * - * @return 0 if OK, AVERROR_xxx on error + * This function is used to extract the ICE ufrag, pwd, and candidates from the SDP answer. + * It returns an error if any of these fields is NULL. The function only uses the first + * candidate if there are multiple candidates. However, support for multiple candidates + * will be added in the future. + * + * @param s Pointer to the AVFormatContext + * @returns Returns 0 if successful or AVERROR_xxx if an error occurs. */ static int parse_answer(AVFormatContext *s) { @@ -757,10 +793,17 @@ static int parse_answer(AVFormatContext *s) } /** - * Create and marshal ICE binding request packet. The size of the response is - * returned in request_size. + * Creates and marshals an ICE binding request packet. * - * @return 0 if OK, AVERROR_xxx on error + * This function creates and marshals an ICE binding request packet. The function only + * generates the username attribute and does not include goog-network-info, ice-controlling, + * use-candidate, and priority. However, some of these attributes may be added in the future. + * + * @param s Pointer to the AVFormatContext + * @param buf Pointer to memory buffer to store the request packet + * @param buf_size Size of the memory buffer + * @param request_size Pointer to an integer that receives the size of the request packet + * @return Returns 0 if successful or AVERROR_xxx if an error occurs. */ static int ice_create_request(AVFormatContext *s, uint8_t *buf, int buf_size, int *request_size) { @@ -835,10 +878,11 @@ static int ice_create_request(AVFormatContext *s, uint8_t *buf, int buf_size, in } /** - * Open the UDP transport and complete the ICE handshake. Use fast retransmit to + * Opens the UDP transport and completes the ICE handshake, using fast retransmit to * handle packet loss for the binding request. * - * @return 0 if OK, AVERROR_xxx on error + * @param s Pointer to the AVFormatContext + * @return Returns 0 if the handshake was successful or AVERROR_xxx in case of an error */ static int ice_handshake(AVFormatContext *s) { @@ -927,6 +971,9 @@ static int ice_handshake(AVFormatContext *s) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" +/** + * Callback function to print the OpenSSL SSL status. + */ static void openssl_on_info(const SSL *dtls, int where, int ret) { int w, r1; @@ -1039,7 +1086,7 @@ static int openssl_verify_callback(int preverify_ok, X509_STORE_CTX *ctx) } /** - * Initialize DTLS context. + * Initializes DTLS context for client role using ECDHE. * * @return 0 if OK, AVERROR_xxx on error */ @@ -1117,6 +1164,10 @@ static av_cold int openssl_init_dtls(AVFormatContext *s, SSL *dtls, SSL_CTX *dtl return ret; } +/** + * Drives the SSL context by attempting to read packets to send from SSL, sending them + * over UDP, and then reading packets from UDP to feed back to SSL. + */ static int openssl_drive_context(AVFormatContext *s, SSL *dtls, BIO *bio_in, BIO *bio_out, int loop) { int ret, i, j, r0, r1, req_size, res_size = 0; @@ -1213,6 +1264,10 @@ static int openssl_drive_context(AVFormatContext *s, SSL *dtls, BIO *bio_in, BIO /** * DTLS handshake with server, as a client in active mode, using openssl. * + * This function initializes the SSL context as the client role using OpenSSL and + * then performs the DTLS handshake until success. Upon successful completion, it + * exports the SRTP material key. + * * @return 0 if OK, AVERROR_xxx on error */ static int openssl_dtls_handshake(AVFormatContext *s) @@ -1280,7 +1335,11 @@ static int openssl_dtls_handshake(AVFormatContext *s) #endif /** - * Setup the SRTP context by the exported keying material using DTLS. + * Establish the SRTP context using the keying material exported from DTLS. + * + * Create separate SRTP contexts for sending video and audio, as their sequences differ + * and should not share a single context. Generate a single SRTP context for receiving + * RTCP only. * * @return 0 if OK, AVERROR_xxx on error */ @@ -1339,7 +1398,13 @@ static int setup_srtp(AVFormatContext *s) } /** - * Create a RTP muxer to build RTP packets from the encoded frames. + * Creates dedicated RTP muxers for each stream in the AVFormatContext to build RTP + * packets from the encoded frames. + * + * The corresponding SRTP context is utilized to encrypt each stream's RTP packets. For + * example, a video SRTP context is used for the video stream. Additionally, the + * "on_rtp_write_packet" callback function is set as the write function for each RTP + * muxer to send out encrypted RTP packets. * * @return 0 if OK, AVERROR_xxx on error */ @@ -1392,6 +1457,11 @@ static int create_rtp_muxer(AVFormatContext *s) rtp_ctx->streams[0]->time_base = s->streams[i]->time_base; buffer = av_malloc(buffer_size); + if (!buffer) { + ret = AVERROR(ENOMEM); + goto end; + } + rtp_ctx->pb = avio_alloc_context(buffer, buffer_size, 1, s, NULL, on_rtp_write_packet, NULL); if (!rtp_ctx->pb) { ret = AVERROR(ENOMEM); @@ -1431,7 +1501,11 @@ static int create_rtp_muxer(AVFormatContext *s) } /** - * When RTP muxer builds and outputs a RTP packet, this callback will be called. + * Callback triggered by the RTP muxer when it creates and sends out an RTP packet. + * + * This function modifies the video STAP packet, removing the markers, and updating the + * NRI of the first NALU. Additionally, it uses the corresponding SRTP context to encrypt + * the RTP packet, where the video packet is handled by the video SRTP context. */ static int on_rtp_write_packet(void *opaque, uint8_t *buf, int buf_size) { @@ -1495,7 +1569,11 @@ static int on_rtp_write_packet(void *opaque, uint8_t *buf, int buf_size) } /** - * Insert the SPS/PPS before each IDR frame. + * Inserts the SPS/PPS data before each IDR (Instantaneous Decoder Refresh) frame. + * + * The SPS/PPS is parsed from the extradata. If it's in ISOM format, the SPS/PPS is + * multiplexed to the data field of the packet. If it's in annexb format, then the entire + * extradata is set to the data field of the packet. */ static int insert_sps_pps_packet(AVFormatContext *s, AVPacket *pkt) { From ff7292004e6c3118aff71fa29b5b9669d6407cdc Mon Sep 17 00:00:00 2001 From: Haibo Chen <495810242@qq.com> Date: Fri, 12 May 2023 16:25:58 +0800 Subject: [PATCH 28/60] WHIP:Support send sctp --- libavformat/rtcenc.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index fdc57bd8d50c9..27a5dbfdf87e1 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -191,6 +191,7 @@ typedef struct RTCContext { /* The SRTP send context, to encrypt outgoing packets. */ struct SRTPContext srtp_audio_send; struct SRTPContext srtp_video_send; + struct SRTPContext srtp_rtcp_send; /* The SRTP receive context, to decrypt incoming packets. */ struct SRTPContext srtp_recv; @@ -1378,6 +1379,12 @@ static int setup_srtp(AVFormatContext *s) goto end; } + ret = ff_srtp_set_crypto(&rtc->srtp_rtcp_send, suite, buf); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to set crypto for rtcp send\n"); + goto end; + } + /* Setup SRTP context for incoming packets */ if (!av_base64_encode(buf, sizeof(buf), recv_key, sizeof(recv_key))) { av_log(s, AV_LOG_ERROR, "Failed to encode recv key\n"); @@ -1475,7 +1482,6 @@ static int create_rtp_muxer(AVFormatContext *s) av_dict_set(&opts, "payload_type", buf, 0); snprintf(buf, sizeof(buf), "%d", is_video? rtc->video_ssrc : rtc->audio_ssrc); av_dict_set(&opts, "ssrc", buf, 0); - av_dict_set(&opts, "rtpflags", "4", 0); /* FF_RTP_FLAG_SKIP_RTCP */ ret = avformat_write_header(rtp_ctx, &opts); if (ret < 0) { @@ -1520,14 +1526,10 @@ static int on_rtp_write_packet(void *opaque, uint8_t *buf, int buf_size) if (buf_size < 12 || (buf[0] & 0xC0) != 0x80) return 0; - /* RTCP is not supported yet. */ + /* Only support audio, video and rtcp. */ is_rtcp = buf[1] >= 192 && buf[1] <= 223; - if (is_rtcp) - return 0; - - /* Only support audio and video. */ payload_type = buf[1] & 0x7f; - if (payload_type != rtc->video_payload_type && payload_type != rtc->audio_payload_type) { + if (!is_rtcp && payload_type != rtc->video_payload_type && payload_type != rtc->audio_payload_type) { return 0; } @@ -1552,7 +1554,8 @@ static int on_rtp_write_packet(void *opaque, uint8_t *buf, int buf_size) } /* Encrypt by SRTP and send out. */ - srtp = payload_type == rtc->video_payload_type ? &rtc->srtp_video_send : &rtc->srtp_audio_send; + srtp = is_rtcp ? &rtc->srtp_rtcp_send + : payload_type == rtc->video_payload_type ? &rtc->srtp_video_send : &rtc->srtp_audio_send; cipher_size = ff_srtp_encrypt(srtp, buf, buf_size, cipher, sizeof(cipher)); if (cipher_size <= 0 || cipher_size < buf_size) { av_log(s, AV_LOG_WARNING, "Failed to encrypt packet=%dB, cipher=%dB\n", buf_size, cipher_size); @@ -1805,6 +1808,7 @@ static av_cold void rtc_deinit(AVFormatContext *s) ffurl_closep(&rtc->udp_uc); ff_srtp_free(&rtc->srtp_audio_send); ff_srtp_free(&rtc->srtp_video_send); + ff_srtp_free(&rtc->srtp_rtcp_send); ff_srtp_free(&rtc->srtp_recv); } From fc3a4a253e8e8ba7d8076c3b34830c5693b90361 Mon Sep 17 00:00:00 2001 From: winlin Date: Fri, 12 May 2023 23:29:15 +0800 Subject: [PATCH 29/60] WHIP: Refining SRTP context selection. --- libavformat/rtcenc.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index 27a5dbfdf87e1..04b00b8f910f7 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -1515,7 +1515,7 @@ static int create_rtp_muxer(AVFormatContext *s) */ static int on_rtp_write_packet(void *opaque, uint8_t *buf, int buf_size) { - int ret, cipher_size, is_rtcp; + int ret, cipher_size, is_rtcp, is_video; uint8_t payload_type, nalu_header; char cipher[MAX_UDP_BUFFER_SIZE]; AVFormatContext *s = opaque; @@ -1529,6 +1529,7 @@ static int on_rtp_write_packet(void *opaque, uint8_t *buf, int buf_size) /* Only support audio, video and rtcp. */ is_rtcp = buf[1] >= 192 && buf[1] <= 223; payload_type = buf[1] & 0x7f; + is_video = payload_type == rtc->video_payload_type; if (!is_rtcp && payload_type != rtc->video_payload_type && payload_type != rtc->audio_payload_type) { return 0; } @@ -1538,7 +1539,7 @@ static int on_rtp_write_packet(void *opaque, uint8_t *buf, int buf_size) * 1. The marker bit should be 0, never be 1. * 2. The NRI should equal to the first NALU's. */ - if (payload_type == rtc->video_payload_type && buf_size > 12) { + if (is_video && buf_size > 12) { nalu_header = buf[12] & 0x1f; if (nalu_header == NALU_TYPE_STAP_A) { /* Reset the marker bit to 0. */ @@ -1553,9 +1554,10 @@ static int on_rtp_write_packet(void *opaque, uint8_t *buf, int buf_size) } } + /* Get the corresponding SRTP context. */ + srtp = is_rtcp ? &rtc->srtp_rtcp_send : (is_video? &rtc->srtp_video_send : &rtc->srtp_audio_send); + /* Encrypt by SRTP and send out. */ - srtp = is_rtcp ? &rtc->srtp_rtcp_send - : payload_type == rtc->video_payload_type ? &rtc->srtp_video_send : &rtc->srtp_audio_send; cipher_size = ff_srtp_encrypt(srtp, buf, buf_size, cipher, sizeof(cipher)); if (cipher_size <= 0 || cipher_size < buf_size) { av_log(s, AV_LOG_WARNING, "Failed to encrypt packet=%dB, cipher=%dB\n", buf_size, cipher_size); From 45e6ece5c1c597a8cab255f36b6d08a831ed63f0 Mon Sep 17 00:00:00 2001 From: winlin Date: Sat, 13 May 2023 11:18:49 +0800 Subject: [PATCH 30/60] WHIP: Refine code. --- libavformat/rtcenc.c | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index 04b00b8f910f7..06bd690fecd37 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -215,6 +215,25 @@ typedef struct RTCContext { static int on_rtp_write_packet(void *opaque, uint8_t *buf, int buf_size); +/** + * Initialize the WHIP muxer context, setup WHIP url, check packet size. + */ +static int whip_init(AVFormatContext *s) +{ + int ideal_pkt_size = 532; + RTCContext *rtc = s->priv_data; + + av_log(s, AV_LOG_INFO, "WHIP: Init ice_arq_max=%d, ice_arq_timeout=%d, dtls_arq_max=%d, dtls_arq_timeout=%d pkt_size=%d\n", + rtc->ice_arq_max, rtc->ice_arq_timeout, rtc->dtls_arq_max, rtc->dtls_arq_timeout, rtc->pkt_size); + + if (rtc->pkt_size < ideal_pkt_size) { + av_log(s, AV_LOG_WARNING, "WHIP: pkt_size=%d(<%d) is too small, may cause packet loss\n", + rtc->pkt_size, ideal_pkt_size); + } + + return 0; +} + /** * Parses the ISOM AVCC format of extradata and extracts SPS/PPS. * @@ -1693,16 +1712,10 @@ static int whip_dispose(AVFormatContext *s) static av_cold int rtc_init(AVFormatContext *s) { - int ret, ideal_pkt_size = 532; - RTCContext *rtc = s->priv_data; - - av_log(s, AV_LOG_INFO, "WHIP: Init ice_arq_max=%d, ice_arq_timeout=%d, dtls_arq_max=%d, dtls_arq_timeout=%d pkt_size=%d\n", - rtc->ice_arq_max, rtc->ice_arq_timeout, rtc->dtls_arq_max, rtc->dtls_arq_timeout, rtc->pkt_size); + int ret; - if (rtc->pkt_size < ideal_pkt_size) { - av_log(s, AV_LOG_WARNING, "WHIP: pkt_size=%d(<%d) is too small, may cause packet loss\n", - rtc->pkt_size, ideal_pkt_size); - } + if ((ret = whip_init(s)) < 0) + return ret; if ((ret = parse_codec(s)) < 0) return ret; From 6f965d6562b8c2e3c9aa73567f356efb95d7e7fc Mon Sep 17 00:00:00 2001 From: cloudwebrtc Date: Sat, 13 May 2023 20:10:01 +0800 Subject: [PATCH 31/60] Add bidirectional ICE binding, DTLS with ECDSA, and compatibility improvements 1. Implement bidirectional ICE binding support, allowing FFmpeg to send ICE binding to the server and vice versa 2. Add DTLS support with certificate initialization using ECDSA mode 3. Remove the mapped address attribute from binding responses in ice-lite mode, as reflex addresses are not needed 4. Generate SDP offer fingerprint from the DTLS certificate 5. Set the use-candidate attribute of ICE binding requests to ensure compatibility with Janus 6. Use ICE local password to sign ICE binding responses, as ICE always uses the responder's password for signing binding requests and responses 7. Ignore other messages, such as ICE indications, during DTLS handshaking --- libavformat/rtcenc.c | 518 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 434 insertions(+), 84 deletions(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index 06bd690fecd37..d8ca30b769c6c 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -21,6 +21,10 @@ #include "config.h" +#ifndef CONFIG_OPENSSL +#error "DTLS is not supported, please enable openssl +#endif + #if CONFIG_OPENSSL #include #include @@ -128,6 +132,17 @@ */ #define DTLS_EAGAIN_RETRIES_MAX 5 +/* The magic cookie for Session Traversal Utilities for NAT (STUN) messages. */ +#define STUN_MAGIC_COOKIE 0x2112A442 + +/* STUN Attribute, comprehension-required range (0x0000-0x7FFF) */ +enum StunAddr { + STUN_ATTR_USERNAME = 0x0006, /// shared secret response/bind request + STUN_ATTR_USE_CANDIDATE = 0x0025, /// bind request + STUN_ATTR_MESSAGE_INTEGRITY = 0x0008, /// bind request/response + STUN_ATTR_FINGERPRINT = 0x8028, /// rfc5389 +}; + typedef struct RTCContext { AVClass *av_class; @@ -169,11 +184,15 @@ typedef struct RTCContext { char *ice_protocol; char *ice_host; int ice_port; + /* The SDP answer received from the WebRTC server. */ char *sdp_answer; /* The resource URL returned in the Location header of WHIP HTTP response. */ char *whip_resource_url; + /* The fingerprint of certificate, used in SDP offer. */ + char *dtls_fingerprint; + /* Whether the timer should be reset. */ int dtls_should_reset_timer; /* Whether the DTLS is done at least for us. */ @@ -216,9 +235,9 @@ typedef struct RTCContext { static int on_rtp_write_packet(void *opaque, uint8_t *buf, int buf_size); /** - * Initialize the WHIP muxer context, setup WHIP url, check packet size. + * Check the options for the WebRTC muxer. */ -static int whip_init(AVFormatContext *s) +static int whip_check_options(AVFormatContext *s) { int ideal_pkt_size = 532; RTCContext *rtc = s->priv_data; @@ -523,7 +542,7 @@ static int generate_sdp_offer(AVFormatContext *s) "c=IN IP4 0.0.0.0\r\n" "a=ice-ufrag:%s\r\n" "a=ice-pwd:%s\r\n" - "a=fingerprint:sha-256 EE:FE:A2:E5:6A:21:78:60:71:2C:21:DC:1A:2C:98:12:0C:E8:AD:68:07:61:1B:0E:FC:46:97:1E:BC:97:4A:54\r\n" + "a=fingerprint:sha-256 %s\r\n" "a=setup:active\r\n" "a=mid:0\r\n" "a=sendonly\r\n" @@ -535,6 +554,7 @@ static int generate_sdp_offer(AVFormatContext *s) rtc->audio_payload_type, rtc->ice_ufrag_local, rtc->ice_pwd_local, + rtc->dtls_fingerprint, rtc->audio_payload_type, rtc->audio_par->sample_rate, rtc->audio_par->ch_layout.nb_channels, @@ -548,7 +568,7 @@ static int generate_sdp_offer(AVFormatContext *s) } if (rtc->video_par) { - profile = rtc->video_par->profile < 0 ? 0xe0 : rtc->video_par->profile; + profile = rtc->video_par->profile < 0 ? 0x42 : rtc->video_par->profile; level = rtc->video_par->level < 0 ? 30 : rtc->video_par->level; profile_iop = profile & FF_PROFILE_H264_CONSTRAINED; av_bprintf(&bp, "" @@ -556,7 +576,7 @@ static int generate_sdp_offer(AVFormatContext *s) "c=IN IP4 0.0.0.0\r\n" "a=ice-ufrag:%s\r\n" "a=ice-pwd:%s\r\n" - "a=fingerprint:sha-256 EE:FE:A2:E5:6A:21:78:60:71:2C:21:DC:1A:2C:98:12:0C:E8:AD:68:07:61:1B:0E:FC:46:97:1E:BC:97:4A:54\r\n" + "a=fingerprint:sha-256 %s\r\n" "a=setup:active\r\n" "a=mid:1\r\n" "a=sendonly\r\n" @@ -570,6 +590,7 @@ static int generate_sdp_offer(AVFormatContext *s) rtc->video_payload_type, rtc->ice_ufrag_local, rtc->ice_pwd_local, + rtc->dtls_fingerprint, rtc->video_payload_type, rtc->video_payload_type, profile & (~FF_PROFILE_H264_CONSTRAINED), @@ -846,7 +867,7 @@ static int ice_create_request(AVFormatContext *s, uint8_t *buf, int buf_size, in /* Write 20 bytes header */ avio_wb16(pb, 0x0001); /* STUN binding request */ avio_wb16(pb, 0); /* length */ - avio_wb32(pb, 0x2112A442); /* magic cookie */ + avio_wb32(pb, STUN_MAGIC_COOKIE); /* magic cookie */ avio_wb32(pb, av_get_random_seed()); /* transaction ID */ avio_wb32(pb, av_get_random_seed()); /* transaction ID */ avio_wb32(pb, av_get_random_seed()); /* transaction ID */ @@ -861,13 +882,17 @@ static int ice_create_request(AVFormatContext *s, uint8_t *buf, int buf_size, in } /* Write the username attribute */ - avio_wb16(pb, 0x0006); /* attribute type username */ + avio_wb16(pb, STUN_ATTR_USERNAME); /* attribute type username */ avio_wb16(pb, ret); /* size of username */ avio_write(pb, username, ret); /* bytes of username */ ffio_fill(pb, 0, (4 - (ret % 4)) % 4); /* padding */ + /* Write the use-candidate attribute */ + avio_wb16(pb, STUN_ATTR_USE_CANDIDATE); /* attribute type use-candidate */ + avio_wb16(pb, 0); /* size of use-candidate */ + /* Build and update message integrity */ - avio_wb16(pb, 0x0008); /* attribute type message integrity */ + avio_wb16(pb, STUN_ATTR_MESSAGE_INTEGRITY); /* attribute type message integrity */ avio_wb16(pb, 20); /* size of message integrity */ ffio_fill(pb, 0, 20); /* fill with zero to directly write and skip it */ size = avio_tell(pb); @@ -878,7 +903,7 @@ static int ice_create_request(AVFormatContext *s, uint8_t *buf, int buf_size, in av_hmac_final(hmac, buf + size - 20, 20); /* Write the fingerprint attribute */ - avio_wb16(pb, 0x8028); /* attribute type fingerprint */ + avio_wb16(pb, STUN_ATTR_FINGERPRINT); /* attribute type fingerprint */ avio_wb16(pb, 4); /* size of fingerprint */ ffio_fill(pb, 0, 4); /* fill with zero to directly write and skip it */ size = avio_tell(pb); @@ -897,17 +922,142 @@ static int ice_create_request(AVFormatContext *s, uint8_t *buf, int buf_size, in return ret; } +/** + * Create an ICE binding response. + * + * This function generates an ICE binding response and writes it to the provided + * buffer. The response is signed using the local password for message integrity. + * + * @param s Pointer to the AVFormatContext structure. + * @param tid Pointer to the transaction ID of the binding request. The tid_size should be 12. + * @param tid_size The size of the transaction ID, should be 12. + * @param buf Pointer to the buffer where the response will be written. + * @param buf_size The size of the buffer provided for the response. + * @param response_size Pointer to an integer that will store the size of the generated response. + * @return Returns 0 if successful or AVERROR_xxx if an error occurs. + */ +static int ice_create_response(AVFormatContext *s, char *tid, int tid_size, uint8_t *buf, int buf_size, int *response_size) { + int ret = 0, size, crc32; + AVIOContext *pb = NULL; + AVHMAC *hmac = NULL; + RTCContext *rtc = s->priv_data; + + if (tid_size != 12) { + av_log(s, AV_LOG_ERROR, "Invalid transaction ID size. Expected 12, got %d\n", tid_size); + return AVERROR(EINVAL); + } + + pb = avio_alloc_context(buf, buf_size, 1, NULL, NULL, NULL, NULL); + if (!pb) + return AVERROR(ENOMEM); + + hmac = av_hmac_alloc(AV_HMAC_SHA1); + if (!hmac) { + ret = AVERROR(ENOMEM); + goto end; + } + + /* Write 20 bytes header */ + avio_wb16(pb, 0x0101); /* STUN binding response */ + avio_wb16(pb, 0); /* length */ + avio_wb32(pb, STUN_MAGIC_COOKIE); /* magic cookie */ + avio_write(pb, tid, tid_size); /* transaction ID */ + + /* Build and update message integrity */ + avio_wb16(pb, STUN_ATTR_MESSAGE_INTEGRITY); /* attribute type message integrity */ + avio_wb16(pb, 20); /* size of message integrity */ + ffio_fill(pb, 0, 20); /* fill with zero to directly write and skip it */ + size = avio_tell(pb); + buf[2] = (size - 20) >> 8; + buf[3] = (size - 20) & 0xFF; + av_hmac_init(hmac, rtc->ice_pwd_local, strlen(rtc->ice_pwd_local)); + av_hmac_update(hmac, buf, size - 24); + av_hmac_final(hmac, buf + size - 20, 20); + + /* Write the fingerprint attribute */ + avio_wb16(pb, STUN_ATTR_FINGERPRINT); /* attribute type fingerprint */ + avio_wb16(pb, 4); /* size of fingerprint */ + ffio_fill(pb, 0, 4); /* fill with zero to directly write and skip it */ + size = avio_tell(pb); + buf[2] = (size - 20) >> 8; + buf[3] = (size - 20) & 0xFF; + /* Refer to the av_hash_alloc("CRC32"), av_hash_init and av_hash_final */ + crc32 = av_crc(av_crc_get_table(AV_CRC_32_IEEE_LE), 0xFFFFFFFF, buf, size - 8) ^ 0xFFFFFFFF; + avio_skip(pb, -4); + avio_wb32(pb, crc32 ^ 0x5354554E); /* xor with "STUN" */ + + *response_size = size; + +end: + avio_context_free(&pb); + av_hmac_free(hmac); + return ret; +} + +static int ice_is_binding_request(char *buf, int buf_size) { + return buf_size > 1 && buf[0] == 0x00 && buf[1] == 0x01; +} + +static int ice_is_binding_response(char *buf, int buf_size) { + return buf_size > 1 && buf[0] == 0x01 && buf[1] == 0x01; +} + +/** + * This function handles incoming binding request messages by responding to them. + * If the message is not a binding request, it will be ignored. + */ +static int ice_handle_binding_request(AVFormatContext *s, char *buf, int buf_size) { + int ret = 0, size; + char tid[12]; + uint8_t res_buf[MAX_UDP_BUFFER_SIZE]; + RTCContext *rtc = s->priv_data; + + /* Ignore if not a binding request. */ + if (!ice_is_binding_request(buf, buf_size)) + return ret; + + if (buf_size < 20) { + av_log(s, AV_LOG_ERROR, "Invalid STUN message size. Expected at least 20, got %d\n", buf_size); + return AVERROR(EINVAL); + } + + /* Parse transaction id from binding request in buf. */ + memcpy(tid, buf + 8, 12); + + /* Build the STUN binding response. */ + ret = ice_create_response(s, tid, sizeof(tid), res_buf, sizeof(res_buf), &size); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to create STUN binding response, size=%d\n", size); + return ret; + } + + ret = ffurl_write(rtc->udp_uc, res_buf, size); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to send STUN binding response, size=%d\n", size); + return ret; + } + + return 0; +} + /** * Opens the UDP transport and completes the ICE handshake, using fast retransmit to * handle packet loss for the binding request. * + * To initiate a fast retransmission of the STUN binding request during ICE, we wait only + * for a successful local ICE process i.e., when a binding response is received from the + * server. Since the server's binding request may not arrive, we do not always wait for it. + * However, we will always respond to the server's binding request during ICE, DTLS or + * RTP streaming. + * * @param s Pointer to the AVFormatContext * @return Returns 0 if the handshake was successful or AVERROR_xxx in case of an error */ static int ice_handshake(AVFormatContext *s) { int ret, size; - char url[256], buf[MAX_UDP_BUFFER_SIZE]; + char url[256], tmp[16]; + char req_buf[MAX_UDP_BUFFER_SIZE], res_buf[MAX_UDP_BUFFER_SIZE]; RTCContext *rtc = s->priv_data; int fast_retries = rtc->ice_arq_max, timeout = rtc->ice_arq_timeout; @@ -922,8 +1072,8 @@ static int ice_handshake(AVFormatContext *s) av_opt_set(rtc->udp_uc->priv_data, "connect", "1", 0); av_opt_set(rtc->udp_uc->priv_data, "fifo_size", "0", 0); /* Set the max packet size to the buffer size. */ - snprintf(buf, sizeof(buf), "%d", rtc->pkt_size); - av_opt_set(rtc->udp_uc->priv_data, "pkt_size", buf, 0); + snprintf(tmp, sizeof(tmp), "%d", rtc->pkt_size); + av_opt_set(rtc->udp_uc->priv_data, "pkt_size", tmp, 0); ret = ffurl_connect(rtc->udp_uc, NULL); if (ret < 0) { @@ -936,15 +1086,15 @@ static int ice_handshake(AVFormatContext *s) rtc->udp_uc->flags |= AVIO_FLAG_READ | AVIO_FLAG_NONBLOCK; /* Build the STUN binding request. */ - ret = ice_create_request(s, buf, sizeof(buf), &size); + ret = ice_create_request(s, req_buf, sizeof(req_buf), &size); if (ret < 0) { av_log(s, AV_LOG_ERROR, "Failed to create STUN binding request, size=%d\n", size); goto end; } /* Fast retransmit the STUN binding request. */ - do { - ret = ffurl_write(rtc->udp_uc, buf, size); + while (1) { + ret = ffurl_write(rtc->udp_uc, req_buf, size); if (ret < 0) { av_log(s, AV_LOG_ERROR, "Failed to send STUN binding request, size=%d\n", size); goto end; @@ -956,7 +1106,7 @@ static int ice_handshake(AVFormatContext *s) #endif /* Read the STUN binding response. */ - ret = ffurl_read(rtc->udp_uc, buf, sizeof(buf)); + ret = ffurl_read(rtc->udp_uc, res_buf, sizeof(res_buf)); if (ret < 0) { /* If max retries is 6 and start timeout is 21ms, the total timeout * is about 21 + 42 + 84 + 168 + 336 + 672 = 1263ms. */ @@ -967,15 +1117,50 @@ static int ice_handshake(AVFormatContext *s) fast_retries--; continue; } + av_log(s, AV_LOG_ERROR, "Failed to read STUN binding response, retries=%d\n", rtc->ice_arq_max); goto end; } - } while (ret < 0); - if (ret < 2 || buf[0] != 0x01 || buf[1] != 0x01) { - av_log(s, AV_LOG_ERROR, "Invalid STUN binding response, size=%d, type=%02X%02X\n", ret, buf[0], buf[1]); - ret = AVERROR(EIO); - goto end; + /* If got any binding response, the fast retransmission is done. */ + if (ice_is_binding_response(res_buf, ret)) + break; + + /* When a binding request is received, it is necessary to respond immediately. */ + if (ice_is_binding_request(res_buf, ret)) { + if ((ret = ice_handle_binding_request(s, res_buf, ret)) < 0) { + goto end; + } + } + } + + /* Wait just for a small while to get the possible binding request from server. */ + fast_retries = rtc->ice_arq_max / 2; + timeout = rtc->ice_arq_timeout; + while (fast_retries) { + ret = ffurl_read(rtc->udp_uc, res_buf, sizeof(res_buf)); + if (ret < 0) { + /* If max retries is 6 and start timeout is 21ms, the total timeout + * is about 21 + 42 + 84 = 147ms. */ + av_usleep(timeout * 1000); + timeout *= 2; + + if (ret == AVERROR(EAGAIN)) { + fast_retries--; + continue; + } + + av_log(s, AV_LOG_ERROR, "Failed to read STUN binding request, retries=%d\n", rtc->ice_arq_max); + goto end; + } + + /* When a binding request is received, it is necessary to respond immediately. */ + if (ice_is_binding_request(res_buf, ret)) { + if ((ret = ice_handle_binding_request(s, res_buf, ret)) < 0) { + goto end; + } + break; + } } av_log(s, AV_LOG_INFO, "WHIP: ICE STUN ok, url=udp://%s:%d, username=%s:%s, req=%dB, res=%dB, arq=%d\n", @@ -991,6 +1176,142 @@ static int ice_handshake(AVFormatContext *s) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wdeprecated-declarations" +/** + * Generate a self-signed certificate and private key for DTLS. + */ +static int openssl_init_cert(AVFormatContext *s, EVP_PKEY *dtls_pkey, X509 *dtls_cert) +{ + int ret = 0, serial, expire_day, i, n = 0; + AVBPrint fingerprint; + unsigned char md[EVP_MAX_MD_SIZE]; + const char *aor = "ffmpeg.org"; + X509_NAME* subject = NULL; + EC_GROUP *ecgroup = NULL; + EC_KEY* dtls_eckey = NULL; + RTCContext *rtc = s->priv_data; + + /* To prevent a crash during cleanup, always initialize it. */ + av_bprint_init(&fingerprint, 1, MAX_SDP_SIZE); + + dtls_eckey = EC_KEY_new(); + + /* Should use the curves in ClientHello.supported_groups, for example: + * Supported Group: x25519 (0x001d) + * Supported Group: secp256r1 (0x0017) + * Supported Group: secp384r1 (0x0018) + * note that secp256r1 in openssl is called NID_X9_62_prime256v1, not NID_secp256k1 + */ + ecgroup = EC_GROUP_new_by_curve_name(NID_X9_62_prime256v1); + + if (EC_KEY_set_group(dtls_eckey, ecgroup) != 1) { + av_log(s, AV_LOG_ERROR, "DTLS: EC_KEY_set_group failed\n"); + ret = AVERROR(EINVAL); + goto end; + } + if (EC_KEY_generate_key(dtls_eckey) != 1) { + av_log(s, AV_LOG_ERROR, "DTLS: EC_KEY_generate_key failed\n"); + ret = AVERROR(EINVAL); + goto end; + } + if (EVP_PKEY_set1_EC_KEY(dtls_pkey, dtls_eckey) != 1) { + av_log(s, AV_LOG_ERROR, "DTLS: EVP_PKEY_set1_EC_KEY failed\n"); + ret = AVERROR(EINVAL); + goto end; + } + + /* Generate a self-signed certificate. */ + subject = X509_NAME_new(); + + serial = (int)av_get_random_seed(); + if (ASN1_INTEGER_set(X509_get_serialNumber(dtls_cert), serial) != 1) { + av_log(s, AV_LOG_ERROR, "WHIP: Failed to set serial\n"); + ret = AVERROR(EINVAL); + goto end; + } + + if (X509_NAME_add_entry_by_txt(subject, "CN", MBSTRING_ASC, aor, strlen(aor), -1, 0) != 1) { + av_log(s, AV_LOG_ERROR, "WHIP: Failed to set CN\n"); + ret = AVERROR(EINVAL); + goto end; + } + + if (X509_set_issuer_name(dtls_cert, subject) != 1) { + av_log(s, AV_LOG_ERROR, "WHIP: Failed to set issuer\n"); + ret = AVERROR(EINVAL); + goto end; + } + if (X509_set_subject_name(dtls_cert, subject) != 1) { + av_log(s, AV_LOG_ERROR, "WHIP: Failed to set subject name\n"); + ret = AVERROR(EINVAL); + goto end; + } + + expire_day = 365; + if (!X509_gmtime_adj(X509_get_notBefore(dtls_cert), 0)) { + av_log(s, AV_LOG_ERROR, "WHIP: Failed to set notBefore\n"); + ret = AVERROR(EINVAL); + goto end; + } + if (!X509_gmtime_adj(X509_get_notAfter(dtls_cert), 60*60*24*expire_day)) { + av_log(s, AV_LOG_ERROR, "WHIP: Failed to set notAfter\n"); + ret = AVERROR(EINVAL); + goto end; + } + + if (X509_set_version(dtls_cert, 2) != 1) { + av_log(s, AV_LOG_ERROR, "WHIP: Failed to set version\n"); + ret = AVERROR(EINVAL); + goto end; + } + + if (X509_set_pubkey(dtls_cert, dtls_pkey) != 1) { + av_log(s, AV_LOG_ERROR, "WHIP: Failed to set public key\n"); + ret = AVERROR(EINVAL); + goto end; + } + + if (!X509_sign(dtls_cert, dtls_pkey, EVP_sha1())) { + av_log(s, AV_LOG_ERROR, "WHIP: Failed to sign certificate\n"); + ret = AVERROR(EINVAL); + goto end; + } + + /* Generate the fingerpint of certficate. */ + if (X509_digest(dtls_cert, EVP_sha256(), md, &n) != 1) { + av_log(s, AV_LOG_ERROR, "Failed to generate fingerprint\n"); + ret = AVERROR(EIO); + goto end; + } + for (i = 0; i < n; i++) { + av_bprintf(&fingerprint, "%02X", md[i]); + if (i < n - 1) + av_bprintf(&fingerprint, ":"); + } + if (!av_bprint_is_complete(&fingerprint)) { + av_log(s, AV_LOG_ERROR, "Fingerprint %d exceed max %d, %s\n", ret, MAX_SDP_SIZE, fingerprint.str); + ret = AVERROR(EIO); + goto end; + } + if (!fingerprint.str || !strlen(fingerprint.str)) { + av_log(s, AV_LOG_ERROR, "Fingerprint is empty\n"); + ret = AVERROR(EINVAL); + goto end; + } + + rtc->dtls_fingerprint = av_strdup(fingerprint.str); + if (!rtc->dtls_fingerprint) { + ret = AVERROR(ENOMEM); + goto end; + } + +end: + EC_KEY_free(dtls_eckey); + EC_GROUP_free(ecgroup); + X509_NAME_free(subject); + av_bprint_finalize(&fingerprint, NULL); + return ret; +} + /** * Callback function to print the OpenSSL SSL status. */ @@ -1107,44 +1428,32 @@ static int openssl_verify_callback(int preverify_ok, X509_STORE_CTX *ctx) /** * Initializes DTLS context for client role using ECDHE. - * - * @return 0 if OK, AVERROR_xxx on error */ -static av_cold int openssl_init_dtls(AVFormatContext *s, SSL *dtls, SSL_CTX *dtls_ctx, EVP_PKEY *dtls_pkey, EC_KEY *eckey) +static av_cold int openssl_init_dtls_context(AVFormatContext *s, EVP_PKEY *dtls_pkey, X509 *dtls_cert, SSL_CTX *dtls_ctx) { - int ret; - RTCContext *rtc = s->priv_data; - - /* Should use the curves in ClientHello.supported_groups, for example: - * Supported Group: x25519 (0x001d) - * Supported Group: secp256r1 (0x0017) - * Supported Group: secp384r1 (0x0018) - * note that secp256r1 in openssl is called NID_X9_62_prime256v1, not NID_secp256k1 - */ - EC_GROUP *ecgroup = EC_GROUP_new_by_curve_name(NID_X9_62_prime256v1); + int ret = 0; - if (EC_KEY_set_group(eckey, ecgroup) != 1) { - av_log(s, AV_LOG_ERROR, "DTLS: EC_KEY_set_group failed\n"); + /* For ECDSA, we could set the curves list. */ + if (SSL_CTX_set1_curves_list(dtls_ctx, "P-521:P-384:P-256") != 1) { + av_log(s, AV_LOG_ERROR, "DTLS: SSL_CTX_set1_curves_list failed\n"); ret = AVERROR(EINVAL); goto end; } - if (EC_KEY_generate_key(eckey) != 1) { - av_log(s, AV_LOG_ERROR, "DTLS: EC_KEY_generate_key failed\n"); + + /* We use "ALL", while you can use "DEFAULT" means "ALL:!EXPORT:!LOW:!aNULL:!eNULL:!SSLv2" */ + if (SSL_CTX_set_cipher_list(dtls_ctx, DTLS_CIPHER_SUTES) != 1) { + av_log(s, AV_LOG_ERROR, "DTLS: SSL_CTX_set_cipher_list failed\n"); ret = AVERROR(EINVAL); goto end; } - if (EVP_PKEY_set1_EC_KEY(dtls_pkey, eckey) != 1) { - av_log(s, AV_LOG_ERROR, "DTLS: EVP_PKEY_set1_EC_KEY failed\n"); + /* Setup the certificate. */ + if (SSL_CTX_use_certificate(dtls_ctx, dtls_cert) != 1) { + av_log(s, AV_LOG_ERROR, "DTLS: SSL_CTX_use_certificate failed\n"); ret = AVERROR(EINVAL); goto end; } - - /* For ECDSA, we could set the curves list. */ - SSL_CTX_set1_curves_list(dtls_ctx, "P-521:P-384:P-256"); - - /* We use "ALL", while you can use "DEFAULT" means "ALL:!EXPORT:!LOW:!aNULL:!eNULL:!SSLv2" */ - if (SSL_CTX_set_cipher_list(dtls_ctx, DTLS_CIPHER_SUTES) != 1) { - av_log(s, AV_LOG_ERROR, "DTLS: SSL_CTX_set_cipher_list failed\n"); + if (SSL_CTX_use_PrivateKey(dtls_ctx, dtls_pkey) != 1) { + av_log(s, AV_LOG_ERROR, "DTLS: SSL_CTX_use_PrivateKey failed\n"); ret = AVERROR(EINVAL); goto end; } @@ -1156,13 +1465,24 @@ static av_cold int openssl_init_dtls(AVFormatContext *s, SSL *dtls, SSL_CTX *dtl /* Whether we should read as many input bytes as possible (for non-blocking reads) or not. */ SSL_CTX_set_read_ahead(dtls_ctx, 1); /* Only support SRTP_AES128_CM_SHA1_80, please read ssl/d1_srtp.c */ - ret = SSL_CTX_set_tlsext_use_srtp(dtls_ctx, "SRTP_AES128_CM_SHA1_80"); - if (ret) { - av_log(s, AV_LOG_ERROR, "DTLS: SSL_CTX_set_tlsext_use_srtp failed, ret=%d\n", ret); + if (SSL_CTX_set_tlsext_use_srtp(dtls_ctx, "SRTP_AES128_CM_SHA1_80")) { + av_log(s, AV_LOG_ERROR, "DTLS: SSL_CTX_set_tlsext_use_srtp failed\n"); ret = AVERROR(EINVAL); goto end; } +end: + return ret; +} + +/** + * After creating a DTLS context, initialize the DTLS SSL object. + */ +static av_cold int openssl_init_dtls_ssl(AVFormatContext *s, SSL *dtls) +{ + int ret = 0; + RTCContext* rtc = s->priv_data; + /* Setup the callback for logging. */ SSL_set_ex_data(dtls, 0, s); SSL_set_info_callback(dtls, openssl_on_info); @@ -1179,8 +1499,6 @@ static av_cold int openssl_init_dtls(AVFormatContext *s, SSL *dtls, SSL_CTX *dtl SSL_set_connect_state(dtls); SSL_set_max_send_fragment(dtls, rtc->pkt_size); -end: - EC_GROUP_free(ecgroup); return ret; } @@ -1232,7 +1550,19 @@ static int openssl_drive_context(AVFormatContext *s, SSL *dtls, BIO *bio_in, BIO for (j = 0; j <= DTLS_EAGAIN_RETRIES_MAX && !res_size; j++) { ret = ffurl_read(rtc->udp_uc, buf, sizeof(buf)); - /* Got response successfully. */ + + /* When a binding request is received, it is necessary to respond immediately. */ + if (ice_is_binding_request(buf, ret)) { + if ((ret = ice_handle_binding_request(s, buf, ret)) < 0) { + return ret; + } + } + + /* Ignore other packets, such as ICE indication, except DTLS. */ + if (ret < 13 || buf[0] <= 19 || buf[0] >= 64) + continue; + + /* Got DTLS response successfully. */ if (ret > 0) { res_size = ret; rtc->dtls_should_reset_timer = 1; @@ -1278,7 +1608,7 @@ static int openssl_drive_context(AVFormatContext *s, SSL *dtls, BIO *bio_in, BIO return AVERROR(EIO); } - return 0; + return ret; } /** @@ -1290,29 +1620,31 @@ static int openssl_drive_context(AVFormatContext *s, SSL *dtls, BIO *bio_in, BIO * * @return 0 if OK, AVERROR_xxx on error */ -static int openssl_dtls_handshake(AVFormatContext *s) +static int openssl_dtls_handshake(AVFormatContext *s, EVP_PKEY *dtls_pkey, X509 *dtls_cert) { int ret, loop; SSL_CTX *dtls_ctx = NULL; SSL *dtls = NULL; - BIO *bio_in = NULL, *bio_out = NULL; - EC_KEY *eckey = NULL; - EVP_PKEY *dtls_pkey = NULL; const char* dst = "EXTRACTOR-dtls_srtp"; + BIO *bio_in = NULL, *bio_out = NULL; RTCContext *rtc = s->priv_data; - /* Create and initialize SSL context. */ - dtls_pkey = EVP_PKEY_new(); - eckey = EC_KEY_new(); - dtls_ctx = SSL_CTX_new(DTLS_client_method()); + + ret = openssl_init_dtls_context(s, dtls_pkey, dtls_cert, dtls_ctx); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to initialize DTLS context\n"); + goto end; + } + + /* The dtls should not be created unless the dtls_ctx has been initialized. */ dtls = SSL_new(dtls_ctx); bio_in = BIO_new(BIO_s_mem()); bio_out = BIO_new(BIO_s_mem()); SSL_set_bio(dtls, bio_in, bio_out); - ret = openssl_init_dtls(s, dtls, dtls_ctx, dtls_pkey, eckey); + ret = openssl_init_dtls_ssl(s, dtls); if (ret < 0) { av_log(s, AV_LOG_ERROR, "Failed to initialize SSL context\n"); goto end; @@ -1346,8 +1678,6 @@ static int openssl_dtls_handshake(AVFormatContext *s) end: SSL_free(dtls); SSL_CTX_free(dtls_ctx); - EC_KEY_free(eckey); - EVP_PKEY_free(dtls_pkey); return ret; } @@ -1714,41 +2044,58 @@ static av_cold int rtc_init(AVFormatContext *s) { int ret; - if ((ret = whip_init(s)) < 0) +#if CONFIG_OPENSSL + /* The SSL certificate used for fingerprint in SDP and DTLS handshake. */ + X509 *dtls_cert = X509_new(); + /* The private key for DTLS handshake. */ + EVP_PKEY *dtls_pkey = EVP_PKEY_new(); + + if ((ret = openssl_init_cert(s, dtls_pkey, dtls_cert)) < 0) { + av_log(s, AV_LOG_ERROR, "WHIP: Failed to init openssl cert, ret=%d\n", ret); return ret; + } +#endif + + if ((ret = whip_check_options(s)) < 0) + goto end; if ((ret = parse_codec(s)) < 0) - return ret; + goto end; if ((ret = generate_sdp_offer(s)) < 0) - return ret; + goto end; if ((ret = exchange_sdp(s)) < 0) - return ret; + goto end; if ((ret = parse_answer(s)) < 0) - return ret; - - return 0; -} - -static int rtc_write_header(AVFormatContext *s) -{ - int ret; + goto end; if ((ret = ice_handshake(s)) < 0) - return ret; + goto end; #if CONFIG_OPENSSL - if ((ret = openssl_dtls_handshake(s)) < 0) - return ret; -#else - av_log(s, AV_LOG_ERROR, "DTLS is not supported, please enable openssl\n"); - return AVERROR(ENOSYS); + if ((ret = openssl_dtls_handshake(s, dtls_pkey, dtls_cert)) < 0) + goto end; #endif if ((ret = setup_srtp(s)) < 0) - return ret; + goto end; + +end: +#if CONFIG_OPENSSL +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" + X509_free(dtls_cert); + EVP_PKEY_free(dtls_pkey); +#pragma GCC diagnostic pop +#endif + return ret; +} + +static int rtc_write_header(AVFormatContext *s) +{ + int ret; if ((ret = create_rtp_muxer(s)) < 0) return ret; @@ -1764,6 +2111,8 @@ static int rtc_write_packet(AVFormatContext *s, AVPacket *pkt) AVFormatContext *rtp_ctx = st->priv_data; /* TODO: Send binding request every 1s as WebRTC heartbeat. */ + /* TODO: Receive packets from the server such as ICE binding requests, DTLS messages, + * and RTCP like PLI requests, then respond to them.*/ /* For audio OPUS stream, correct the timestamp. */ if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) { @@ -1825,6 +2174,7 @@ static av_cold void rtc_deinit(AVFormatContext *s) ff_srtp_free(&rtc->srtp_video_send); ff_srtp_free(&rtc->srtp_rtcp_send); ff_srtp_free(&rtc->srtp_recv); + av_freep(&rtc->dtls_fingerprint); } #define OFFSET(x) offsetof(RTCContext, x) From 3fd676778b048224998a02ecd3d766f925c8dc61 Mon Sep 17 00:00:00 2001 From: winlin Date: Mon, 15 May 2023 18:09:18 +0800 Subject: [PATCH 32/60] Add missing quotation string to error message --- libavformat/rtcenc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index d8ca30b769c6c..44fbfde5c7a41 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -22,7 +22,7 @@ #include "config.h" #ifndef CONFIG_OPENSSL -#error "DTLS is not supported, please enable openssl +#error "DTLS is not supported, please enable openssl" #endif #if CONFIG_OPENSSL From 68ef7ad27bc0984485f61d108585c0c25fee86e3 Mon Sep 17 00:00:00 2001 From: winlin Date: Wed, 17 May 2023 08:40:10 +0800 Subject: [PATCH 33/60] WHIP: Extract DTLSContext from RTContext. --- libavformat/rtcenc.c | 329 ++++++++++++++++++++++++------------------- 1 file changed, 185 insertions(+), 144 deletions(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index 44fbfde5c7a41..40ba4cbbd0648 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -136,13 +136,60 @@ #define STUN_MAGIC_COOKIE 0x2112A442 /* STUN Attribute, comprehension-required range (0x0000-0x7FFF) */ -enum StunAddr { +enum StunAttr { STUN_ATTR_USERNAME = 0x0006, /// shared secret response/bind request STUN_ATTR_USE_CANDIDATE = 0x0025, /// bind request STUN_ATTR_MESSAGE_INTEGRITY = 0x0008, /// bind request/response STUN_ATTR_FINGERPRINT = 0x8028, /// rfc5389 }; +#if CONFIG_OPENSSL +typedef struct DTLSContext { + /* For av_log to write log to this category. */ + void *log_avcl; + + /* The private key for DTLS handshake. */ + EVP_PKEY *dtls_pkey; + /* The SSL certificate used for fingerprint in SDP and DTLS handshake. */ + X509 *dtls_cert; + /* The fingerprint of certificate, used in SDP offer. */ + char *dtls_fingerprint; + + /** + * This represents the material used to build the SRTP master key. It is + * generated by DTLS and has the following layout: + * 16B 16B 14B 14B + * client_key | server_key | client_salt | server_salt + */ + uint8_t dtls_srtp_material[DTLS_SRTP_MASTER_KEY_LEN * 2]; + + /* Whether the timer should be reset. */ + int dtls_should_reset_timer; + /* Whether the DTLS is done at least for us. */ + int dtls_done_for_us; + /* The number of packets retransmitted for DTLS. */ + int dtls_arq_packets; + + /* The UDP transport is used for delivering ICE, DTLS and SRTP packets. */ + URLContext *udp_uc; + + /* The maximum number of retries for ICE transmission. */ + int ice_arq_max; + /* The step start timeout in ms for ICE transmission. */ + int ice_arq_timeout; + /* The maximum number of retries for DTLS transmission. */ + int dtls_arq_max; + /* The step start timeout in ms for DTLS transmission. */ + int dtls_arq_timeout; + /* The size of RTP packet, should generally be set to MTU. */ + int pkt_size; +} DTLSContext; + +static av_cold int dtls_context_init(DTLSContext *ctx); +static int dtls_context_handshake(DTLSContext *ctx); +static av_cold void dtls_context_deinit(DTLSContext *ctx); +#endif + typedef struct RTCContext { AVClass *av_class; @@ -190,22 +237,8 @@ typedef struct RTCContext { /* The resource URL returned in the Location header of WHIP HTTP response. */ char *whip_resource_url; - /* The fingerprint of certificate, used in SDP offer. */ - char *dtls_fingerprint; - - /* Whether the timer should be reset. */ - int dtls_should_reset_timer; - /* Whether the DTLS is done at least for us. */ - int dtls_done_for_us; - /* The number of packets retransmitted for DTLS. */ - int dtls_arq_packets; - /** - * This represents the material used to build the SRTP master key. It is - * generated by DTLS and has the following layout: - * 16B 16B 14B 14B - * client_key | server_key | client_salt | server_salt - */ - uint8_t dtls_srtp_material[DTLS_SRTP_MASTER_KEY_LEN * 2]; + /* The DTLS context. */ + DTLSContext dtls_ctx; /* The SRTP send context, to encrypt outgoing packets. */ struct SRTPContext srtp_audio_send; @@ -235,13 +268,27 @@ typedef struct RTCContext { static int on_rtp_write_packet(void *opaque, uint8_t *buf, int buf_size); /** - * Check the options for the WebRTC muxer. + * Initialize and check the options for the WebRTC muxer. */ -static int whip_check_options(AVFormatContext *s) +static av_cold int whip_init(AVFormatContext *s) { - int ideal_pkt_size = 532; + int ret, ideal_pkt_size = 532; RTCContext *rtc = s->priv_data; + /* Use the same logging context as AV format. */ + rtc->dtls_ctx.log_avcl = s; + rtc->dtls_ctx.udp_uc = rtc->udp_uc; + rtc->dtls_ctx.ice_arq_max = rtc->ice_arq_max; + rtc->dtls_ctx.ice_arq_timeout = rtc->ice_arq_timeout; + rtc->dtls_ctx.dtls_arq_max = rtc->dtls_arq_max; + rtc->dtls_ctx.dtls_arq_timeout = rtc->dtls_arq_timeout; + rtc->dtls_ctx.pkt_size = rtc->pkt_size; + + if ((ret = dtls_context_init(&rtc->dtls_ctx)) < 0) { + av_log(s, AV_LOG_ERROR, "WHIP: Failed to init DTLS context\n"); + return ret; + } + av_log(s, AV_LOG_INFO, "WHIP: Init ice_arq_max=%d, ice_arq_timeout=%d, dtls_arq_max=%d, dtls_arq_timeout=%d pkt_size=%d\n", rtc->ice_arq_max, rtc->ice_arq_timeout, rtc->dtls_arq_max, rtc->dtls_arq_timeout, rtc->pkt_size); @@ -554,7 +601,7 @@ static int generate_sdp_offer(AVFormatContext *s) rtc->audio_payload_type, rtc->ice_ufrag_local, rtc->ice_pwd_local, - rtc->dtls_fingerprint, + rtc->dtls_ctx.dtls_fingerprint, rtc->audio_payload_type, rtc->audio_par->sample_rate, rtc->audio_par->ch_layout.nb_channels, @@ -590,7 +637,7 @@ static int generate_sdp_offer(AVFormatContext *s) rtc->video_payload_type, rtc->ice_ufrag_local, rtc->ice_pwd_local, - rtc->dtls_fingerprint, + rtc->dtls_ctx.dtls_fingerprint, rtc->video_payload_type, rtc->video_payload_type, profile & (~FF_PROFILE_H264_CONSTRAINED), @@ -1179,7 +1226,7 @@ static int ice_handshake(AVFormatContext *s) /** * Generate a self-signed certificate and private key for DTLS. */ -static int openssl_init_cert(AVFormatContext *s, EVP_PKEY *dtls_pkey, X509 *dtls_cert) +static av_cold int dtls_context_init(DTLSContext *ctx) { int ret = 0, serial, expire_day, i, n = 0; AVBPrint fingerprint; @@ -1188,13 +1235,17 @@ static int openssl_init_cert(AVFormatContext *s, EVP_PKEY *dtls_pkey, X509 *dtls X509_NAME* subject = NULL; EC_GROUP *ecgroup = NULL; EC_KEY* dtls_eckey = NULL; - RTCContext *rtc = s->priv_data; + EVP_PKEY *dtls_pkey = NULL; + X509 *dtls_cert = NULL; + void *s1 = ctx->log_avcl; + + ctx->dtls_cert = dtls_cert = X509_new(); + ctx->dtls_pkey = dtls_pkey = EVP_PKEY_new(); + dtls_eckey = EC_KEY_new(); /* To prevent a crash during cleanup, always initialize it. */ av_bprint_init(&fingerprint, 1, MAX_SDP_SIZE); - dtls_eckey = EC_KEY_new(); - /* Should use the curves in ClientHello.supported_groups, for example: * Supported Group: x25519 (0x001d) * Supported Group: secp256r1 (0x0017) @@ -1204,17 +1255,17 @@ static int openssl_init_cert(AVFormatContext *s, EVP_PKEY *dtls_pkey, X509 *dtls ecgroup = EC_GROUP_new_by_curve_name(NID_X9_62_prime256v1); if (EC_KEY_set_group(dtls_eckey, ecgroup) != 1) { - av_log(s, AV_LOG_ERROR, "DTLS: EC_KEY_set_group failed\n"); + av_log(s1, AV_LOG_ERROR, "DTLS: EC_KEY_set_group failed\n"); ret = AVERROR(EINVAL); goto end; } if (EC_KEY_generate_key(dtls_eckey) != 1) { - av_log(s, AV_LOG_ERROR, "DTLS: EC_KEY_generate_key failed\n"); + av_log(s1, AV_LOG_ERROR, "DTLS: EC_KEY_generate_key failed\n"); ret = AVERROR(EINVAL); goto end; } if (EVP_PKEY_set1_EC_KEY(dtls_pkey, dtls_eckey) != 1) { - av_log(s, AV_LOG_ERROR, "DTLS: EVP_PKEY_set1_EC_KEY failed\n"); + av_log(s1, AV_LOG_ERROR, "DTLS: EVP_PKEY_set1_EC_KEY failed\n"); ret = AVERROR(EINVAL); goto end; } @@ -1224,61 +1275,61 @@ static int openssl_init_cert(AVFormatContext *s, EVP_PKEY *dtls_pkey, X509 *dtls serial = (int)av_get_random_seed(); if (ASN1_INTEGER_set(X509_get_serialNumber(dtls_cert), serial) != 1) { - av_log(s, AV_LOG_ERROR, "WHIP: Failed to set serial\n"); + av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set serial\n"); ret = AVERROR(EINVAL); goto end; } if (X509_NAME_add_entry_by_txt(subject, "CN", MBSTRING_ASC, aor, strlen(aor), -1, 0) != 1) { - av_log(s, AV_LOG_ERROR, "WHIP: Failed to set CN\n"); + av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set CN\n"); ret = AVERROR(EINVAL); goto end; } if (X509_set_issuer_name(dtls_cert, subject) != 1) { - av_log(s, AV_LOG_ERROR, "WHIP: Failed to set issuer\n"); + av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set issuer\n"); ret = AVERROR(EINVAL); goto end; } if (X509_set_subject_name(dtls_cert, subject) != 1) { - av_log(s, AV_LOG_ERROR, "WHIP: Failed to set subject name\n"); + av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set subject name\n"); ret = AVERROR(EINVAL); goto end; } expire_day = 365; if (!X509_gmtime_adj(X509_get_notBefore(dtls_cert), 0)) { - av_log(s, AV_LOG_ERROR, "WHIP: Failed to set notBefore\n"); + av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set notBefore\n"); ret = AVERROR(EINVAL); goto end; } if (!X509_gmtime_adj(X509_get_notAfter(dtls_cert), 60*60*24*expire_day)) { - av_log(s, AV_LOG_ERROR, "WHIP: Failed to set notAfter\n"); + av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set notAfter\n"); ret = AVERROR(EINVAL); goto end; } if (X509_set_version(dtls_cert, 2) != 1) { - av_log(s, AV_LOG_ERROR, "WHIP: Failed to set version\n"); + av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set version\n"); ret = AVERROR(EINVAL); goto end; } if (X509_set_pubkey(dtls_cert, dtls_pkey) != 1) { - av_log(s, AV_LOG_ERROR, "WHIP: Failed to set public key\n"); + av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set public key\n"); ret = AVERROR(EINVAL); goto end; } if (!X509_sign(dtls_cert, dtls_pkey, EVP_sha1())) { - av_log(s, AV_LOG_ERROR, "WHIP: Failed to sign certificate\n"); + av_log(s1, AV_LOG_ERROR, "WHIP: Failed to sign certificate\n"); ret = AVERROR(EINVAL); goto end; } /* Generate the fingerpint of certficate. */ if (X509_digest(dtls_cert, EVP_sha256(), md, &n) != 1) { - av_log(s, AV_LOG_ERROR, "Failed to generate fingerprint\n"); + av_log(s1, AV_LOG_ERROR, "Failed to generate fingerprint\n"); ret = AVERROR(EIO); goto end; } @@ -1288,22 +1339,24 @@ static int openssl_init_cert(AVFormatContext *s, EVP_PKEY *dtls_pkey, X509 *dtls av_bprintf(&fingerprint, ":"); } if (!av_bprint_is_complete(&fingerprint)) { - av_log(s, AV_LOG_ERROR, "Fingerprint %d exceed max %d, %s\n", ret, MAX_SDP_SIZE, fingerprint.str); + av_log(s1, AV_LOG_ERROR, "Fingerprint %d exceed max %d, %s\n", ret, MAX_SDP_SIZE, fingerprint.str); ret = AVERROR(EIO); goto end; } if (!fingerprint.str || !strlen(fingerprint.str)) { - av_log(s, AV_LOG_ERROR, "Fingerprint is empty\n"); + av_log(s1, AV_LOG_ERROR, "Fingerprint is empty\n"); ret = AVERROR(EINVAL); goto end; } - rtc->dtls_fingerprint = av_strdup(fingerprint.str); - if (!rtc->dtls_fingerprint) { + ctx->dtls_fingerprint = av_strdup(fingerprint.str); + if (!ctx->dtls_fingerprint) { ret = AVERROR(ENOMEM); goto end; } + av_log(s1, AV_LOG_INFO, "DTLS: Fingerprint %s\n", ctx->dtls_fingerprint); + end: EC_KEY_free(dtls_eckey); EC_GROUP_free(ecgroup); @@ -1312,6 +1365,16 @@ static int openssl_init_cert(AVFormatContext *s, EVP_PKEY *dtls_pkey, X509 *dtls return ret; } +/** + * Cleanup the DTLS context. + */ +static av_cold void dtls_context_deinit(DTLSContext *ctx) +{ + X509_free(ctx->dtls_cert); + EVP_PKEY_free(ctx->dtls_pkey); + av_freep(&ctx->dtls_fingerprint); +} + /** * Callback function to print the OpenSSL SSL status. */ @@ -1319,7 +1382,8 @@ static void openssl_on_info(const SSL *dtls, int where, int ret) { int w, r1; const char *method, *alert_type, *alert_desc; - AVFormatContext *s = (AVFormatContext*)SSL_get_ex_data(dtls, 0); + DTLSContext *ctx = (DTLSContext*)SSL_get_ex_data(dtls, 0); + void *s1 = ctx->log_avcl; w = where & ~SSL_ST_MASK; if (w & SSL_ST_CONNECT) { @@ -1332,7 +1396,7 @@ static void openssl_on_info(const SSL *dtls, int where, int ret) r1 = SSL_get_error(dtls, ret); if (where & SSL_CB_LOOP) { - av_log(s, AV_LOG_VERBOSE, "DTLS: method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", + av_log(s1, AV_LOG_VERBOSE, "DTLS: method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, ret, r1); } else if (where & SSL_CB_ALERT) { method = (where & SSL_CB_READ) ? "read":"write"; @@ -1341,22 +1405,22 @@ static void openssl_on_info(const SSL *dtls, int where, int ret) alert_desc = SSL_alert_desc_string(ret); if (!av_strcasecmp(alert_type, "warning") && !av_strcasecmp(alert_desc, "CN")) { - av_log(s, AV_LOG_WARNING, "DTLS: SSL3 alert method=%s type=%s, desc=%s(%s), where=%d, ret=%d, r1=%d\n", + av_log(s1, AV_LOG_WARNING, "DTLS: SSL3 alert method=%s type=%s, desc=%s(%s), where=%d, ret=%d, r1=%d\n", method, alert_type, alert_desc, SSL_alert_desc_string_long(ret), where, ret, r1); } else { - av_log(s, AV_LOG_ERROR, "DTLS: SSL3 alert method=%s type=%s, desc=%s(%s), where=%d, ret=%d, r1=%d\n", + av_log(s1, AV_LOG_ERROR, "DTLS: SSL3 alert method=%s type=%s, desc=%s(%s), where=%d, ret=%d, r1=%d\n", method, alert_type, alert_desc, SSL_alert_desc_string_long(ret), where, ret, r1); } } else if (where & SSL_CB_EXIT) { if (!ret) { - av_log(s, AV_LOG_WARNING, "DTLS: Fail method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", + av_log(s1, AV_LOG_WARNING, "DTLS: Fail method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, ret, r1); } else if (ret < 0) { if (r1 != SSL_ERROR_NONE && r1 != SSL_ERROR_WANT_READ && r1 != SSL_ERROR_WANT_WRITE) { - av_log(s, AV_LOG_ERROR, "DTLS: Error method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", + av_log(s1, AV_LOG_ERROR, "DTLS: Error method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, ret, r1); } else { - av_log(s, AV_LOG_VERBOSE, "DTLS: method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", + av_log(s1, AV_LOG_VERBOSE, "DTLS: method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, ret, r1); } } @@ -1365,33 +1429,33 @@ static void openssl_on_info(const SSL *dtls, int where, int ret) static unsigned int openssl_dtls_timer_cb(SSL *dtls, unsigned int previous_us) { - AVFormatContext *s = (AVFormatContext*)SSL_get_ex_data(dtls, 0); - RTCContext *rtc = s->priv_data; + DTLSContext *ctx = (DTLSContext*)SSL_get_ex_data(dtls, 0); + void *s1 = ctx->log_avcl; /* Double the timeout, note that it may be 0. */ unsigned int timeout_us = previous_us * 2; /* If previous_us is 0, for example, the HelloVerifyRequest, we should respond it ASAP. * when got ServerHello, we should reset the timer. */ - if (!previous_us || rtc->dtls_should_reset_timer) { - timeout_us = rtc->dtls_arq_timeout * 1000; /* in us */ + if (!previous_us || ctx->dtls_should_reset_timer) { + timeout_us = ctx->dtls_arq_timeout * 1000; /* in us */ } /* never exceed the max timeout. */ timeout_us = FFMIN(timeout_us, 30 * 1000 * 1000); /* in us */ - av_log(s, AV_LOG_VERBOSE, "DTLS: ARQ timer cb timeout=%ums, previous=%ums\n", + av_log(s1, AV_LOG_VERBOSE, "DTLS: ARQ timer cb timeout=%ums, previous=%ums\n", timeout_us / 1000, previous_us / 1000); return timeout_us; } -static void openssl_state_trace(AVFormatContext *s, uint8_t *data, int length, int incoming, int r0, int r1) +static void openssl_state_trace(DTLSContext *ctx, uint8_t *data, int length, int incoming, int r0, int r1) { uint8_t content_type = 0; uint16_t size = 0; uint8_t handshake_type = 0; - RTCContext *rtc = s->priv_data; + void *s1 = ctx->log_avcl; /* Change_cipher_spec(20), alert(21), handshake(22), application_data(23) */ if (length >= 1) { @@ -1406,8 +1470,8 @@ static void openssl_state_trace(AVFormatContext *s, uint8_t *data, int length, i handshake_type = (uint8_t)data[13]; } - av_log(s, AV_LOG_VERBOSE, "WHIP: DTLS state %s %s, done=%u, arq=%u, r0=%d, r1=%d, len=%u, cnt=%u, size=%u, hs=%u\n", - "Active", (incoming? "RECV":"SEND"), rtc->dtls_done_for_us, rtc->dtls_arq_packets, r0, r1, length, + av_log(s1, AV_LOG_VERBOSE, "WHIP: DTLS state %s %s, done=%u, arq=%u, r0=%d, r1=%d, len=%u, cnt=%u, size=%u, hs=%u\n", + "Active", (incoming? "RECV":"SEND"), ctx->dtls_done_for_us, ctx->dtls_arq_packets, r0, r1, length, content_type, size, handshake_type); } @@ -1429,31 +1493,34 @@ static int openssl_verify_callback(int preverify_ok, X509_STORE_CTX *ctx) /** * Initializes DTLS context for client role using ECDHE. */ -static av_cold int openssl_init_dtls_context(AVFormatContext *s, EVP_PKEY *dtls_pkey, X509 *dtls_cert, SSL_CTX *dtls_ctx) +static av_cold int openssl_init_dtls_context(DTLSContext *ctx, SSL_CTX *dtls_ctx) { int ret = 0; + void *s1 = ctx->log_avcl; + EVP_PKEY *dtls_pkey = ctx->dtls_pkey; + X509 *dtls_cert = ctx->dtls_cert; /* For ECDSA, we could set the curves list. */ if (SSL_CTX_set1_curves_list(dtls_ctx, "P-521:P-384:P-256") != 1) { - av_log(s, AV_LOG_ERROR, "DTLS: SSL_CTX_set1_curves_list failed\n"); + av_log(s1, AV_LOG_ERROR, "DTLS: SSL_CTX_set1_curves_list failed\n"); ret = AVERROR(EINVAL); goto end; } /* We use "ALL", while you can use "DEFAULT" means "ALL:!EXPORT:!LOW:!aNULL:!eNULL:!SSLv2" */ if (SSL_CTX_set_cipher_list(dtls_ctx, DTLS_CIPHER_SUTES) != 1) { - av_log(s, AV_LOG_ERROR, "DTLS: SSL_CTX_set_cipher_list failed\n"); + av_log(s1, AV_LOG_ERROR, "DTLS: SSL_CTX_set_cipher_list failed\n"); ret = AVERROR(EINVAL); goto end; } /* Setup the certificate. */ if (SSL_CTX_use_certificate(dtls_ctx, dtls_cert) != 1) { - av_log(s, AV_LOG_ERROR, "DTLS: SSL_CTX_use_certificate failed\n"); + av_log(s1, AV_LOG_ERROR, "DTLS: SSL_CTX_use_certificate failed\n"); ret = AVERROR(EINVAL); goto end; } if (SSL_CTX_use_PrivateKey(dtls_ctx, dtls_pkey) != 1) { - av_log(s, AV_LOG_ERROR, "DTLS: SSL_CTX_use_PrivateKey failed\n"); + av_log(s1, AV_LOG_ERROR, "DTLS: SSL_CTX_use_PrivateKey failed\n"); ret = AVERROR(EINVAL); goto end; } @@ -1466,7 +1533,7 @@ static av_cold int openssl_init_dtls_context(AVFormatContext *s, EVP_PKEY *dtls_ SSL_CTX_set_read_ahead(dtls_ctx, 1); /* Only support SRTP_AES128_CM_SHA1_80, please read ssl/d1_srtp.c */ if (SSL_CTX_set_tlsext_use_srtp(dtls_ctx, "SRTP_AES128_CM_SHA1_80")) { - av_log(s, AV_LOG_ERROR, "DTLS: SSL_CTX_set_tlsext_use_srtp failed\n"); + av_log(s1, AV_LOG_ERROR, "DTLS: SSL_CTX_set_tlsext_use_srtp failed\n"); ret = AVERROR(EINVAL); goto end; } @@ -1478,26 +1545,25 @@ static av_cold int openssl_init_dtls_context(AVFormatContext *s, EVP_PKEY *dtls_ /** * After creating a DTLS context, initialize the DTLS SSL object. */ -static av_cold int openssl_init_dtls_ssl(AVFormatContext *s, SSL *dtls) +static av_cold int openssl_init_dtls_ssl(DTLSContext *ctx, SSL *dtls) { int ret = 0; - RTCContext* rtc = s->priv_data; /* Setup the callback for logging. */ - SSL_set_ex_data(dtls, 0, s); + SSL_set_ex_data(dtls, 0, ctx); SSL_set_info_callback(dtls, openssl_on_info); /* Set dtls fragment size */ SSL_set_options(dtls, SSL_OP_NO_QUERY_MTU); /* Avoid dtls negotiate failed, limit the max size of DTLS fragment. */ - SSL_set_mtu(dtls, rtc->pkt_size); + SSL_set_mtu(dtls, ctx->pkt_size); /* Set the callback for ARQ timer. */ DTLS_set_timer_cb(dtls, openssl_dtls_timer_cb); /* Setup DTLS as active, which is client role. */ SSL_set_connect_state(dtls); - SSL_set_max_send_fragment(dtls, rtc->pkt_size); + SSL_set_max_send_fragment(dtls, ctx->pkt_size); return ret; } @@ -1506,12 +1572,12 @@ static av_cold int openssl_init_dtls_ssl(AVFormatContext *s, SSL *dtls) * Drives the SSL context by attempting to read packets to send from SSL, sending them * over UDP, and then reading packets from UDP to feed back to SSL. */ -static int openssl_drive_context(AVFormatContext *s, SSL *dtls, BIO *bio_in, BIO *bio_out, int loop) +static int openssl_drive_context(DTLSContext *ctx, SSL *dtls, BIO *bio_in, BIO *bio_out, int loop) { int ret, i, j, r0, r1, req_size, res_size = 0; uint8_t *data = NULL, req_ct = 0, req_ht = 0, res_ct = 0, res_ht = 0; char buf[MAX_UDP_BUFFER_SIZE]; - RTCContext *rtc = s->priv_data; + void *s1 = ctx->log_avcl; /* Drive the SSL context by state change, arq or response messages. */ r0 = SSL_do_handshake(dtls); @@ -1519,26 +1585,26 @@ static int openssl_drive_context(AVFormatContext *s, SSL *dtls, BIO *bio_in, BIO /* Handshake successfully done */ if (r0 == 1) { - rtc->dtls_done_for_us = 1; + ctx->dtls_done_for_us = 1; return 0; } /* Handshake failed with fatal error */ if (r0 < 0 && r1 != SSL_ERROR_WANT_READ) { - av_log(s, AV_LOG_ERROR, "DTLS: Start handshake failed, loop=%d, r0=%d, r1=%d\n", loop, r0, r1); + av_log(s1, AV_LOG_ERROR, "DTLS: Start handshake failed, loop=%d, r0=%d, r1=%d\n", loop, r0, r1); return AVERROR(EIO); } /* Fast retransmit the request util got response. */ - for (i = 0; i <= rtc->dtls_arq_max && !res_size; i++) { + for (i = 0; i <= ctx->dtls_arq_max && !res_size; i++) { req_size = BIO_get_mem_data(bio_out, (char**)&data); - openssl_state_trace(s, data, req_size, 0, r0, r1); - ret = ffurl_write(rtc->udp_uc, data, req_size); + openssl_state_trace(ctx, data, req_size, 0, r0, r1); + ret = ffurl_write(ctx->udp_uc, data, req_size); BIO_reset(bio_out); req_ct = req_size > 0 ? data[0] : 0; req_ht = req_size > 13 ? data[13] : 0; if (ret < 0) { - av_log(s, AV_LOG_ERROR, "DTLS: Send request failed, loop=%d, content=%d, handshake=%d, size=%d\n", + av_log(s1, AV_LOG_ERROR, "DTLS: Send request failed, loop=%d, content=%d, handshake=%d, size=%d\n", loop, req_ct, req_ht, req_size); return ret; } @@ -1549,14 +1615,7 @@ static int openssl_drive_context(AVFormatContext *s, SSL *dtls, BIO *bio_in, BIO #endif for (j = 0; j <= DTLS_EAGAIN_RETRIES_MAX && !res_size; j++) { - ret = ffurl_read(rtc->udp_uc, buf, sizeof(buf)); - - /* When a binding request is received, it is necessary to respond immediately. */ - if (ice_is_binding_request(buf, ret)) { - if ((ret = ice_handle_binding_request(s, buf, ret)) < 0) { - return ret; - } - } + ret = ffurl_read(ctx->udp_uc, buf, sizeof(buf)); /* Ignore other packets, such as ICE indication, except DTLS. */ if (ret < 13 || buf[0] <= 19 || buf[0] >= 64) @@ -1565,13 +1624,13 @@ static int openssl_drive_context(AVFormatContext *s, SSL *dtls, BIO *bio_in, BIO /* Got DTLS response successfully. */ if (ret > 0) { res_size = ret; - rtc->dtls_should_reset_timer = 1; + ctx->dtls_should_reset_timer = 1; break; } /* Fatal error or timeout. */ if (ret != AVERROR(EAGAIN)) { - av_log(s, AV_LOG_ERROR, "DTLS: Read response failed, loop=%d, content=%d, handshake=%d\n", + av_log(s1, AV_LOG_ERROR, "DTLS: Read response failed, loop=%d, content=%d, handshake=%d\n", loop, req_ct, req_ht); return ret; } @@ -1582,28 +1641,28 @@ static int openssl_drive_context(AVFormatContext *s, SSL *dtls, BIO *bio_in, BIO * occurs, it returns -1. */ r0 = DTLSv1_handle_timeout(dtls); if (!r0) { - av_usleep(rtc->dtls_arq_timeout * 1000); + av_usleep(ctx->dtls_arq_timeout * 1000); continue; /* no timeout had expired. */ } if (r0 != 1) { r1 = SSL_get_error(dtls, r0); - av_log(s, AV_LOG_ERROR, "DTLS: Handle timeout, loop=%d, content=%d, handshake=%d, r0=%d, r1=%d\n", + av_log(s1, AV_LOG_ERROR, "DTLS: Handle timeout, loop=%d, content=%d, handshake=%d, r0=%d, r1=%d\n", loop, req_ct, req_ht, r0, r1); return AVERROR(EIO); } - rtc->dtls_arq_packets++; + ctx->dtls_arq_packets++; break; } } /* Trace the response packet, feed to SSL. */ BIO_reset(bio_in); - openssl_state_trace(s, buf, res_size, 1, r0, SSL_ERROR_NONE); + openssl_state_trace(ctx, buf, res_size, 1, r0, SSL_ERROR_NONE); res_ct = res_size > 0 ? buf[0]: 0; res_ht = res_size > 13 ? buf[13] : 0; if ((r0 = BIO_write(bio_in, buf, res_size)) <= 0) { - av_log(s, AV_LOG_ERROR, "DTLS: Feed response failed, loop=%d, content=%d, handshake=%d, size=%d, r0=%d\n", + av_log(s1, AV_LOG_ERROR, "DTLS: Feed response failed, loop=%d, content=%d, handshake=%d, size=%d, r0=%d\n", loop, res_ct, res_ht, res_size, r0); return AVERROR(EIO); } @@ -1620,20 +1679,20 @@ static int openssl_drive_context(AVFormatContext *s, SSL *dtls, BIO *bio_in, BIO * * @return 0 if OK, AVERROR_xxx on error */ -static int openssl_dtls_handshake(AVFormatContext *s, EVP_PKEY *dtls_pkey, X509 *dtls_cert) +static int dtls_context_handshake(DTLSContext *ctx) { int ret, loop; SSL_CTX *dtls_ctx = NULL; SSL *dtls = NULL; const char* dst = "EXTRACTOR-dtls_srtp"; BIO *bio_in = NULL, *bio_out = NULL; - RTCContext *rtc = s->priv_data; + void *s1 = ctx->log_avcl; dtls_ctx = SSL_CTX_new(DTLS_client_method()); - ret = openssl_init_dtls_context(s, dtls_pkey, dtls_cert, dtls_ctx); + ret = openssl_init_dtls_context(ctx, dtls_ctx); if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to initialize DTLS context\n"); + av_log(s1, AV_LOG_ERROR, "Failed to initialize DTLS context\n"); goto end; } @@ -1644,36 +1703,36 @@ static int openssl_dtls_handshake(AVFormatContext *s, EVP_PKEY *dtls_pkey, X509 bio_out = BIO_new(BIO_s_mem()); SSL_set_bio(dtls, bio_in, bio_out); - ret = openssl_init_dtls_ssl(s, dtls); + ret = openssl_init_dtls_ssl(ctx, dtls); if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to initialize SSL context\n"); + av_log(s1, AV_LOG_ERROR, "Failed to initialize SSL context\n"); goto end; } - for (loop = 0; loop < 64 && !rtc->dtls_done_for_us; loop++) { - ret = openssl_drive_context(s, dtls, bio_in, bio_out, loop); + for (loop = 0; loop < 64 && !ctx->dtls_done_for_us; loop++) { + ret = openssl_drive_context(ctx, dtls, bio_in, bio_out, loop); if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to drive SSL context\n"); + av_log(s1, AV_LOG_ERROR, "Failed to drive SSL context\n"); goto end; } } - if (!rtc->dtls_done_for_us) { - av_log(s, AV_LOG_ERROR, "DTLS: Handshake failed, loop=%d\n", loop); + if (!ctx->dtls_done_for_us) { + av_log(s1, AV_LOG_ERROR, "DTLS: Handshake failed, loop=%d\n", loop); ret = AVERROR(EIO); goto end; } /* Export SRTP master key after DTLS done */ - ret = SSL_export_keying_material(dtls, rtc->dtls_srtp_material, sizeof(rtc->dtls_srtp_material), + ret = SSL_export_keying_material(dtls, ctx->dtls_srtp_material, sizeof(ctx->dtls_srtp_material), dst, strlen(dst), NULL, 0, 0); if (!ret) { - av_log(s, AV_LOG_ERROR, "DTLS: SSL export key r0=%lu, ret=%d\n", ERR_get_error(), ret); + av_log(s1, AV_LOG_ERROR, "DTLS: SSL export key r0=%lu, ret=%d\n", ERR_get_error(), ret); ret = AVERROR(EIO); goto end; } - av_log(s, AV_LOG_INFO, "WHIP: DTLS handshake done=%d, arq=%d, srtp_material=%luB\n", - rtc->dtls_done_for_us, rtc->dtls_arq_packets, sizeof(rtc->dtls_srtp_material)); + av_log(s1, AV_LOG_INFO, "WHIP: DTLS handshake done=%d, arq=%d, srtp_material=%luB\n", + ctx->dtls_done_for_us, ctx->dtls_arq_packets, sizeof(ctx->dtls_srtp_material)); end: SSL_free(dtls); @@ -1702,12 +1761,12 @@ static int setup_srtp(AVFormatContext *s) RTCContext *rtc = s->priv_data; /* As DTLS client, the send key is client master key plus salt. */ - memcpy(send_key, rtc->dtls_srtp_material, 16); - memcpy(send_key + 16, rtc->dtls_srtp_material + 32, 14); + memcpy(send_key, rtc->dtls_ctx.dtls_srtp_material, 16); + memcpy(send_key + 16, rtc->dtls_ctx.dtls_srtp_material + 32, 14); /* As DTLS client, the recv key is server master key plus salt. */ - memcpy(recv_key, rtc->dtls_srtp_material + 16, 16); - memcpy(recv_key + 16, rtc->dtls_srtp_material + 46, 14); + memcpy(recv_key, rtc->dtls_ctx.dtls_srtp_material + 16, 16); + memcpy(recv_key + 16, rtc->dtls_ctx.dtls_srtp_material + 46, 14); /* Setup SRTP context for outgoing packets */ if (!av_base64_encode(buf, sizeof(buf), send_key, sizeof(send_key))) { @@ -2043,53 +2102,35 @@ static int whip_dispose(AVFormatContext *s) static av_cold int rtc_init(AVFormatContext *s) { int ret; + RTCContext *rtc = s->priv_data; -#if CONFIG_OPENSSL - /* The SSL certificate used for fingerprint in SDP and DTLS handshake. */ - X509 *dtls_cert = X509_new(); - /* The private key for DTLS handshake. */ - EVP_PKEY *dtls_pkey = EVP_PKEY_new(); - - if ((ret = openssl_init_cert(s, dtls_pkey, dtls_cert)) < 0) { - av_log(s, AV_LOG_ERROR, "WHIP: Failed to init openssl cert, ret=%d\n", ret); + if ((ret = whip_init(s)) < 0) return ret; - } -#endif - - if ((ret = whip_check_options(s)) < 0) - goto end; if ((ret = parse_codec(s)) < 0) - goto end; + return ret; if ((ret = generate_sdp_offer(s)) < 0) - goto end; + return ret; if ((ret = exchange_sdp(s)) < 0) - goto end; + return ret; if ((ret = parse_answer(s)) < 0) - goto end; + return ret; if ((ret = ice_handshake(s)) < 0) - goto end; + return ret; -#if CONFIG_OPENSSL - if ((ret = openssl_dtls_handshake(s, dtls_pkey, dtls_cert)) < 0) - goto end; -#endif + /* Now UDP URL context is ready, setup the DTLS transport. */ + rtc->dtls_ctx.udp_uc = rtc->udp_uc; + + if ((ret = dtls_context_handshake(&rtc->dtls_ctx)) < 0) + return ret; if ((ret = setup_srtp(s)) < 0) - goto end; + return ret; -end: -#if CONFIG_OPENSSL -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - X509_free(dtls_cert); - EVP_PKEY_free(dtls_pkey); -#pragma GCC diagnostic pop -#endif return ret; } @@ -2174,7 +2215,7 @@ static av_cold void rtc_deinit(AVFormatContext *s) ff_srtp_free(&rtc->srtp_video_send); ff_srtp_free(&rtc->srtp_rtcp_send); ff_srtp_free(&rtc->srtp_recv); - av_freep(&rtc->dtls_fingerprint); + dtls_context_deinit(&rtc->dtls_ctx); } #define OFFSET(x) offsetof(RTCContext, x) From c7b146b7b670e12a894e6a22b5c4736c38541497 Mon Sep 17 00:00:00 2001 From: winlin Date: Wed, 17 May 2023 08:56:41 +0800 Subject: [PATCH 34/60] WHIP: Reorder functions, nothing changed. --- libavformat/rtcenc.c | 2477 +++++++++++++++++++++--------------------- 1 file changed, 1235 insertions(+), 1242 deletions(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index 40ba4cbbd0648..6e57e720276a4 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -173,10 +173,6 @@ typedef struct DTLSContext { /* The UDP transport is used for delivering ICE, DTLS and SRTP packets. */ URLContext *udp_uc; - /* The maximum number of retries for ICE transmission. */ - int ice_arq_max; - /* The step start timeout in ms for ICE transmission. */ - int ice_arq_timeout; /* The maximum number of retries for DTLS transmission. */ int dtls_arq_max; /* The step start timeout in ms for DTLS transmission. */ @@ -185,1564 +181,1561 @@ typedef struct DTLSContext { int pkt_size; } DTLSContext; -static av_cold int dtls_context_init(DTLSContext *ctx); -static int dtls_context_handshake(DTLSContext *ctx); -static av_cold void dtls_context_deinit(DTLSContext *ctx); -#endif - -typedef struct RTCContext { - AVClass *av_class; - - /* Parameters for the input audio and video codecs. */ - AVCodecParameters *audio_par; - AVCodecParameters *video_par; - - /* The SPS/PPS of AVC video */ - uint8_t *avc_sps; - int avc_sps_size; - uint8_t *avc_pps; - int avc_pps_size; - /* The size of NALU in ISOM format. */ - int avc_nal_length_size; - - /* The ICE username and pwd fragment generated by the muxer. */ - char ice_ufrag_local[9]; - char ice_pwd_local[33]; - /* The SSRC of the audio and video stream, generated by the muxer. */ - uint32_t audio_ssrc; - uint32_t video_ssrc; - /* The PT(Payload Type) of stream, generated by the muxer. */ - uint8_t audio_payload_type; - uint8_t video_payload_type; - /** - * This is the SDP offer generated by the muxer based on the codec parameters, - * DTLS, and ICE information. - */ - char *sdp_offer; - - /* The ICE username and pwd from remote server. */ - char *ice_ufrag_remote; - char *ice_pwd_remote; - /** - * This represents the ICE candidate protocol, priority, host and port. - * Currently, we only support one candidate and choose the first UDP candidate. - * However, we plan to support multiple candidates in the future. - */ - char *ice_protocol; - char *ice_host; - int ice_port; - - /* The SDP answer received from the WebRTC server. */ - char *sdp_answer; - /* The resource URL returned in the Location header of WHIP HTTP response. */ - char *whip_resource_url; - - /* The DTLS context. */ - DTLSContext dtls_ctx; - - /* The SRTP send context, to encrypt outgoing packets. */ - struct SRTPContext srtp_audio_send; - struct SRTPContext srtp_video_send; - struct SRTPContext srtp_rtcp_send; - /* The SRTP receive context, to decrypt incoming packets. */ - struct SRTPContext srtp_recv; - - /* The time jitter base for audio OPUS stream. */ - int64_t audio_jitter_base; - - /* The UDP transport is used for delivering ICE, DTLS and SRTP packets. */ - URLContext *udp_uc; - - /* The maximum number of retries for ICE transmission. */ - int ice_arq_max; - /* The step start timeout in ms for ICE transmission. */ - int ice_arq_timeout; - /* The maximum number of retries for DTLS transmission. */ - int dtls_arq_max; - /* The step start timeout in ms for DTLS transmission. */ - int dtls_arq_timeout; - /* The size of RTP packet, should generally be set to MTU. */ - int pkt_size; -} RTCContext; - -static int on_rtp_write_packet(void *opaque, uint8_t *buf, int buf_size); +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" /** - * Initialize and check the options for the WebRTC muxer. + * Generate a self-signed certificate and private key for DTLS. */ -static av_cold int whip_init(AVFormatContext *s) +static av_cold int dtls_context_init(DTLSContext *ctx) { - int ret, ideal_pkt_size = 532; - RTCContext *rtc = s->priv_data; + int ret = 0, serial, expire_day, i, n = 0; + AVBPrint fingerprint; + unsigned char md[EVP_MAX_MD_SIZE]; + const char *aor = "ffmpeg.org"; + X509_NAME* subject = NULL; + EC_GROUP *ecgroup = NULL; + EC_KEY* dtls_eckey = NULL; + EVP_PKEY *dtls_pkey = NULL; + X509 *dtls_cert = NULL; + void *s1 = ctx->log_avcl; - /* Use the same logging context as AV format. */ - rtc->dtls_ctx.log_avcl = s; - rtc->dtls_ctx.udp_uc = rtc->udp_uc; - rtc->dtls_ctx.ice_arq_max = rtc->ice_arq_max; - rtc->dtls_ctx.ice_arq_timeout = rtc->ice_arq_timeout; - rtc->dtls_ctx.dtls_arq_max = rtc->dtls_arq_max; - rtc->dtls_ctx.dtls_arq_timeout = rtc->dtls_arq_timeout; - rtc->dtls_ctx.pkt_size = rtc->pkt_size; + ctx->dtls_cert = dtls_cert = X509_new(); + ctx->dtls_pkey = dtls_pkey = EVP_PKEY_new(); + dtls_eckey = EC_KEY_new(); - if ((ret = dtls_context_init(&rtc->dtls_ctx)) < 0) { - av_log(s, AV_LOG_ERROR, "WHIP: Failed to init DTLS context\n"); - return ret; - } + /* To prevent a crash during cleanup, always initialize it. */ + av_bprint_init(&fingerprint, 1, MAX_SDP_SIZE); - av_log(s, AV_LOG_INFO, "WHIP: Init ice_arq_max=%d, ice_arq_timeout=%d, dtls_arq_max=%d, dtls_arq_timeout=%d pkt_size=%d\n", - rtc->ice_arq_max, rtc->ice_arq_timeout, rtc->dtls_arq_max, rtc->dtls_arq_timeout, rtc->pkt_size); + /* Should use the curves in ClientHello.supported_groups, for example: + * Supported Group: x25519 (0x001d) + * Supported Group: secp256r1 (0x0017) + * Supported Group: secp384r1 (0x0018) + * note that secp256r1 in openssl is called NID_X9_62_prime256v1, not NID_secp256k1 + */ + ecgroup = EC_GROUP_new_by_curve_name(NID_X9_62_prime256v1); - if (rtc->pkt_size < ideal_pkt_size) { - av_log(s, AV_LOG_WARNING, "WHIP: pkt_size=%d(<%d) is too small, may cause packet loss\n", - rtc->pkt_size, ideal_pkt_size); + if (EC_KEY_set_group(dtls_eckey, ecgroup) != 1) { + av_log(s1, AV_LOG_ERROR, "DTLS: EC_KEY_set_group failed\n"); + ret = AVERROR(EINVAL); + goto end; } - - return 0; -} - -/** - * Parses the ISOM AVCC format of extradata and extracts SPS/PPS. - * - * This function is used to parse SPS/PPS from the extradata in ISOM AVCC format. - * It can handle both ISOM and annexb formats but only parses data in ISOM format. - * If the extradata is in annexb format, this function ignores it, and uses the entire - * extradata as a sequence header with SPS/PPS. Refer to ff_isom_write_avcc. - * - * @param s Pointer to the AVFormatContext - * @param extradata Pointer to the extradata - * @param extradata_size Size of the extradata - * @returns Returns 0 if successful or AVERROR_xxx in case of an error. - */ -static int isom_read_avcc(AVFormatContext *s, uint8_t *extradata, int extradata_size) -{ - int ret = 0; - uint8_t version, nal_length_size, nb_sps, nb_pps; - AVIOContext *pb; - RTCContext *rtc = s->priv_data; - - if (!extradata || !extradata_size) - return 0; - - /* Not H.264 ISOM format, may be annexb etc. */ - if (extradata_size < 4 || extradata[0] != 1) { - if (!ff_avc_find_startcode(extradata, extradata + extradata_size)) { - av_log(s, AV_LOG_ERROR, "Format must be ISOM or annexb\n"); - return AVERROR_INVALIDDATA; - } - return 0; + if (EC_KEY_generate_key(dtls_eckey) != 1) { + av_log(s1, AV_LOG_ERROR, "DTLS: EC_KEY_generate_key failed\n"); + ret = AVERROR(EINVAL); + goto end; } - - /* Parse the SPS/PPS in ISOM format in extradata. */ - pb = avio_alloc_context(extradata, extradata_size, 0, NULL, NULL, NULL, NULL); - if (!pb) - return AVERROR(ENOMEM); - - version = avio_r8(pb); /* version */ - avio_r8(pb); /* avc profile */ - avio_r8(pb); /* avc profile compat */ - avio_r8(pb); /* avc level */ - nal_length_size = avio_r8(pb); /* 6 bits reserved (111111) + 2 bits nal size length - 1 (11) */ - nb_sps = avio_r8(pb); /* 3 bits reserved (111) + 5 bits number of sps */ - - if (version != 1) { - av_log(s, AV_LOG_ERROR, "Invalid version=%d\n", version); - ret = AVERROR_INVALIDDATA; + if (EVP_PKEY_set1_EC_KEY(dtls_pkey, dtls_eckey) != 1) { + av_log(s1, AV_LOG_ERROR, "DTLS: EVP_PKEY_set1_EC_KEY failed\n"); + ret = AVERROR(EINVAL); goto end; } - rtc->avc_nal_length_size = (nal_length_size & 0x03) + 1; - if (rtc->avc_nal_length_size == 3) { - av_log(s, AV_LOG_ERROR, "Invalid nal length size=%d\n", rtc->avc_nal_length_size); - ret = AVERROR_INVALIDDATA; + /* Generate a self-signed certificate. */ + subject = X509_NAME_new(); + + serial = (int)av_get_random_seed(); + if (ASN1_INTEGER_set(X509_get_serialNumber(dtls_cert), serial) != 1) { + av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set serial\n"); + ret = AVERROR(EINVAL); goto end; } - /* Read SPS */ - nb_sps &= 0x1f; - if (nb_sps != 1 || avio_feof(pb)) { - av_log(s, AV_LOG_ERROR, "Invalid number of sps=%d, eof=%d\n", nb_sps, avio_feof(pb)); - ret = AVERROR_INVALIDDATA; + if (X509_NAME_add_entry_by_txt(subject, "CN", MBSTRING_ASC, aor, strlen(aor), -1, 0) != 1) { + av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set CN\n"); + ret = AVERROR(EINVAL); goto end; } - rtc->avc_sps_size = avio_rb16(pb); /* sps size */ - if (rtc->avc_sps_size <= 0 || avio_feof(pb)) { - av_log(s, AV_LOG_ERROR, "Invalid sps size=%d, eof=%d\n", rtc->avc_sps_size, avio_feof(pb)); - ret = AVERROR_INVALIDDATA; + if (X509_set_issuer_name(dtls_cert, subject) != 1) { + av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set issuer\n"); + ret = AVERROR(EINVAL); goto end; } - - rtc->avc_sps = av_malloc(rtc->avc_sps_size); - if (!rtc->avc_sps) { - ret = AVERROR(ENOMEM); + if (X509_set_subject_name(dtls_cert, subject) != 1) { + av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set subject name\n"); + ret = AVERROR(EINVAL); goto end; } - ret = avio_read(pb, rtc->avc_sps, rtc->avc_sps_size); /* sps */ - if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to read sps, size=%d\n", rtc->avc_sps_size); + expire_day = 365; + if (!X509_gmtime_adj(X509_get_notBefore(dtls_cert), 0)) { + av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set notBefore\n"); + ret = AVERROR(EINVAL); goto end; } - - /* Read PPS */ - nb_pps = avio_r8(pb); /* number of pps */ - if (nb_pps != 1 || avio_feof(pb)) { - av_log(s, AV_LOG_ERROR, "Invalid number of pps=%d, eof=%d\n", nb_pps, avio_feof(pb)); - ret = AVERROR_INVALIDDATA; + if (!X509_gmtime_adj(X509_get_notAfter(dtls_cert), 60*60*24*expire_day)) { + av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set notAfter\n"); + ret = AVERROR(EINVAL); goto end; } - rtc->avc_pps_size = avio_rb16(pb); /* pps size */ - if (rtc->avc_pps_size <= 0 || avio_feof(pb)) { - av_log(s, AV_LOG_ERROR, "Invalid pps size=%d, eof=%d\n", rtc->avc_pps_size, avio_feof(pb)); - ret = AVERROR_INVALIDDATA; + if (X509_set_version(dtls_cert, 2) != 1) { + av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set version\n"); + ret = AVERROR(EINVAL); goto end; } - rtc->avc_pps = av_malloc(rtc->avc_pps_size); - if (!rtc->avc_pps) { - ret = AVERROR(ENOMEM); + if (X509_set_pubkey(dtls_cert, dtls_pkey) != 1) { + av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set public key\n"); + ret = AVERROR(EINVAL); goto end; } - ret = avio_read(pb, rtc->avc_pps, rtc->avc_pps_size); /* pps */ - if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to read pps, size=%d\n", rtc->avc_pps_size); + if (!X509_sign(dtls_cert, dtls_pkey, EVP_sha1())) { + av_log(s1, AV_LOG_ERROR, "WHIP: Failed to sign certificate\n"); + ret = AVERROR(EINVAL); goto end; } + /* Generate the fingerpint of certficate. */ + if (X509_digest(dtls_cert, EVP_sha256(), md, &n) != 1) { + av_log(s1, AV_LOG_ERROR, "Failed to generate fingerprint\n"); + ret = AVERROR(EIO); + goto end; + } + for (i = 0; i < n; i++) { + av_bprintf(&fingerprint, "%02X", md[i]); + if (i < n - 1) + av_bprintf(&fingerprint, ":"); + } + if (!av_bprint_is_complete(&fingerprint)) { + av_log(s1, AV_LOG_ERROR, "Fingerprint %d exceed max %d, %s\n", ret, MAX_SDP_SIZE, fingerprint.str); + ret = AVERROR(EIO); + goto end; + } + if (!fingerprint.str || !strlen(fingerprint.str)) { + av_log(s1, AV_LOG_ERROR, "Fingerprint is empty\n"); + ret = AVERROR(EINVAL); + goto end; + } + + ctx->dtls_fingerprint = av_strdup(fingerprint.str); + if (!ctx->dtls_fingerprint) { + ret = AVERROR(ENOMEM); + goto end; + } + + av_log(s1, AV_LOG_INFO, "DTLS: Fingerprint %s\n", ctx->dtls_fingerprint); + end: - avio_context_free(&pb); + EC_KEY_free(dtls_eckey); + EC_GROUP_free(ecgroup); + X509_NAME_free(subject); + av_bprint_finalize(&fingerprint, NULL); return ret; } /** - * Parses video SPS/PPS from the extradata of codecpar and checks the codec. - * Currently only supports video(h264) and audio(opus). Note that only baseline - * and constrained baseline profiles of h264 are supported. - * - * If the profile is less than 0, the function considers the profile as baseline. - * It may need to parse the profile from SPS/PPS. This situation occurs when ingesting - * desktop and transcoding. - * - * @param s Pointer to the AVFormatContext - * @returns Returns 0 if successful or AVERROR_xxx in case of an error. + * Cleanup the DTLS context. */ -static int parse_codec(AVFormatContext *s) +static av_cold void dtls_context_deinit(DTLSContext *ctx) { - int i, ret; - RTCContext *rtc = s->priv_data; - - for (i = 0; i < s->nb_streams; i++) { - AVCodecParameters *par = s->streams[i]->codecpar; - const AVCodecDescriptor *desc = avcodec_descriptor_get(par->codec_id); - switch (par->codec_type) { - case AVMEDIA_TYPE_VIDEO: - if (rtc->video_par) { - av_log(s, AV_LOG_ERROR, "Only one video stream is supported by RTC\n"); - return AVERROR(EINVAL); - } - rtc->video_par = par; + X509_free(ctx->dtls_cert); + EVP_PKEY_free(ctx->dtls_pkey); + av_freep(&ctx->dtls_fingerprint); +} - if (par->codec_id != AV_CODEC_ID_H264) { - av_log(s, AV_LOG_ERROR, "Unsupported video codec %s by RTC, choose h264\n", - desc ? desc->name : "unknown"); - return AVERROR_PATCHWELCOME; - } - if (par->profile > 0 && (par->profile & ~FF_PROFILE_H264_CONSTRAINED) != FF_PROFILE_H264_BASELINE) { - av_log(s, AV_LOG_ERROR, "Profile %d of stream %d is not baseline, currently unsupported by RTC\n", - par->profile, i); - return AVERROR_PATCHWELCOME; - } +/** + * Callback function to print the OpenSSL SSL status. + */ +static void openssl_on_info(const SSL *dtls, int where, int ret) +{ + int w, r1; + const char *method, *alert_type, *alert_desc; + DTLSContext *ctx = (DTLSContext*)SSL_get_ex_data(dtls, 0); + void *s1 = ctx->log_avcl; - ret = isom_read_avcc(s, par->extradata, par->extradata_size); - if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to parse SPS/PPS from extradata\n"); - return ret; - } - break; - case AVMEDIA_TYPE_AUDIO: - if (rtc->audio_par) { - av_log(s, AV_LOG_ERROR, "Only one audio stream is supported by RTC\n"); - return AVERROR(EINVAL); - } - rtc->audio_par = par; + w = where & ~SSL_ST_MASK; + if (w & SSL_ST_CONNECT) { + method = "SSL_connect"; + } else if (w & SSL_ST_ACCEPT) { + method = "SSL_accept"; + } else { + method = "undefined"; + } - if (par->codec_id != AV_CODEC_ID_OPUS) { - av_log(s, AV_LOG_ERROR, "Unsupported audio codec %s by RTC, choose opus\n", - desc ? desc->name : "unknown"); - return AVERROR_PATCHWELCOME; - } + r1 = SSL_get_error(dtls, ret); + if (where & SSL_CB_LOOP) { + av_log(s1, AV_LOG_VERBOSE, "DTLS: method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", + method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, ret, r1); + } else if (where & SSL_CB_ALERT) { + method = (where & SSL_CB_READ) ? "read":"write"; - if (par->ch_layout.nb_channels != 2) { - av_log(s, AV_LOG_ERROR, "Unsupported audio channels %d by RTC, choose stereo\n", - par->ch_layout.nb_channels); - return AVERROR_PATCHWELCOME; - } + alert_type = SSL_alert_type_string_long(ret); + alert_desc = SSL_alert_desc_string(ret); - if (par->sample_rate != 48000) { - av_log(s, AV_LOG_ERROR, "Unsupported audio sample rate %d by RTC, choose 48000\n", par->sample_rate); - return AVERROR_PATCHWELCOME; + if (!av_strcasecmp(alert_type, "warning") && !av_strcasecmp(alert_desc, "CN")) { + av_log(s1, AV_LOG_WARNING, "DTLS: SSL3 alert method=%s type=%s, desc=%s(%s), where=%d, ret=%d, r1=%d\n", + method, alert_type, alert_desc, SSL_alert_desc_string_long(ret), where, ret, r1); + } else { + av_log(s1, AV_LOG_ERROR, "DTLS: SSL3 alert method=%s type=%s, desc=%s(%s), where=%d, ret=%d, r1=%d\n", + method, alert_type, alert_desc, SSL_alert_desc_string_long(ret), where, ret, r1); + } + } else if (where & SSL_CB_EXIT) { + if (!ret) { + av_log(s1, AV_LOG_WARNING, "DTLS: Fail method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", + method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, ret, r1); + } else if (ret < 0) { + if (r1 != SSL_ERROR_NONE && r1 != SSL_ERROR_WANT_READ && r1 != SSL_ERROR_WANT_WRITE) { + av_log(s1, AV_LOG_ERROR, "DTLS: Error method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", + method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, ret, r1); + } else { + av_log(s1, AV_LOG_VERBOSE, "DTLS: method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", + method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, ret, r1); } - break; - default: - av_log(s, AV_LOG_ERROR, "Codec type '%s' for stream %d is not supported by RTC\n", - av_get_media_type_string(par->codec_type), i); - return AVERROR_PATCHWELCOME; } } - - return 0; } -/** - * Generate SDP offer according to the codec parameters, DTLS and ICE information. - * The below is an example of SDP offer: - * - * v=0 - * o=FFmpeg 4489045141692799359 2 IN IP4 127.0.0.1 - * s=FFmpegPublishSession - * t=0 0 - * a=group:BUNDLE 0 1 - * a=extmap-allow-mixed - * a=msid-semantic: WMS - * - * m=audio 9 UDP/TLS/RTP/SAVPF 111 - * c=IN IP4 0.0.0.0 - * a=ice-ufrag:a174B - * a=ice-pwd:wY8rJ3gNLxL3eWZs6UPOxy - * a=fingerprint:sha-256 EE:FE:A2:E5:6A:21:78:60:71:2C:21:DC:1A:2C:98:12:0C:E8:AD:68:07:61:1B:0E:FC:46:97:1E:BC:97:4A:54 - * a=setup:actpass - * a=mid:0 - * a=sendonly - * a=msid:FFmpeg audio - * a=rtcp-mux - * a=rtpmap:111 opus/48000/2 - * a=ssrc:4267647086 cname:FFmpeg - * a=ssrc:4267647086 msid:FFmpeg audio - * - * m=video 9 UDP/TLS/RTP/SAVPF 106 - * c=IN IP4 0.0.0.0 - * a=ice-ufrag:a174B - * a=ice-pwd:wY8rJ3gNLxL3eWZs6UPOxy - * a=fingerprint:sha-256 EE:FE:A2:E5:6A:21:78:60:71:2C:21:DC:1A:2C:98:12:0C:E8:AD:68:07:61:1B:0E:FC:46:97:1E:BC:97:4A:54 - * a=setup:actpass - * a=mid:1 - * a=sendonly - * a=msid:FFmpeg video - * a=rtcp-mux - * a=rtcp-rsize - * a=rtpmap:106 H264/90000 - * a=fmtp:106 level-asymmetry-allowed=1;packetization-mode=1;profile-level-id=42e01f - * a=ssrc:107169110 cname:FFmpeg - * a=ssrc:107169110 msid:FFmpeg video - * - * Note that we don't use av_sdp_create to generate SDP offer because it doesn't - * support DTLS and ICE information. - * - * @return 0 if OK, AVERROR_xxx on error - */ -static int generate_sdp_offer(AVFormatContext *s) +static unsigned int openssl_dtls_timer_cb(SSL *dtls, unsigned int previous_us) { - int ret = 0, profile, level, profile_iop; - AVBPrint bp; - RTCContext *rtc = s->priv_data; + DTLSContext *ctx = (DTLSContext*)SSL_get_ex_data(dtls, 0); + void *s1 = ctx->log_avcl; - /* To prevent a crash during cleanup, always initialize it. */ - av_bprint_init(&bp, 1, MAX_SDP_SIZE); + /* Double the timeout, note that it may be 0. */ + unsigned int timeout_us = previous_us * 2; - if (rtc->sdp_offer) { - av_log(s, AV_LOG_ERROR, "SDP offer is already set\n"); - ret = AVERROR(EINVAL); - goto end; + /* If previous_us is 0, for example, the HelloVerifyRequest, we should respond it ASAP. + * when got ServerHello, we should reset the timer. */ + if (!previous_us || ctx->dtls_should_reset_timer) { + timeout_us = ctx->dtls_arq_timeout * 1000; /* in us */ } - snprintf(rtc->ice_ufrag_local, sizeof(rtc->ice_ufrag_local), "%08x", - av_get_random_seed()); - snprintf(rtc->ice_pwd_local, sizeof(rtc->ice_pwd_local), "%08x%08x%08x%08x", - av_get_random_seed(), av_get_random_seed(), av_get_random_seed(), - av_get_random_seed()); + /* never exceed the max timeout. */ + timeout_us = FFMIN(timeout_us, 30 * 1000 * 1000); /* in us */ - rtc->audio_ssrc = av_get_random_seed(); - rtc->video_ssrc = av_get_random_seed(); + av_log(s1, AV_LOG_VERBOSE, "DTLS: ARQ timer cb timeout=%ums, previous=%ums\n", + timeout_us / 1000, previous_us / 1000); - rtc->audio_payload_type = 111; - rtc->video_payload_type = 106; + return timeout_us; +} - av_bprintf(&bp, "" - "v=0\r\n" - "o=FFmpeg 4489045141692799359 2 IN IP4 127.0.0.1\r\n" - "s=FFmpegPublishSession\r\n" - "t=0 0\r\n" - "a=group:BUNDLE 0 1\r\n" - "a=extmap-allow-mixed\r\n" - "a=msid-semantic: WMS\r\n"); - if (!av_bprint_is_complete(&bp)) { - av_log(s, AV_LOG_ERROR, "Offer %d exceed max %d, %s\n", ret, MAX_SDP_SIZE, bp.str); - ret = AVERROR(EIO); - goto end; - } +static void openssl_state_trace(DTLSContext *ctx, uint8_t *data, int length, int incoming, int r0, int r1) +{ + uint8_t content_type = 0; + uint16_t size = 0; + uint8_t handshake_type = 0; + void *s1 = ctx->log_avcl; - if (rtc->audio_par) { - av_bprintf(&bp, "" - "m=audio 9 UDP/TLS/RTP/SAVPF %u\r\n" - "c=IN IP4 0.0.0.0\r\n" - "a=ice-ufrag:%s\r\n" - "a=ice-pwd:%s\r\n" - "a=fingerprint:sha-256 %s\r\n" - "a=setup:active\r\n" - "a=mid:0\r\n" - "a=sendonly\r\n" - "a=msid:FFmpeg audio\r\n" - "a=rtcp-mux\r\n" - "a=rtpmap:%u opus/%d/%d\r\n" - "a=ssrc:%u cname:FFmpeg\r\n" - "a=ssrc:%u msid:FFmpeg audio\r\n", - rtc->audio_payload_type, - rtc->ice_ufrag_local, - rtc->ice_pwd_local, - rtc->dtls_ctx.dtls_fingerprint, - rtc->audio_payload_type, - rtc->audio_par->sample_rate, - rtc->audio_par->ch_layout.nb_channels, - rtc->audio_ssrc, - rtc->audio_ssrc); - if (!av_bprint_is_complete(&bp)) { - av_log(s, AV_LOG_ERROR, "Offer %d exceed max %d, %s\n", ret, MAX_SDP_SIZE, bp.str); - ret = AVERROR(EIO); - goto end; - } + /* Change_cipher_spec(20), alert(21), handshake(22), application_data(23) */ + if (length >= 1) { + content_type = (uint8_t)data[0]; } - if (rtc->video_par) { - profile = rtc->video_par->profile < 0 ? 0x42 : rtc->video_par->profile; - level = rtc->video_par->level < 0 ? 30 : rtc->video_par->level; - profile_iop = profile & FF_PROFILE_H264_CONSTRAINED; - av_bprintf(&bp, "" - "m=video 9 UDP/TLS/RTP/SAVPF %u\r\n" - "c=IN IP4 0.0.0.0\r\n" - "a=ice-ufrag:%s\r\n" - "a=ice-pwd:%s\r\n" - "a=fingerprint:sha-256 %s\r\n" - "a=setup:active\r\n" - "a=mid:1\r\n" - "a=sendonly\r\n" - "a=msid:FFmpeg video\r\n" - "a=rtcp-mux\r\n" - "a=rtcp-rsize\r\n" - "a=rtpmap:%u H264/90000\r\n" - "a=fmtp:%u level-asymmetry-allowed=1;packetization-mode=1;profile-level-id=%02x%02x%02x\r\n" - "a=ssrc:%u cname:FFmpeg\r\n" - "a=ssrc:%u msid:FFmpeg video\r\n", - rtc->video_payload_type, - rtc->ice_ufrag_local, - rtc->ice_pwd_local, - rtc->dtls_ctx.dtls_fingerprint, - rtc->video_payload_type, - rtc->video_payload_type, - profile & (~FF_PROFILE_H264_CONSTRAINED), - profile_iop, - level, - rtc->video_ssrc, - rtc->video_ssrc); - if (!av_bprint_is_complete(&bp)) { - av_log(s, AV_LOG_ERROR, "Offer %d exceed max %d, %s\n", ret, MAX_SDP_SIZE, bp.str); - ret = AVERROR(EIO); - goto end; - } + if (length >= 13) { + size = (uint16_t)(data[11])<<8 | (uint16_t)data[12]; } - rtc->sdp_offer = av_strdup(bp.str); - if (!rtc->sdp_offer) { - ret = AVERROR(ENOMEM); - goto end; + if (length >= 14) { + handshake_type = (uint8_t)data[13]; } - av_log(s, AV_LOG_VERBOSE, "WHIP: Generated offer: %s\n", rtc->sdp_offer); -end: - av_bprint_finalize(&bp, NULL); - return ret; + av_log(s1, AV_LOG_VERBOSE, "WHIP: DTLS state %s %s, done=%u, arq=%u, r0=%d, r1=%d, len=%u, cnt=%u, size=%u, hs=%u\n", + "Active", (incoming? "RECV":"SEND"), ctx->dtls_done_for_us, ctx->dtls_arq_packets, r0, r1, length, + content_type, size, handshake_type); } /** - * Exchange SDP offer with WebRTC peer to get the answer. - * The below is an example of SDP answer: - * - * v=0 - * o=SRS/6.0.42(Bee) 107408542208384 2 IN IP4 0.0.0.0 - * s=SRSPublishSession - * t=0 0 - * a=ice-lite - * a=group:BUNDLE 0 1 - * a=msid-semantic: WMS live/show - * - * m=audio 9 UDP/TLS/RTP/SAVPF 111 - * c=IN IP4 0.0.0.0 - * a=ice-ufrag:ex9061f9 - * a=ice-pwd:bi8k19m9n836187b00d1gm3946234w85 - * a=fingerprint:sha-256 68:DD:7A:95:27:BD:0A:99:F4:7A:83:21:2F:50:15:2A:1D:1F:8A:D8:96:24:42:2D:A1:83:99:BF:F1:E2:11:A2 - * a=setup:passive - * a=mid:0 - * a=recvonly - * a=rtcp-mux - * a=rtcp-rsize - * a=rtpmap:111 opus/48000/2 - * a=candidate:0 1 udp 2130706431 172.20.10.7 8000 typ host generation 0 - * - * m=video 9 UDP/TLS/RTP/SAVPF 106 - * c=IN IP4 0.0.0.0 - * a=ice-ufrag:ex9061f9 - * a=ice-pwd:bi8k19m9n836187b00d1gm3946234w85 - * a=fingerprint:sha-256 68:DD:7A:95:27:BD:0A:99:F4:7A:83:21:2F:50:15:2A:1D:1F:8A:D8:96:24:42:2D:A1:83:99:BF:F1:E2:11:A2 - * a=setup:passive - * a=mid:1 - * a=recvonly - * a=rtcp-mux - * a=rtcp-rsize - * a=rtpmap:106 H264/90000 - * a=fmtp:106 level-asymmetry-allowed=1;packetization-mode=1;profile-level-id=42e01e - * a=candidate:0 1 udp 2130706431 172.20.10.7 8000 typ host generation 0 - * - * @return 0 if OK, AVERROR_xxx on error + * The return value of verify_callback controls the strategy of the further verification process. If verify_callback + * returns 0, the verification process is immediately stopped with "verification failed" state. If SSL_VERIFY_PEER is + * set, a verification failure alert is sent to the peer and the TLS/SSL handshake is terminated. If verify_callback + * returns 1, the verification process is continued. If verify_callback always returns 1, the TLS/SSL handshake will + * not be terminated with respect to verification failures and the connection will be established. The calling process + * can however retrieve the error code of the last verification error using SSL_get_verify_result(3) or by maintaining + * its own error storage managed by verify_callback. */ -static int exchange_sdp(AVFormatContext *s) +static int openssl_verify_callback(int preverify_ok, X509_STORE_CTX *ctx) { - int ret; - char buf[MAX_URL_SIZE]; - AVBPrint bp; - RTCContext *rtc = s->priv_data; - /* The URL context is an HTTP transport layer for the WHIP protocol. */ - URLContext *whip_uc = NULL; + /* Always OK, we don't check the certificate of client, because we allow client self-sign certificate. */ + return 1; +} - /* To prevent a crash during cleanup, always initialize it. */ - av_bprint_init(&bp, 1, MAX_SDP_SIZE); +/** + * Initializes DTLS context for client role using ECDHE. + */ +static av_cold int openssl_init_dtls_context(DTLSContext *ctx, SSL_CTX *dtls_ctx) +{ + int ret = 0; + void *s1 = ctx->log_avcl; + EVP_PKEY *dtls_pkey = ctx->dtls_pkey; + X509 *dtls_cert = ctx->dtls_cert; - ret = ffurl_alloc(&whip_uc, s->url, AVIO_FLAG_READ_WRITE, &s->interrupt_callback); - if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to alloc HTTP context: %s\n", s->url); + /* For ECDSA, we could set the curves list. */ + if (SSL_CTX_set1_curves_list(dtls_ctx, "P-521:P-384:P-256") != 1) { + av_log(s1, AV_LOG_ERROR, "DTLS: SSL_CTX_set1_curves_list failed\n"); + ret = AVERROR(EINVAL); + goto end; + } + + /* We use "ALL", while you can use "DEFAULT" means "ALL:!EXPORT:!LOW:!aNULL:!eNULL:!SSLv2" */ + if (SSL_CTX_set_cipher_list(dtls_ctx, DTLS_CIPHER_SUTES) != 1) { + av_log(s1, AV_LOG_ERROR, "DTLS: SSL_CTX_set_cipher_list failed\n"); + ret = AVERROR(EINVAL); + goto end; + } + /* Setup the certificate. */ + if (SSL_CTX_use_certificate(dtls_ctx, dtls_cert) != 1) { + av_log(s1, AV_LOG_ERROR, "DTLS: SSL_CTX_use_certificate failed\n"); + ret = AVERROR(EINVAL); + goto end; + } + if (SSL_CTX_use_PrivateKey(dtls_ctx, dtls_pkey) != 1) { + av_log(s1, AV_LOG_ERROR, "DTLS: SSL_CTX_use_PrivateKey failed\n"); + ret = AVERROR(EINVAL); goto end; } - - if (!rtc->sdp_offer || !strlen(rtc->sdp_offer)) { - av_log(s, AV_LOG_ERROR, "No offer to exchange\n"); + /* Server will send Certificate Request. */ + SSL_CTX_set_verify(dtls_ctx, SSL_VERIFY_PEER | SSL_VERIFY_CLIENT_ONCE, openssl_verify_callback); + /* The depth count is "level 0:peer certificate", "level 1: CA certificate", + * "level 2: higher level CA certificate", and so on. */ + SSL_CTX_set_verify_depth(dtls_ctx, 4); + /* Whether we should read as many input bytes as possible (for non-blocking reads) or not. */ + SSL_CTX_set_read_ahead(dtls_ctx, 1); + /* Only support SRTP_AES128_CM_SHA1_80, please read ssl/d1_srtp.c */ + if (SSL_CTX_set_tlsext_use_srtp(dtls_ctx, "SRTP_AES128_CM_SHA1_80")) { + av_log(s1, AV_LOG_ERROR, "DTLS: SSL_CTX_set_tlsext_use_srtp failed\n"); ret = AVERROR(EINVAL); goto end; } - snprintf(buf, sizeof(buf), - "Cache-Control: no-cache\r\n" - "Content-Type: application/sdp\r\n"); - av_opt_set(whip_uc->priv_data, "headers", buf, 0); - av_opt_set(whip_uc->priv_data, "chunked_post", "0", 0); - av_opt_set_bin(whip_uc->priv_data, "post_data", rtc->sdp_offer, (int)strlen(rtc->sdp_offer), 0); +end: + return ret; +} - ret = ffurl_connect(whip_uc, NULL); - if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to request url=%s, offer: %s\n", s->url, rtc->sdp_offer); - goto end; - } +/** + * After creating a DTLS context, initialize the DTLS SSL object. + */ +static av_cold int openssl_init_dtls_ssl(DTLSContext *ctx, SSL *dtls) +{ + int ret = 0; - if (ff_http_get_new_location(whip_uc)) { - rtc->whip_resource_url = av_strdup(ff_http_get_new_location(whip_uc)); - if (!rtc->whip_resource_url) { - ret = AVERROR(ENOMEM); - goto end; - } - } + /* Setup the callback for logging. */ + SSL_set_ex_data(dtls, 0, ctx); + SSL_set_info_callback(dtls, openssl_on_info); - while (1) { - ret = ffurl_read(whip_uc, buf, sizeof(buf)); - if (ret == AVERROR_EOF) { - /* Reset the error because we read all response as answer util EOF. */ - ret = 0; - break; - } - if (ret <= 0) { - av_log(s, AV_LOG_ERROR, "Failed to read response from url=%s, offer is %s, answer is %s\n", - s->url, rtc->sdp_offer, rtc->sdp_answer); - goto end; - } + /* Set dtls fragment size */ + SSL_set_options(dtls, SSL_OP_NO_QUERY_MTU); + /* Avoid dtls negotiate failed, limit the max size of DTLS fragment. */ + SSL_set_mtu(dtls, ctx->pkt_size); - av_bprintf(&bp, "%.*s", ret, buf); - if (!av_bprint_is_complete(&bp)) { - av_log(s, AV_LOG_ERROR, "Answer %d exceed max size %d, %s\n", ret, MAX_SDP_SIZE, bp.str); - ret = AVERROR(EIO); - goto end; - } - } + /* Set the callback for ARQ timer. */ + DTLS_set_timer_cb(dtls, openssl_dtls_timer_cb); - rtc->sdp_answer = av_strdup(bp.str); - if (!rtc->sdp_answer) { - ret = AVERROR(ENOMEM); - goto end; - } - av_log(s, AV_LOG_VERBOSE, "WHIP: Got answer: %s\n", rtc->sdp_answer); + /* Setup DTLS as active, which is client role. */ + SSL_set_connect_state(dtls); + SSL_set_max_send_fragment(dtls, ctx->pkt_size); -end: - ffurl_closep(&whip_uc); - av_bprint_finalize(&bp, NULL); return ret; } /** - * Parses the ICE ufrag, pwd, and candidates from the SDP answer. - * - * This function is used to extract the ICE ufrag, pwd, and candidates from the SDP answer. - * It returns an error if any of these fields is NULL. The function only uses the first - * candidate if there are multiple candidates. However, support for multiple candidates - * will be added in the future. - * - * @param s Pointer to the AVFormatContext - * @returns Returns 0 if successful or AVERROR_xxx if an error occurs. + * Drives the SSL context by attempting to read packets to send from SSL, sending them + * over UDP, and then reading packets from UDP to feed back to SSL. */ -static int parse_answer(AVFormatContext *s) +static int openssl_drive_context(DTLSContext *ctx, SSL *dtls, BIO *bio_in, BIO *bio_out, int loop) { - int ret = 0; - AVIOContext *pb; - char line[MAX_URL_SIZE]; - const char *ptr; - int i; - RTCContext *rtc = s->priv_data; + int ret, i, j, r0, r1, req_size, res_size = 0; + uint8_t *data = NULL, req_ct = 0, req_ht = 0, res_ct = 0, res_ht = 0; + char buf[MAX_UDP_BUFFER_SIZE]; + void *s1 = ctx->log_avcl; - if (!rtc->sdp_answer || !strlen(rtc->sdp_answer)) { - av_log(s, AV_LOG_ERROR, "No answer to parse\n"); - ret = AVERROR(EINVAL); - goto end; + /* Drive the SSL context by state change, arq or response messages. */ + r0 = SSL_do_handshake(dtls); + r1 = SSL_get_error(dtls, r0); + + /* Handshake successfully done */ + if (r0 == 1) { + ctx->dtls_done_for_us = 1; + return 0; } - pb = avio_alloc_context(rtc->sdp_answer, strlen(rtc->sdp_answer), 0, NULL, NULL, NULL, NULL); - if (!pb) - return AVERROR(ENOMEM); + /* Handshake failed with fatal error */ + if (r0 < 0 && r1 != SSL_ERROR_WANT_READ) { + av_log(s1, AV_LOG_ERROR, "DTLS: Start handshake failed, loop=%d, r0=%d, r1=%d\n", loop, r0, r1); + return AVERROR(EIO); + } - for (i = 0; !avio_feof(pb); i++) { - ff_get_chomp_line(pb, line, sizeof(line)); - if (av_strstart(line, "a=ice-ufrag:", &ptr) && !rtc->ice_ufrag_remote) { - rtc->ice_ufrag_remote = av_strdup(ptr); - if (!rtc->ice_ufrag_remote) { - ret = AVERROR(ENOMEM); - goto end; - } - } else if (av_strstart(line, "a=ice-pwd:", &ptr) && !rtc->ice_pwd_remote) { - rtc->ice_pwd_remote = av_strdup(ptr); - if (!rtc->ice_pwd_remote) { - ret = AVERROR(ENOMEM); - goto end; + /* Fast retransmit the request util got response. */ + for (i = 0; i <= ctx->dtls_arq_max && !res_size; i++) { + req_size = BIO_get_mem_data(bio_out, (char**)&data); + openssl_state_trace(ctx, data, req_size, 0, r0, r1); + ret = ffurl_write(ctx->udp_uc, data, req_size); + BIO_reset(bio_out); + req_ct = req_size > 0 ? data[0] : 0; + req_ht = req_size > 13 ? data[13] : 0; + if (ret < 0) { + av_log(s1, AV_LOG_ERROR, "DTLS: Send request failed, loop=%d, content=%d, handshake=%d, size=%d\n", + loop, req_ct, req_ht, req_size); + return ret; + } + + /* Wait so that the server can process the request and no need ARQ then. */ +#if DTLS_PROCESSING_TIMEOUT > 0 + av_usleep(DTLS_PROCESSING_TIMEOUT * 10000); +#endif + + for (j = 0; j <= DTLS_EAGAIN_RETRIES_MAX && !res_size; j++) { + ret = ffurl_read(ctx->udp_uc, buf, sizeof(buf)); + + /* Ignore other packets, such as ICE indication, except DTLS. */ + if (ret < 13 || buf[0] <= 19 || buf[0] >= 64) + continue; + + /* Got DTLS response successfully. */ + if (ret > 0) { + res_size = ret; + ctx->dtls_should_reset_timer = 1; + break; } - } else if (av_strstart(line, "a=candidate:", &ptr) && !rtc->ice_protocol) { - ptr = av_stristr(ptr, "udp"); - if (ptr && av_stristr(ptr, "host")) { - char protocol[17], host[129]; - int priority, port; - ret = sscanf(ptr, "%16s %d %128s %d typ host", protocol, &priority, host, &port); - if (ret != 4) { - av_log(s, AV_LOG_ERROR, "Failed %d to parse line %d %s from %s\n", - ret, i, line, rtc->sdp_answer); - ret = AVERROR(EIO); - goto end; - } - if (av_strcasecmp(protocol, "udp")) { - av_log(s, AV_LOG_ERROR, "Protocol %s is not supported by RTC, choose udp, line %d %s of %s\n", - protocol, i, line, rtc->sdp_answer); - ret = AVERROR(EIO); - goto end; - } + /* Fatal error or timeout. */ + if (ret != AVERROR(EAGAIN)) { + av_log(s1, AV_LOG_ERROR, "DTLS: Read response failed, loop=%d, content=%d, handshake=%d\n", + loop, req_ct, req_ht); + return ret; + } - rtc->ice_protocol = av_strdup(protocol); - rtc->ice_host = av_strdup(host); - rtc->ice_port = port; - if (!rtc->ice_protocol || !rtc->ice_host) { - ret = AVERROR(ENOMEM); - goto end; - } + /* DTLSv1_handle_timeout is called when a DTLS handshake timeout expires. If no timeout + * had expired, it returns 0. Otherwise, it retransmits the previous flight of handshake + * messages and returns 1. If too many timeouts had expired without progress or an error + * occurs, it returns -1. */ + r0 = DTLSv1_handle_timeout(dtls); + if (!r0) { + av_usleep(ctx->dtls_arq_timeout * 1000); + continue; /* no timeout had expired. */ + } + if (r0 != 1) { + r1 = SSL_get_error(dtls, r0); + av_log(s1, AV_LOG_ERROR, "DTLS: Handle timeout, loop=%d, content=%d, handshake=%d, r0=%d, r1=%d\n", + loop, req_ct, req_ht, r0, r1); + return AVERROR(EIO); } - } - } - if (!rtc->ice_pwd_remote || !strlen(rtc->ice_pwd_remote) || !rtc->ice_ufrag_remote || !rtc->ice_ufrag_remote) { - av_log(s, AV_LOG_ERROR, "No ice pwd or ufrag parsed from %s\n", rtc->sdp_answer); - ret = AVERROR(EINVAL); - goto end; + ctx->dtls_arq_packets++; + break; + } } - if (!rtc->ice_protocol || !rtc->ice_host || !rtc->ice_port) { - av_log(s, AV_LOG_ERROR, "No ice candidate parsed from %s\n", rtc->sdp_answer); - ret = AVERROR(EINVAL); - goto end; + /* Trace the response packet, feed to SSL. */ + BIO_reset(bio_in); + openssl_state_trace(ctx, buf, res_size, 1, r0, SSL_ERROR_NONE); + res_ct = res_size > 0 ? buf[0]: 0; + res_ht = res_size > 13 ? buf[13] : 0; + if ((r0 = BIO_write(bio_in, buf, res_size)) <= 0) { + av_log(s1, AV_LOG_ERROR, "DTLS: Feed response failed, loop=%d, content=%d, handshake=%d, size=%d, r0=%d\n", + loop, res_ct, res_ht, res_size, r0); + return AVERROR(EIO); } - av_log(s, AV_LOG_INFO, "WHIP: SDP offer=%luB, answer=%luB, ufrag=%s, pwd=%luB, transport=%s://%s:%d\n", - strlen(rtc->sdp_offer), strlen(rtc->sdp_answer), rtc->ice_ufrag_remote, strlen(rtc->ice_pwd_remote), - rtc->ice_protocol, rtc->ice_host, rtc->ice_port); - -end: - avio_context_free(&pb); return ret; } /** - * Creates and marshals an ICE binding request packet. - * - * This function creates and marshals an ICE binding request packet. The function only - * generates the username attribute and does not include goog-network-info, ice-controlling, - * use-candidate, and priority. However, some of these attributes may be added in the future. + * DTLS handshake with server, as a client in active mode, using openssl. * - * @param s Pointer to the AVFormatContext - * @param buf Pointer to memory buffer to store the request packet - * @param buf_size Size of the memory buffer - * @param request_size Pointer to an integer that receives the size of the request packet - * @return Returns 0 if successful or AVERROR_xxx if an error occurs. + * This function initializes the SSL context as the client role using OpenSSL and + * then performs the DTLS handshake until success. Upon successful completion, it + * exports the SRTP material key. + * + * @return 0 if OK, AVERROR_xxx on error */ -static int ice_create_request(AVFormatContext *s, uint8_t *buf, int buf_size, int *request_size) +static int dtls_context_handshake(DTLSContext *ctx) { - int ret, size, crc32; - char username[128]; - AVIOContext *pb = NULL; - AVHMAC *hmac = NULL; - RTCContext *rtc = s->priv_data; + int ret, loop; + SSL_CTX *dtls_ctx = NULL; + SSL *dtls = NULL; + const char* dst = "EXTRACTOR-dtls_srtp"; + BIO *bio_in = NULL, *bio_out = NULL; + void *s1 = ctx->log_avcl; - pb = avio_alloc_context(buf, buf_size, 1, NULL, NULL, NULL, NULL); - if (!pb) - return AVERROR(ENOMEM); + dtls_ctx = SSL_CTX_new(DTLS_client_method()); - hmac = av_hmac_alloc(AV_HMAC_SHA1); - if (!hmac) { - ret = AVERROR(ENOMEM); + if (!ctx->udp_uc) { + av_log(s1, AV_LOG_ERROR, "DTLS: No UDP context\n"); + ret = AVERROR(EIO); goto end; } - /* Write 20 bytes header */ - avio_wb16(pb, 0x0001); /* STUN binding request */ - avio_wb16(pb, 0); /* length */ - avio_wb32(pb, STUN_MAGIC_COOKIE); /* magic cookie */ - avio_wb32(pb, av_get_random_seed()); /* transaction ID */ - avio_wb32(pb, av_get_random_seed()); /* transaction ID */ - avio_wb32(pb, av_get_random_seed()); /* transaction ID */ - - /* The username is the concatenation of the two ICE ufrag */ - ret = snprintf(username, sizeof(username), "%s:%s", rtc->ice_ufrag_remote, rtc->ice_ufrag_local); - if (ret <= 0 || ret >= sizeof(username)) { - av_log(s, AV_LOG_ERROR, "Failed to build username %s:%s, max=%lu, ret=%d\n", - rtc->ice_ufrag_remote, rtc->ice_ufrag_local, sizeof(username), ret); - ret = AVERROR(EIO); + ret = openssl_init_dtls_context(ctx, dtls_ctx); + if (ret < 0) { + av_log(s1, AV_LOG_ERROR, "Failed to initialize DTLS context\n"); goto end; } - /* Write the username attribute */ - avio_wb16(pb, STUN_ATTR_USERNAME); /* attribute type username */ - avio_wb16(pb, ret); /* size of username */ - avio_write(pb, username, ret); /* bytes of username */ - ffio_fill(pb, 0, (4 - (ret % 4)) % 4); /* padding */ + /* The dtls should not be created unless the dtls_ctx has been initialized. */ + dtls = SSL_new(dtls_ctx); - /* Write the use-candidate attribute */ - avio_wb16(pb, STUN_ATTR_USE_CANDIDATE); /* attribute type use-candidate */ - avio_wb16(pb, 0); /* size of use-candidate */ + bio_in = BIO_new(BIO_s_mem()); + bio_out = BIO_new(BIO_s_mem()); + SSL_set_bio(dtls, bio_in, bio_out); - /* Build and update message integrity */ - avio_wb16(pb, STUN_ATTR_MESSAGE_INTEGRITY); /* attribute type message integrity */ - avio_wb16(pb, 20); /* size of message integrity */ - ffio_fill(pb, 0, 20); /* fill with zero to directly write and skip it */ - size = avio_tell(pb); - buf[2] = (size - 20) >> 8; - buf[3] = (size - 20) & 0xFF; - av_hmac_init(hmac, rtc->ice_pwd_remote, strlen(rtc->ice_pwd_remote)); - av_hmac_update(hmac, buf, size - 24); - av_hmac_final(hmac, buf + size - 20, 20); + ret = openssl_init_dtls_ssl(ctx, dtls); + if (ret < 0) { + av_log(s1, AV_LOG_ERROR, "Failed to initialize SSL context\n"); + goto end; + } - /* Write the fingerprint attribute */ - avio_wb16(pb, STUN_ATTR_FINGERPRINT); /* attribute type fingerprint */ - avio_wb16(pb, 4); /* size of fingerprint */ - ffio_fill(pb, 0, 4); /* fill with zero to directly write and skip it */ - size = avio_tell(pb); - buf[2] = (size - 20) >> 8; - buf[3] = (size - 20) & 0xFF; - /* Refer to the av_hash_alloc("CRC32"), av_hash_init and av_hash_final */ - crc32 = av_crc(av_crc_get_table(AV_CRC_32_IEEE_LE), 0xFFFFFFFF, buf, size - 8) ^ 0xFFFFFFFF; - avio_skip(pb, -4); - avio_wb32(pb, crc32 ^ 0x5354554E); /* xor with "STUN" */ + for (loop = 0; loop < 64 && !ctx->dtls_done_for_us; loop++) { + ret = openssl_drive_context(ctx, dtls, bio_in, bio_out, loop); + if (ret < 0) { + av_log(s1, AV_LOG_ERROR, "Failed to drive SSL context\n"); + goto end; + } + } + if (!ctx->dtls_done_for_us) { + av_log(s1, AV_LOG_ERROR, "DTLS: Handshake failed, loop=%d\n", loop); + ret = AVERROR(EIO); + goto end; + } - *request_size = size; + /* Export SRTP master key after DTLS done */ + ret = SSL_export_keying_material(dtls, ctx->dtls_srtp_material, sizeof(ctx->dtls_srtp_material), + dst, strlen(dst), NULL, 0, 0); + if (!ret) { + av_log(s1, AV_LOG_ERROR, "DTLS: SSL export key r0=%lu, ret=%d\n", ERR_get_error(), ret); + ret = AVERROR(EIO); + goto end; + } + + av_log(s1, AV_LOG_INFO, "WHIP: DTLS handshake done=%d, arq=%d, srtp_material=%luB\n", + ctx->dtls_done_for_us, ctx->dtls_arq_packets, sizeof(ctx->dtls_srtp_material)); end: - avio_context_free(&pb); - av_hmac_free(hmac); + SSL_free(dtls); + SSL_CTX_free(dtls_ctx); return ret; } -/** - * Create an ICE binding response. - * - * This function generates an ICE binding response and writes it to the provided - * buffer. The response is signed using the local password for message integrity. - * - * @param s Pointer to the AVFormatContext structure. - * @param tid Pointer to the transaction ID of the binding request. The tid_size should be 12. - * @param tid_size The size of the transaction ID, should be 12. - * @param buf Pointer to the buffer where the response will be written. - * @param buf_size The size of the buffer provided for the response. - * @param response_size Pointer to an integer that will store the size of the generated response. - * @return Returns 0 if successful or AVERROR_xxx if an error occurs. - */ -static int ice_create_response(AVFormatContext *s, char *tid, int tid_size, uint8_t *buf, int buf_size, int *response_size) { - int ret = 0, size, crc32; - AVIOContext *pb = NULL; - AVHMAC *hmac = NULL; - RTCContext *rtc = s->priv_data; - - if (tid_size != 12) { - av_log(s, AV_LOG_ERROR, "Invalid transaction ID size. Expected 12, got %d\n", tid_size); - return AVERROR(EINVAL); - } +#pragma GCC diagnostic pop +#endif - pb = avio_alloc_context(buf, buf_size, 1, NULL, NULL, NULL, NULL); - if (!pb) - return AVERROR(ENOMEM); +typedef struct RTCContext { + AVClass *av_class; - hmac = av_hmac_alloc(AV_HMAC_SHA1); - if (!hmac) { - ret = AVERROR(ENOMEM); - goto end; - } + /* Parameters for the input audio and video codecs. */ + AVCodecParameters *audio_par; + AVCodecParameters *video_par; - /* Write 20 bytes header */ - avio_wb16(pb, 0x0101); /* STUN binding response */ - avio_wb16(pb, 0); /* length */ - avio_wb32(pb, STUN_MAGIC_COOKIE); /* magic cookie */ - avio_write(pb, tid, tid_size); /* transaction ID */ + /* The SPS/PPS of AVC video */ + uint8_t *avc_sps; + int avc_sps_size; + uint8_t *avc_pps; + int avc_pps_size; + /* The size of NALU in ISOM format. */ + int avc_nal_length_size; - /* Build and update message integrity */ - avio_wb16(pb, STUN_ATTR_MESSAGE_INTEGRITY); /* attribute type message integrity */ - avio_wb16(pb, 20); /* size of message integrity */ - ffio_fill(pb, 0, 20); /* fill with zero to directly write and skip it */ - size = avio_tell(pb); - buf[2] = (size - 20) >> 8; - buf[3] = (size - 20) & 0xFF; - av_hmac_init(hmac, rtc->ice_pwd_local, strlen(rtc->ice_pwd_local)); - av_hmac_update(hmac, buf, size - 24); - av_hmac_final(hmac, buf + size - 20, 20); + /* The ICE username and pwd fragment generated by the muxer. */ + char ice_ufrag_local[9]; + char ice_pwd_local[33]; + /* The SSRC of the audio and video stream, generated by the muxer. */ + uint32_t audio_ssrc; + uint32_t video_ssrc; + /* The PT(Payload Type) of stream, generated by the muxer. */ + uint8_t audio_payload_type; + uint8_t video_payload_type; + /** + * This is the SDP offer generated by the muxer based on the codec parameters, + * DTLS, and ICE information. + */ + char *sdp_offer; - /* Write the fingerprint attribute */ - avio_wb16(pb, STUN_ATTR_FINGERPRINT); /* attribute type fingerprint */ - avio_wb16(pb, 4); /* size of fingerprint */ - ffio_fill(pb, 0, 4); /* fill with zero to directly write and skip it */ - size = avio_tell(pb); - buf[2] = (size - 20) >> 8; - buf[3] = (size - 20) & 0xFF; - /* Refer to the av_hash_alloc("CRC32"), av_hash_init and av_hash_final */ - crc32 = av_crc(av_crc_get_table(AV_CRC_32_IEEE_LE), 0xFFFFFFFF, buf, size - 8) ^ 0xFFFFFFFF; - avio_skip(pb, -4); - avio_wb32(pb, crc32 ^ 0x5354554E); /* xor with "STUN" */ + /* The ICE username and pwd from remote server. */ + char *ice_ufrag_remote; + char *ice_pwd_remote; + /** + * This represents the ICE candidate protocol, priority, host and port. + * Currently, we only support one candidate and choose the first UDP candidate. + * However, we plan to support multiple candidates in the future. + */ + char *ice_protocol; + char *ice_host; + int ice_port; - *response_size = size; + /* The SDP answer received from the WebRTC server. */ + char *sdp_answer; + /* The resource URL returned in the Location header of WHIP HTTP response. */ + char *whip_resource_url; -end: - avio_context_free(&pb); - av_hmac_free(hmac); - return ret; -} + /* The DTLS context. */ + DTLSContext dtls_ctx; -static int ice_is_binding_request(char *buf, int buf_size) { - return buf_size > 1 && buf[0] == 0x00 && buf[1] == 0x01; -} + /* The SRTP send context, to encrypt outgoing packets. */ + struct SRTPContext srtp_audio_send; + struct SRTPContext srtp_video_send; + struct SRTPContext srtp_rtcp_send; + /* The SRTP receive context, to decrypt incoming packets. */ + struct SRTPContext srtp_recv; -static int ice_is_binding_response(char *buf, int buf_size) { - return buf_size > 1 && buf[0] == 0x01 && buf[1] == 0x01; -} + /* The time jitter base for audio OPUS stream. */ + int64_t audio_jitter_base; -/** - * This function handles incoming binding request messages by responding to them. - * If the message is not a binding request, it will be ignored. - */ -static int ice_handle_binding_request(AVFormatContext *s, char *buf, int buf_size) { - int ret = 0, size; - char tid[12]; - uint8_t res_buf[MAX_UDP_BUFFER_SIZE]; - RTCContext *rtc = s->priv_data; + /* The UDP transport is used for delivering ICE, DTLS and SRTP packets. */ + URLContext *udp_uc; - /* Ignore if not a binding request. */ - if (!ice_is_binding_request(buf, buf_size)) - return ret; + /* The maximum number of retries for ICE transmission. */ + int ice_arq_max; + /* The step start timeout in ms for ICE transmission. */ + int ice_arq_timeout; + /* The maximum number of retries for DTLS transmission. */ + int dtls_arq_max; + /* The step start timeout in ms for DTLS transmission. */ + int dtls_arq_timeout; + /* The size of RTP packet, should generally be set to MTU. */ + int pkt_size; +} RTCContext; - if (buf_size < 20) { - av_log(s, AV_LOG_ERROR, "Invalid STUN message size. Expected at least 20, got %d\n", buf_size); - return AVERROR(EINVAL); - } +static int on_rtp_write_packet(void *opaque, uint8_t *buf, int buf_size); - /* Parse transaction id from binding request in buf. */ - memcpy(tid, buf + 8, 12); +/** + * Initialize and check the options for the WebRTC muxer. + */ +static av_cold int whip_init(AVFormatContext *s) +{ + int ret, ideal_pkt_size = 532; + RTCContext *rtc = s->priv_data; - /* Build the STUN binding response. */ - ret = ice_create_response(s, tid, sizeof(tid), res_buf, sizeof(res_buf), &size); - if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to create STUN binding response, size=%d\n", size); + /* Use the same logging context as AV format. */ + rtc->dtls_ctx.log_avcl = s; + rtc->dtls_ctx.dtls_arq_max = rtc->dtls_arq_max; + rtc->dtls_ctx.dtls_arq_timeout = rtc->dtls_arq_timeout; + rtc->dtls_ctx.pkt_size = rtc->pkt_size; + + if ((ret = dtls_context_init(&rtc->dtls_ctx)) < 0) { + av_log(s, AV_LOG_ERROR, "WHIP: Failed to init DTLS context\n"); return ret; } - ret = ffurl_write(rtc->udp_uc, res_buf, size); - if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to send STUN binding response, size=%d\n", size); - return ret; + av_log(s, AV_LOG_INFO, "WHIP: Init ice_arq_max=%d, ice_arq_timeout=%d, dtls_arq_max=%d, dtls_arq_timeout=%d pkt_size=%d\n", + rtc->ice_arq_max, rtc->ice_arq_timeout, rtc->dtls_arq_max, rtc->dtls_arq_timeout, rtc->pkt_size); + + if (rtc->pkt_size < ideal_pkt_size) { + av_log(s, AV_LOG_WARNING, "WHIP: pkt_size=%d(<%d) is too small, may cause packet loss\n", + rtc->pkt_size, ideal_pkt_size); } return 0; } /** - * Opens the UDP transport and completes the ICE handshake, using fast retransmit to - * handle packet loss for the binding request. + * Parses the ISOM AVCC format of extradata and extracts SPS/PPS. * - * To initiate a fast retransmission of the STUN binding request during ICE, we wait only - * for a successful local ICE process i.e., when a binding response is received from the - * server. Since the server's binding request may not arrive, we do not always wait for it. - * However, we will always respond to the server's binding request during ICE, DTLS or - * RTP streaming. + * This function is used to parse SPS/PPS from the extradata in ISOM AVCC format. + * It can handle both ISOM and annexb formats but only parses data in ISOM format. + * If the extradata is in annexb format, this function ignores it, and uses the entire + * extradata as a sequence header with SPS/PPS. Refer to ff_isom_write_avcc. * - * @param s Pointer to the AVFormatContext - * @return Returns 0 if the handshake was successful or AVERROR_xxx in case of an error + * @param s Pointer to the AVFormatContext + * @param extradata Pointer to the extradata + * @param extradata_size Size of the extradata + * @returns Returns 0 if successful or AVERROR_xxx in case of an error. */ -static int ice_handshake(AVFormatContext *s) +static int isom_read_avcc(AVFormatContext *s, uint8_t *extradata, int extradata_size) { - int ret, size; - char url[256], tmp[16]; - char req_buf[MAX_UDP_BUFFER_SIZE], res_buf[MAX_UDP_BUFFER_SIZE]; + int ret = 0; + uint8_t version, nal_length_size, nb_sps, nb_pps; + AVIOContext *pb; RTCContext *rtc = s->priv_data; - int fast_retries = rtc->ice_arq_max, timeout = rtc->ice_arq_timeout; - /* Build UDP URL and create the UDP context as transport. */ - ff_url_join(url, sizeof(url), "udp", NULL, rtc->ice_host, rtc->ice_port, NULL); - ret = ffurl_alloc(&rtc->udp_uc, url, AVIO_FLAG_WRITE, &s->interrupt_callback); - if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to open udp://%s:%d\n", rtc->ice_host, rtc->ice_port); + if (!extradata || !extradata_size) + return 0; + + /* Not H.264 ISOM format, may be annexb etc. */ + if (extradata_size < 4 || extradata[0] != 1) { + if (!ff_avc_find_startcode(extradata, extradata + extradata_size)) { + av_log(s, AV_LOG_ERROR, "Format must be ISOM or annexb\n"); + return AVERROR_INVALIDDATA; + } + return 0; + } + + /* Parse the SPS/PPS in ISOM format in extradata. */ + pb = avio_alloc_context(extradata, extradata_size, 0, NULL, NULL, NULL, NULL); + if (!pb) + return AVERROR(ENOMEM); + + version = avio_r8(pb); /* version */ + avio_r8(pb); /* avc profile */ + avio_r8(pb); /* avc profile compat */ + avio_r8(pb); /* avc level */ + nal_length_size = avio_r8(pb); /* 6 bits reserved (111111) + 2 bits nal size length - 1 (11) */ + nb_sps = avio_r8(pb); /* 3 bits reserved (111) + 5 bits number of sps */ + + if (version != 1) { + av_log(s, AV_LOG_ERROR, "Invalid version=%d\n", version); + ret = AVERROR_INVALIDDATA; goto end; } - av_opt_set(rtc->udp_uc->priv_data, "connect", "1", 0); - av_opt_set(rtc->udp_uc->priv_data, "fifo_size", "0", 0); - /* Set the max packet size to the buffer size. */ - snprintf(tmp, sizeof(tmp), "%d", rtc->pkt_size); - av_opt_set(rtc->udp_uc->priv_data, "pkt_size", tmp, 0); + rtc->avc_nal_length_size = (nal_length_size & 0x03) + 1; + if (rtc->avc_nal_length_size == 3) { + av_log(s, AV_LOG_ERROR, "Invalid nal length size=%d\n", rtc->avc_nal_length_size); + ret = AVERROR_INVALIDDATA; + goto end; + } - ret = ffurl_connect(rtc->udp_uc, NULL); - if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to connect udp://%s:%d\n", rtc->ice_host, rtc->ice_port); + /* Read SPS */ + nb_sps &= 0x1f; + if (nb_sps != 1 || avio_feof(pb)) { + av_log(s, AV_LOG_ERROR, "Invalid number of sps=%d, eof=%d\n", nb_sps, avio_feof(pb)); + ret = AVERROR_INVALIDDATA; goto end; } - /* Make the socket non-blocking, set to READ and WRITE mode after connected */ - ff_socket_nonblock(ffurl_get_file_handle(rtc->udp_uc), 1); - rtc->udp_uc->flags |= AVIO_FLAG_READ | AVIO_FLAG_NONBLOCK; + rtc->avc_sps_size = avio_rb16(pb); /* sps size */ + if (rtc->avc_sps_size <= 0 || avio_feof(pb)) { + av_log(s, AV_LOG_ERROR, "Invalid sps size=%d, eof=%d\n", rtc->avc_sps_size, avio_feof(pb)); + ret = AVERROR_INVALIDDATA; + goto end; + } - /* Build the STUN binding request. */ - ret = ice_create_request(s, req_buf, sizeof(req_buf), &size); + rtc->avc_sps = av_malloc(rtc->avc_sps_size); + if (!rtc->avc_sps) { + ret = AVERROR(ENOMEM); + goto end; + } + + ret = avio_read(pb, rtc->avc_sps, rtc->avc_sps_size); /* sps */ if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to create STUN binding request, size=%d\n", size); + av_log(s, AV_LOG_ERROR, "Failed to read sps, size=%d\n", rtc->avc_sps_size); goto end; } - /* Fast retransmit the STUN binding request. */ - while (1) { - ret = ffurl_write(rtc->udp_uc, req_buf, size); - if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to send STUN binding request, size=%d\n", size); - goto end; - } + /* Read PPS */ + nb_pps = avio_r8(pb); /* number of pps */ + if (nb_pps != 1 || avio_feof(pb)) { + av_log(s, AV_LOG_ERROR, "Invalid number of pps=%d, eof=%d\n", nb_pps, avio_feof(pb)); + ret = AVERROR_INVALIDDATA; + goto end; + } - /* Wait so that the server can process the request and no need ARQ then. */ -#if ICE_PROCESSING_TIMEOUT > 0 - av_usleep(ICE_PROCESSING_TIMEOUT * 10000); -#endif + rtc->avc_pps_size = avio_rb16(pb); /* pps size */ + if (rtc->avc_pps_size <= 0 || avio_feof(pb)) { + av_log(s, AV_LOG_ERROR, "Invalid pps size=%d, eof=%d\n", rtc->avc_pps_size, avio_feof(pb)); + ret = AVERROR_INVALIDDATA; + goto end; + } - /* Read the STUN binding response. */ - ret = ffurl_read(rtc->udp_uc, res_buf, sizeof(res_buf)); - if (ret < 0) { - /* If max retries is 6 and start timeout is 21ms, the total timeout - * is about 21 + 42 + 84 + 168 + 336 + 672 = 1263ms. */ - av_usleep(timeout * 1000); - timeout *= 2; + rtc->avc_pps = av_malloc(rtc->avc_pps_size); + if (!rtc->avc_pps) { + ret = AVERROR(ENOMEM); + goto end; + } - if (ret == AVERROR(EAGAIN) && fast_retries) { - fast_retries--; - continue; - } + ret = avio_read(pb, rtc->avc_pps, rtc->avc_pps_size); /* pps */ + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to read pps, size=%d\n", rtc->avc_pps_size); + goto end; + } - av_log(s, AV_LOG_ERROR, "Failed to read STUN binding response, retries=%d\n", rtc->ice_arq_max); - goto end; - } +end: + avio_context_free(&pb); + return ret; +} - /* If got any binding response, the fast retransmission is done. */ - if (ice_is_binding_response(res_buf, ret)) - break; +/** + * Parses video SPS/PPS from the extradata of codecpar and checks the codec. + * Currently only supports video(h264) and audio(opus). Note that only baseline + * and constrained baseline profiles of h264 are supported. + * + * If the profile is less than 0, the function considers the profile as baseline. + * It may need to parse the profile from SPS/PPS. This situation occurs when ingesting + * desktop and transcoding. + * + * @param s Pointer to the AVFormatContext + * @returns Returns 0 if successful or AVERROR_xxx in case of an error. + */ +static int parse_codec(AVFormatContext *s) +{ + int i, ret; + RTCContext *rtc = s->priv_data; - /* When a binding request is received, it is necessary to respond immediately. */ - if (ice_is_binding_request(res_buf, ret)) { - if ((ret = ice_handle_binding_request(s, res_buf, ret)) < 0) { - goto end; + for (i = 0; i < s->nb_streams; i++) { + AVCodecParameters *par = s->streams[i]->codecpar; + const AVCodecDescriptor *desc = avcodec_descriptor_get(par->codec_id); + switch (par->codec_type) { + case AVMEDIA_TYPE_VIDEO: + if (rtc->video_par) { + av_log(s, AV_LOG_ERROR, "Only one video stream is supported by RTC\n"); + return AVERROR(EINVAL); } - } - } + rtc->video_par = par; - /* Wait just for a small while to get the possible binding request from server. */ - fast_retries = rtc->ice_arq_max / 2; - timeout = rtc->ice_arq_timeout; - while (fast_retries) { - ret = ffurl_read(rtc->udp_uc, res_buf, sizeof(res_buf)); - if (ret < 0) { - /* If max retries is 6 and start timeout is 21ms, the total timeout - * is about 21 + 42 + 84 = 147ms. */ - av_usleep(timeout * 1000); - timeout *= 2; + if (par->codec_id != AV_CODEC_ID_H264) { + av_log(s, AV_LOG_ERROR, "Unsupported video codec %s by RTC, choose h264\n", + desc ? desc->name : "unknown"); + return AVERROR_PATCHWELCOME; + } + if (par->profile > 0 && (par->profile & ~FF_PROFILE_H264_CONSTRAINED) != FF_PROFILE_H264_BASELINE) { + av_log(s, AV_LOG_ERROR, "Profile %d of stream %d is not baseline, currently unsupported by RTC\n", + par->profile, i); + return AVERROR_PATCHWELCOME; + } - if (ret == AVERROR(EAGAIN)) { - fast_retries--; - continue; + ret = isom_read_avcc(s, par->extradata, par->extradata_size); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to parse SPS/PPS from extradata\n"); + return ret; + } + break; + case AVMEDIA_TYPE_AUDIO: + if (rtc->audio_par) { + av_log(s, AV_LOG_ERROR, "Only one audio stream is supported by RTC\n"); + return AVERROR(EINVAL); + } + rtc->audio_par = par; + + if (par->codec_id != AV_CODEC_ID_OPUS) { + av_log(s, AV_LOG_ERROR, "Unsupported audio codec %s by RTC, choose opus\n", + desc ? desc->name : "unknown"); + return AVERROR_PATCHWELCOME; } - av_log(s, AV_LOG_ERROR, "Failed to read STUN binding request, retries=%d\n", rtc->ice_arq_max); - goto end; - } + if (par->ch_layout.nb_channels != 2) { + av_log(s, AV_LOG_ERROR, "Unsupported audio channels %d by RTC, choose stereo\n", + par->ch_layout.nb_channels); + return AVERROR_PATCHWELCOME; + } - /* When a binding request is received, it is necessary to respond immediately. */ - if (ice_is_binding_request(res_buf, ret)) { - if ((ret = ice_handle_binding_request(s, res_buf, ret)) < 0) { - goto end; + if (par->sample_rate != 48000) { + av_log(s, AV_LOG_ERROR, "Unsupported audio sample rate %d by RTC, choose 48000\n", par->sample_rate); + return AVERROR_PATCHWELCOME; } break; + default: + av_log(s, AV_LOG_ERROR, "Codec type '%s' for stream %d is not supported by RTC\n", + av_get_media_type_string(par->codec_type), i); + return AVERROR_PATCHWELCOME; } } - av_log(s, AV_LOG_INFO, "WHIP: ICE STUN ok, url=udp://%s:%d, username=%s:%s, req=%dB, res=%dB, arq=%d\n", - rtc->ice_host, rtc->ice_port, rtc->ice_ufrag_remote, rtc->ice_ufrag_local, size, ret, - rtc->ice_arq_max - fast_retries); - ret = 0; - -end: - return ret; + return 0; } -#if CONFIG_OPENSSL -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - /** - * Generate a self-signed certificate and private key for DTLS. + * Generate SDP offer according to the codec parameters, DTLS and ICE information. + * The below is an example of SDP offer: + * + * v=0 + * o=FFmpeg 4489045141692799359 2 IN IP4 127.0.0.1 + * s=FFmpegPublishSession + * t=0 0 + * a=group:BUNDLE 0 1 + * a=extmap-allow-mixed + * a=msid-semantic: WMS + * + * m=audio 9 UDP/TLS/RTP/SAVPF 111 + * c=IN IP4 0.0.0.0 + * a=ice-ufrag:a174B + * a=ice-pwd:wY8rJ3gNLxL3eWZs6UPOxy + * a=fingerprint:sha-256 EE:FE:A2:E5:6A:21:78:60:71:2C:21:DC:1A:2C:98:12:0C:E8:AD:68:07:61:1B:0E:FC:46:97:1E:BC:97:4A:54 + * a=setup:actpass + * a=mid:0 + * a=sendonly + * a=msid:FFmpeg audio + * a=rtcp-mux + * a=rtpmap:111 opus/48000/2 + * a=ssrc:4267647086 cname:FFmpeg + * a=ssrc:4267647086 msid:FFmpeg audio + * + * m=video 9 UDP/TLS/RTP/SAVPF 106 + * c=IN IP4 0.0.0.0 + * a=ice-ufrag:a174B + * a=ice-pwd:wY8rJ3gNLxL3eWZs6UPOxy + * a=fingerprint:sha-256 EE:FE:A2:E5:6A:21:78:60:71:2C:21:DC:1A:2C:98:12:0C:E8:AD:68:07:61:1B:0E:FC:46:97:1E:BC:97:4A:54 + * a=setup:actpass + * a=mid:1 + * a=sendonly + * a=msid:FFmpeg video + * a=rtcp-mux + * a=rtcp-rsize + * a=rtpmap:106 H264/90000 + * a=fmtp:106 level-asymmetry-allowed=1;packetization-mode=1;profile-level-id=42e01f + * a=ssrc:107169110 cname:FFmpeg + * a=ssrc:107169110 msid:FFmpeg video + * + * Note that we don't use av_sdp_create to generate SDP offer because it doesn't + * support DTLS and ICE information. + * + * @return 0 if OK, AVERROR_xxx on error */ -static av_cold int dtls_context_init(DTLSContext *ctx) +static int generate_sdp_offer(AVFormatContext *s) { - int ret = 0, serial, expire_day, i, n = 0; - AVBPrint fingerprint; - unsigned char md[EVP_MAX_MD_SIZE]; - const char *aor = "ffmpeg.org"; - X509_NAME* subject = NULL; - EC_GROUP *ecgroup = NULL; - EC_KEY* dtls_eckey = NULL; - EVP_PKEY *dtls_pkey = NULL; - X509 *dtls_cert = NULL; - void *s1 = ctx->log_avcl; - - ctx->dtls_cert = dtls_cert = X509_new(); - ctx->dtls_pkey = dtls_pkey = EVP_PKEY_new(); - dtls_eckey = EC_KEY_new(); + int ret = 0, profile, level, profile_iop; + AVBPrint bp; + RTCContext *rtc = s->priv_data; /* To prevent a crash during cleanup, always initialize it. */ - av_bprint_init(&fingerprint, 1, MAX_SDP_SIZE); - - /* Should use the curves in ClientHello.supported_groups, for example: - * Supported Group: x25519 (0x001d) - * Supported Group: secp256r1 (0x0017) - * Supported Group: secp384r1 (0x0018) - * note that secp256r1 in openssl is called NID_X9_62_prime256v1, not NID_secp256k1 - */ - ecgroup = EC_GROUP_new_by_curve_name(NID_X9_62_prime256v1); - - if (EC_KEY_set_group(dtls_eckey, ecgroup) != 1) { - av_log(s1, AV_LOG_ERROR, "DTLS: EC_KEY_set_group failed\n"); - ret = AVERROR(EINVAL); - goto end; - } - if (EC_KEY_generate_key(dtls_eckey) != 1) { - av_log(s1, AV_LOG_ERROR, "DTLS: EC_KEY_generate_key failed\n"); - ret = AVERROR(EINVAL); - goto end; - } - if (EVP_PKEY_set1_EC_KEY(dtls_pkey, dtls_eckey) != 1) { - av_log(s1, AV_LOG_ERROR, "DTLS: EVP_PKEY_set1_EC_KEY failed\n"); - ret = AVERROR(EINVAL); - goto end; - } - - /* Generate a self-signed certificate. */ - subject = X509_NAME_new(); - - serial = (int)av_get_random_seed(); - if (ASN1_INTEGER_set(X509_get_serialNumber(dtls_cert), serial) != 1) { - av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set serial\n"); - ret = AVERROR(EINVAL); - goto end; - } + av_bprint_init(&bp, 1, MAX_SDP_SIZE); - if (X509_NAME_add_entry_by_txt(subject, "CN", MBSTRING_ASC, aor, strlen(aor), -1, 0) != 1) { - av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set CN\n"); + if (rtc->sdp_offer) { + av_log(s, AV_LOG_ERROR, "SDP offer is already set\n"); ret = AVERROR(EINVAL); goto end; } - if (X509_set_issuer_name(dtls_cert, subject) != 1) { - av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set issuer\n"); - ret = AVERROR(EINVAL); - goto end; - } - if (X509_set_subject_name(dtls_cert, subject) != 1) { - av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set subject name\n"); - ret = AVERROR(EINVAL); - goto end; - } + snprintf(rtc->ice_ufrag_local, sizeof(rtc->ice_ufrag_local), "%08x", + av_get_random_seed()); + snprintf(rtc->ice_pwd_local, sizeof(rtc->ice_pwd_local), "%08x%08x%08x%08x", + av_get_random_seed(), av_get_random_seed(), av_get_random_seed(), + av_get_random_seed()); - expire_day = 365; - if (!X509_gmtime_adj(X509_get_notBefore(dtls_cert), 0)) { - av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set notBefore\n"); - ret = AVERROR(EINVAL); - goto end; - } - if (!X509_gmtime_adj(X509_get_notAfter(dtls_cert), 60*60*24*expire_day)) { - av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set notAfter\n"); - ret = AVERROR(EINVAL); - goto end; - } + rtc->audio_ssrc = av_get_random_seed(); + rtc->video_ssrc = av_get_random_seed(); - if (X509_set_version(dtls_cert, 2) != 1) { - av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set version\n"); - ret = AVERROR(EINVAL); - goto end; - } + rtc->audio_payload_type = 111; + rtc->video_payload_type = 106; - if (X509_set_pubkey(dtls_cert, dtls_pkey) != 1) { - av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set public key\n"); - ret = AVERROR(EINVAL); + av_bprintf(&bp, "" + "v=0\r\n" + "o=FFmpeg 4489045141692799359 2 IN IP4 127.0.0.1\r\n" + "s=FFmpegPublishSession\r\n" + "t=0 0\r\n" + "a=group:BUNDLE 0 1\r\n" + "a=extmap-allow-mixed\r\n" + "a=msid-semantic: WMS\r\n"); + if (!av_bprint_is_complete(&bp)) { + av_log(s, AV_LOG_ERROR, "Offer %d exceed max %d, %s\n", ret, MAX_SDP_SIZE, bp.str); + ret = AVERROR(EIO); goto end; } - if (!X509_sign(dtls_cert, dtls_pkey, EVP_sha1())) { - av_log(s1, AV_LOG_ERROR, "WHIP: Failed to sign certificate\n"); - ret = AVERROR(EINVAL); - goto end; + if (rtc->audio_par) { + av_bprintf(&bp, "" + "m=audio 9 UDP/TLS/RTP/SAVPF %u\r\n" + "c=IN IP4 0.0.0.0\r\n" + "a=ice-ufrag:%s\r\n" + "a=ice-pwd:%s\r\n" + "a=fingerprint:sha-256 %s\r\n" + "a=setup:active\r\n" + "a=mid:0\r\n" + "a=sendonly\r\n" + "a=msid:FFmpeg audio\r\n" + "a=rtcp-mux\r\n" + "a=rtpmap:%u opus/%d/%d\r\n" + "a=ssrc:%u cname:FFmpeg\r\n" + "a=ssrc:%u msid:FFmpeg audio\r\n", + rtc->audio_payload_type, + rtc->ice_ufrag_local, + rtc->ice_pwd_local, + rtc->dtls_ctx.dtls_fingerprint, + rtc->audio_payload_type, + rtc->audio_par->sample_rate, + rtc->audio_par->ch_layout.nb_channels, + rtc->audio_ssrc, + rtc->audio_ssrc); + if (!av_bprint_is_complete(&bp)) { + av_log(s, AV_LOG_ERROR, "Offer %d exceed max %d, %s\n", ret, MAX_SDP_SIZE, bp.str); + ret = AVERROR(EIO); + goto end; + } } - /* Generate the fingerpint of certficate. */ - if (X509_digest(dtls_cert, EVP_sha256(), md, &n) != 1) { - av_log(s1, AV_LOG_ERROR, "Failed to generate fingerprint\n"); - ret = AVERROR(EIO); - goto end; - } - for (i = 0; i < n; i++) { - av_bprintf(&fingerprint, "%02X", md[i]); - if (i < n - 1) - av_bprintf(&fingerprint, ":"); - } - if (!av_bprint_is_complete(&fingerprint)) { - av_log(s1, AV_LOG_ERROR, "Fingerprint %d exceed max %d, %s\n", ret, MAX_SDP_SIZE, fingerprint.str); - ret = AVERROR(EIO); - goto end; - } - if (!fingerprint.str || !strlen(fingerprint.str)) { - av_log(s1, AV_LOG_ERROR, "Fingerprint is empty\n"); - ret = AVERROR(EINVAL); - goto end; + if (rtc->video_par) { + profile = rtc->video_par->profile < 0 ? 0x42 : rtc->video_par->profile; + level = rtc->video_par->level < 0 ? 30 : rtc->video_par->level; + profile_iop = profile & FF_PROFILE_H264_CONSTRAINED; + av_bprintf(&bp, "" + "m=video 9 UDP/TLS/RTP/SAVPF %u\r\n" + "c=IN IP4 0.0.0.0\r\n" + "a=ice-ufrag:%s\r\n" + "a=ice-pwd:%s\r\n" + "a=fingerprint:sha-256 %s\r\n" + "a=setup:active\r\n" + "a=mid:1\r\n" + "a=sendonly\r\n" + "a=msid:FFmpeg video\r\n" + "a=rtcp-mux\r\n" + "a=rtcp-rsize\r\n" + "a=rtpmap:%u H264/90000\r\n" + "a=fmtp:%u level-asymmetry-allowed=1;packetization-mode=1;profile-level-id=%02x%02x%02x\r\n" + "a=ssrc:%u cname:FFmpeg\r\n" + "a=ssrc:%u msid:FFmpeg video\r\n", + rtc->video_payload_type, + rtc->ice_ufrag_local, + rtc->ice_pwd_local, + rtc->dtls_ctx.dtls_fingerprint, + rtc->video_payload_type, + rtc->video_payload_type, + profile & (~FF_PROFILE_H264_CONSTRAINED), + profile_iop, + level, + rtc->video_ssrc, + rtc->video_ssrc); + if (!av_bprint_is_complete(&bp)) { + av_log(s, AV_LOG_ERROR, "Offer %d exceed max %d, %s\n", ret, MAX_SDP_SIZE, bp.str); + ret = AVERROR(EIO); + goto end; + } } - ctx->dtls_fingerprint = av_strdup(fingerprint.str); - if (!ctx->dtls_fingerprint) { + rtc->sdp_offer = av_strdup(bp.str); + if (!rtc->sdp_offer) { ret = AVERROR(ENOMEM); goto end; } - - av_log(s1, AV_LOG_INFO, "DTLS: Fingerprint %s\n", ctx->dtls_fingerprint); + av_log(s, AV_LOG_VERBOSE, "WHIP: Generated offer: %s\n", rtc->sdp_offer); end: - EC_KEY_free(dtls_eckey); - EC_GROUP_free(ecgroup); - X509_NAME_free(subject); - av_bprint_finalize(&fingerprint, NULL); + av_bprint_finalize(&bp, NULL); return ret; } /** - * Cleanup the DTLS context. + * Exchange SDP offer with WebRTC peer to get the answer. + * The below is an example of SDP answer: + * + * v=0 + * o=SRS/6.0.42(Bee) 107408542208384 2 IN IP4 0.0.0.0 + * s=SRSPublishSession + * t=0 0 + * a=ice-lite + * a=group:BUNDLE 0 1 + * a=msid-semantic: WMS live/show + * + * m=audio 9 UDP/TLS/RTP/SAVPF 111 + * c=IN IP4 0.0.0.0 + * a=ice-ufrag:ex9061f9 + * a=ice-pwd:bi8k19m9n836187b00d1gm3946234w85 + * a=fingerprint:sha-256 68:DD:7A:95:27:BD:0A:99:F4:7A:83:21:2F:50:15:2A:1D:1F:8A:D8:96:24:42:2D:A1:83:99:BF:F1:E2:11:A2 + * a=setup:passive + * a=mid:0 + * a=recvonly + * a=rtcp-mux + * a=rtcp-rsize + * a=rtpmap:111 opus/48000/2 + * a=candidate:0 1 udp 2130706431 172.20.10.7 8000 typ host generation 0 + * + * m=video 9 UDP/TLS/RTP/SAVPF 106 + * c=IN IP4 0.0.0.0 + * a=ice-ufrag:ex9061f9 + * a=ice-pwd:bi8k19m9n836187b00d1gm3946234w85 + * a=fingerprint:sha-256 68:DD:7A:95:27:BD:0A:99:F4:7A:83:21:2F:50:15:2A:1D:1F:8A:D8:96:24:42:2D:A1:83:99:BF:F1:E2:11:A2 + * a=setup:passive + * a=mid:1 + * a=recvonly + * a=rtcp-mux + * a=rtcp-rsize + * a=rtpmap:106 H264/90000 + * a=fmtp:106 level-asymmetry-allowed=1;packetization-mode=1;profile-level-id=42e01e + * a=candidate:0 1 udp 2130706431 172.20.10.7 8000 typ host generation 0 + * + * @return 0 if OK, AVERROR_xxx on error */ -static av_cold void dtls_context_deinit(DTLSContext *ctx) +static int exchange_sdp(AVFormatContext *s) { - X509_free(ctx->dtls_cert); - EVP_PKEY_free(ctx->dtls_pkey); - av_freep(&ctx->dtls_fingerprint); -} + int ret; + char buf[MAX_URL_SIZE]; + AVBPrint bp; + RTCContext *rtc = s->priv_data; + /* The URL context is an HTTP transport layer for the WHIP protocol. */ + URLContext *whip_uc = NULL; -/** - * Callback function to print the OpenSSL SSL status. - */ -static void openssl_on_info(const SSL *dtls, int where, int ret) -{ - int w, r1; - const char *method, *alert_type, *alert_desc; - DTLSContext *ctx = (DTLSContext*)SSL_get_ex_data(dtls, 0); - void *s1 = ctx->log_avcl; + /* To prevent a crash during cleanup, always initialize it. */ + av_bprint_init(&bp, 1, MAX_SDP_SIZE); - w = where & ~SSL_ST_MASK; - if (w & SSL_ST_CONNECT) { - method = "SSL_connect"; - } else if (w & SSL_ST_ACCEPT) { - method = "SSL_accept"; - } else { - method = "undefined"; + ret = ffurl_alloc(&whip_uc, s->url, AVIO_FLAG_READ_WRITE, &s->interrupt_callback); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to alloc HTTP context: %s\n", s->url); + goto end; } - r1 = SSL_get_error(dtls, ret); - if (where & SSL_CB_LOOP) { - av_log(s1, AV_LOG_VERBOSE, "DTLS: method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", - method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, ret, r1); - } else if (where & SSL_CB_ALERT) { - method = (where & SSL_CB_READ) ? "read":"write"; - - alert_type = SSL_alert_type_string_long(ret); - alert_desc = SSL_alert_desc_string(ret); - - if (!av_strcasecmp(alert_type, "warning") && !av_strcasecmp(alert_desc, "CN")) { - av_log(s1, AV_LOG_WARNING, "DTLS: SSL3 alert method=%s type=%s, desc=%s(%s), where=%d, ret=%d, r1=%d\n", - method, alert_type, alert_desc, SSL_alert_desc_string_long(ret), where, ret, r1); - } else { - av_log(s1, AV_LOG_ERROR, "DTLS: SSL3 alert method=%s type=%s, desc=%s(%s), where=%d, ret=%d, r1=%d\n", - method, alert_type, alert_desc, SSL_alert_desc_string_long(ret), where, ret, r1); - } - } else if (where & SSL_CB_EXIT) { - if (!ret) { - av_log(s1, AV_LOG_WARNING, "DTLS: Fail method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", - method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, ret, r1); - } else if (ret < 0) { - if (r1 != SSL_ERROR_NONE && r1 != SSL_ERROR_WANT_READ && r1 != SSL_ERROR_WANT_WRITE) { - av_log(s1, AV_LOG_ERROR, "DTLS: Error method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", - method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, ret, r1); - } else { - av_log(s1, AV_LOG_VERBOSE, "DTLS: method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", - method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, ret, r1); - } - } + if (!rtc->sdp_offer || !strlen(rtc->sdp_offer)) { + av_log(s, AV_LOG_ERROR, "No offer to exchange\n"); + ret = AVERROR(EINVAL); + goto end; } -} -static unsigned int openssl_dtls_timer_cb(SSL *dtls, unsigned int previous_us) -{ - DTLSContext *ctx = (DTLSContext*)SSL_get_ex_data(dtls, 0); - void *s1 = ctx->log_avcl; - - /* Double the timeout, note that it may be 0. */ - unsigned int timeout_us = previous_us * 2; + snprintf(buf, sizeof(buf), + "Cache-Control: no-cache\r\n" + "Content-Type: application/sdp\r\n"); + av_opt_set(whip_uc->priv_data, "headers", buf, 0); + av_opt_set(whip_uc->priv_data, "chunked_post", "0", 0); + av_opt_set_bin(whip_uc->priv_data, "post_data", rtc->sdp_offer, (int)strlen(rtc->sdp_offer), 0); - /* If previous_us is 0, for example, the HelloVerifyRequest, we should respond it ASAP. - * when got ServerHello, we should reset the timer. */ - if (!previous_us || ctx->dtls_should_reset_timer) { - timeout_us = ctx->dtls_arq_timeout * 1000; /* in us */ + ret = ffurl_connect(whip_uc, NULL); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to request url=%s, offer: %s\n", s->url, rtc->sdp_offer); + goto end; } - /* never exceed the max timeout. */ - timeout_us = FFMIN(timeout_us, 30 * 1000 * 1000); /* in us */ - - av_log(s1, AV_LOG_VERBOSE, "DTLS: ARQ timer cb timeout=%ums, previous=%ums\n", - timeout_us / 1000, previous_us / 1000); - - return timeout_us; -} - -static void openssl_state_trace(DTLSContext *ctx, uint8_t *data, int length, int incoming, int r0, int r1) -{ - uint8_t content_type = 0; - uint16_t size = 0; - uint8_t handshake_type = 0; - void *s1 = ctx->log_avcl; - - /* Change_cipher_spec(20), alert(21), handshake(22), application_data(23) */ - if (length >= 1) { - content_type = (uint8_t)data[0]; + if (ff_http_get_new_location(whip_uc)) { + rtc->whip_resource_url = av_strdup(ff_http_get_new_location(whip_uc)); + if (!rtc->whip_resource_url) { + ret = AVERROR(ENOMEM); + goto end; + } } - if (length >= 13) { - size = (uint16_t)(data[11])<<8 | (uint16_t)data[12]; - } + while (1) { + ret = ffurl_read(whip_uc, buf, sizeof(buf)); + if (ret == AVERROR_EOF) { + /* Reset the error because we read all response as answer util EOF. */ + ret = 0; + break; + } + if (ret <= 0) { + av_log(s, AV_LOG_ERROR, "Failed to read response from url=%s, offer is %s, answer is %s\n", + s->url, rtc->sdp_offer, rtc->sdp_answer); + goto end; + } - if (length >= 14) { - handshake_type = (uint8_t)data[13]; + av_bprintf(&bp, "%.*s", ret, buf); + if (!av_bprint_is_complete(&bp)) { + av_log(s, AV_LOG_ERROR, "Answer %d exceed max size %d, %s\n", ret, MAX_SDP_SIZE, bp.str); + ret = AVERROR(EIO); + goto end; + } } - av_log(s1, AV_LOG_VERBOSE, "WHIP: DTLS state %s %s, done=%u, arq=%u, r0=%d, r1=%d, len=%u, cnt=%u, size=%u, hs=%u\n", - "Active", (incoming? "RECV":"SEND"), ctx->dtls_done_for_us, ctx->dtls_arq_packets, r0, r1, length, - content_type, size, handshake_type); -} + rtc->sdp_answer = av_strdup(bp.str); + if (!rtc->sdp_answer) { + ret = AVERROR(ENOMEM); + goto end; + } + av_log(s, AV_LOG_VERBOSE, "WHIP: Got answer: %s\n", rtc->sdp_answer); -/** - * The return value of verify_callback controls the strategy of the further verification process. If verify_callback - * returns 0, the verification process is immediately stopped with "verification failed" state. If SSL_VERIFY_PEER is - * set, a verification failure alert is sent to the peer and the TLS/SSL handshake is terminated. If verify_callback - * returns 1, the verification process is continued. If verify_callback always returns 1, the TLS/SSL handshake will - * not be terminated with respect to verification failures and the connection will be established. The calling process - * can however retrieve the error code of the last verification error using SSL_get_verify_result(3) or by maintaining - * its own error storage managed by verify_callback. - */ -static int openssl_verify_callback(int preverify_ok, X509_STORE_CTX *ctx) -{ - /* Always OK, we don't check the certificate of client, because we allow client self-sign certificate. */ - return 1; +end: + ffurl_closep(&whip_uc); + av_bprint_finalize(&bp, NULL); + return ret; } /** - * Initializes DTLS context for client role using ECDHE. + * Parses the ICE ufrag, pwd, and candidates from the SDP answer. + * + * This function is used to extract the ICE ufrag, pwd, and candidates from the SDP answer. + * It returns an error if any of these fields is NULL. The function only uses the first + * candidate if there are multiple candidates. However, support for multiple candidates + * will be added in the future. + * + * @param s Pointer to the AVFormatContext + * @returns Returns 0 if successful or AVERROR_xxx if an error occurs. */ -static av_cold int openssl_init_dtls_context(DTLSContext *ctx, SSL_CTX *dtls_ctx) +static int parse_answer(AVFormatContext *s) { int ret = 0; - void *s1 = ctx->log_avcl; - EVP_PKEY *dtls_pkey = ctx->dtls_pkey; - X509 *dtls_cert = ctx->dtls_cert; + AVIOContext *pb; + char line[MAX_URL_SIZE]; + const char *ptr; + int i; + RTCContext *rtc = s->priv_data; - /* For ECDSA, we could set the curves list. */ - if (SSL_CTX_set1_curves_list(dtls_ctx, "P-521:P-384:P-256") != 1) { - av_log(s1, AV_LOG_ERROR, "DTLS: SSL_CTX_set1_curves_list failed\n"); + if (!rtc->sdp_answer || !strlen(rtc->sdp_answer)) { + av_log(s, AV_LOG_ERROR, "No answer to parse\n"); ret = AVERROR(EINVAL); goto end; } - /* We use "ALL", while you can use "DEFAULT" means "ALL:!EXPORT:!LOW:!aNULL:!eNULL:!SSLv2" */ - if (SSL_CTX_set_cipher_list(dtls_ctx, DTLS_CIPHER_SUTES) != 1) { - av_log(s1, AV_LOG_ERROR, "DTLS: SSL_CTX_set_cipher_list failed\n"); - ret = AVERROR(EINVAL); - goto end; - } - /* Setup the certificate. */ - if (SSL_CTX_use_certificate(dtls_ctx, dtls_cert) != 1) { - av_log(s1, AV_LOG_ERROR, "DTLS: SSL_CTX_use_certificate failed\n"); - ret = AVERROR(EINVAL); - goto end; + pb = avio_alloc_context(rtc->sdp_answer, strlen(rtc->sdp_answer), 0, NULL, NULL, NULL, NULL); + if (!pb) + return AVERROR(ENOMEM); + + for (i = 0; !avio_feof(pb); i++) { + ff_get_chomp_line(pb, line, sizeof(line)); + if (av_strstart(line, "a=ice-ufrag:", &ptr) && !rtc->ice_ufrag_remote) { + rtc->ice_ufrag_remote = av_strdup(ptr); + if (!rtc->ice_ufrag_remote) { + ret = AVERROR(ENOMEM); + goto end; + } + } else if (av_strstart(line, "a=ice-pwd:", &ptr) && !rtc->ice_pwd_remote) { + rtc->ice_pwd_remote = av_strdup(ptr); + if (!rtc->ice_pwd_remote) { + ret = AVERROR(ENOMEM); + goto end; + } + } else if (av_strstart(line, "a=candidate:", &ptr) && !rtc->ice_protocol) { + ptr = av_stristr(ptr, "udp"); + if (ptr && av_stristr(ptr, "host")) { + char protocol[17], host[129]; + int priority, port; + ret = sscanf(ptr, "%16s %d %128s %d typ host", protocol, &priority, host, &port); + if (ret != 4) { + av_log(s, AV_LOG_ERROR, "Failed %d to parse line %d %s from %s\n", + ret, i, line, rtc->sdp_answer); + ret = AVERROR(EIO); + goto end; + } + + if (av_strcasecmp(protocol, "udp")) { + av_log(s, AV_LOG_ERROR, "Protocol %s is not supported by RTC, choose udp, line %d %s of %s\n", + protocol, i, line, rtc->sdp_answer); + ret = AVERROR(EIO); + goto end; + } + + rtc->ice_protocol = av_strdup(protocol); + rtc->ice_host = av_strdup(host); + rtc->ice_port = port; + if (!rtc->ice_protocol || !rtc->ice_host) { + ret = AVERROR(ENOMEM); + goto end; + } + } + } } - if (SSL_CTX_use_PrivateKey(dtls_ctx, dtls_pkey) != 1) { - av_log(s1, AV_LOG_ERROR, "DTLS: SSL_CTX_use_PrivateKey failed\n"); + + if (!rtc->ice_pwd_remote || !strlen(rtc->ice_pwd_remote) || !rtc->ice_ufrag_remote || !rtc->ice_ufrag_remote) { + av_log(s, AV_LOG_ERROR, "No ice pwd or ufrag parsed from %s\n", rtc->sdp_answer); ret = AVERROR(EINVAL); goto end; } - /* Server will send Certificate Request. */ - SSL_CTX_set_verify(dtls_ctx, SSL_VERIFY_PEER | SSL_VERIFY_CLIENT_ONCE, openssl_verify_callback); - /* The depth count is "level 0:peer certificate", "level 1: CA certificate", - * "level 2: higher level CA certificate", and so on. */ - SSL_CTX_set_verify_depth(dtls_ctx, 4); - /* Whether we should read as many input bytes as possible (for non-blocking reads) or not. */ - SSL_CTX_set_read_ahead(dtls_ctx, 1); - /* Only support SRTP_AES128_CM_SHA1_80, please read ssl/d1_srtp.c */ - if (SSL_CTX_set_tlsext_use_srtp(dtls_ctx, "SRTP_AES128_CM_SHA1_80")) { - av_log(s1, AV_LOG_ERROR, "DTLS: SSL_CTX_set_tlsext_use_srtp failed\n"); + + if (!rtc->ice_protocol || !rtc->ice_host || !rtc->ice_port) { + av_log(s, AV_LOG_ERROR, "No ice candidate parsed from %s\n", rtc->sdp_answer); ret = AVERROR(EINVAL); goto end; } + av_log(s, AV_LOG_INFO, "WHIP: SDP offer=%luB, answer=%luB, ufrag=%s, pwd=%luB, transport=%s://%s:%d\n", + strlen(rtc->sdp_offer), strlen(rtc->sdp_answer), rtc->ice_ufrag_remote, strlen(rtc->ice_pwd_remote), + rtc->ice_protocol, rtc->ice_host, rtc->ice_port); + end: + avio_context_free(&pb); return ret; } /** - * After creating a DTLS context, initialize the DTLS SSL object. + * Creates and marshals an ICE binding request packet. + * + * This function creates and marshals an ICE binding request packet. The function only + * generates the username attribute and does not include goog-network-info, ice-controlling, + * use-candidate, and priority. However, some of these attributes may be added in the future. + * + * @param s Pointer to the AVFormatContext + * @param buf Pointer to memory buffer to store the request packet + * @param buf_size Size of the memory buffer + * @param request_size Pointer to an integer that receives the size of the request packet + * @return Returns 0 if successful or AVERROR_xxx if an error occurs. */ -static av_cold int openssl_init_dtls_ssl(DTLSContext *ctx, SSL *dtls) +static int ice_create_request(AVFormatContext *s, uint8_t *buf, int buf_size, int *request_size) { - int ret = 0; + int ret, size, crc32; + char username[128]; + AVIOContext *pb = NULL; + AVHMAC *hmac = NULL; + RTCContext *rtc = s->priv_data; - /* Setup the callback for logging. */ - SSL_set_ex_data(dtls, 0, ctx); - SSL_set_info_callback(dtls, openssl_on_info); + pb = avio_alloc_context(buf, buf_size, 1, NULL, NULL, NULL, NULL); + if (!pb) + return AVERROR(ENOMEM); - /* Set dtls fragment size */ - SSL_set_options(dtls, SSL_OP_NO_QUERY_MTU); - /* Avoid dtls negotiate failed, limit the max size of DTLS fragment. */ - SSL_set_mtu(dtls, ctx->pkt_size); + hmac = av_hmac_alloc(AV_HMAC_SHA1); + if (!hmac) { + ret = AVERROR(ENOMEM); + goto end; + } - /* Set the callback for ARQ timer. */ - DTLS_set_timer_cb(dtls, openssl_dtls_timer_cb); + /* Write 20 bytes header */ + avio_wb16(pb, 0x0001); /* STUN binding request */ + avio_wb16(pb, 0); /* length */ + avio_wb32(pb, STUN_MAGIC_COOKIE); /* magic cookie */ + avio_wb32(pb, av_get_random_seed()); /* transaction ID */ + avio_wb32(pb, av_get_random_seed()); /* transaction ID */ + avio_wb32(pb, av_get_random_seed()); /* transaction ID */ - /* Setup DTLS as active, which is client role. */ - SSL_set_connect_state(dtls); - SSL_set_max_send_fragment(dtls, ctx->pkt_size); + /* The username is the concatenation of the two ICE ufrag */ + ret = snprintf(username, sizeof(username), "%s:%s", rtc->ice_ufrag_remote, rtc->ice_ufrag_local); + if (ret <= 0 || ret >= sizeof(username)) { + av_log(s, AV_LOG_ERROR, "Failed to build username %s:%s, max=%lu, ret=%d\n", + rtc->ice_ufrag_remote, rtc->ice_ufrag_local, sizeof(username), ret); + ret = AVERROR(EIO); + goto end; + } + + /* Write the username attribute */ + avio_wb16(pb, STUN_ATTR_USERNAME); /* attribute type username */ + avio_wb16(pb, ret); /* size of username */ + avio_write(pb, username, ret); /* bytes of username */ + ffio_fill(pb, 0, (4 - (ret % 4)) % 4); /* padding */ + + /* Write the use-candidate attribute */ + avio_wb16(pb, STUN_ATTR_USE_CANDIDATE); /* attribute type use-candidate */ + avio_wb16(pb, 0); /* size of use-candidate */ + + /* Build and update message integrity */ + avio_wb16(pb, STUN_ATTR_MESSAGE_INTEGRITY); /* attribute type message integrity */ + avio_wb16(pb, 20); /* size of message integrity */ + ffio_fill(pb, 0, 20); /* fill with zero to directly write and skip it */ + size = avio_tell(pb); + buf[2] = (size - 20) >> 8; + buf[3] = (size - 20) & 0xFF; + av_hmac_init(hmac, rtc->ice_pwd_remote, strlen(rtc->ice_pwd_remote)); + av_hmac_update(hmac, buf, size - 24); + av_hmac_final(hmac, buf + size - 20, 20); + + /* Write the fingerprint attribute */ + avio_wb16(pb, STUN_ATTR_FINGERPRINT); /* attribute type fingerprint */ + avio_wb16(pb, 4); /* size of fingerprint */ + ffio_fill(pb, 0, 4); /* fill with zero to directly write and skip it */ + size = avio_tell(pb); + buf[2] = (size - 20) >> 8; + buf[3] = (size - 20) & 0xFF; + /* Refer to the av_hash_alloc("CRC32"), av_hash_init and av_hash_final */ + crc32 = av_crc(av_crc_get_table(AV_CRC_32_IEEE_LE), 0xFFFFFFFF, buf, size - 8) ^ 0xFFFFFFFF; + avio_skip(pb, -4); + avio_wb32(pb, crc32 ^ 0x5354554E); /* xor with "STUN" */ + + *request_size = size; +end: + avio_context_free(&pb); + av_hmac_free(hmac); return ret; } /** - * Drives the SSL context by attempting to read packets to send from SSL, sending them - * over UDP, and then reading packets from UDP to feed back to SSL. + * Create an ICE binding response. + * + * This function generates an ICE binding response and writes it to the provided + * buffer. The response is signed using the local password for message integrity. + * + * @param s Pointer to the AVFormatContext structure. + * @param tid Pointer to the transaction ID of the binding request. The tid_size should be 12. + * @param tid_size The size of the transaction ID, should be 12. + * @param buf Pointer to the buffer where the response will be written. + * @param buf_size The size of the buffer provided for the response. + * @param response_size Pointer to an integer that will store the size of the generated response. + * @return Returns 0 if successful or AVERROR_xxx if an error occurs. */ -static int openssl_drive_context(DTLSContext *ctx, SSL *dtls, BIO *bio_in, BIO *bio_out, int loop) -{ - int ret, i, j, r0, r1, req_size, res_size = 0; - uint8_t *data = NULL, req_ct = 0, req_ht = 0, res_ct = 0, res_ht = 0; - char buf[MAX_UDP_BUFFER_SIZE]; - void *s1 = ctx->log_avcl; +static int ice_create_response(AVFormatContext *s, char *tid, int tid_size, uint8_t *buf, int buf_size, int *response_size) { + int ret = 0, size, crc32; + AVIOContext *pb = NULL; + AVHMAC *hmac = NULL; + RTCContext *rtc = s->priv_data; - /* Drive the SSL context by state change, arq or response messages. */ - r0 = SSL_do_handshake(dtls); - r1 = SSL_get_error(dtls, r0); + if (tid_size != 12) { + av_log(s, AV_LOG_ERROR, "Invalid transaction ID size. Expected 12, got %d\n", tid_size); + return AVERROR(EINVAL); + } - /* Handshake successfully done */ - if (r0 == 1) { - ctx->dtls_done_for_us = 1; - return 0; + pb = avio_alloc_context(buf, buf_size, 1, NULL, NULL, NULL, NULL); + if (!pb) + return AVERROR(ENOMEM); + + hmac = av_hmac_alloc(AV_HMAC_SHA1); + if (!hmac) { + ret = AVERROR(ENOMEM); + goto end; } - /* Handshake failed with fatal error */ - if (r0 < 0 && r1 != SSL_ERROR_WANT_READ) { - av_log(s1, AV_LOG_ERROR, "DTLS: Start handshake failed, loop=%d, r0=%d, r1=%d\n", loop, r0, r1); - return AVERROR(EIO); - } + /* Write 20 bytes header */ + avio_wb16(pb, 0x0101); /* STUN binding response */ + avio_wb16(pb, 0); /* length */ + avio_wb32(pb, STUN_MAGIC_COOKIE); /* magic cookie */ + avio_write(pb, tid, tid_size); /* transaction ID */ + + /* Build and update message integrity */ + avio_wb16(pb, STUN_ATTR_MESSAGE_INTEGRITY); /* attribute type message integrity */ + avio_wb16(pb, 20); /* size of message integrity */ + ffio_fill(pb, 0, 20); /* fill with zero to directly write and skip it */ + size = avio_tell(pb); + buf[2] = (size - 20) >> 8; + buf[3] = (size - 20) & 0xFF; + av_hmac_init(hmac, rtc->ice_pwd_local, strlen(rtc->ice_pwd_local)); + av_hmac_update(hmac, buf, size - 24); + av_hmac_final(hmac, buf + size - 20, 20); + + /* Write the fingerprint attribute */ + avio_wb16(pb, STUN_ATTR_FINGERPRINT); /* attribute type fingerprint */ + avio_wb16(pb, 4); /* size of fingerprint */ + ffio_fill(pb, 0, 4); /* fill with zero to directly write and skip it */ + size = avio_tell(pb); + buf[2] = (size - 20) >> 8; + buf[3] = (size - 20) & 0xFF; + /* Refer to the av_hash_alloc("CRC32"), av_hash_init and av_hash_final */ + crc32 = av_crc(av_crc_get_table(AV_CRC_32_IEEE_LE), 0xFFFFFFFF, buf, size - 8) ^ 0xFFFFFFFF; + avio_skip(pb, -4); + avio_wb32(pb, crc32 ^ 0x5354554E); /* xor with "STUN" */ + + *response_size = size; - /* Fast retransmit the request util got response. */ - for (i = 0; i <= ctx->dtls_arq_max && !res_size; i++) { - req_size = BIO_get_mem_data(bio_out, (char**)&data); - openssl_state_trace(ctx, data, req_size, 0, r0, r1); - ret = ffurl_write(ctx->udp_uc, data, req_size); - BIO_reset(bio_out); - req_ct = req_size > 0 ? data[0] : 0; - req_ht = req_size > 13 ? data[13] : 0; - if (ret < 0) { - av_log(s1, AV_LOG_ERROR, "DTLS: Send request failed, loop=%d, content=%d, handshake=%d, size=%d\n", - loop, req_ct, req_ht, req_size); - return ret; - } +end: + avio_context_free(&pb); + av_hmac_free(hmac); + return ret; +} - /* Wait so that the server can process the request and no need ARQ then. */ -#if DTLS_PROCESSING_TIMEOUT > 0 - av_usleep(DTLS_PROCESSING_TIMEOUT * 10000); -#endif +static int ice_is_binding_request(char *buf, int buf_size) { + return buf_size > 1 && buf[0] == 0x00 && buf[1] == 0x01; +} - for (j = 0; j <= DTLS_EAGAIN_RETRIES_MAX && !res_size; j++) { - ret = ffurl_read(ctx->udp_uc, buf, sizeof(buf)); +static int ice_is_binding_response(char *buf, int buf_size) { + return buf_size > 1 && buf[0] == 0x01 && buf[1] == 0x01; +} - /* Ignore other packets, such as ICE indication, except DTLS. */ - if (ret < 13 || buf[0] <= 19 || buf[0] >= 64) - continue; +/** + * This function handles incoming binding request messages by responding to them. + * If the message is not a binding request, it will be ignored. + */ +static int ice_handle_binding_request(AVFormatContext *s, char *buf, int buf_size) { + int ret = 0, size; + char tid[12]; + uint8_t res_buf[MAX_UDP_BUFFER_SIZE]; + RTCContext *rtc = s->priv_data; - /* Got DTLS response successfully. */ - if (ret > 0) { - res_size = ret; - ctx->dtls_should_reset_timer = 1; - break; - } + /* Ignore if not a binding request. */ + if (!ice_is_binding_request(buf, buf_size)) + return ret; - /* Fatal error or timeout. */ - if (ret != AVERROR(EAGAIN)) { - av_log(s1, AV_LOG_ERROR, "DTLS: Read response failed, loop=%d, content=%d, handshake=%d\n", - loop, req_ct, req_ht); - return ret; - } + if (buf_size < 20) { + av_log(s, AV_LOG_ERROR, "Invalid STUN message size. Expected at least 20, got %d\n", buf_size); + return AVERROR(EINVAL); + } - /* DTLSv1_handle_timeout is called when a DTLS handshake timeout expires. If no timeout - * had expired, it returns 0. Otherwise, it retransmits the previous flight of handshake - * messages and returns 1. If too many timeouts had expired without progress or an error - * occurs, it returns -1. */ - r0 = DTLSv1_handle_timeout(dtls); - if (!r0) { - av_usleep(ctx->dtls_arq_timeout * 1000); - continue; /* no timeout had expired. */ - } - if (r0 != 1) { - r1 = SSL_get_error(dtls, r0); - av_log(s1, AV_LOG_ERROR, "DTLS: Handle timeout, loop=%d, content=%d, handshake=%d, r0=%d, r1=%d\n", - loop, req_ct, req_ht, r0, r1); - return AVERROR(EIO); - } + /* Parse transaction id from binding request in buf. */ + memcpy(tid, buf + 8, 12); - ctx->dtls_arq_packets++; - break; - } + /* Build the STUN binding response. */ + ret = ice_create_response(s, tid, sizeof(tid), res_buf, sizeof(res_buf), &size); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to create STUN binding response, size=%d\n", size); + return ret; } - /* Trace the response packet, feed to SSL. */ - BIO_reset(bio_in); - openssl_state_trace(ctx, buf, res_size, 1, r0, SSL_ERROR_NONE); - res_ct = res_size > 0 ? buf[0]: 0; - res_ht = res_size > 13 ? buf[13] : 0; - if ((r0 = BIO_write(bio_in, buf, res_size)) <= 0) { - av_log(s1, AV_LOG_ERROR, "DTLS: Feed response failed, loop=%d, content=%d, handshake=%d, size=%d, r0=%d\n", - loop, res_ct, res_ht, res_size, r0); - return AVERROR(EIO); + ret = ffurl_write(rtc->udp_uc, res_buf, size); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to send STUN binding response, size=%d\n", size); + return ret; } - return ret; + return 0; } /** - * DTLS handshake with server, as a client in active mode, using openssl. + * Opens the UDP transport and completes the ICE handshake, using fast retransmit to + * handle packet loss for the binding request. * - * This function initializes the SSL context as the client role using OpenSSL and - * then performs the DTLS handshake until success. Upon successful completion, it - * exports the SRTP material key. + * To initiate a fast retransmission of the STUN binding request during ICE, we wait only + * for a successful local ICE process i.e., when a binding response is received from the + * server. Since the server's binding request may not arrive, we do not always wait for it. + * However, we will always respond to the server's binding request during ICE, DTLS or + * RTP streaming. * - * @return 0 if OK, AVERROR_xxx on error + * @param s Pointer to the AVFormatContext + * @return Returns 0 if the handshake was successful or AVERROR_xxx in case of an error */ -static int dtls_context_handshake(DTLSContext *ctx) +static int ice_handshake(AVFormatContext *s) { - int ret, loop; - SSL_CTX *dtls_ctx = NULL; - SSL *dtls = NULL; - const char* dst = "EXTRACTOR-dtls_srtp"; - BIO *bio_in = NULL, *bio_out = NULL; - void *s1 = ctx->log_avcl; - - dtls_ctx = SSL_CTX_new(DTLS_client_method()); + int ret, size; + char url[256], tmp[16]; + char req_buf[MAX_UDP_BUFFER_SIZE], res_buf[MAX_UDP_BUFFER_SIZE]; + RTCContext *rtc = s->priv_data; + int fast_retries = rtc->ice_arq_max, timeout = rtc->ice_arq_timeout; - ret = openssl_init_dtls_context(ctx, dtls_ctx); + /* Build UDP URL and create the UDP context as transport. */ + ff_url_join(url, sizeof(url), "udp", NULL, rtc->ice_host, rtc->ice_port, NULL); + ret = ffurl_alloc(&rtc->udp_uc, url, AVIO_FLAG_WRITE, &s->interrupt_callback); if (ret < 0) { - av_log(s1, AV_LOG_ERROR, "Failed to initialize DTLS context\n"); + av_log(s, AV_LOG_ERROR, "Failed to open udp://%s:%d\n", rtc->ice_host, rtc->ice_port); goto end; } - /* The dtls should not be created unless the dtls_ctx has been initialized. */ - dtls = SSL_new(dtls_ctx); + av_opt_set(rtc->udp_uc->priv_data, "connect", "1", 0); + av_opt_set(rtc->udp_uc->priv_data, "fifo_size", "0", 0); + /* Set the max packet size to the buffer size. */ + snprintf(tmp, sizeof(tmp), "%d", rtc->pkt_size); + av_opt_set(rtc->udp_uc->priv_data, "pkt_size", tmp, 0); - bio_in = BIO_new(BIO_s_mem()); - bio_out = BIO_new(BIO_s_mem()); - SSL_set_bio(dtls, bio_in, bio_out); + ret = ffurl_connect(rtc->udp_uc, NULL); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to connect udp://%s:%d\n", rtc->ice_host, rtc->ice_port); + goto end; + } - ret = openssl_init_dtls_ssl(ctx, dtls); + /* Make the socket non-blocking, set to READ and WRITE mode after connected */ + ff_socket_nonblock(ffurl_get_file_handle(rtc->udp_uc), 1); + rtc->udp_uc->flags |= AVIO_FLAG_READ | AVIO_FLAG_NONBLOCK; + + /* Build the STUN binding request. */ + ret = ice_create_request(s, req_buf, sizeof(req_buf), &size); if (ret < 0) { - av_log(s1, AV_LOG_ERROR, "Failed to initialize SSL context\n"); + av_log(s, AV_LOG_ERROR, "Failed to create STUN binding request, size=%d\n", size); goto end; } - for (loop = 0; loop < 64 && !ctx->dtls_done_for_us; loop++) { - ret = openssl_drive_context(ctx, dtls, bio_in, bio_out, loop); + /* Fast retransmit the STUN binding request. */ + while (1) { + ret = ffurl_write(rtc->udp_uc, req_buf, size); if (ret < 0) { - av_log(s1, AV_LOG_ERROR, "Failed to drive SSL context\n"); + av_log(s, AV_LOG_ERROR, "Failed to send STUN binding request, size=%d\n", size); goto end; } - } - if (!ctx->dtls_done_for_us) { - av_log(s1, AV_LOG_ERROR, "DTLS: Handshake failed, loop=%d\n", loop); - ret = AVERROR(EIO); - goto end; + + /* Wait so that the server can process the request and no need ARQ then. */ +#if ICE_PROCESSING_TIMEOUT > 0 + av_usleep(ICE_PROCESSING_TIMEOUT * 10000); +#endif + + /* Read the STUN binding response. */ + ret = ffurl_read(rtc->udp_uc, res_buf, sizeof(res_buf)); + if (ret < 0) { + /* If max retries is 6 and start timeout is 21ms, the total timeout + * is about 21 + 42 + 84 + 168 + 336 + 672 = 1263ms. */ + av_usleep(timeout * 1000); + timeout *= 2; + + if (ret == AVERROR(EAGAIN) && fast_retries) { + fast_retries--; + continue; + } + + av_log(s, AV_LOG_ERROR, "Failed to read STUN binding response, retries=%d\n", rtc->ice_arq_max); + goto end; + } + + /* If got any binding response, the fast retransmission is done. */ + if (ice_is_binding_response(res_buf, ret)) + break; + + /* When a binding request is received, it is necessary to respond immediately. */ + if (ice_is_binding_request(res_buf, ret)) { + if ((ret = ice_handle_binding_request(s, res_buf, ret)) < 0) { + goto end; + } + } } - /* Export SRTP master key after DTLS done */ - ret = SSL_export_keying_material(dtls, ctx->dtls_srtp_material, sizeof(ctx->dtls_srtp_material), - dst, strlen(dst), NULL, 0, 0); - if (!ret) { - av_log(s1, AV_LOG_ERROR, "DTLS: SSL export key r0=%lu, ret=%d\n", ERR_get_error(), ret); - ret = AVERROR(EIO); - goto end; + /* Wait just for a small while to get the possible binding request from server. */ + fast_retries = rtc->ice_arq_max / 2; + timeout = rtc->ice_arq_timeout; + while (fast_retries) { + ret = ffurl_read(rtc->udp_uc, res_buf, sizeof(res_buf)); + if (ret < 0) { + /* If max retries is 6 and start timeout is 21ms, the total timeout + * is about 21 + 42 + 84 = 147ms. */ + av_usleep(timeout * 1000); + timeout *= 2; + + if (ret == AVERROR(EAGAIN)) { + fast_retries--; + continue; + } + + av_log(s, AV_LOG_ERROR, "Failed to read STUN binding request, retries=%d\n", rtc->ice_arq_max); + goto end; + } + + /* When a binding request is received, it is necessary to respond immediately. */ + if (ice_is_binding_request(res_buf, ret)) { + if ((ret = ice_handle_binding_request(s, res_buf, ret)) < 0) { + goto end; + } + break; + } } - av_log(s1, AV_LOG_INFO, "WHIP: DTLS handshake done=%d, arq=%d, srtp_material=%luB\n", - ctx->dtls_done_for_us, ctx->dtls_arq_packets, sizeof(ctx->dtls_srtp_material)); + av_log(s, AV_LOG_INFO, "WHIP: ICE STUN ok, url=udp://%s:%d, username=%s:%s, req=%dB, res=%dB, arq=%d\n", + rtc->ice_host, rtc->ice_port, rtc->ice_ufrag_remote, rtc->ice_ufrag_local, size, ret, + rtc->ice_arq_max - fast_retries); + ret = 0; end: - SSL_free(dtls); - SSL_CTX_free(dtls_ctx); return ret; } -#pragma GCC diagnostic pop -#endif - /** * Establish the SRTP context using the keying material exported from DTLS. * From 2dedc8673700e218694703516f6e5278017bda62 Mon Sep 17 00:00:00 2001 From: winlin Date: Thu, 18 May 2023 10:27:11 +0800 Subject: [PATCH 35/60] WHIP: Remove the macro that suppresses warnings for the SSL deprecated API. --- .gitignore | 2 -- libavformat/rtcenc.c | 4 ---- 2 files changed, 6 deletions(-) diff --git a/.gitignore b/.gitignore index 1b7086289db7f..e810d11107f26 100644 --- a/.gitignore +++ b/.gitignore @@ -41,5 +41,3 @@ /src /mapfile /tools/python/__pycache__/ -.idea -patcheck.* diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index 6e57e720276a4..321b16855bc40 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -181,9 +181,6 @@ typedef struct DTLSContext { int pkt_size; } DTLSContext; -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdeprecated-declarations" - /** * Generate a self-signed certificate and private key for DTLS. */ @@ -707,7 +704,6 @@ static int dtls_context_handshake(DTLSContext *ctx) return ret; } -#pragma GCC diagnostic pop #endif typedef struct RTCContext { From 1054bed9784868ba30d4dc054762e845dca8a19d Mon Sep 17 00:00:00 2001 From: winlin Date: Fri, 19 May 2023 06:14:54 +0800 Subject: [PATCH 36/60] WHIP: Merge write header to init. --- libavformat/rtcenc.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index 321b16855bc40..d450499fe1cd1 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -1372,8 +1372,14 @@ static int parse_answer(AVFormatContext *s) } } - if (!rtc->ice_pwd_remote || !strlen(rtc->ice_pwd_remote) || !rtc->ice_ufrag_remote || !rtc->ice_ufrag_remote) { - av_log(s, AV_LOG_ERROR, "No ice pwd or ufrag parsed from %s\n", rtc->sdp_answer); + if (!rtc->ice_pwd_remote || !strlen(rtc->ice_pwd_remote)) { + av_log(s, AV_LOG_ERROR, "No remote ice pwd parsed from %s\n", rtc->sdp_answer); + ret = AVERROR(EINVAL); + goto end; + } + + if (!rtc->ice_ufrag_remote || !strlen(rtc->ice_ufrag_remote)) { + av_log(s, AV_LOG_ERROR, "No remote ice ufrag parsed from %s\n", rtc->sdp_answer); ret = AVERROR(EINVAL); goto end; } @@ -2120,13 +2126,6 @@ static av_cold int rtc_init(AVFormatContext *s) if ((ret = setup_srtp(s)) < 0) return ret; - return ret; -} - -static int rtc_write_header(AVFormatContext *s) -{ - int ret; - if ((ret = create_rtp_muxer(s)) < 0) return ret; @@ -2234,7 +2233,6 @@ const FFOutputFormat ff_rtc_muxer = { .p.priv_class = &rtc_muxer_class, .priv_data_size = sizeof(RTCContext), .init = rtc_init, - .write_header = rtc_write_header, .write_packet = rtc_write_packet, .deinit = rtc_deinit, }; From b31e0c009c9317e46acf78ab532b10b9f098faa6 Mon Sep 17 00:00:00 2001 From: Haibo Chen <495810242@qq.com> Date: Sat, 20 May 2023 07:07:03 +0800 Subject: [PATCH 37/60] WHIP:Support baseline/main/high profile without B frames (#2) --- libavformat/rtcenc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index d450499fe1cd1..d9969e54d5040 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -962,9 +962,9 @@ static int parse_codec(AVFormatContext *s) desc ? desc->name : "unknown"); return AVERROR_PATCHWELCOME; } - if (par->profile > 0 && (par->profile & ~FF_PROFILE_H264_CONSTRAINED) != FF_PROFILE_H264_BASELINE) { - av_log(s, AV_LOG_ERROR, "Profile %d of stream %d is not baseline, currently unsupported by RTC\n", - par->profile, i); + + if (par->video_delay > 0) { + av_log(s, AV_LOG_ERROR, "Unsupported B frames by RTC\n"); return AVERROR_PATCHWELCOME; } From b3f5c274e0d6b19048e89aed29fed8d8c849fdcb Mon Sep 17 00:00:00 2001 From: winlin Date: Tue, 23 May 2023 11:33:40 +0800 Subject: [PATCH 38/60] WHIP: Fix the SSL deprecated warning by replacing EC_KEY_new with EVP_EC_gen. --- libavformat/rtcenc.c | 39 ++++++++++++++------------------------- 1 file changed, 14 insertions(+), 25 deletions(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index d9969e54d5040..3ba1f91d4495e 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -189,41 +189,32 @@ static av_cold int dtls_context_init(DTLSContext *ctx) int ret = 0, serial, expire_day, i, n = 0; AVBPrint fingerprint; unsigned char md[EVP_MAX_MD_SIZE]; - const char *aor = "ffmpeg.org"; + const char *aor = "ffmpeg.org", *curve = NULL; X509_NAME* subject = NULL; - EC_GROUP *ecgroup = NULL; - EC_KEY* dtls_eckey = NULL; EVP_PKEY *dtls_pkey = NULL; X509 *dtls_cert = NULL; void *s1 = ctx->log_avcl; - ctx->dtls_cert = dtls_cert = X509_new(); - ctx->dtls_pkey = dtls_pkey = EVP_PKEY_new(); - dtls_eckey = EC_KEY_new(); - /* To prevent a crash during cleanup, always initialize it. */ av_bprint_init(&fingerprint, 1, MAX_SDP_SIZE); + ctx->dtls_cert = dtls_cert = X509_new(); + if (!dtls_cert) { + ret = AVERROR(ENOMEM); + goto end; + } + /* Should use the curves in ClientHello.supported_groups, for example: * Supported Group: x25519 (0x001d) * Supported Group: secp256r1 (0x0017) * Supported Group: secp384r1 (0x0018) - * note that secp256r1 in openssl is called NID_X9_62_prime256v1, not NID_secp256k1 + * Note that secp256r1 in openssl is called NID_X9_62_prime256v1 or prime256v1 in string, + * not NID_secp256k1 or secp256k1 in string */ - ecgroup = EC_GROUP_new_by_curve_name(NID_X9_62_prime256v1); - - if (EC_KEY_set_group(dtls_eckey, ecgroup) != 1) { - av_log(s1, AV_LOG_ERROR, "DTLS: EC_KEY_set_group failed\n"); - ret = AVERROR(EINVAL); - goto end; - } - if (EC_KEY_generate_key(dtls_eckey) != 1) { - av_log(s1, AV_LOG_ERROR, "DTLS: EC_KEY_generate_key failed\n"); - ret = AVERROR(EINVAL); - goto end; - } - if (EVP_PKEY_set1_EC_KEY(dtls_pkey, dtls_eckey) != 1) { - av_log(s1, AV_LOG_ERROR, "DTLS: EVP_PKEY_set1_EC_KEY failed\n"); + curve = "prime256v1"; + ctx->dtls_pkey = dtls_pkey = EVP_EC_gen(curve); + if (!dtls_pkey) { + av_log(s1, AV_LOG_ERROR, "DTLS: EVP_EC_gen curve=%s failed\n", curve); ret = AVERROR(EINVAL); goto end; } @@ -313,11 +304,9 @@ static av_cold int dtls_context_init(DTLSContext *ctx) goto end; } - av_log(s1, AV_LOG_INFO, "DTLS: Fingerprint %s\n", ctx->dtls_fingerprint); + av_log(s1, AV_LOG_INFO, "DTLS: Curve=%s, fingerprint %s\n", curve, ctx->dtls_fingerprint); end: - EC_KEY_free(dtls_eckey); - EC_GROUP_free(ecgroup); X509_NAME_free(subject); av_bprint_finalize(&fingerprint, NULL); return ret; From 85c5680af7d1eb8f3a38622ab1df1cadf3c28e53 Mon Sep 17 00:00:00 2001 From: winlin Date: Tue, 23 May 2023 12:30:29 +0800 Subject: [PATCH 39/60] WHIP: Refine the code to be shorter. --- libavformat/rtcenc.c | 141 ++++++++++++++++++------------------------- 1 file changed, 58 insertions(+), 83 deletions(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index 3ba1f91d4495e..d8f9167052406 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -23,9 +23,7 @@ #ifndef CONFIG_OPENSSL #error "DTLS is not supported, please enable openssl" -#endif - -#if CONFIG_OPENSSL +#else #include #include #if OPENSSL_VERSION_NUMBER < 0x1010102fL @@ -33,29 +31,21 @@ #endif #endif -#include "libavutil/dict.h" -#include "libavutil/avassert.h" -#include "libavutil/mathematics.h" -#include "libavcodec/codec_desc.h" -#include "libavcodec/mpeg4audio.h" -#include "avformat.h" -#include "internal.h" -#include "mux.h" -#include "libavutil/opt.h" #include "libavcodec/avcodec.h" -#include "libavutil/avstring.h" -#include "url.h" -#include "libavutil/random_seed.h" -#include "avio_internal.h" -#include "libavutil/hmac.h" +#include "libavutil/base64.h" +#include "libavutil/bprint.h" #include "libavutil/crc.h" -#include "network.h" +#include "libavutil/hmac.h" +#include "libavutil/opt.h" +#include "libavutil/random_seed.h" #include "libavutil/time.h" -#include "libavutil/base64.h" -#include "srtp.h" #include "avc.h" +#include "avio_internal.h" #include "http.h" -#include "libavutil/bprint.h" +#include "internal.h" +#include "mux.h" +#include "network.h" +#include "srtp.h" /** * Maximum size limit of a Session Description Protocol (SDP), @@ -221,6 +211,10 @@ static av_cold int dtls_context_init(DTLSContext *ctx) /* Generate a self-signed certificate. */ subject = X509_NAME_new(); + if (!subject) { + ret = AVERROR(ENOMEM); + goto end; + } serial = (int)av_get_random_seed(); if (ASN1_INTEGER_set(X509_get_serialNumber(dtls_cert), serial) != 1) { @@ -328,18 +322,15 @@ static av_cold void dtls_context_deinit(DTLSContext *ctx) static void openssl_on_info(const SSL *dtls, int where, int ret) { int w, r1; - const char *method, *alert_type, *alert_desc; + const char *method = "undefined", *alert_type, *alert_desc; DTLSContext *ctx = (DTLSContext*)SSL_get_ex_data(dtls, 0); void *s1 = ctx->log_avcl; w = where & ~SSL_ST_MASK; - if (w & SSL_ST_CONNECT) { + if (w & SSL_ST_CONNECT) method = "SSL_connect"; - } else if (w & SSL_ST_ACCEPT) { + else if (w & SSL_ST_ACCEPT) method = "SSL_accept"; - } else { - method = "undefined"; - } r1 = SSL_get_error(dtls, ret); if (where & SSL_CB_LOOP) { @@ -351,26 +342,23 @@ static void openssl_on_info(const SSL *dtls, int where, int ret) alert_type = SSL_alert_type_string_long(ret); alert_desc = SSL_alert_desc_string(ret); - if (!av_strcasecmp(alert_type, "warning") && !av_strcasecmp(alert_desc, "CN")) { + if (!av_strcasecmp(alert_type, "warning") && !av_strcasecmp(alert_desc, "CN")) av_log(s1, AV_LOG_WARNING, "DTLS: SSL3 alert method=%s type=%s, desc=%s(%s), where=%d, ret=%d, r1=%d\n", method, alert_type, alert_desc, SSL_alert_desc_string_long(ret), where, ret, r1); - } else { + else av_log(s1, AV_LOG_ERROR, "DTLS: SSL3 alert method=%s type=%s, desc=%s(%s), where=%d, ret=%d, r1=%d\n", method, alert_type, alert_desc, SSL_alert_desc_string_long(ret), where, ret, r1); - } } else if (where & SSL_CB_EXIT) { - if (!ret) { + if (!ret) av_log(s1, AV_LOG_WARNING, "DTLS: Fail method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, ret, r1); - } else if (ret < 0) { - if (r1 != SSL_ERROR_NONE && r1 != SSL_ERROR_WANT_READ && r1 != SSL_ERROR_WANT_WRITE) { + else if (ret < 0) + if (r1 != SSL_ERROR_NONE && r1 != SSL_ERROR_WANT_READ && r1 != SSL_ERROR_WANT_WRITE) av_log(s1, AV_LOG_ERROR, "DTLS: Error method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, ret, r1); - } else { + else av_log(s1, AV_LOG_VERBOSE, "DTLS: method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, ret, r1); - } - } } } @@ -384,9 +372,8 @@ static unsigned int openssl_dtls_timer_cb(SSL *dtls, unsigned int previous_us) /* If previous_us is 0, for example, the HelloVerifyRequest, we should respond it ASAP. * when got ServerHello, we should reset the timer. */ - if (!previous_us || ctx->dtls_should_reset_timer) { + if (!previous_us || ctx->dtls_should_reset_timer) timeout_us = ctx->dtls_arq_timeout * 1000; /* in us */ - } /* never exceed the max timeout. */ timeout_us = FFMIN(timeout_us, 30 * 1000 * 1000); /* in us */ @@ -405,38 +392,18 @@ static void openssl_state_trace(DTLSContext *ctx, uint8_t *data, int length, int void *s1 = ctx->log_avcl; /* Change_cipher_spec(20), alert(21), handshake(22), application_data(23) */ - if (length >= 1) { + if (length >= 1) content_type = (uint8_t)data[0]; - } - - if (length >= 13) { + if (length >= 13) size = (uint16_t)(data[11])<<8 | (uint16_t)data[12]; - } - - if (length >= 14) { + if (length >= 14) handshake_type = (uint8_t)data[13]; - } av_log(s1, AV_LOG_VERBOSE, "WHIP: DTLS state %s %s, done=%u, arq=%u, r0=%d, r1=%d, len=%u, cnt=%u, size=%u, hs=%u\n", "Active", (incoming? "RECV":"SEND"), ctx->dtls_done_for_us, ctx->dtls_arq_packets, r0, r1, length, content_type, size, handshake_type); } -/** - * The return value of verify_callback controls the strategy of the further verification process. If verify_callback - * returns 0, the verification process is immediately stopped with "verification failed" state. If SSL_VERIFY_PEER is - * set, a verification failure alert is sent to the peer and the TLS/SSL handshake is terminated. If verify_callback - * returns 1, the verification process is continued. If verify_callback always returns 1, the TLS/SSL handshake will - * not be terminated with respect to verification failures and the connection will be established. The calling process - * can however retrieve the error code of the last verification error using SSL_get_verify_result(3) or by maintaining - * its own error storage managed by verify_callback. - */ -static int openssl_verify_callback(int preverify_ok, X509_STORE_CTX *ctx) -{ - /* Always OK, we don't check the certificate of client, because we allow client self-sign certificate. */ - return 1; -} - /** * Initializes DTLS context for client role using ECDHE. */ @@ -471,8 +438,6 @@ static av_cold int openssl_init_dtls_context(DTLSContext *ctx, SSL_CTX *dtls_ctx ret = AVERROR(EINVAL); goto end; } - /* Server will send Certificate Request. */ - SSL_CTX_set_verify(dtls_ctx, SSL_VERIFY_PEER | SSL_VERIFY_CLIENT_ONCE, openssl_verify_callback); /* The depth count is "level 0:peer certificate", "level 1: CA certificate", * "level 2: higher level CA certificate", and so on. */ SSL_CTX_set_verify_depth(dtls_ctx, 4); @@ -636,6 +601,10 @@ static int dtls_context_handshake(DTLSContext *ctx) void *s1 = ctx->log_avcl; dtls_ctx = SSL_CTX_new(DTLS_client_method()); + if (!dtls_ctx) { + ret = AVERROR(ENOMEM); + goto end; + } if (!ctx->udp_uc) { av_log(s1, AV_LOG_ERROR, "DTLS: No UDP context\n"); @@ -651,9 +620,23 @@ static int dtls_context_handshake(DTLSContext *ctx) /* The dtls should not be created unless the dtls_ctx has been initialized. */ dtls = SSL_new(dtls_ctx); + if (!dtls) { + ret = AVERROR(ENOMEM); + goto end; + } bio_in = BIO_new(BIO_s_mem()); + if (!bio_in) { + ret = AVERROR(ENOMEM); + goto end; + } + bio_out = BIO_new(BIO_s_mem()); + if (!bio_out) { + ret = AVERROR(ENOMEM); + goto end; + } + SSL_set_bio(dtls, bio_in, bio_out); ret = openssl_init_dtls_ssl(ctx, dtls); @@ -794,10 +777,9 @@ static av_cold int whip_init(AVFormatContext *s) av_log(s, AV_LOG_INFO, "WHIP: Init ice_arq_max=%d, ice_arq_timeout=%d, dtls_arq_max=%d, dtls_arq_timeout=%d pkt_size=%d\n", rtc->ice_arq_max, rtc->ice_arq_timeout, rtc->dtls_arq_max, rtc->dtls_arq_timeout, rtc->pkt_size); - if (rtc->pkt_size < ideal_pkt_size) { + if (rtc->pkt_size < ideal_pkt_size) av_log(s, AV_LOG_WARNING, "WHIP: pkt_size=%d(<%d) is too small, may cause packet loss\n", rtc->pkt_size, ideal_pkt_size); - } return 0; } @@ -1682,11 +1664,8 @@ static int ice_handshake(AVFormatContext *s) break; /* When a binding request is received, it is necessary to respond immediately. */ - if (ice_is_binding_request(res_buf, ret)) { - if ((ret = ice_handle_binding_request(s, res_buf, ret)) < 0) { - goto end; - } - } + if (ice_is_binding_request(res_buf, ret) && (ret = ice_handle_binding_request(s, res_buf, ret)) < 0) + goto end; } /* Wait just for a small while to get the possible binding request from server. */ @@ -1711,15 +1690,15 @@ static int ice_handshake(AVFormatContext *s) /* When a binding request is received, it is necessary to respond immediately. */ if (ice_is_binding_request(res_buf, ret)) { - if ((ret = ice_handle_binding_request(s, res_buf, ret)) < 0) { + if ((ret = ice_handle_binding_request(s, res_buf, ret)) < 0) goto end; - } break; } } - av_log(s, AV_LOG_INFO, "WHIP: ICE STUN ok, url=udp://%s:%d, username=%s:%s, req=%dB, res=%dB, arq=%d\n", - rtc->ice_host, rtc->ice_port, rtc->ice_ufrag_remote, rtc->ice_ufrag_local, size, ret, + av_log(s, AV_LOG_INFO, "WHIP: ICE STUN ok, url=udp://%s:%d, location=%s, username=%s:%s, req=%dB, res=%dB, arq=%d\n", + rtc->ice_host, rtc->ice_port, rtc->whip_resource_url ? rtc->whip_resource_url : "", + rtc->ice_ufrag_remote, rtc->ice_ufrag_local, size, ret, rtc->ice_arq_max - fast_retries); ret = 0; @@ -1922,9 +1901,8 @@ static int on_rtp_write_packet(void *opaque, uint8_t *buf, int buf_size) is_rtcp = buf[1] >= 192 && buf[1] <= 223; payload_type = buf[1] & 0x7f; is_video = payload_type == rtc->video_payload_type; - if (!is_rtcp && payload_type != rtc->video_payload_type && payload_type != rtc->audio_payload_type) { + if (!is_rtcp && payload_type != rtc->video_payload_type && payload_type != rtc->audio_payload_type) return 0; - } /** * For video, the STAP-A with SPS/PPS should: @@ -1935,14 +1913,12 @@ static int on_rtp_write_packet(void *opaque, uint8_t *buf, int buf_size) nalu_header = buf[12] & 0x1f; if (nalu_header == NALU_TYPE_STAP_A) { /* Reset the marker bit to 0. */ - if (buf[1] & 0x80) { + if (buf[1] & 0x80) buf[1] &= 0x7f; - } /* Reset the NRI to the first NALU's NRI. */ - if (buf_size > 15 && (buf[15]&0x60) != (buf[12]&0x60)) { + if (buf_size > 15 && (buf[15]&0x60) != (buf[12]&0x60)) buf[12] = (buf[12]&0x80) | (buf[15]&0x60) | (buf[12]&0x1f); - } } } @@ -2076,7 +2052,7 @@ static int whip_dispose(AVFormatContext *s) } } - av_log(s, AV_LOG_INFO, "WHIP: Dispose resource %s\n", rtc->whip_resource_url); + av_log(s, AV_LOG_INFO, "WHIP: Dispose resource %s ok\n", rtc->whip_resource_url); end: ffurl_closep(&whip_uc); @@ -2149,9 +2125,8 @@ static int rtc_write_packet(AVFormatContext *s, AVPacket *pkt) if (ret == AVERROR(EINVAL)) { av_log(s, AV_LOG_WARNING, "Ignore failed to write packet=%dB, ret=%d\n", pkt->size, ret); ret = 0; - } else { + } else av_log(s, AV_LOG_ERROR, "Failed to write packet, size=%d\n", pkt->size); - } return ret; } From 71ee877f3e002791a513d451f5d708bf30fd8c8e Mon Sep 17 00:00:00 2001 From: winlin Date: Tue, 23 May 2023 12:54:44 +0800 Subject: [PATCH 40/60] WHIP: Increase the base timeout and thereby reduce the number of unnecessary ClientHello. --- libavformat/rtcenc.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index d8f9167052406..553fb25291c93 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -122,6 +122,12 @@ */ #define DTLS_EAGAIN_RETRIES_MAX 5 +/** + * The DTLS timer's base timeout in microseconds. Its purpose is to minimize the unnecessary + * retransmission of ClientHello. + */ +#define DTLS_SSL_TIMER_BASE 400 * 1000 + /* The magic cookie for Session Traversal Utilities for NAT (STUN) messages. */ #define STUN_MAGIC_COOKIE 0x2112A442 @@ -373,7 +379,7 @@ static unsigned int openssl_dtls_timer_cb(SSL *dtls, unsigned int previous_us) /* If previous_us is 0, for example, the HelloVerifyRequest, we should respond it ASAP. * when got ServerHello, we should reset the timer. */ if (!previous_us || ctx->dtls_should_reset_timer) - timeout_us = ctx->dtls_arq_timeout * 1000; /* in us */ + timeout_us = DTLS_SSL_TIMER_BASE + ctx->dtls_arq_timeout * 1000; /* in us */ /* never exceed the max timeout. */ timeout_us = FFMIN(timeout_us, 30 * 1000 * 1000); /* in us */ @@ -2176,7 +2182,7 @@ static const AVOption options[] = { { "ice_arq_max", "Maximum number of retransmissions for the ICE ARQ mechanism", OFFSET(ice_arq_max), AV_OPT_TYPE_INT, { .i64 = 5 }, -1, INT_MAX, DEC }, { "ice_arq_timeout", "Start timeout in milliseconds for the ICE ARQ mechanism", OFFSET(ice_arq_timeout), AV_OPT_TYPE_INT, { .i64 = 30 }, -1, INT_MAX, DEC }, { "dtls_arq_max", "Maximum number of retransmissions for the DTLS ARQ mechanism", OFFSET(dtls_arq_max), AV_OPT_TYPE_INT, { .i64 = 5 }, -1, INT_MAX, DEC }, - { "dtls_arq_timeout", "Start timeout in milliseconds for the DTLS ARQ mechanism", OFFSET(dtls_arq_timeout), AV_OPT_TYPE_INT, { .i64 = 50 }, -1, INT_MAX, DEC }, + { "dtls_arq_timeout", "Start timeout in milliseconds for the DTLS ARQ mechanism", OFFSET(dtls_arq_timeout), AV_OPT_TYPE_INT, { .i64 = 50 }, -1, INT_MAX, DEC }, { "pkt_size", "The maximum size, in bytes, of RTP packets that send out", OFFSET(pkt_size), AV_OPT_TYPE_INT, { .i64 = 1500 }, -1, INT_MAX, DEC }, { NULL }, }; From 726bb67e695245237ca308c568e6123da36fda56 Mon Sep 17 00:00:00 2001 From: winlin Date: Tue, 23 May 2023 13:47:04 +0800 Subject: [PATCH 41/60] WHIP: Refine ARQ for DTLS with bug fixed. --- libavformat/rtcenc.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index 553fb25291c93..9b113f7726350 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -536,7 +536,7 @@ static int openssl_drive_context(DTLSContext *ctx, SSL *dtls, BIO *bio_in, BIO * ret = ffurl_read(ctx->udp_uc, buf, sizeof(buf)); /* Ignore other packets, such as ICE indication, except DTLS. */ - if (ret < 13 || buf[0] <= 19 || buf[0] >= 64) + if (ret > 0 && (ret < 13 || buf[0] <= 19 || buf[0] >= 64)) continue; /* Got DTLS response successfully. */ @@ -559,13 +559,13 @@ static int openssl_drive_context(DTLSContext *ctx, SSL *dtls, BIO *bio_in, BIO * * occurs, it returns -1. */ r0 = DTLSv1_handle_timeout(dtls); if (!r0) { - av_usleep(ctx->dtls_arq_timeout * 1000); + av_usleep(DTLS_SSL_TIMER_BASE + ctx->dtls_arq_timeout * 1000); continue; /* no timeout had expired. */ } if (r0 != 1) { r1 = SSL_get_error(dtls, r0); av_log(s1, AV_LOG_ERROR, "DTLS: Handle timeout, loop=%d, content=%d, handshake=%d, r0=%d, r1=%d\n", - loop, req_ct, req_ht, r0, r1); + loop, req_ct, req_ht, r0, r1); return AVERROR(EIO); } @@ -604,6 +604,7 @@ static int dtls_context_handshake(DTLSContext *ctx) SSL *dtls = NULL; const char* dst = "EXTRACTOR-dtls_srtp"; BIO *bio_in = NULL, *bio_out = NULL; + int64_t starttime = av_gettime(); void *s1 = ctx->log_avcl; dtls_ctx = SSL_CTX_new(DTLS_client_method()); @@ -654,7 +655,8 @@ static int dtls_context_handshake(DTLSContext *ctx) for (loop = 0; loop < 64 && !ctx->dtls_done_for_us; loop++) { ret = openssl_drive_context(ctx, dtls, bio_in, bio_out, loop); if (ret < 0) { - av_log(s1, AV_LOG_ERROR, "Failed to drive SSL context\n"); + av_log(s1, AV_LOG_ERROR, "Failed to drive SSL context, cost=%dms\n", + (int)(av_gettime() - starttime) / 1000); goto end; } } @@ -673,8 +675,9 @@ static int dtls_context_handshake(DTLSContext *ctx) goto end; } - av_log(s1, AV_LOG_INFO, "WHIP: DTLS handshake done=%d, arq=%d, srtp_material=%luB\n", - ctx->dtls_done_for_us, ctx->dtls_arq_packets, sizeof(ctx->dtls_srtp_material)); + av_log(s1, AV_LOG_INFO, "WHIP: DTLS handshake done=%d, arq=%d, srtp_material=%luB, cost=%dms\n", + ctx->dtls_done_for_us, ctx->dtls_arq_packets, sizeof(ctx->dtls_srtp_material), + (int)(av_gettime() - starttime) / 1000); end: SSL_free(dtls); From 6e1fbb5420ee6ebce2e75a38f09ac6bcc2307f92 Mon Sep 17 00:00:00 2001 From: winlin Date: Mon, 29 May 2023 19:34:15 +0800 Subject: [PATCH 42/60] WHIP: Update muxers.texi for RTC. --- doc/muxers.texi | 50 ++++++++++++++++++++++++++++++++++++++++++++ libavformat/rtcenc.c | 3 --- 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/doc/muxers.texi b/doc/muxers.texi index f6071484ff661..3ec4b6d876558 100644 --- a/doc/muxers.texi +++ b/doc/muxers.texi @@ -1333,6 +1333,56 @@ Set custom HTTP headers, can override built in default headers. Applicable only @end table +@anchor{rtc} +@section rtc + +WebRTC (Real-Time Communication) muxer that supports sub-second latency streaming according to +the WHIP (WebRTC-HTTP ingestion protocol) specification. + +It uses HTTP as a signaling protocol to exchange SDP capabilities and ICE lite candidates. Then, +it uses STUN binding requests and responses to establish a session over UDP. Subsequently, it +initiates a DTLS handshake to exchange the SRTP encryption keys. Lastly, it splits video and +audio frames into RTP packets and encrypts them using SRTP. + +Ensure that you use H.264 without B frames and Opus for the audio codec. For example, to convert +an input file with @command{ffmpeg} to WebRTC: +@example +ffmpeg -re -i input.mp4 -acodec libopus -ar 48000 -ac 2 \ + -vcodec libx264 -profile:v baseline -tune zerolatency -threads 1 -bf 0 \ + -f rtc "http://localhost:1985/rtc/v1/whip/?app=live&stream=livestream" +@end example + +For this example, we have employed low latency options, resulting in an end-to-end latency of +approximately 150ms. + +@subsection Options + +This muxer supports the following options: + +@table @option + +@item ice_arq_max @var{size} +Set the maximum number of retransmissions for the ICE ARQ mechanism. +Default value is 5. + +@item ice_arq_timeout @var{size} +Set the start timeout in milliseconds for the ICE ARQ mechanism. +Default value is 30. + +@item dtls_arq_max @var{size} +Set the maximum number of retransmissions for the DTLS ARQ mechanism. +Default value is 5. + +@item dtls_arq_timeout @var{size} +Set the start timeout in milliseconds for the DTLS ARQ mechanism. +Default value is 50. + +@item pkt_size @var{size} +Set the maximum size, in bytes, of RTP packets that send out. +Default value is 1500. + +@end table + @anchor{ico} @section ico diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index 9b113f7726350..65173d00738d8 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -139,7 +139,6 @@ enum StunAttr { STUN_ATTR_FINGERPRINT = 0x8028, /// rfc5389 }; -#if CONFIG_OPENSSL typedef struct DTLSContext { /* For av_log to write log to this category. */ void *log_avcl; @@ -685,8 +684,6 @@ static int dtls_context_handshake(DTLSContext *ctx) return ret; } -#endif - typedef struct RTCContext { AVClass *av_class; From 86b361cd0fc5c5b2053efbfb88b40044a4a831b5 Mon Sep 17 00:00:00 2001 From: winlin Date: Tue, 30 May 2023 21:11:42 +0800 Subject: [PATCH 43/60] WHIP: Fix bugs causing build OpenSSL error, to work with Pion 1. Fix OpenSSL build error. 2. Support OpenSSL 1.0.1k and newer versions. 3. Support WHIP authorization via Bearer HTTP header. 4. Change the option default value from 1500 to 1200, to make Pion work. 5. Detect the minimum required OpenSSL version, should be 1.0.1k and newer. 6. Quickly check the SDP answer by taking a glance at the first few bytes. --- configure | 8 ++ doc/muxers.texi | 13 ++- libavformat/rtcenc.c | 231 ++++++++++++++++++++++++------------------- 3 files changed, 146 insertions(+), 106 deletions(-) diff --git a/configure b/configure index 2ef0ff537e4cf..7d6f12cde63cf 100755 --- a/configure +++ b/configure @@ -6913,6 +6913,14 @@ enabled rkmpp && { require_pkg_config rkmpp rockchip_mpp rockchip/r } enabled vapoursynth && require_pkg_config vapoursynth "vapoursynth-script >= 42" VSScript.h vsscript_init +enabled openssl && { + enabled rtc_muxer && { + $pkg_config --exists --print-errors "openssl >= 1.0.1k" || + require_pkg_config openssl "openssl >= 1.0.1k" openssl/ssl.h SSL_library_init || + require_pkg_config openssl "openssl >= 1.0.1k" openssl/ssl.h OPENSSL_init_ssl + } +} + if enabled gcrypt; then GCRYPT_CONFIG="${cross_prefix}libgcrypt-config" diff --git a/doc/muxers.texi b/doc/muxers.texi index 3ec4b6d876558..414b91a7b95df 100644 --- a/doc/muxers.texi +++ b/doc/muxers.texi @@ -1361,26 +1361,29 @@ This muxer supports the following options: @table @option -@item ice_arq_max @var{size} +@item ice_arq_max @var{integer} Set the maximum number of retransmissions for the ICE ARQ mechanism. Default value is 5. -@item ice_arq_timeout @var{size} +@item ice_arq_timeout @var{integer} Set the start timeout in milliseconds for the ICE ARQ mechanism. Default value is 30. -@item dtls_arq_max @var{size} +@item dtls_arq_max @var{integer} Set the maximum number of retransmissions for the DTLS ARQ mechanism. Default value is 5. -@item dtls_arq_timeout @var{size} +@item dtls_arq_timeout @var{integer} Set the start timeout in milliseconds for the DTLS ARQ mechanism. Default value is 50. -@item pkt_size @var{size} +@item pkt_size @var{integer} Set the maximum size, in bytes, of RTP packets that send out. Default value is 1500. +@item authorization @var{string} +The optional Bearer token for WHIP Authorization. + @end table @anchor{ico} diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index 65173d00738d8..f9511d5ecf50f 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -26,8 +26,20 @@ #else #include #include -#if OPENSSL_VERSION_NUMBER < 0x1010102fL -#error "OpenSSL version 1.1.1b or newer is required" +/** + * Minimum required version of OpenSSL. + * MM NN FF PP S + * 0x1010102fL = 0x1 01 01 02 fL // 1.1.1b release + * MM(major) = 0x1 // 1.* + * NN(minor) = 0x01 // 1.1.* + * FF(fix) = 0x01 // 1.1.1* + * PP(patch) = 'a' + 0x02 - 1 = 'b' // 1.1.1b * + * S(status) = 0xf = release // 1.1.1b release + * Status 0 for development, 1 to e for betas 1 to 14, and f for release. + * Please use the stable version for DTLS, see https://github.com/openssl/openssl/issues/346 + */ +#if OPENSSL_VERSION_NUMBER < 0x100010b0L /* OpenSSL 1.0.1k */ +#error "OpenSSL version 1.0.1k or newer is required" #endif #endif @@ -172,10 +184,75 @@ typedef struct DTLSContext { int dtls_arq_max; /* The step start timeout in ms for DTLS transmission. */ int dtls_arq_timeout; - /* The size of RTP packet, should generally be set to MTU. */ + /** + * The size of RTP packet, should generally be set to MTU. + * Note that pion requires a smaller value, for example, 1200. + */ int pkt_size; } DTLSContext; +static int openssl_gen_private_key(DTLSContext *ctx) +{ + int ret = 0; +#if OPENSSL_VERSION_NUMBER < 0x30000000L /* OpenSSL 3.0 */ + EC_GROUP *ecgroup = NULL; + EC_KEY* dtls_eckey = NULL; +#else + const char *curve = "prime256v1"; +#endif + void *s1 = ctx->log_avcl; + + /* Should use the curves in ClientHello.supported_groups, for example: + * Supported Group: x25519 (0x001d) + * Supported Group: secp256r1 (0x0017) + * Supported Group: secp384r1 (0x0018) + * Note that secp256r1 in openssl is called NID_X9_62_prime256v1 or prime256v1 in string, + * not NID_secp256k1 or secp256k1 in string + */ +#if OPENSSL_VERSION_NUMBER < 0x30000000L /* OpenSSL 3.0 */ + ctx->dtls_pkey = EVP_PKEY_new(); + dtls_eckey = EC_KEY_new(); + ecgroup = EC_GROUP_new_by_curve_name(NID_X9_62_prime256v1); + +#if OPENSSL_VERSION_NUMBER < 0x10100000L // v1.1.x + /* For openssl 1.0, we must set the group parameters, so that cert is ok. */ + EC_GROUP_set_asn1_flag(ecgroup, OPENSSL_EC_NAMED_CURVE); +#endif + + if (EC_KEY_set_group(dtls_eckey, ecgroup) != 1) { + av_log(s1, AV_LOG_ERROR, "DTLS: EC_KEY_set_group failed\n"); + ret = AVERROR(EINVAL); + goto end; + } + + if (EC_KEY_generate_key(dtls_eckey) != 1) { + av_log(s1, AV_LOG_ERROR, "DTLS: EC_KEY_generate_key failed\n"); + ret = AVERROR(EINVAL); + goto end; + } + + if (EVP_PKEY_set1_EC_KEY(ctx->dtls_pkey, dtls_eckey) != 1) { + av_log(s1, AV_LOG_ERROR, "DTLS: EVP_PKEY_set1_EC_KEY failed\n"); + ret = AVERROR(EINVAL); + goto end; + } +#else + ctx->dtls_pkey = EVP_EC_gen(curve); + if (!ctx->dtls_pkey) { + av_log(s1, AV_LOG_ERROR, "DTLS: EVP_EC_gen curve=%s failed\n", curve); + ret = AVERROR(EINVAL); + goto end; + } +#endif + +end: +#if OPENSSL_VERSION_NUMBER < 0x30000000L /* OpenSSL 3.0 */ + EC_KEY_free(dtls_eckey); + EC_GROUP_free(ecgroup); +#endif + return ret; +} + /** * Generate a self-signed certificate and private key for DTLS. */ @@ -186,12 +263,11 @@ static av_cold int dtls_context_init(DTLSContext *ctx) unsigned char md[EVP_MAX_MD_SIZE]; const char *aor = "ffmpeg.org", *curve = NULL; X509_NAME* subject = NULL; - EVP_PKEY *dtls_pkey = NULL; X509 *dtls_cert = NULL; void *s1 = ctx->log_avcl; /* To prevent a crash during cleanup, always initialize it. */ - av_bprint_init(&fingerprint, 1, MAX_SDP_SIZE); + av_bprint_init(&fingerprint, 1, MAX_URL_SIZE); ctx->dtls_cert = dtls_cert = X509_new(); if (!dtls_cert) { @@ -199,20 +275,9 @@ static av_cold int dtls_context_init(DTLSContext *ctx) goto end; } - /* Should use the curves in ClientHello.supported_groups, for example: - * Supported Group: x25519 (0x001d) - * Supported Group: secp256r1 (0x0017) - * Supported Group: secp384r1 (0x0018) - * Note that secp256r1 in openssl is called NID_X9_62_prime256v1 or prime256v1 in string, - * not NID_secp256k1 or secp256k1 in string - */ - curve = "prime256v1"; - ctx->dtls_pkey = dtls_pkey = EVP_EC_gen(curve); - if (!dtls_pkey) { - av_log(s1, AV_LOG_ERROR, "DTLS: EVP_EC_gen curve=%s failed\n", curve); - ret = AVERROR(EINVAL); + /* Generate a private key to ctx->dtls_pkey. */ + if ((ret = openssl_gen_private_key(ctx)) < 0) goto end; - } /* Generate a self-signed certificate. */ subject = X509_NAME_new(); @@ -263,13 +328,13 @@ static av_cold int dtls_context_init(DTLSContext *ctx) goto end; } - if (X509_set_pubkey(dtls_cert, dtls_pkey) != 1) { + if (X509_set_pubkey(dtls_cert, ctx->dtls_pkey) != 1) { av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set public key\n"); ret = AVERROR(EINVAL); goto end; } - if (!X509_sign(dtls_cert, dtls_pkey, EVP_sha1())) { + if (!X509_sign(dtls_cert, ctx->dtls_pkey, EVP_sha1())) { av_log(s1, AV_LOG_ERROR, "WHIP: Failed to sign certificate\n"); ret = AVERROR(EINVAL); goto end; @@ -287,7 +352,7 @@ static av_cold int dtls_context_init(DTLSContext *ctx) av_bprintf(&fingerprint, ":"); } if (!av_bprint_is_complete(&fingerprint)) { - av_log(s1, AV_LOG_ERROR, "Fingerprint %d exceed max %d, %s\n", ret, MAX_SDP_SIZE, fingerprint.str); + av_log(s1, AV_LOG_ERROR, "Fingerprint %d exceed max %d, %s\n", ret, MAX_URL_SIZE, fingerprint.str); ret = AVERROR(EIO); goto end; } @@ -303,7 +368,7 @@ static av_cold int dtls_context_init(DTLSContext *ctx) goto end; } - av_log(s1, AV_LOG_INFO, "DTLS: Curve=%s, fingerprint %s\n", curve, ctx->dtls_fingerprint); + av_log(s1, AV_LOG_INFO, "DTLS: Curve=%s, fingerprint %s\n", curve ? curve : "", ctx->dtls_fingerprint); end: X509_NAME_free(subject); @@ -367,6 +432,7 @@ static void openssl_on_info(const SSL *dtls, int where, int ret) } } +#if OPENSSL_VERSION_NUMBER >= 0x10101000L /* OpenSSL 1.1.1 */ static unsigned int openssl_dtls_timer_cb(SSL *dtls, unsigned int previous_us) { DTLSContext *ctx = (DTLSContext*)SSL_get_ex_data(dtls, 0); @@ -388,6 +454,7 @@ static unsigned int openssl_dtls_timer_cb(SSL *dtls, unsigned int previous_us) return timeout_us; } +#endif static void openssl_state_trace(DTLSContext *ctx, uint8_t *data, int length, int incoming, int r0, int r1) { @@ -419,12 +486,14 @@ static av_cold int openssl_init_dtls_context(DTLSContext *ctx, SSL_CTX *dtls_ctx EVP_PKEY *dtls_pkey = ctx->dtls_pkey; X509 *dtls_cert = ctx->dtls_cert; +#if OPENSSL_VERSION_NUMBER >= 0x10002000L /* OpenSSL 1.0.2 */ /* For ECDSA, we could set the curves list. */ if (SSL_CTX_set1_curves_list(dtls_ctx, "P-521:P-384:P-256") != 1) { av_log(s1, AV_LOG_ERROR, "DTLS: SSL_CTX_set1_curves_list failed\n"); ret = AVERROR(EINVAL); goto end; } +#endif /* We use "ALL", while you can use "DEFAULT" means "ALL:!EXPORT:!LOW:!aNULL:!eNULL:!SSLv2" */ if (SSL_CTX_set_cipher_list(dtls_ctx, DTLS_CIPHER_SUTES) != 1) { @@ -475,8 +544,10 @@ static av_cold int openssl_init_dtls_ssl(DTLSContext *ctx, SSL *dtls) /* Avoid dtls negotiate failed, limit the max size of DTLS fragment. */ SSL_set_mtu(dtls, ctx->pkt_size); +#if OPENSSL_VERSION_NUMBER >= 0x10101000L /* OpenSSL 1.1.1 */ /* Set the callback for ARQ timer. */ DTLS_set_timer_cb(dtls, openssl_dtls_timer_cb); +#endif /* Setup DTLS as active, which is client role. */ SSL_set_connect_state(dtls); @@ -606,7 +677,11 @@ static int dtls_context_handshake(DTLSContext *ctx) int64_t starttime = av_gettime(); void *s1 = ctx->log_avcl; +#if OPENSSL_VERSION_NUMBER < 0x10002000L /* OpenSSL v1.0.2 */ + dtls_ctx = SSL_CTX_new(DTLSv1_method()); +#else dtls_ctx = SSL_CTX_new(DTLS_client_method()); +#endif if (!dtls_ctx) { ret = AVERROR(ENOMEM); goto end; @@ -755,8 +830,16 @@ typedef struct RTCContext { int dtls_arq_max; /* The step start timeout in ms for DTLS transmission. */ int dtls_arq_timeout; - /* The size of RTP packet, should generally be set to MTU. */ + /** + * The size of RTP packet, should generally be set to MTU. + * Note that pion requires a smaller value, for example, 1200. + */ int pkt_size; + /** + * The optional Bearer token for WHIP Authorization. + * See https://www.ietf.org/archive/id/draft-ietf-wish-whip-08.html#name-authentication-and-authoriz + */ + char* authorization; } RTCContext; static int on_rtp_write_packet(void *opaque, uint8_t *buf, int buf_size); @@ -987,45 +1070,6 @@ static int parse_codec(AVFormatContext *s) /** * Generate SDP offer according to the codec parameters, DTLS and ICE information. - * The below is an example of SDP offer: - * - * v=0 - * o=FFmpeg 4489045141692799359 2 IN IP4 127.0.0.1 - * s=FFmpegPublishSession - * t=0 0 - * a=group:BUNDLE 0 1 - * a=extmap-allow-mixed - * a=msid-semantic: WMS - * - * m=audio 9 UDP/TLS/RTP/SAVPF 111 - * c=IN IP4 0.0.0.0 - * a=ice-ufrag:a174B - * a=ice-pwd:wY8rJ3gNLxL3eWZs6UPOxy - * a=fingerprint:sha-256 EE:FE:A2:E5:6A:21:78:60:71:2C:21:DC:1A:2C:98:12:0C:E8:AD:68:07:61:1B:0E:FC:46:97:1E:BC:97:4A:54 - * a=setup:actpass - * a=mid:0 - * a=sendonly - * a=msid:FFmpeg audio - * a=rtcp-mux - * a=rtpmap:111 opus/48000/2 - * a=ssrc:4267647086 cname:FFmpeg - * a=ssrc:4267647086 msid:FFmpeg audio - * - * m=video 9 UDP/TLS/RTP/SAVPF 106 - * c=IN IP4 0.0.0.0 - * a=ice-ufrag:a174B - * a=ice-pwd:wY8rJ3gNLxL3eWZs6UPOxy - * a=fingerprint:sha-256 EE:FE:A2:E5:6A:21:78:60:71:2C:21:DC:1A:2C:98:12:0C:E8:AD:68:07:61:1B:0E:FC:46:97:1E:BC:97:4A:54 - * a=setup:actpass - * a=mid:1 - * a=sendonly - * a=msid:FFmpeg video - * a=rtcp-mux - * a=rtcp-rsize - * a=rtpmap:106 H264/90000 - * a=fmtp:106 level-asymmetry-allowed=1;packetization-mode=1;profile-level-id=42e01f - * a=ssrc:107169110 cname:FFmpeg - * a=ssrc:107169110 msid:FFmpeg video * * Note that we don't use av_sdp_create to generate SDP offer because it doesn't * support DTLS and ICE information. @@ -1156,42 +1200,6 @@ static int generate_sdp_offer(AVFormatContext *s) /** * Exchange SDP offer with WebRTC peer to get the answer. - * The below is an example of SDP answer: - * - * v=0 - * o=SRS/6.0.42(Bee) 107408542208384 2 IN IP4 0.0.0.0 - * s=SRSPublishSession - * t=0 0 - * a=ice-lite - * a=group:BUNDLE 0 1 - * a=msid-semantic: WMS live/show - * - * m=audio 9 UDP/TLS/RTP/SAVPF 111 - * c=IN IP4 0.0.0.0 - * a=ice-ufrag:ex9061f9 - * a=ice-pwd:bi8k19m9n836187b00d1gm3946234w85 - * a=fingerprint:sha-256 68:DD:7A:95:27:BD:0A:99:F4:7A:83:21:2F:50:15:2A:1D:1F:8A:D8:96:24:42:2D:A1:83:99:BF:F1:E2:11:A2 - * a=setup:passive - * a=mid:0 - * a=recvonly - * a=rtcp-mux - * a=rtcp-rsize - * a=rtpmap:111 opus/48000/2 - * a=candidate:0 1 udp 2130706431 172.20.10.7 8000 typ host generation 0 - * - * m=video 9 UDP/TLS/RTP/SAVPF 106 - * c=IN IP4 0.0.0.0 - * a=ice-ufrag:ex9061f9 - * a=ice-pwd:bi8k19m9n836187b00d1gm3946234w85 - * a=fingerprint:sha-256 68:DD:7A:95:27:BD:0A:99:F4:7A:83:21:2F:50:15:2A:1D:1F:8A:D8:96:24:42:2D:A1:83:99:BF:F1:E2:11:A2 - * a=setup:passive - * a=mid:1 - * a=recvonly - * a=rtcp-mux - * a=rtcp-rsize - * a=rtpmap:106 H264/90000 - * a=fmtp:106 level-asymmetry-allowed=1;packetization-mode=1;profile-level-id=42e01e - * a=candidate:0 1 udp 2130706431 172.20.10.7 8000 typ host generation 0 * * @return 0 if OK, AVERROR_xxx on error */ @@ -1219,9 +1227,17 @@ static int exchange_sdp(AVFormatContext *s) goto end; } - snprintf(buf, sizeof(buf), + ret = snprintf(buf, sizeof(buf), "Cache-Control: no-cache\r\n" "Content-Type: application/sdp\r\n"); + if (rtc->authorization) + ret += snprintf(buf + ret, sizeof(buf) - ret, "Authorization: Bearer %s\r\n", rtc->authorization); + if (ret <= 0 || ret >= sizeof(buf)) { + av_log(s, AV_LOG_ERROR, "Failed to generate headers, size=%d, %s\n", ret, buf); + ret = AVERROR(EINVAL); + goto end; + } + av_opt_set(whip_uc->priv_data, "headers", buf, 0); av_opt_set(whip_uc->priv_data, "chunked_post", "0", 0); av_opt_set_bin(whip_uc->priv_data, "post_data", rtc->sdp_offer, (int)strlen(rtc->sdp_offer), 0); @@ -1261,6 +1277,12 @@ static int exchange_sdp(AVFormatContext *s) } } + if (!av_strstart(bp.str, "v=", NULL)) { + av_log(s, AV_LOG_ERROR, "Invalid answer: %s\n", bp.str); + ret = AVERROR(EINVAL); + goto end; + } + rtc->sdp_answer = av_strdup(bp.str); if (!rtc->sdp_answer) { ret = AVERROR(ENOMEM); @@ -2117,6 +2139,11 @@ static int rtc_write_packet(AVFormatContext *s, AVPacket *pkt) /* For audio OPUS stream, correct the timestamp. */ if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) { pkt->dts = pkt->pts = rtc->audio_jitter_base; + // TODO: FIXME: For opus 48khz, each frame is 20ms which is 48000*20/1000 = 960. It appears that there is a + // bug introduced by libopus regarding the timestamp. Instead of being exactly 960, there is a slight + // deviation, such as 956 or 970. This deviation can cause Chrome to play the audio stream with noise. + // Although we are unsure of the root cause, we can simply correct the timestamp by using the timebase of + // Opus. We need to conduct further research and remove this line. rtc->audio_jitter_base += 960; } @@ -2168,6 +2195,7 @@ static av_cold void rtc_deinit(AVFormatContext *s) av_freep(&rtc->ice_pwd_remote); av_freep(&rtc->ice_protocol); av_freep(&rtc->ice_host); + av_freep(&rtc->authorization); ffurl_closep(&rtc->udp_uc); ff_srtp_free(&rtc->srtp_audio_send); ff_srtp_free(&rtc->srtp_video_send); @@ -2182,8 +2210,9 @@ static const AVOption options[] = { { "ice_arq_max", "Maximum number of retransmissions for the ICE ARQ mechanism", OFFSET(ice_arq_max), AV_OPT_TYPE_INT, { .i64 = 5 }, -1, INT_MAX, DEC }, { "ice_arq_timeout", "Start timeout in milliseconds for the ICE ARQ mechanism", OFFSET(ice_arq_timeout), AV_OPT_TYPE_INT, { .i64 = 30 }, -1, INT_MAX, DEC }, { "dtls_arq_max", "Maximum number of retransmissions for the DTLS ARQ mechanism", OFFSET(dtls_arq_max), AV_OPT_TYPE_INT, { .i64 = 5 }, -1, INT_MAX, DEC }, - { "dtls_arq_timeout", "Start timeout in milliseconds for the DTLS ARQ mechanism", OFFSET(dtls_arq_timeout), AV_OPT_TYPE_INT, { .i64 = 50 }, -1, INT_MAX, DEC }, - { "pkt_size", "The maximum size, in bytes, of RTP packets that send out", OFFSET(pkt_size), AV_OPT_TYPE_INT, { .i64 = 1500 }, -1, INT_MAX, DEC }, + { "dtls_arq_timeout", "Start timeout in milliseconds for the DTLS ARQ mechanism", OFFSET(dtls_arq_timeout), AV_OPT_TYPE_INT, { .i64 = 50 }, -1, INT_MAX, DEC }, + { "pkt_size", "The maximum size, in bytes, of RTP packets that send out", OFFSET(pkt_size), AV_OPT_TYPE_INT, { .i64 = 1200 }, -1, INT_MAX, DEC }, + { "authorization", "The optional Bearer token for WHIP Authorization", OFFSET(authorization), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, DEC }, { NULL }, }; From ea74e3cbe8833c9c3c9ea756eda20ad11a74c4a1 Mon Sep 17 00:00:00 2001 From: Winlin Date: Wed, 7 Jun 2023 20:07:36 +0800 Subject: [PATCH 44/60] Optimize DTLS Handshake and ICE Handling for Improved Performance (#3) 1. Merge ICE and DTLS ARQ max retry options into a single handshake timeout. 2. Utilize DTLS server role to prevent ARQ, as the peer DTLS client will handle ARQ. 3. Replace IO from DTLSContext with a callback function. 4. Measure and analyze the time cost for each step in the process. 5. Implement DTLS BIO callback for packet fragmentation using BIO_set_callback. 6. Generate private key and certificate prior to ICE for faster handshake. 7. Refine DTLS MTU settings using SSL_set_mtu and DTLS_set_link_mtu. 8. Provide callback for DTLS state, returning errors when DTLS encounters issues or closes. 9. Consolidate ICE request/response handling and DTLS handshake into a single function. --- doc/muxers.texi | 18 +- libavformat/rtcenc.c | 1176 ++++++++++++++++++++++++------------------ 2 files changed, 691 insertions(+), 503 deletions(-) diff --git a/doc/muxers.texi b/doc/muxers.texi index 414b91a7b95df..c6cf61a864ebd 100644 --- a/doc/muxers.texi +++ b/doc/muxers.texi @@ -1361,21 +1361,9 @@ This muxer supports the following options: @table @option -@item ice_arq_max @var{integer} -Set the maximum number of retransmissions for the ICE ARQ mechanism. -Default value is 5. - -@item ice_arq_timeout @var{integer} -Set the start timeout in milliseconds for the ICE ARQ mechanism. -Default value is 30. - -@item dtls_arq_max @var{integer} -Set the maximum number of retransmissions for the DTLS ARQ mechanism. -Default value is 5. - -@item dtls_arq_timeout @var{integer} -Set the start timeout in milliseconds for the DTLS ARQ mechanism. -Default value is 50. +@item handshake_timeout @var{integer} +Set the timeout in milliseconds for ICE and DTLS handshake. +Default value is 5000. @item pkt_size @var{integer} Set the maximum size, in bytes, of RTP packets that send out. diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index f9511d5ecf50f..32f5cce8f1116 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -72,19 +72,7 @@ * but please keep in mind that the `pkt_size` option limits the packet size to 1400. */ #define MAX_UDP_BUFFER_SIZE 4096 -/* - * Supported DTLS cipher suites for FFmpeg as a DTLS client. - * These cipher suites are used to negotiate with DTLS servers. - * - * It is advisable to use a limited number of cipher suites to reduce - * the size of DTLS UDP packets. - */ -#define DTLS_CIPHER_SUTES "ECDHE-ECDSA-AES128-GCM-SHA256"\ - ":ECDHE-RSA-AES128-GCM-SHA256"\ - ":ECDHE-ECDSA-AES128-SHA"\ - ":ECDHE-RSA-AES128-SHA"\ - ":ECDHE-ECDSA-AES256-SHA"\ - ":ECDHE-RSA-AES256-SHA" + /** * The size of the Secure Real-time Transport Protocol (SRTP) master key material * that is exported by Secure Sockets Layer (SSL) after a successful Datagram @@ -109,40 +97,22 @@ #define NALU_TYPE_STAP_A 24 /** - * Wait for a small timeout in milliseconds to allow for the server to process - * the Interactive Connectivity Establishment (ICE) request. If we immediately - * read the response after sending the request, we may receive nothing and need - * to immediately retry. To lessen the likelihood of retries, we can send the - * request and wait for a small amount of time for the server to process it - * before reading the response. - */ -#define ICE_PROCESSING_TIMEOUT 10 -/** - * Wait for a short timeout in milliseconds to allow the server to process - * the Datagram Transport Layer Security (DTLS) request. If we immediately - * read the response after sending the request, we may receive nothing and - * need to immediately retry. To reduce the likelihood of retries, we can - * send the request and wait a short amount of time for the server to - * process it before attempting to read the response. - */ -#define DTLS_PROCESSING_TIMEOUT 30 -/** - * The maximum number of retries for Datagram Transport Layer Security (DTLS) EAGAIN errors. - * When we send a DTLS request and receive no response, we may encounter an EAGAIN error. - * In this situation, we wait briefly and attempt to read the response again. - * We limit the maximum number of times we retry this loop. + * When sending ICE or DTLS messages, responses are received via UDP. However, the peer + * may not be ready and return EAGAIN, in which case we should wait for a short duration + * and retry reading. + * For instance, if we try to read from UDP and get EAGAIN, we sleep for 5ms and retry. + * This macro is used to limit the total duration in milliseconds (e.g., 50ms), so we + * will try at most 5 times. + * Keep in mind that this macro should have a minimum duration of 5 ms. */ -#define DTLS_EAGAIN_RETRIES_MAX 5 - -/** - * The DTLS timer's base timeout in microseconds. Its purpose is to minimize the unnecessary - * retransmission of ClientHello. - */ -#define DTLS_SSL_TIMER_BASE 400 * 1000 +#define ICE_DTLS_READ_INTERVAL 50 /* The magic cookie for Session Traversal Utilities for NAT (STUN) messages. */ #define STUN_MAGIC_COOKIE 0x2112A442 +/* Calculate the elapsed time from starttime to endtime in milliseconds. */ +#define RTC_ELAPSED(starttime, endtime) ((int)(endtime - starttime) / 1000) + /* STUN Attribute, comprehension-required range (0x0000-0x7FFF) */ enum StunAttr { STUN_ATTR_USERNAME = 0x0006, /// shared secret response/bind request @@ -151,12 +121,40 @@ enum StunAttr { STUN_ATTR_FINGERPRINT = 0x8028, /// rfc5389 }; +enum DTLSState { + DTLS_STATE_NONE, + + /* Whether DTLS handshake is finished. */ + DTLS_STATE_FINISHED, + /* Whether DTLS session is closed. */ + DTLS_STATE_CLOSED, + /* Whether DTLS handshake is failed. */ + DTLS_STATE_FAILED, +}; + +typedef struct DTLSContext DTLSContext; +typedef int (*DTLSContext_on_state_fn)(DTLSContext *ctx, enum DTLSState state, const char* type, const char* desc); +typedef int (*DTLSContext_on_write_fn)(DTLSContext *ctx, char* data, int size); + typedef struct DTLSContext { + /* For callback. */ + DTLSContext_on_state_fn on_state; + DTLSContext_on_write_fn on_write; + void* opaque; + /* For av_log to write log to this category. */ void *log_avcl; + /* The DTLS context. */ + SSL_CTX *dtls_ctx; + SSL *dtls; + /* The DTLS BIOs. */ + BIO *bio_in; + /* The private key for DTLS handshake. */ EVP_PKEY *dtls_pkey; + /* The EC key for DTLS handshake. */ + EC_KEY* dtls_eckey; /* The SSL certificate used for fingerprint in SDP and DTLS handshake. */ X509 *dtls_cert; /* The fingerprint of certificate, used in SDP offer. */ @@ -170,33 +168,178 @@ typedef struct DTLSContext { */ uint8_t dtls_srtp_material[DTLS_SRTP_MASTER_KEY_LEN * 2]; - /* Whether the timer should be reset. */ - int dtls_should_reset_timer; /* Whether the DTLS is done at least for us. */ int dtls_done_for_us; + /* Whether the SRTP key is exported. */ + int dtls_srtp_key_exported; /* The number of packets retransmitted for DTLS. */ int dtls_arq_packets; + /** + * This is the last DTLS content type and handshake type that is used to detect + * the ARQ packet. + */ + uint8_t dtls_last_content_type; + uint8_t dtls_last_handshake_type; - /* The UDP transport is used for delivering ICE, DTLS and SRTP packets. */ - URLContext *udp_uc; + /* These variables represent timestamps used for calculating and tracking the cost. */ + int64_t dtls_init_starttime; + int64_t dtls_init_endtime; + int64_t dtls_handshake_starttime; + int64_t dtls_handshake_endtime; - /* The maximum number of retries for DTLS transmission. */ - int dtls_arq_max; - /* The step start timeout in ms for DTLS transmission. */ - int dtls_arq_timeout; /** * The size of RTP packet, should generally be set to MTU. * Note that pion requires a smaller value, for example, 1200. */ - int pkt_size; + int mtu; } DTLSContext; -static int openssl_gen_private_key(DTLSContext *ctx) +static int is_dtls_packet(char *buf, int buf_size) { + return buf_size > 13 && buf[0] > 19 && buf[0] < 64; +} + +/** + * Callback function to print the OpenSSL SSL status. + */ +static void openssl_dtls_on_info(const SSL *dtls, int where, int r0) +{ + int w, r1, is_fatal, is_warning, is_close_notify; + const char *method = "undefined", *alert_type, *alert_desc; + enum DTLSState state; + DTLSContext *ctx = (DTLSContext*)SSL_get_ex_data(dtls, 0); + void *s1 = ctx->log_avcl; + + w = where & ~SSL_ST_MASK; + if (w & SSL_ST_CONNECT) + method = "SSL_connect"; + else if (w & SSL_ST_ACCEPT) + method = "SSL_accept"; + + r1 = SSL_get_error(dtls, r0); + if (where & SSL_CB_LOOP) { + av_log(s1, AV_LOG_VERBOSE, "DTLS: method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", + method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, r0, r1); + } else if (where & SSL_CB_ALERT) { + method = (where & SSL_CB_READ) ? "read":"write"; + + alert_type = SSL_alert_type_string_long(r0); + alert_desc = SSL_alert_desc_string(r0); + + if (!av_strcasecmp(alert_type, "warning") && !av_strcasecmp(alert_desc, "CN")) + av_log(s1, AV_LOG_WARNING, "DTLS: SSL3 alert method=%s type=%s, desc=%s(%s), where=%d, ret=%d, r1=%d\n", + method, alert_type, alert_desc, SSL_alert_desc_string_long(r0), where, r0, r1); + else + av_log(s1, AV_LOG_ERROR, "DTLS: SSL3 alert method=%s type=%s, desc=%s(%s), where=%d, ret=%d, r1=%d\n", + method, alert_type, alert_desc, SSL_alert_desc_string_long(r0), where, r0, r1); + + /** + * Notify the DTLS to handle the ALERT message, which maybe means media connection disconnect. + * CN(Close Notify) is sent when peer close the PeerConnection. fatal, IP(Illegal Parameter) + * is sent when DTLS failed. + */ + is_fatal = !av_strncasecmp(alert_type, "fatal", 5); + is_warning = !av_strncasecmp(alert_type, "warning", 7); + is_close_notify = !av_strncasecmp(alert_desc, "CN", 2); + state = is_fatal ? DTLS_STATE_FAILED : (is_warning && is_close_notify ? DTLS_STATE_CLOSED : DTLS_STATE_NONE); + if (state != DTLS_STATE_NONE && ctx->on_state) { + av_log(s1, AV_LOG_INFO, "DTLS: Notify ctx=%p, state=%d, fatal=%d, warning=%d, cn=%d\n", + ctx, state, is_fatal, is_warning, is_close_notify); + ctx->on_state(ctx, state, alert_type, alert_desc); + } + } else if (where & SSL_CB_EXIT) { + if (!r0) + av_log(s1, AV_LOG_WARNING, "DTLS: Fail method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", + method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, r0, r1); + else if (r0 < 0) + if (r1 != SSL_ERROR_NONE && r1 != SSL_ERROR_WANT_READ && r1 != SSL_ERROR_WANT_WRITE) + av_log(s1, AV_LOG_ERROR, "DTLS: Error method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", + method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, r0, r1); + else + av_log(s1, AV_LOG_VERBOSE, "DTLS: method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", + method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, r0, r1); + } +} + +static void openssl_dtls_state_trace(DTLSContext *ctx, uint8_t *data, int length, int incoming) +{ + uint8_t content_type = 0; + uint16_t size = 0; + uint8_t handshake_type = 0; + void *s1 = ctx->log_avcl; + + /* Change_cipher_spec(20), alert(21), handshake(22), application_data(23) */ + if (length >= 1) + content_type = (uint8_t)data[0]; + if (length >= 13) + size = (uint16_t)(data[11])<<8 | (uint16_t)data[12]; + if (length >= 14) + handshake_type = (uint8_t)data[13]; + + av_log(s1, AV_LOG_VERBOSE, "WHIP: DTLS state %s %s, done=%u, arq=%u, len=%u, cnt=%u, size=%u, hs=%u\n", + "Active", (incoming? "RECV":"SEND"), ctx->dtls_done_for_us, ctx->dtls_arq_packets, length, + content_type, size, handshake_type); +} + +/** + * Always return 1 to accept any certificate. This is because we allow the peer to + * use a temporary self-signed certificate for DTLS. + */ +static int openssl_dtls_verify_callback(int preverify_ok, X509_STORE_CTX *ctx) +{ + return 1; +} + +/** + * DTLS BIO read callback. + */ +#if OPENSSL_VERSION_NUMBER < 0x30000000L // v3.0.x +static long openssl_dtls_bio_out_callback(BIO* b, int oper, const char* argp, int argi, long argl, long retvalue) +#else +static long openssl_dtls_bio_out_callback_ex(BIO *b, int oper, const char *argp, size_t len, int argi, long argl, int retvalue, size_t *processed) +#endif +{ + int ret, req_size = argi, is_arq = 0; + uint8_t content_type, handshake_type; + uint8_t *data = (uint8_t*)argp; + DTLSContext* ctx = b ? (DTLSContext*)BIO_get_callback_arg(b) : NULL; + void *s1 = ctx ? ctx->log_avcl : NULL; + +#if OPENSSL_VERSION_NUMBER >= 0x30000000L // v3.0.x + req_size = len; + av_log(s1, AV_LOG_DEBUG, "DTLS: bio callback b=%p, oper=%d, argp=%p, len=%ld, argi=%d, argl=%ld, retvalue=%d, processed=%p, req_size=%d\n", + b, oper, argp, len, argi, argl, retvalue, processed, req_size); +#else + av_log(s1, AV_LOG_DEBUG, "DTLS: bio callback b=%p, oper=%d, argp=%p, argi=%d, argl=%ld, retvalue=%ld, req_size=%d\n", + b, oper, argp, argi, argl, retvalue, req_size); +#endif + + if (oper != BIO_CB_WRITE || !argp || req_size <= 0) + return retvalue; + + openssl_dtls_state_trace(ctx, data, req_size, 0); + ret = ctx->on_write ? ctx->on_write(ctx, data, req_size) : 0; + content_type = req_size > 0 ? data[0] : 0; + handshake_type = req_size > 13 ? data[13] : 0; + + is_arq = ctx->dtls_last_content_type == content_type && ctx->dtls_last_handshake_type == handshake_type; + ctx->dtls_arq_packets += is_arq; + ctx->dtls_last_content_type = content_type; + ctx->dtls_last_handshake_type = handshake_type; + + if (ret < 0) { + av_log(s1, AV_LOG_ERROR, "DTLS: Send request failed, oper=%d, content=%d, handshake=%d, size=%d, is_arq=%d\n", + oper, content_type, handshake_type, req_size, is_arq); + return ret; + } + + return retvalue; +} + +static int openssl_dtls_gen_private_key(DTLSContext *ctx) { int ret = 0; #if OPENSSL_VERSION_NUMBER < 0x30000000L /* OpenSSL 3.0 */ EC_GROUP *ecgroup = NULL; - EC_KEY* dtls_eckey = NULL; #else const char *curve = "prime256v1"; #endif @@ -211,7 +354,7 @@ static int openssl_gen_private_key(DTLSContext *ctx) */ #if OPENSSL_VERSION_NUMBER < 0x30000000L /* OpenSSL 3.0 */ ctx->dtls_pkey = EVP_PKEY_new(); - dtls_eckey = EC_KEY_new(); + ctx->dtls_eckey = EC_KEY_new(); ecgroup = EC_GROUP_new_by_curve_name(NID_X9_62_prime256v1); #if OPENSSL_VERSION_NUMBER < 0x10100000L // v1.1.x @@ -219,19 +362,19 @@ static int openssl_gen_private_key(DTLSContext *ctx) EC_GROUP_set_asn1_flag(ecgroup, OPENSSL_EC_NAMED_CURVE); #endif - if (EC_KEY_set_group(dtls_eckey, ecgroup) != 1) { + if (EC_KEY_set_group(ctx->dtls_eckey, ecgroup) != 1) { av_log(s1, AV_LOG_ERROR, "DTLS: EC_KEY_set_group failed\n"); ret = AVERROR(EINVAL); goto end; } - if (EC_KEY_generate_key(dtls_eckey) != 1) { + if (EC_KEY_generate_key(ctx->dtls_eckey) != 1) { av_log(s1, AV_LOG_ERROR, "DTLS: EC_KEY_generate_key failed\n"); ret = AVERROR(EINVAL); goto end; } - if (EVP_PKEY_set1_EC_KEY(ctx->dtls_pkey, dtls_eckey) != 1) { + if (EVP_PKEY_set1_EC_KEY(ctx->dtls_pkey, ctx->dtls_eckey) != 1) { av_log(s1, AV_LOG_ERROR, "DTLS: EVP_PKEY_set1_EC_KEY failed\n"); ret = AVERROR(EINVAL); goto end; @@ -247,21 +390,17 @@ static int openssl_gen_private_key(DTLSContext *ctx) end: #if OPENSSL_VERSION_NUMBER < 0x30000000L /* OpenSSL 3.0 */ - EC_KEY_free(dtls_eckey); EC_GROUP_free(ecgroup); #endif return ret; } -/** - * Generate a self-signed certificate and private key for DTLS. - */ -static av_cold int dtls_context_init(DTLSContext *ctx) +static int openssl_dtls_gen_certificate(DTLSContext *ctx) { int ret = 0, serial, expire_day, i, n = 0; AVBPrint fingerprint; unsigned char md[EVP_MAX_MD_SIZE]; - const char *aor = "ffmpeg.org", *curve = NULL; + const char *aor = "ffmpeg.org"; X509_NAME* subject = NULL; X509 *dtls_cert = NULL; void *s1 = ctx->log_avcl; @@ -269,17 +408,12 @@ static av_cold int dtls_context_init(DTLSContext *ctx) /* To prevent a crash during cleanup, always initialize it. */ av_bprint_init(&fingerprint, 1, MAX_URL_SIZE); - ctx->dtls_cert = dtls_cert = X509_new(); + dtls_cert = ctx->dtls_cert = X509_new(); if (!dtls_cert) { ret = AVERROR(ENOMEM); goto end; } - /* Generate a private key to ctx->dtls_pkey. */ - if ((ret = openssl_gen_private_key(ctx)) < 0) - goto end; - - /* Generate a self-signed certificate. */ subject = X509_NAME_new(); if (!subject) { ret = AVERROR(ENOMEM); @@ -368,8 +502,6 @@ static av_cold int dtls_context_init(DTLSContext *ctx) goto end; } - av_log(s1, AV_LOG_INFO, "DTLS: Curve=%s, fingerprint %s\n", curve ? curve : "", ctx->dtls_fingerprint); - end: X509_NAME_free(subject); av_bprint_finalize(&fingerprint, NULL); @@ -377,141 +509,66 @@ static av_cold int dtls_context_init(DTLSContext *ctx) } /** - * Cleanup the DTLS context. + * Initializes DTLS context using ECDHE. */ -static av_cold void dtls_context_deinit(DTLSContext *ctx) -{ - X509_free(ctx->dtls_cert); - EVP_PKEY_free(ctx->dtls_pkey); - av_freep(&ctx->dtls_fingerprint); -} - -/** - * Callback function to print the OpenSSL SSL status. - */ -static void openssl_on_info(const SSL *dtls, int where, int ret) -{ - int w, r1; - const char *method = "undefined", *alert_type, *alert_desc; - DTLSContext *ctx = (DTLSContext*)SSL_get_ex_data(dtls, 0); - void *s1 = ctx->log_avcl; - - w = where & ~SSL_ST_MASK; - if (w & SSL_ST_CONNECT) - method = "SSL_connect"; - else if (w & SSL_ST_ACCEPT) - method = "SSL_accept"; - - r1 = SSL_get_error(dtls, ret); - if (where & SSL_CB_LOOP) { - av_log(s1, AV_LOG_VERBOSE, "DTLS: method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", - method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, ret, r1); - } else if (where & SSL_CB_ALERT) { - method = (where & SSL_CB_READ) ? "read":"write"; - - alert_type = SSL_alert_type_string_long(ret); - alert_desc = SSL_alert_desc_string(ret); - - if (!av_strcasecmp(alert_type, "warning") && !av_strcasecmp(alert_desc, "CN")) - av_log(s1, AV_LOG_WARNING, "DTLS: SSL3 alert method=%s type=%s, desc=%s(%s), where=%d, ret=%d, r1=%d\n", - method, alert_type, alert_desc, SSL_alert_desc_string_long(ret), where, ret, r1); - else - av_log(s1, AV_LOG_ERROR, "DTLS: SSL3 alert method=%s type=%s, desc=%s(%s), where=%d, ret=%d, r1=%d\n", - method, alert_type, alert_desc, SSL_alert_desc_string_long(ret), where, ret, r1); - } else if (where & SSL_CB_EXIT) { - if (!ret) - av_log(s1, AV_LOG_WARNING, "DTLS: Fail method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", - method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, ret, r1); - else if (ret < 0) - if (r1 != SSL_ERROR_NONE && r1 != SSL_ERROR_WANT_READ && r1 != SSL_ERROR_WANT_WRITE) - av_log(s1, AV_LOG_ERROR, "DTLS: Error method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", - method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, ret, r1); - else - av_log(s1, AV_LOG_VERBOSE, "DTLS: method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", - method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, ret, r1); - } -} - -#if OPENSSL_VERSION_NUMBER >= 0x10101000L /* OpenSSL 1.1.1 */ -static unsigned int openssl_dtls_timer_cb(SSL *dtls, unsigned int previous_us) -{ - DTLSContext *ctx = (DTLSContext*)SSL_get_ex_data(dtls, 0); - void *s1 = ctx->log_avcl; - - /* Double the timeout, note that it may be 0. */ - unsigned int timeout_us = previous_us * 2; - - /* If previous_us is 0, for example, the HelloVerifyRequest, we should respond it ASAP. - * when got ServerHello, we should reset the timer. */ - if (!previous_us || ctx->dtls_should_reset_timer) - timeout_us = DTLS_SSL_TIMER_BASE + ctx->dtls_arq_timeout * 1000; /* in us */ - - /* never exceed the max timeout. */ - timeout_us = FFMIN(timeout_us, 30 * 1000 * 1000); /* in us */ - - av_log(s1, AV_LOG_VERBOSE, "DTLS: ARQ timer cb timeout=%ums, previous=%ums\n", - timeout_us / 1000, previous_us / 1000); - - return timeout_us; -} -#endif - -static void openssl_state_trace(DTLSContext *ctx, uint8_t *data, int length, int incoming, int r0, int r1) -{ - uint8_t content_type = 0; - uint16_t size = 0; - uint8_t handshake_type = 0; - void *s1 = ctx->log_avcl; - - /* Change_cipher_spec(20), alert(21), handshake(22), application_data(23) */ - if (length >= 1) - content_type = (uint8_t)data[0]; - if (length >= 13) - size = (uint16_t)(data[11])<<8 | (uint16_t)data[12]; - if (length >= 14) - handshake_type = (uint8_t)data[13]; - - av_log(s1, AV_LOG_VERBOSE, "WHIP: DTLS state %s %s, done=%u, arq=%u, r0=%d, r1=%d, len=%u, cnt=%u, size=%u, hs=%u\n", - "Active", (incoming? "RECV":"SEND"), ctx->dtls_done_for_us, ctx->dtls_arq_packets, r0, r1, length, - content_type, size, handshake_type); -} - -/** - * Initializes DTLS context for client role using ECDHE. - */ -static av_cold int openssl_init_dtls_context(DTLSContext *ctx, SSL_CTX *dtls_ctx) +static av_cold int openssl_dtls_init_context(DTLSContext *ctx) { int ret = 0; void *s1 = ctx->log_avcl; EVP_PKEY *dtls_pkey = ctx->dtls_pkey; X509 *dtls_cert = ctx->dtls_cert; + SSL_CTX *dtls_ctx = NULL; + SSL *dtls = NULL; + BIO *bio_in = NULL, *bio_out = NULL; + +#if OPENSSL_VERSION_NUMBER < 0x10002000L /* OpenSSL v1.0.2 */ + dtls_ctx = ctx->dtls_ctx = SSL_CTX_new(DTLSv1_method()); +#else + dtls_ctx = ctx->dtls_ctx = SSL_CTX_new(DTLS_method()); +#endif + if (!dtls_ctx) { + return AVERROR(ENOMEM); + } #if OPENSSL_VERSION_NUMBER >= 0x10002000L /* OpenSSL 1.0.2 */ /* For ECDSA, we could set the curves list. */ if (SSL_CTX_set1_curves_list(dtls_ctx, "P-521:P-384:P-256") != 1) { av_log(s1, AV_LOG_ERROR, "DTLS: SSL_CTX_set1_curves_list failed\n"); - ret = AVERROR(EINVAL); - goto end; + return AVERROR(EINVAL); } #endif - /* We use "ALL", while you can use "DEFAULT" means "ALL:!EXPORT:!LOW:!aNULL:!eNULL:!SSLv2" */ - if (SSL_CTX_set_cipher_list(dtls_ctx, DTLS_CIPHER_SUTES) != 1) { +#if OPENSSL_VERSION_NUMBER < 0x10100000L // v1.1.x +#if OPENSSL_VERSION_NUMBER < 0x10002000L // v1.0.2 + SSL_CTX_set_tmp_ecdh(dtls_ctx, ctx->dtls_eckey); +#else + SSL_CTX_set_ecdh_auto(dtls_ctx, 1); +#endif +#endif + + /** + * We use "ALL", while you can use "DEFAULT" means "ALL:!EXPORT:!LOW:!aNULL:!eNULL:!SSLv2" + * Cipher Suite: ECDHE-ECDSA-AES128-CBC-SHA (0xc009) + * Cipher Suite: ECDHE-RSA-AES128-CBC-SHA (0xc013) + * Cipher Suite: ECDHE-ECDSA-AES256-CBC-SHA (0xc00a) + * Cipher Suite: ECDHE-RSA-AES256-CBC-SHA (0xc014) + */ + if (SSL_CTX_set_cipher_list(dtls_ctx, "ALL") != 1) { av_log(s1, AV_LOG_ERROR, "DTLS: SSL_CTX_set_cipher_list failed\n"); - ret = AVERROR(EINVAL); - goto end; + return AVERROR(EINVAL); } /* Setup the certificate. */ if (SSL_CTX_use_certificate(dtls_ctx, dtls_cert) != 1) { av_log(s1, AV_LOG_ERROR, "DTLS: SSL_CTX_use_certificate failed\n"); - ret = AVERROR(EINVAL); - goto end; + return AVERROR(EINVAL); } if (SSL_CTX_use_PrivateKey(dtls_ctx, dtls_pkey) != 1) { av_log(s1, AV_LOG_ERROR, "DTLS: SSL_CTX_use_PrivateKey failed\n"); - ret = AVERROR(EINVAL); - goto end; + return AVERROR(EINVAL); } + + /* Server will send Certificate Request. */ + SSL_CTX_set_verify(dtls_ctx, SSL_VERIFY_PEER | SSL_VERIFY_CLIENT_ONCE, openssl_dtls_verify_callback); /* The depth count is "level 0:peer certificate", "level 1: CA certificate", * "level 2: higher level CA certificate", and so on. */ SSL_CTX_set_verify_depth(dtls_ctx, 4); @@ -520,138 +577,134 @@ static av_cold int openssl_init_dtls_context(DTLSContext *ctx, SSL_CTX *dtls_ctx /* Only support SRTP_AES128_CM_SHA1_80, please read ssl/d1_srtp.c */ if (SSL_CTX_set_tlsext_use_srtp(dtls_ctx, "SRTP_AES128_CM_SHA1_80")) { av_log(s1, AV_LOG_ERROR, "DTLS: SSL_CTX_set_tlsext_use_srtp failed\n"); - ret = AVERROR(EINVAL); - goto end; + return AVERROR(EINVAL); } -end: - return ret; -} - -/** - * After creating a DTLS context, initialize the DTLS SSL object. - */ -static av_cold int openssl_init_dtls_ssl(DTLSContext *ctx, SSL *dtls) -{ - int ret = 0; + /* The dtls should not be created unless the dtls_ctx has been initialized. */ + dtls = ctx->dtls = SSL_new(dtls_ctx); + if (!dtls) { + return AVERROR(ENOMEM); + } /* Setup the callback for logging. */ SSL_set_ex_data(dtls, 0, ctx); - SSL_set_info_callback(dtls, openssl_on_info); + SSL_set_info_callback(dtls, openssl_dtls_on_info); - /* Set dtls fragment size */ + /** + * We have set the MTU to fragment the DTLS packet. It is important to note that the + * packet is split to ensure that each handshake packet is smaller than the MTU. + */ SSL_set_options(dtls, SSL_OP_NO_QUERY_MTU); - /* Avoid dtls negotiate failed, limit the max size of DTLS fragment. */ - SSL_set_mtu(dtls, ctx->pkt_size); + SSL_set_mtu(dtls, ctx->mtu); +#if OPENSSL_VERSION_NUMBER >= 0x100010b0L /* OpenSSL 1.0.1k */ + DTLS_set_link_mtu(dtls, ctx->mtu); +#endif + + bio_in = ctx->bio_in = BIO_new(BIO_s_mem()); + if (!bio_in) { + return AVERROR(ENOMEM); + } -#if OPENSSL_VERSION_NUMBER >= 0x10101000L /* OpenSSL 1.1.1 */ - /* Set the callback for ARQ timer. */ - DTLS_set_timer_cb(dtls, openssl_dtls_timer_cb); + bio_out = BIO_new(BIO_s_mem()); + if (!bio_out) { + return AVERROR(ENOMEM); + } + + /** + * Please be aware that it is necessary to use a callback to obtain the packet to be written out. It is + * imperative that BIO_get_mem_data is not used to retrieve the packet, as it returns all the bytes that + * need to be sent out. + * For example, if MTU is set to 1200, and we got two DTLS packets to sendout: + * ServerHello, 95bytes. + * Certificate, 1105+143=1248bytes. + * If use BIO_get_mem_data, it will return 95+1248=1343bytes, which is larger than MTU 1200. + * If use callback, it will return two UDP packets: + * ServerHello+Certificate(Frament) = 95+1105=1200bytes. + * Certificate(Fragment) = 143bytes. + * Note that there should be more packets in real world, like ServerKeyExchange, CertificateRequest, + * and ServerHelloDone. Here we just use two packets for example. + */ +#if OPENSSL_VERSION_NUMBER < 0x30000000L // v3.0.x + BIO_set_callback(bio_out, openssl_dtls_bio_out_callback); +#else + BIO_set_callback_ex(bio_out, openssl_dtls_bio_out_callback_ex); #endif + BIO_set_callback_arg(bio_out, (char*)ctx); - /* Setup DTLS as active, which is client role. */ - SSL_set_connect_state(dtls); - SSL_set_max_send_fragment(dtls, ctx->pkt_size); + SSL_set_bio(dtls, bio_in, bio_out); return ret; } /** - * Drives the SSL context by attempting to read packets to send from SSL, sending them - * over UDP, and then reading packets from UDP to feed back to SSL. + * Generate a self-signed certificate and private key for DTLS. Please note that the + * ff_openssl_init in tls_openssl.c has already called SSL_library_init(), and therefore, + * there is no need to call it again. */ -static int openssl_drive_context(DTLSContext *ctx, SSL *dtls, BIO *bio_in, BIO *bio_out, int loop) +static av_cold int dtls_context_init(DTLSContext *ctx) { - int ret, i, j, r0, r1, req_size, res_size = 0; - uint8_t *data = NULL, req_ct = 0, req_ht = 0, res_ct = 0, res_ht = 0; - char buf[MAX_UDP_BUFFER_SIZE]; + int ret = 0; void *s1 = ctx->log_avcl; - /* Drive the SSL context by state change, arq or response messages. */ - r0 = SSL_do_handshake(dtls); - r1 = SSL_get_error(dtls, r0); + ctx->dtls_init_starttime = av_gettime(); - /* Handshake successfully done */ - if (r0 == 1) { - ctx->dtls_done_for_us = 1; - return 0; + /* Generate a private key to ctx->dtls_pkey. */ + if ((ret = openssl_dtls_gen_private_key(ctx)) < 0) { + av_log(s1, AV_LOG_ERROR, "Failed to generate DTLS private key\n"); + return ret; } - /* Handshake failed with fatal error */ - if (r0 < 0 && r1 != SSL_ERROR_WANT_READ) { - av_log(s1, AV_LOG_ERROR, "DTLS: Start handshake failed, loop=%d, r0=%d, r1=%d\n", loop, r0, r1); - return AVERROR(EIO); + /* Generate a self-signed certificate. */ + if ((ret = openssl_dtls_gen_certificate(ctx)) < 0) { + av_log(s1, AV_LOG_ERROR, "Failed to generate DTLS certificate\n"); + return ret; } - /* Fast retransmit the request util got response. */ - for (i = 0; i <= ctx->dtls_arq_max && !res_size; i++) { - req_size = BIO_get_mem_data(bio_out, (char**)&data); - openssl_state_trace(ctx, data, req_size, 0, r0, r1); - ret = ffurl_write(ctx->udp_uc, data, req_size); - BIO_reset(bio_out); - req_ct = req_size > 0 ? data[0] : 0; - req_ht = req_size > 13 ? data[13] : 0; - if (ret < 0) { - av_log(s1, AV_LOG_ERROR, "DTLS: Send request failed, loop=%d, content=%d, handshake=%d, size=%d\n", - loop, req_ct, req_ht, req_size); - return ret; - } - - /* Wait so that the server can process the request and no need ARQ then. */ -#if DTLS_PROCESSING_TIMEOUT > 0 - av_usleep(DTLS_PROCESSING_TIMEOUT * 10000); -#endif - - for (j = 0; j <= DTLS_EAGAIN_RETRIES_MAX && !res_size; j++) { - ret = ffurl_read(ctx->udp_uc, buf, sizeof(buf)); + if ((ret = openssl_dtls_init_context(ctx)) < 0) { + av_log(s1, AV_LOG_ERROR, "Failed to initialize DTLS context\n"); + return ret; + } - /* Ignore other packets, such as ICE indication, except DTLS. */ - if (ret > 0 && (ret < 13 || buf[0] <= 19 || buf[0] >= 64)) - continue; + ctx->dtls_init_endtime = av_gettime(); + av_log(s1, AV_LOG_INFO, "DTLS: Setup ok, MTU=%d, cost=%dms, fingerprint %s\n", + ctx->mtu, RTC_ELAPSED(ctx->dtls_init_starttime, av_gettime()), ctx->dtls_fingerprint); - /* Got DTLS response successfully. */ - if (ret > 0) { - res_size = ret; - ctx->dtls_should_reset_timer = 1; - break; - } + return ret; +} - /* Fatal error or timeout. */ - if (ret != AVERROR(EAGAIN)) { - av_log(s1, AV_LOG_ERROR, "DTLS: Read response failed, loop=%d, content=%d, handshake=%d\n", - loop, req_ct, req_ht); - return ret; - } +/** + * Once the DTLS role has been negotiated - active for the DTLS client or passive for the + * DTLS server - we proceed to set up the DTLS state and initiate the handshake. + */ +static int dtls_context_start(DTLSContext *ctx) +{ + int ret = 0, r0, r1; + SSL *dtls = ctx->dtls; + void *s1 = ctx->log_avcl; + char detail_error[256]; - /* DTLSv1_handle_timeout is called when a DTLS handshake timeout expires. If no timeout - * had expired, it returns 0. Otherwise, it retransmits the previous flight of handshake - * messages and returns 1. If too many timeouts had expired without progress or an error - * occurs, it returns -1. */ - r0 = DTLSv1_handle_timeout(dtls); - if (!r0) { - av_usleep(DTLS_SSL_TIMER_BASE + ctx->dtls_arq_timeout * 1000); - continue; /* no timeout had expired. */ - } - if (r0 != 1) { - r1 = SSL_get_error(dtls, r0); - av_log(s1, AV_LOG_ERROR, "DTLS: Handle timeout, loop=%d, content=%d, handshake=%d, r0=%d, r1=%d\n", - loop, req_ct, req_ht, r0, r1); - return AVERROR(EIO); - } + ctx->dtls_handshake_starttime = av_gettime(); - ctx->dtls_arq_packets++; - break; - } - } + /* Setup DTLS as passive, which is server role. */ + SSL_set_accept_state(dtls); - /* Trace the response packet, feed to SSL. */ - BIO_reset(bio_in); - openssl_state_trace(ctx, buf, res_size, 1, r0, SSL_ERROR_NONE); - res_ct = res_size > 0 ? buf[0]: 0; - res_ht = res_size > 13 ? buf[13] : 0; - if ((r0 = BIO_write(bio_in, buf, res_size)) <= 0) { - av_log(s1, AV_LOG_ERROR, "DTLS: Feed response failed, loop=%d, content=%d, handshake=%d, size=%d, r0=%d\n", - loop, res_ct, res_ht, res_size, r0); + /** + * During initialization, we only need to call SSL_do_handshake once because SSL_read consumes + * the handshake message if the handshake is incomplete. + * To simplify maintenance, we initiate the handshake for both the DTLS server and client after + * sending out the ICE response in the start_active_handshake function. It's worth noting that + * although the DTLS server may receive the ClientHello immediately after sending out the ICE + * response, this shouldn't be an issue as the handshake function is called before any DTLS + * packets are received. + */ + r0 = SSL_do_handshake(dtls); + r1 = SSL_get_error(dtls, r0); + if (r0 < 0 && r1 == SSL_ERROR_SSL) + ERR_error_string_n(ERR_get_error(), detail_error, sizeof(detail_error)); + ERR_clear_error(); + // Fatal SSL error, for example, no available suite when peer is DTLS 1.0 while we are DTLS 1.2. + if (r0 < 0 && (r1 != SSL_ERROR_NONE && r1 != SSL_ERROR_WANT_READ && r1 != SSL_ERROR_WANT_WRITE)) { + av_log(s1, AV_LOG_ERROR, "Failed to drive SSL context, r0=%d, r1=%d %s\n", r0, r1, detail_error); return AVERROR(EIO); } @@ -659,7 +712,7 @@ static int openssl_drive_context(DTLSContext *ctx, SSL *dtls, BIO *bio_in, BIO * } /** - * DTLS handshake with server, as a client in active mode, using openssl. + * DTLS handshake with server, as a server in passive mode, using openssl. * * This function initializes the SSL context as the client role using OpenSSL and * then performs the DTLS handshake until success. Upon successful completion, it @@ -667,101 +720,127 @@ static int openssl_drive_context(DTLSContext *ctx, SSL *dtls, BIO *bio_in, BIO * * * @return 0 if OK, AVERROR_xxx on error */ -static int dtls_context_handshake(DTLSContext *ctx) +static int dtls_context_write(DTLSContext *ctx, char* buf, int size) { - int ret, loop; - SSL_CTX *dtls_ctx = NULL; - SSL *dtls = NULL; + int ret = 0, res_ct, res_ht, r0, r1, do_callback; + SSL *dtls = ctx->dtls; const char* dst = "EXTRACTOR-dtls_srtp"; - BIO *bio_in = NULL, *bio_out = NULL; - int64_t starttime = av_gettime(); + BIO *bio_in = ctx->bio_in; void *s1 = ctx->log_avcl; - -#if OPENSSL_VERSION_NUMBER < 0x10002000L /* OpenSSL v1.0.2 */ - dtls_ctx = SSL_CTX_new(DTLSv1_method()); -#else - dtls_ctx = SSL_CTX_new(DTLS_client_method()); -#endif - if (!dtls_ctx) { - ret = AVERROR(ENOMEM); - goto end; - } - - if (!ctx->udp_uc) { - av_log(s1, AV_LOG_ERROR, "DTLS: No UDP context\n"); + char detail_error[256]; + + /* Got DTLS response successfully. */ + openssl_dtls_state_trace(ctx, buf, size, 1); + if ((r0 = BIO_write(bio_in, buf, size)) <= 0) { + res_ct = size > 0 ? buf[0]: 0; + res_ht = size > 13 ? buf[13] : 0; + av_log(s1, AV_LOG_ERROR, "DTLS: Feed response failed, content=%d, handshake=%d, size=%d, r0=%d\n", + res_ct, res_ht, size, r0); ret = AVERROR(EIO); goto end; } - ret = openssl_init_dtls_context(ctx, dtls_ctx); - if (ret < 0) { - av_log(s1, AV_LOG_ERROR, "Failed to initialize DTLS context\n"); - goto end; - } - - /* The dtls should not be created unless the dtls_ctx has been initialized. */ - dtls = SSL_new(dtls_ctx); - if (!dtls) { - ret = AVERROR(ENOMEM); - goto end; - } - - bio_in = BIO_new(BIO_s_mem()); - if (!bio_in) { - ret = AVERROR(ENOMEM); - goto end; + /** + * If there is data available in bio_in, use SSL_read to allow SSL to process it. + * We limit the MTU to 1200 for DTLS handshake, which ensures that the buffer is large enough for reading. + */ + r0 = SSL_read(dtls, buf, sizeof(buf)); + r1 = SSL_get_error(dtls, r0); + if (r0 < 0 && r1 == SSL_ERROR_SSL) + ERR_error_string_n(ERR_get_error(), detail_error, sizeof(detail_error)); + ERR_clear_error(); + if (r0 <= 0) { + if (r1 != SSL_ERROR_WANT_READ && r1 != SSL_ERROR_WANT_WRITE && r1 != SSL_ERROR_ZERO_RETURN) { + av_log(s1, AV_LOG_ERROR, "DTLS: Read failed, r0=%d, r1=%d %s\n", r0, r1, detail_error); + ret = AVERROR(EIO); + goto end; + } + } else { + av_log(s1, AV_LOG_TRACE, "DTLS: Read %d bytes, r0=%d, r1=%d\n", r0, r0, r1); } - bio_out = BIO_new(BIO_s_mem()); - if (!bio_out) { - ret = AVERROR(ENOMEM); + /* Check whether the DTLS is completed. */ + if (SSL_is_init_finished(dtls) != 1) goto end; - } - SSL_set_bio(dtls, bio_in, bio_out); + do_callback = ctx->on_state && !ctx->dtls_done_for_us; + ctx->dtls_done_for_us = 1; + ctx->dtls_handshake_endtime = av_gettime(); - ret = openssl_init_dtls_ssl(ctx, dtls); - if (ret < 0) { - av_log(s1, AV_LOG_ERROR, "Failed to initialize SSL context\n"); - goto end; - } - - for (loop = 0; loop < 64 && !ctx->dtls_done_for_us; loop++) { - ret = openssl_drive_context(ctx, dtls, bio_in, bio_out, loop); - if (ret < 0) { - av_log(s1, AV_LOG_ERROR, "Failed to drive SSL context, cost=%dms\n", - (int)(av_gettime() - starttime) / 1000); + /* Export SRTP master key after DTLS done */ + if (!ctx->dtls_srtp_key_exported) { + ret = SSL_export_keying_material(dtls, ctx->dtls_srtp_material, sizeof(ctx->dtls_srtp_material), + dst, strlen(dst), NULL, 0, 0); + if (!ret) { + av_log(s1, AV_LOG_ERROR, "DTLS: SSL export key r0=%lu, ret=%d\n", ERR_get_error(), ret); + ret = AVERROR(EIO); goto end; } - } - if (!ctx->dtls_done_for_us) { - av_log(s1, AV_LOG_ERROR, "DTLS: Handshake failed, loop=%d\n", loop); - ret = AVERROR(EIO); - goto end; - } - /* Export SRTP master key after DTLS done */ - ret = SSL_export_keying_material(dtls, ctx->dtls_srtp_material, sizeof(ctx->dtls_srtp_material), - dst, strlen(dst), NULL, 0, 0); - if (!ret) { - av_log(s1, AV_LOG_ERROR, "DTLS: SSL export key r0=%lu, ret=%d\n", ERR_get_error(), ret); - ret = AVERROR(EIO); - goto end; + ctx->dtls_srtp_key_exported = 1; } - av_log(s1, AV_LOG_INFO, "WHIP: DTLS handshake done=%d, arq=%d, srtp_material=%luB, cost=%dms\n", - ctx->dtls_done_for_us, ctx->dtls_arq_packets, sizeof(ctx->dtls_srtp_material), - (int)(av_gettime() - starttime) / 1000); + if (do_callback && (ret = ctx->on_state(ctx, DTLS_STATE_FINISHED, NULL, NULL)) < 0) + goto end; end: - SSL_free(dtls); - SSL_CTX_free(dtls_ctx); return ret; } +/** + * Cleanup the DTLS context. + */ +static av_cold void dtls_context_deinit(DTLSContext *ctx) +{ + SSL_free(ctx->dtls); + SSL_CTX_free(ctx->dtls_ctx); + X509_free(ctx->dtls_cert); + EVP_PKEY_free(ctx->dtls_pkey); + av_freep(&ctx->dtls_fingerprint); +#if OPENSSL_VERSION_NUMBER < 0x30000000L /* OpenSSL 3.0 */ + EC_KEY_free(ctx->dtls_eckey); +#endif +} + +enum RTCState { + RTC_STATE_NONE, + + /* The initial state. */ + RTC_STATE_INIT, + /* The muxer has sent the offer to the peer. */ + RTC_STATE_OFFER, + /* The muxer has received the answer from the peer. */ + RTC_STATE_ANSWER, + /** + * After parsing the answer received from the peer, the muxer negotiates the abilities + * in the offer that it generated. + */ + RTC_STATE_NEGOTIATED, + /* The muxer has connected to the peer via UDP. */ + RTC_STATE_UDP_CONNECTED, + /* The muxer has sent the ICE request to the peer. */ + RTC_STATE_ICE_CONNECTING, + /* The muxer has received the ICE response from the peer. */ + RTC_STATE_ICE_CONNECTED, + /* The muxer has finished the DTLS handshake with the peer. */ + RTC_STATE_DTLS_FINISHED, + /* The muxer has finished the SRTP setup. */ + RTC_STATE_SRTP_FINISHED, + /* The muxer is ready to send/receive media frames. */ + RTC_STATE_READY, + /* The muxer is failed. */ + RTC_STATE_FAILED, +}; + typedef struct RTCContext { AVClass *av_class; + /* The state of the RTC connection. */ + enum RTCState state; + /* The callback return value for DTLS. */ + int dtls_ret; + int dtls_closed; + /* Parameters for the input audio and video codecs. */ AVCodecParameters *audio_par; AVCodecParameters *video_par; @@ -806,6 +885,17 @@ typedef struct RTCContext { /* The resource URL returned in the Location header of WHIP HTTP response. */ char *whip_resource_url; + /* These variables represent timestamps used for calculating and tracking the cost. */ + int64_t rtc_starttime; + int64_t rtc_init_time; + int64_t rtc_offer_time; + int64_t rtc_answer_time; + int64_t rtc_udp_time; + int64_t rtc_ice_time; + int64_t rtc_dtls_time; + int64_t rtc_srtp_time; + int64_t rtc_ready_time; + /* The DTLS context. */ DTLSContext dtls_ctx; @@ -821,15 +911,11 @@ typedef struct RTCContext { /* The UDP transport is used for delivering ICE, DTLS and SRTP packets. */ URLContext *udp_uc; + /* The buffer for UDP transmission. */ + char buf[MAX_UDP_BUFFER_SIZE]; - /* The maximum number of retries for ICE transmission. */ - int ice_arq_max; - /* The step start timeout in ms for ICE transmission. */ - int ice_arq_timeout; - /* The maximum number of retries for DTLS transmission. */ - int dtls_arq_max; - /* The step start timeout in ms for DTLS transmission. */ - int dtls_arq_timeout; + /* The timeout in milliseconds for ICE and DTLS handshake. */ + int handshake_timeout; /** * The size of RTP packet, should generally be set to MTU. * Note that pion requires a smaller value, for example, 1200. @@ -844,6 +930,59 @@ typedef struct RTCContext { static int on_rtp_write_packet(void *opaque, uint8_t *buf, int buf_size); +/** + * When DTLS state change. + */ +static int dtls_context_on_state(DTLSContext *ctx, enum DTLSState state, const char* type, const char* desc) +{ + int ret = 0; + AVFormatContext *s = ctx->opaque; + RTCContext *rtc = s->priv_data; + + if (state == DTLS_STATE_CLOSED) { + rtc->dtls_closed = 1; + av_log(s, AV_LOG_INFO, "WHIP: DTLS session closed, type=%s, desc=%s, elapsed=%dms\n", + type ? type : "", desc ? desc : "", RTC_ELAPSED(rtc->rtc_starttime, av_gettime())); + return ret; + } + + if (state == DTLS_STATE_FAILED) { + rtc->state = RTC_STATE_FAILED; + av_log(s, AV_LOG_ERROR, "WHIP: DTLS session failed, type=%s, desc=%s\n", + type ? type : "", desc ? desc : ""); + rtc->dtls_ret = AVERROR(EIO); + return ret; + } + + if (state == DTLS_STATE_FINISHED && rtc->state < RTC_STATE_DTLS_FINISHED) { + rtc->state = RTC_STATE_DTLS_FINISHED; + rtc->rtc_dtls_time = av_gettime(); + av_log(s, AV_LOG_INFO, "WHIP: DTLS handshake, done=%d, exported=%d, arq=%d, srtp_material=%luB, cost=%dms, elapsed=%dms\n", + ctx->dtls_done_for_us, ctx->dtls_srtp_key_exported, ctx->dtls_arq_packets, sizeof(ctx->dtls_srtp_material), + RTC_ELAPSED(ctx->dtls_handshake_starttime, ctx->dtls_handshake_endtime), + RTC_ELAPSED(rtc->rtc_starttime, av_gettime())); + return ret; + } + + return ret; +} + +/** + * When DTLS write data. + */ +static int dtls_context_on_write(DTLSContext *ctx, char* data, int size) +{ + AVFormatContext *s = ctx->opaque; + RTCContext *rtc = s->priv_data; + + if (!rtc->udp_uc) { + av_log(s, AV_LOG_ERROR, "WHIP: DTLS write data, but udp_uc is NULL\n"); + return AVERROR(EIO); + } + + return ffurl_write(rtc->udp_uc, data, size); +} + /** * Initialize and check the options for the WebRTC muxer. */ @@ -852,24 +991,30 @@ static av_cold int whip_init(AVFormatContext *s) int ret, ideal_pkt_size = 532; RTCContext *rtc = s->priv_data; + rtc->rtc_starttime = av_gettime(); + /* Use the same logging context as AV format. */ rtc->dtls_ctx.log_avcl = s; - rtc->dtls_ctx.dtls_arq_max = rtc->dtls_arq_max; - rtc->dtls_ctx.dtls_arq_timeout = rtc->dtls_arq_timeout; - rtc->dtls_ctx.pkt_size = rtc->pkt_size; + rtc->dtls_ctx.mtu = rtc->pkt_size; + rtc->dtls_ctx.opaque = s; + rtc->dtls_ctx.on_state = dtls_context_on_state; + rtc->dtls_ctx.on_write = dtls_context_on_write; if ((ret = dtls_context_init(&rtc->dtls_ctx)) < 0) { av_log(s, AV_LOG_ERROR, "WHIP: Failed to init DTLS context\n"); return ret; } - av_log(s, AV_LOG_INFO, "WHIP: Init ice_arq_max=%d, ice_arq_timeout=%d, dtls_arq_max=%d, dtls_arq_timeout=%d pkt_size=%d\n", - rtc->ice_arq_max, rtc->ice_arq_timeout, rtc->dtls_arq_max, rtc->dtls_arq_timeout, rtc->pkt_size); - if (rtc->pkt_size < ideal_pkt_size) av_log(s, AV_LOG_WARNING, "WHIP: pkt_size=%d(<%d) is too small, may cause packet loss\n", rtc->pkt_size, ideal_pkt_size); + if (rtc->state < RTC_STATE_INIT) + rtc->state = RTC_STATE_INIT; + rtc->rtc_init_time = av_gettime(); + av_log(s, AV_LOG_INFO, "WHIP: Init state=%d, handshake_timeout=%dms, pkt_size=%d, elapsed=%dms\n", + rtc->state, rtc->handshake_timeout, rtc->pkt_size, RTC_ELAPSED(rtc->rtc_starttime, av_gettime())); + return 0; } @@ -1124,7 +1269,7 @@ static int generate_sdp_offer(AVFormatContext *s) "a=ice-ufrag:%s\r\n" "a=ice-pwd:%s\r\n" "a=fingerprint:sha-256 %s\r\n" - "a=setup:active\r\n" + "a=setup:passive\r\n" "a=mid:0\r\n" "a=sendonly\r\n" "a=msid:FFmpeg audio\r\n" @@ -1158,7 +1303,7 @@ static int generate_sdp_offer(AVFormatContext *s) "a=ice-ufrag:%s\r\n" "a=ice-pwd:%s\r\n" "a=fingerprint:sha-256 %s\r\n" - "a=setup:active\r\n" + "a=setup:passive\r\n" "a=mid:1\r\n" "a=sendonly\r\n" "a=msid:FFmpeg video\r\n" @@ -1191,7 +1336,11 @@ static int generate_sdp_offer(AVFormatContext *s) ret = AVERROR(ENOMEM); goto end; } - av_log(s, AV_LOG_VERBOSE, "WHIP: Generated offer: %s\n", rtc->sdp_offer); + + if (rtc->state < RTC_STATE_OFFER) + rtc->state = RTC_STATE_OFFER; + rtc->rtc_offer_time = av_gettime(); + av_log(s, AV_LOG_VERBOSE, "WHIP: Generated state=%d, offer: %s\n", rtc->state, rtc->sdp_offer); end: av_bprint_finalize(&bp, NULL); @@ -1288,7 +1437,10 @@ static int exchange_sdp(AVFormatContext *s) ret = AVERROR(ENOMEM); goto end; } - av_log(s, AV_LOG_VERBOSE, "WHIP: Got answer: %s\n", rtc->sdp_answer); + + if (rtc->state < RTC_STATE_ANSWER) + rtc->state = RTC_STATE_ANSWER; + av_log(s, AV_LOG_VERBOSE, "WHIP: Got state=%d, answer: %s\n", rtc->state, rtc->sdp_answer); end: ffurl_closep(&whip_uc); @@ -1389,9 +1541,12 @@ static int parse_answer(AVFormatContext *s) goto end; } - av_log(s, AV_LOG_INFO, "WHIP: SDP offer=%luB, answer=%luB, ufrag=%s, pwd=%luB, transport=%s://%s:%d\n", - strlen(rtc->sdp_offer), strlen(rtc->sdp_answer), rtc->ice_ufrag_remote, strlen(rtc->ice_pwd_remote), - rtc->ice_protocol, rtc->ice_host, rtc->ice_port); + if (rtc->state < RTC_STATE_NEGOTIATED) + rtc->state = RTC_STATE_NEGOTIATED; + rtc->rtc_answer_time = av_gettime(); + av_log(s, AV_LOG_INFO, "WHIP: SDP state=%d, offer=%luB, answer=%luB, ufrag=%s, pwd=%luB, transport=%s://%s:%d, elapsed=%dms\n", + rtc->state, strlen(rtc->sdp_offer), strlen(rtc->sdp_answer), rtc->ice_ufrag_remote, strlen(rtc->ice_pwd_remote), + rtc->ice_protocol, rtc->ice_host, rtc->ice_port, RTC_ELAPSED(rtc->rtc_starttime, av_gettime())); end: avio_context_free(&pb); @@ -1574,7 +1729,6 @@ static int ice_is_binding_response(char *buf, int buf_size) { static int ice_handle_binding_request(AVFormatContext *s, char *buf, int buf_size) { int ret = 0, size; char tid[12]; - uint8_t res_buf[MAX_UDP_BUFFER_SIZE]; RTCContext *rtc = s->priv_data; /* Ignore if not a binding request. */ @@ -1590,13 +1744,13 @@ static int ice_handle_binding_request(AVFormatContext *s, char *buf, int buf_siz memcpy(tid, buf + 8, 12); /* Build the STUN binding response. */ - ret = ice_create_response(s, tid, sizeof(tid), res_buf, sizeof(res_buf), &size); + ret = ice_create_response(s, tid, sizeof(tid), rtc->buf, sizeof(rtc->buf), &size); if (ret < 0) { av_log(s, AV_LOG_ERROR, "Failed to create STUN binding response, size=%d\n", size); return ret; } - ret = ffurl_write(rtc->udp_uc, res_buf, size); + ret = ffurl_write(rtc->udp_uc, rtc->buf, size); if (ret < 0) { av_log(s, AV_LOG_ERROR, "Failed to send STUN binding response, size=%d\n", size); return ret; @@ -1606,32 +1760,22 @@ static int ice_handle_binding_request(AVFormatContext *s, char *buf, int buf_siz } /** - * Opens the UDP transport and completes the ICE handshake, using fast retransmit to - * handle packet loss for the binding request. - * - * To initiate a fast retransmission of the STUN binding request during ICE, we wait only - * for a successful local ICE process i.e., when a binding response is received from the - * server. Since the server's binding request may not arrive, we do not always wait for it. - * However, we will always respond to the server's binding request during ICE, DTLS or - * RTP streaming. - * - * @param s Pointer to the AVFormatContext - * @return Returns 0 if the handshake was successful or AVERROR_xxx in case of an error + * To establish a connection with the UDP server, we utilize ICE-LITE in a Client-Server + * mode. In this setup, FFmpeg acts as the UDP client, while the peer functions as the + * UDP server. */ -static int ice_handshake(AVFormatContext *s) +static int udp_connect(AVFormatContext *s) { - int ret, size; + int ret = 0; char url[256], tmp[16]; - char req_buf[MAX_UDP_BUFFER_SIZE], res_buf[MAX_UDP_BUFFER_SIZE]; RTCContext *rtc = s->priv_data; - int fast_retries = rtc->ice_arq_max, timeout = rtc->ice_arq_timeout; /* Build UDP URL and create the UDP context as transport. */ ff_url_join(url, sizeof(url), "udp", NULL, rtc->ice_host, rtc->ice_port, NULL); ret = ffurl_alloc(&rtc->udp_uc, url, AVIO_FLAG_WRITE, &s->interrupt_callback); if (ret < 0) { av_log(s, AV_LOG_ERROR, "Failed to open udp://%s:%d\n", rtc->ice_host, rtc->ice_port); - goto end; + return ret; } av_opt_set(rtc->udp_uc->priv_data, "connect", "1", 0); @@ -1643,92 +1787,112 @@ static int ice_handshake(AVFormatContext *s) ret = ffurl_connect(rtc->udp_uc, NULL); if (ret < 0) { av_log(s, AV_LOG_ERROR, "Failed to connect udp://%s:%d\n", rtc->ice_host, rtc->ice_port); - goto end; + return ret; } /* Make the socket non-blocking, set to READ and WRITE mode after connected */ ff_socket_nonblock(ffurl_get_file_handle(rtc->udp_uc), 1); rtc->udp_uc->flags |= AVIO_FLAG_READ | AVIO_FLAG_NONBLOCK; - /* Build the STUN binding request. */ - ret = ice_create_request(s, req_buf, sizeof(req_buf), &size); - if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to create STUN binding request, size=%d\n", size); - goto end; - } + if (rtc->state < RTC_STATE_UDP_CONNECTED) + rtc->state = RTC_STATE_UDP_CONNECTED; + rtc->rtc_udp_time = av_gettime(); + av_log(s, AV_LOG_VERBOSE, "WHIP: UDP state=%d, elapsed=%dms, connected to udp://%s:%d\n", + rtc->state, RTC_ELAPSED(rtc->rtc_starttime, av_gettime()), rtc->ice_host, rtc->ice_port); - /* Fast retransmit the STUN binding request. */ - while (1) { - ret = ffurl_write(rtc->udp_uc, req_buf, size); - if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to send STUN binding request, size=%d\n", size); - goto end; - } + return ret; +} - /* Wait so that the server can process the request and no need ARQ then. */ -#if ICE_PROCESSING_TIMEOUT > 0 - av_usleep(ICE_PROCESSING_TIMEOUT * 10000); -#endif +static int ice_dtls_handshake(AVFormatContext *s) +{ + int ret = 0, size, i; + int64_t starttime = av_gettime(), now; + RTCContext *rtc = s->priv_data; - /* Read the STUN binding response. */ - ret = ffurl_read(rtc->udp_uc, res_buf, sizeof(res_buf)); - if (ret < 0) { - /* If max retries is 6 and start timeout is 21ms, the total timeout - * is about 21 + 42 + 84 + 168 + 336 + 672 = 1263ms. */ - av_usleep(timeout * 1000); - timeout *= 2; + if (rtc->state < RTC_STATE_UDP_CONNECTED || !rtc->udp_uc) { + av_log(s, AV_LOG_ERROR, "WHIP: UDP not connected, state=%d, udp_uc=%p\n", rtc->state, rtc->udp_uc); + return AVERROR(EINVAL); + } - if (ret == AVERROR(EAGAIN) && fast_retries) { - fast_retries--; - continue; + while (1) { + if (rtc->state <= RTC_STATE_ICE_CONNECTING) { + /* Build the STUN binding request. */ + ret = ice_create_request(s, rtc->buf, sizeof(rtc->buf), &size); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to create STUN binding request, size=%d\n", size); + goto end; } - av_log(s, AV_LOG_ERROR, "Failed to read STUN binding response, retries=%d\n", rtc->ice_arq_max); - goto end; + ret = ffurl_write(rtc->udp_uc, rtc->buf, size); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "Failed to send STUN binding request, size=%d\n", size); + goto end; + } + + if (rtc->state < RTC_STATE_ICE_CONNECTING) + rtc->state = RTC_STATE_ICE_CONNECTING; } - /* If got any binding response, the fast retransmission is done. */ - if (ice_is_binding_response(res_buf, ret)) +next_packet: + if (rtc->state >= RTC_STATE_DTLS_FINISHED) + /* DTLS handshake is done, exit the loop. */ break; - /* When a binding request is received, it is necessary to respond immediately. */ - if (ice_is_binding_request(res_buf, ret) && (ret = ice_handle_binding_request(s, res_buf, ret)) < 0) + now = av_gettime(); + if (now - starttime >= rtc->handshake_timeout * 1000) { + av_log(s, AV_LOG_ERROR, "WHIP: DTLS handshake timeout=%dms, cost=%dms, elapsed=%dms, state=%d\n", + rtc->handshake_timeout, RTC_ELAPSED(starttime, now), RTC_ELAPSED(rtc->rtc_starttime, now), rtc->state); + ret = AVERROR(ETIMEDOUT); goto end; - } - - /* Wait just for a small while to get the possible binding request from server. */ - fast_retries = rtc->ice_arq_max / 2; - timeout = rtc->ice_arq_timeout; - while (fast_retries) { - ret = ffurl_read(rtc->udp_uc, res_buf, sizeof(res_buf)); - if (ret < 0) { - /* If max retries is 6 and start timeout is 21ms, the total timeout - * is about 21 + 42 + 84 = 147ms. */ - av_usleep(timeout * 1000); - timeout *= 2; + } + /* Read the STUN or DTLS messages from peer. */ + for (i = 0; i < ICE_DTLS_READ_INTERVAL / 5; i++) { + ret = ffurl_read(rtc->udp_uc, rtc->buf, sizeof(rtc->buf)); + if (ret > 0) + break; if (ret == AVERROR(EAGAIN)) { - fast_retries--; + av_usleep(5 * 1000); continue; } - - av_log(s, AV_LOG_ERROR, "Failed to read STUN binding request, retries=%d\n", rtc->ice_arq_max); + av_log(s, AV_LOG_ERROR, "Failed to read message\n"); goto end; } + /* Got nothing, continue to process handshake. */ + if (ret <= 0) + continue; + + /* Handle the ICE binding response. */ + if (ice_is_binding_response(rtc->buf, ret)) { + if (rtc->state < RTC_STATE_ICE_CONNECTED) { + rtc->state = RTC_STATE_ICE_CONNECTED; + rtc->rtc_ice_time = av_gettime(); + av_log(s, AV_LOG_INFO, "WHIP: ICE STUN ok, state=%d, url=udp://%s:%d, location=%s, username=%s:%s, res=%dB, elapsed=%dms\n", + rtc->state, rtc->ice_host, rtc->ice_port, rtc->whip_resource_url ? rtc->whip_resource_url : "", + rtc->ice_ufrag_remote, rtc->ice_ufrag_local, ret, RTC_ELAPSED(rtc->rtc_starttime, av_gettime())); + + /* If got the first binding response, start DTLS handshake. */ + if ((ret = dtls_context_start(&rtc->dtls_ctx)) < 0) + goto end; + } + goto next_packet; + } + /* When a binding request is received, it is necessary to respond immediately. */ - if (ice_is_binding_request(res_buf, ret)) { - if ((ret = ice_handle_binding_request(s, res_buf, ret)) < 0) + if (ice_is_binding_request(rtc->buf, ret)) { + if ((ret = ice_handle_binding_request(s, rtc->buf, ret)) < 0) goto end; - break; + goto next_packet; } - } - av_log(s, AV_LOG_INFO, "WHIP: ICE STUN ok, url=udp://%s:%d, location=%s, username=%s:%s, req=%dB, res=%dB, arq=%d\n", - rtc->ice_host, rtc->ice_port, rtc->whip_resource_url ? rtc->whip_resource_url : "", - rtc->ice_ufrag_remote, rtc->ice_ufrag_local, size, ret, - rtc->ice_arq_max - fast_retries); - ret = 0; + /* If got any DTLS messages, handle it. */ + if (is_dtls_packet(rtc->buf, ret) && rtc->state >= RTC_STATE_ICE_CONNECTED) { + if ((ret = dtls_context_write(&rtc->dtls_ctx, rtc->buf, ret)) < 0) + goto end; + goto next_packet; + } + } end: return ret; @@ -1751,13 +1915,13 @@ static int setup_srtp(AVFormatContext *s) const char* suite = "AES_CM_128_HMAC_SHA1_80"; RTCContext *rtc = s->priv_data; - /* As DTLS client, the send key is client master key plus salt. */ - memcpy(send_key, rtc->dtls_ctx.dtls_srtp_material, 16); - memcpy(send_key + 16, rtc->dtls_ctx.dtls_srtp_material + 32, 14); + /* As DTLS server, the recv key is client master key plus salt. */ + memcpy(recv_key, rtc->dtls_ctx.dtls_srtp_material, 16); + memcpy(recv_key + 16, rtc->dtls_ctx.dtls_srtp_material + 32, 14); - /* As DTLS client, the recv key is server master key plus salt. */ - memcpy(recv_key, rtc->dtls_ctx.dtls_srtp_material + 16, 16); - memcpy(recv_key + 16, rtc->dtls_ctx.dtls_srtp_material + 46, 14); + /* As DTLS server, the send key is server master key plus salt. */ + memcpy(send_key, rtc->dtls_ctx.dtls_srtp_material + 16, 16); + memcpy(send_key + 16, rtc->dtls_ctx.dtls_srtp_material + 46, 14); /* Setup SRTP context for outgoing packets */ if (!av_base64_encode(buf, sizeof(buf), send_key, sizeof(send_key))) { @@ -1797,7 +1961,11 @@ static int setup_srtp(AVFormatContext *s) goto end; } - av_log(s, AV_LOG_INFO, "WHIP: SRTP setup done, suite=%s, key=%luB\n", suite, sizeof(send_key)); + if (rtc->state < RTC_STATE_SRTP_FINISHED) + rtc->state = RTC_STATE_SRTP_FINISHED; + rtc->rtc_srtp_time = av_gettime(); + av_log(s, AV_LOG_INFO, "WHIP: SRTP setup done, state=%d, suite=%s, key=%luB, elapsed=%dms\n", + rtc->state, suite, sizeof(send_key), RTC_ELAPSED(rtc->rtc_starttime, av_gettime())); end: return ret; @@ -1894,8 +2062,20 @@ static int create_rtp_muxer(AVFormatContext *s) rtp_ctx = NULL; } - av_log(s, AV_LOG_INFO, "WHIP: Create RTP muxer OK, buffer_size=%d, max_packet_size=%d\n", - buffer_size, max_packet_size); + if (rtc->state < RTC_STATE_READY) + rtc->state = RTC_STATE_READY; + rtc->rtc_ready_time = av_gettime(); + av_log(s, AV_LOG_INFO, "WHIP: Muxer is ready, state=%d, buffer_size=%d, max_packet_size=%d, " + "elapsed=%dms(init:%d,offer:%d,answer:%d,udp:%d,ice:%d,dtls:%d,srtp:%d,ready:%d)\n", + rtc->state, buffer_size, max_packet_size, RTC_ELAPSED(rtc->rtc_starttime, av_gettime()), + RTC_ELAPSED(rtc->rtc_starttime, rtc->rtc_init_time), + RTC_ELAPSED(rtc->rtc_init_time, rtc->rtc_offer_time), + RTC_ELAPSED(rtc->rtc_offer_time, rtc->rtc_answer_time), + RTC_ELAPSED(rtc->rtc_answer_time, rtc->rtc_udp_time), + RTC_ELAPSED(rtc->rtc_udp_time, rtc->rtc_ice_time), + RTC_ELAPSED(rtc->rtc_ice_time, rtc->rtc_dtls_time), + RTC_ELAPSED(rtc->rtc_dtls_time, rtc->rtc_srtp_time), + RTC_ELAPSED(rtc->rtc_srtp_time, rtc->rtc_ready_time)); end: if (rtp_ctx) @@ -1916,7 +2096,6 @@ static int on_rtp_write_packet(void *opaque, uint8_t *buf, int buf_size) { int ret, cipher_size, is_rtcp, is_video; uint8_t payload_type, nalu_header; - char cipher[MAX_UDP_BUFFER_SIZE]; AVFormatContext *s = opaque; RTCContext *rtc = s->priv_data; struct SRTPContext *srtp; @@ -1954,13 +2133,13 @@ static int on_rtp_write_packet(void *opaque, uint8_t *buf, int buf_size) srtp = is_rtcp ? &rtc->srtp_rtcp_send : (is_video? &rtc->srtp_video_send : &rtc->srtp_audio_send); /* Encrypt by SRTP and send out. */ - cipher_size = ff_srtp_encrypt(srtp, buf, buf_size, cipher, sizeof(cipher)); + cipher_size = ff_srtp_encrypt(srtp, buf, buf_size, rtc->buf, sizeof(rtc->buf)); if (cipher_size <= 0 || cipher_size < buf_size) { av_log(s, AV_LOG_WARNING, "Failed to encrypt packet=%dB, cipher=%dB\n", buf_size, cipher_size); return 0; } - ret = ffurl_write(rtc->udp_uc, cipher, cipher_size); + ret = ffurl_write(rtc->udp_uc, rtc->buf, cipher_size); if (ret < 0) { av_log(s, AV_LOG_ERROR, "Failed to write packet=%dB, ret=%d\n", cipher_size, ret); return ret; @@ -2093,35 +2272,37 @@ static av_cold int rtc_init(AVFormatContext *s) RTCContext *rtc = s->priv_data; if ((ret = whip_init(s)) < 0) - return ret; + goto end; if ((ret = parse_codec(s)) < 0) - return ret; + goto end; if ((ret = generate_sdp_offer(s)) < 0) - return ret; + goto end; if ((ret = exchange_sdp(s)) < 0) - return ret; + goto end; if ((ret = parse_answer(s)) < 0) - return ret; - - if ((ret = ice_handshake(s)) < 0) - return ret; + goto end; - /* Now UDP URL context is ready, setup the DTLS transport. */ - rtc->dtls_ctx.udp_uc = rtc->udp_uc; + if ((ret = udp_connect(s)) < 0) + goto end; - if ((ret = dtls_context_handshake(&rtc->dtls_ctx)) < 0) - return ret; + if ((ret = ice_dtls_handshake(s)) < 0) + goto end; if ((ret = setup_srtp(s)) < 0) - return ret; + goto end; if ((ret = create_rtp_muxer(s)) < 0) - return ret; + goto end; +end: + if (ret < 0 && rtc->state < RTC_STATE_FAILED) + rtc->state = RTC_STATE_FAILED; + if (ret >= 0 && rtc->state >= RTC_STATE_FAILED && rtc->dtls_ret < 0) + ret = rtc->dtls_ret; return ret; } @@ -2133,8 +2314,23 @@ static int rtc_write_packet(AVFormatContext *s, AVPacket *pkt) AVFormatContext *rtp_ctx = st->priv_data; /* TODO: Send binding request every 1s as WebRTC heartbeat. */ - /* TODO: Receive packets from the server such as ICE binding requests, DTLS messages, - * and RTCP like PLI requests, then respond to them.*/ + + /** + * Receive packets from the server such as ICE binding requests, DTLS messages, + * and RTCP like PLI requests, then respond to them. + */ + ret = ffurl_read(rtc->udp_uc, rtc->buf, sizeof(rtc->buf)); + if (ret > 0) { + if (is_dtls_packet(rtc->buf, ret)) { + if ((ret = dtls_context_write(&rtc->dtls_ctx, rtc->buf, ret)) < 0) { + av_log(s, AV_LOG_ERROR, "Failed to handle DTLS message\n"); + goto end; + } + } + } else if (ret != AVERROR(EAGAIN)) { + av_log(s, AV_LOG_ERROR, "Failed to read from UDP socket\n"); + goto end; + } /* For audio OPUS stream, correct the timestamp. */ if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) { @@ -2150,7 +2346,7 @@ static int rtc_write_packet(AVFormatContext *s, AVPacket *pkt) ret = insert_sps_pps_packet(s, pkt); if (ret < 0) { av_log(s, AV_LOG_ERROR, "Failed to insert SPS/PPS packet\n"); - return ret; + goto end; } ret = ff_write_chained(rtp_ctx, 0, pkt, s, 0); @@ -2160,9 +2356,16 @@ static int rtc_write_packet(AVFormatContext *s, AVPacket *pkt) ret = 0; } else av_log(s, AV_LOG_ERROR, "Failed to write packet, size=%d\n", pkt->size); - return ret; + goto end; } +end: + if (ret < 0 && rtc->state < RTC_STATE_FAILED) + rtc->state = RTC_STATE_FAILED; + if (ret >= 0 && rtc->state >= RTC_STATE_FAILED && rtc->dtls_ret < 0) + ret = rtc->dtls_ret; + if (ret >= 0 && rtc->dtls_closed) + ret = AVERROR(EIO); return ret; } @@ -2207,10 +2410,7 @@ static av_cold void rtc_deinit(AVFormatContext *s) #define OFFSET(x) offsetof(RTCContext, x) #define DEC AV_OPT_FLAG_DECODING_PARAM static const AVOption options[] = { - { "ice_arq_max", "Maximum number of retransmissions for the ICE ARQ mechanism", OFFSET(ice_arq_max), AV_OPT_TYPE_INT, { .i64 = 5 }, -1, INT_MAX, DEC }, - { "ice_arq_timeout", "Start timeout in milliseconds for the ICE ARQ mechanism", OFFSET(ice_arq_timeout), AV_OPT_TYPE_INT, { .i64 = 30 }, -1, INT_MAX, DEC }, - { "dtls_arq_max", "Maximum number of retransmissions for the DTLS ARQ mechanism", OFFSET(dtls_arq_max), AV_OPT_TYPE_INT, { .i64 = 5 }, -1, INT_MAX, DEC }, - { "dtls_arq_timeout", "Start timeout in milliseconds for the DTLS ARQ mechanism", OFFSET(dtls_arq_timeout), AV_OPT_TYPE_INT, { .i64 = 50 }, -1, INT_MAX, DEC }, + { "handshake_timeout", "Timeout in milliseconds for ICE and DTLS handshake.", OFFSET(handshake_timeout), AV_OPT_TYPE_INT, { .i64 = 5000 }, -1, INT_MAX, DEC }, { "pkt_size", "The maximum size, in bytes, of RTP packets that send out", OFFSET(pkt_size), AV_OPT_TYPE_INT, { .i64 = 1200 }, -1, INT_MAX, DEC }, { "authorization", "The optional Bearer token for WHIP Authorization", OFFSET(authorization), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, DEC }, { NULL }, From 680ed4964e4071e53e731a464fd960e50929f2fd Mon Sep 17 00:00:00 2001 From: Winlin Date: Fri, 9 Jun 2023 12:40:33 +0800 Subject: [PATCH 45/60] Roundtrip3: Refinements Based on Derek Buitenhuis' Comments (#4) 1. Refine WHIP muxer name. 1. Refine SRTP key macros. 1. Refine logging context. 1. Refine SSL error messages. 1. Refine DTLS error messages. 1. Refine RTC error messages. 1. Use AV_RB8 to read integer from memory. 1. Update DTLS curve list to X25519:P-256:P-384:P-521. 1. Refine SRTP profile name for FFmpeg and OpenSSL. 1. Replace magic numbers with macros and extract to functions. 1. Alter log levels from INFO to VERBOSE, except for final results. 1. Use typedef SRTPContext. 1. Refine the ICE STUN magic number. 1. Reposition the on_rtp_write_packet function. 1. Refer to Chrome definition of RTP payload types. 1. Replace magic numbers with macros for RTP and RTCP payload types. 1. Rename to WHIP muxer. 1. Add TODO for OPUS timestamp issue. 1. Refine comments, do not hardcode H.264. 1. Define SDP session id and creator IP as macros. 1. Refine fixed frame size 960 to rtc->audio_par->frame_size. 1. Use h264_mp4toannexb to convert MP4/ISOM to annexb. 1. Address occasional inaccuracies in OPUS audio timestamps. 1. Correct marker setting after utilizing BSF. 1. Remove dependency on avc.h after using BSF. --- libavformat/Makefile | 2 +- libavformat/rtcenc.c | 1085 ++++++++++++++++++++---------------------- libavformat/srtp.h | 4 +- 3 files changed, 506 insertions(+), 585 deletions(-) diff --git a/libavformat/Makefile b/libavformat/Makefile index b980b1232126b..b5000b08a452f 100644 --- a/libavformat/Makefile +++ b/libavformat/Makefile @@ -499,7 +499,7 @@ OBJS-$(CONFIG_RSD_DEMUXER) += rsd.o OBJS-$(CONFIG_RPL_DEMUXER) += rpl.o OBJS-$(CONFIG_RSO_DEMUXER) += rsodec.o rso.o pcm.o OBJS-$(CONFIG_RSO_MUXER) += rsoenc.o rso.o rawenc.o -OBJS-$(CONFIG_RTC_MUXER) += rtcenc.o avc.o http.o srtp.o +OBJS-$(CONFIG_RTC_MUXER) += rtcenc.o http.o srtp.o OBJS-$(CONFIG_RTP_MPEGTS_MUXER) += rtpenc_mpegts.o OBJS-$(CONFIG_RTP_MUXER) += rtp.o \ rtpenc_aac.o \ diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index 32f5cce8f1116..f6e124ae7c815 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -19,39 +19,18 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "config.h" - -#ifndef CONFIG_OPENSSL -#error "DTLS is not supported, please enable openssl" -#else #include #include -/** - * Minimum required version of OpenSSL. - * MM NN FF PP S - * 0x1010102fL = 0x1 01 01 02 fL // 1.1.1b release - * MM(major) = 0x1 // 1.* - * NN(minor) = 0x01 // 1.1.* - * FF(fix) = 0x01 // 1.1.1* - * PP(patch) = 'a' + 0x02 - 1 = 'b' // 1.1.1b * - * S(status) = 0xf = release // 1.1.1b release - * Status 0 for development, 1 to e for betas 1 to 14, and f for release. - * Please use the stable version for DTLS, see https://github.com/openssl/openssl/issues/346 - */ -#if OPENSSL_VERSION_NUMBER < 0x100010b0L /* OpenSSL 1.0.1k */ -#error "OpenSSL version 1.0.1k or newer is required" -#endif -#endif #include "libavcodec/avcodec.h" #include "libavutil/base64.h" #include "libavutil/bprint.h" #include "libavutil/crc.h" #include "libavutil/hmac.h" +#include "libavutil/intreadwrite.h" #include "libavutil/opt.h" #include "libavutil/random_seed.h" #include "libavutil/time.h" -#include "avc.h" #include "avio_internal.h" #include "http.h" #include "internal.h" @@ -64,6 +43,7 @@ * be it an offer or answer. */ #define MAX_SDP_SIZE 8192 + /** * Maximum size of the buffer for sending and receiving UDP packets. * Please note that this size does not limit the size of the UDP packet that can be sent. @@ -78,11 +58,10 @@ * that is exported by Secure Sockets Layer (SSL) after a successful Datagram * Transport Layer Security (DTLS) handshake. This material consists of a key * of 16 bytes and a salt of 14 bytes. - * - * The material is exported by SSL in the following format: client_key (16 bytes) | - * server_key (16 bytes) | client_salt (14 bytes) | server_salt (14 bytes). */ -#define DTLS_SRTP_MASTER_KEY_LEN 30 +#define DTLS_SRTP_KEY_LEN 16 +#define DTLS_SRTP_SALT_LEN 14 + /** * The maximum size of the Secure Real-time Transport Protocol (SRTP) HMAC checksum * and padding that is appended to the end of the packet. To calculate the maximum @@ -90,11 +69,6 @@ * this size from the `pkt_size`. */ #define DTLS_SRTP_CHECKSUM_LEN 16 -/** - * STAP-A stands for Single-Time Aggregation Packet. - * The NALU type for STAP-A is 24 (0x18). - */ -#define NALU_TYPE_STAP_A 24 /** * When sending ICE or DTLS messages, responses are received via UDP. However, the peer @@ -110,11 +84,72 @@ /* The magic cookie for Session Traversal Utilities for NAT (STUN) messages. */ #define STUN_MAGIC_COOKIE 0x2112A442 +/** + * The DTLS content type. + * See https://tools.ietf.org/html/rfc2246#section-6.2.1 + * change_cipher_spec(20), alert(21), handshake(22), application_data(23) + */ +#define DTLS_CONTENT_TYPE_CHANGE_CIPHER_SPEC 20 + +/** + * The DTLS record layer header has a total size of 13 bytes, consisting of + * ContentType (1 byte), ProtocolVersion (2 bytes), Epoch (2 bytes), + * SequenceNumber (6 bytes), and Length (2 bytes). + * See https://datatracker.ietf.org/doc/html/rfc9147#section-4 + */ +#define DTLS_RECORD_LAYER_HEADER_LEN 13 + +/** + * The DTLS version number, which is 0xfeff for DTLS 1.0, or 0xfefd for DTLS 1.2. + * See https://datatracker.ietf.org/doc/html/rfc9147#name-the-dtls-record-layer + */ +#define DTLS_VERSION_10 0xfeff +#define DTLS_VERSION_12 0xfefd + +/* Referring to Chrome's definition of RTP payload types. */ +#define RTC_RTP_PAYLOAD_TYPE_H264 106 +#define RTC_RTP_PAYLOAD_TYPE_OPUS 111 + +/** + * The STUN message header, which is 20 bytes long, comprises the + * STUNMessageType (1B), MessageLength (2B), MagicCookie (4B), + * and TransactionID (12B). + * See https://datatracker.ietf.org/doc/html/rfc5389#section-6 + */ +#define ICE_STUN_HEADER_SIZE 20 + +/** + * The RTP header is 12 bytes long, comprising the Version(1B), PT(1B), + * SequenceNumber(2B), Timestamp(4B), and SSRC(4B). + * See https://www.rfc-editor.org/rfc/rfc3550#section-5.1 + */ +#define RTC_RTP_HEADER_SIZE 12 + +/** + * For RTCP, PT is [128, 223] (or without marker [0, 95]). Literally, RTCP starts + * from 64 not 0, so PT is [192, 223] (or without marker [64, 95]), see "RTCP Control + * Packet Types (PT)" at + * https://www.iana.org/assignments/rtp-parameters/rtp-parameters.xhtml#rtp-parameters-4 + * + * For RTP, the PT is [96, 127], or [224, 255] with marker. See "RTP Payload Types (PT) + * for standard audio and video encodings" at + * https://www.iana.org/assignments/rtp-parameters/rtp-parameters.xhtml#rtp-parameters-1 + */ +#define RTC_RTCP_PT_START 192 +#define RTC_RTCP_PT_END 223 + +/** + * In the case of ICE-LITE, these fields are not used; instead, they are defined + * as constant values. + */ +#define RTC_SDP_SESSION_ID "4489045141692799359" +#define RTC_SDP_CREATOR_IP "127.0.0.1" + /* Calculate the elapsed time from starttime to endtime in milliseconds. */ -#define RTC_ELAPSED(starttime, endtime) ((int)(endtime - starttime) / 1000) +#define ELAPSED(starttime, endtime) ((int)(endtime - starttime) / 1000) /* STUN Attribute, comprehension-required range (0x0000-0x7FFF) */ -enum StunAttr { +enum STUNAttr { STUN_ATTR_USERNAME = 0x0006, /// shared secret response/bind request STUN_ATTR_USE_CANDIDATE = 0x0025, /// bind request STUN_ATTR_MESSAGE_INTEGRITY = 0x0008, /// bind request/response @@ -137,13 +172,15 @@ typedef int (*DTLSContext_on_state_fn)(DTLSContext *ctx, enum DTLSState state, c typedef int (*DTLSContext_on_write_fn)(DTLSContext *ctx, char* data, int size); typedef struct DTLSContext { + AVClass *av_class; + /* For callback. */ DTLSContext_on_state_fn on_state; DTLSContext_on_write_fn on_write; void* opaque; - /* For av_log to write log to this category. */ - void *log_avcl; + /* For logging. */ + AVClass *log_avcl; /* The DTLS context. */ SSL_CTX *dtls_ctx; @@ -166,7 +203,7 @@ typedef struct DTLSContext { * 16B 16B 14B 14B * client_key | server_key | client_salt | server_salt */ - uint8_t dtls_srtp_material[DTLS_SRTP_MASTER_KEY_LEN * 2]; + uint8_t dtls_srtp_materials[(DTLS_SRTP_KEY_LEN + DTLS_SRTP_SALT_LEN) * 2]; /* Whether the DTLS is done at least for us. */ int dtls_done_for_us; @@ -187,6 +224,10 @@ typedef struct DTLSContext { int64_t dtls_handshake_starttime; int64_t dtls_handshake_endtime; + /* Helper for get error code and message. */ + int error_code; + char error_message[256]; + /** * The size of RTP packet, should generally be set to MTU. * Note that pion requires a smaller value, for example, 1200. @@ -194,8 +235,52 @@ typedef struct DTLSContext { int mtu; } DTLSContext; -static int is_dtls_packet(char *buf, int buf_size) { - return buf_size > 13 && buf[0] > 19 && buf[0] < 64; +/** + * Whether the packet is a DTLS packet. + */ +static int is_dtls_packet(uint8_t *b, int size) { + uint16_t version = AV_RB16(&b[1]); + return size > DTLS_RECORD_LAYER_HEADER_LEN && + b[0] >= DTLS_CONTENT_TYPE_CHANGE_CIPHER_SPEC && + (version == DTLS_VERSION_10 || version == DTLS_VERSION_12); +} + +/** + * Retrieves the error message for the latest OpenSSL error. + * + * This function retrieves the error code from the thread's error queue, converts it + * to a human-readable string, and stores it in the DTLSContext's error_message field. + * The error queue is then cleared using ERR_clear_error(). + */ +static const char* openssl_get_error(DTLSContext *ctx) +{ + int r2 = ERR_get_error(); + if (r2) + ERR_error_string_n(r2, ctx->error_message, sizeof(ctx->error_message)); + else + ctx->error_message[0] = '\0'; + + ERR_clear_error(); + return ctx->error_message; +} + +/** + * Get the error code for the given SSL operation result. + * + * This function retrieves the error code for the given SSL operation result + * and stores the error message in the DTLS context if an error occurred. + * It also clears the error queue. + */ +static int openssl_ssl_get_error(DTLSContext *ctx, int ret) +{ + SSL *dtls = ctx->dtls; + int r1 = SSL_ERROR_NONE; + + if (ret <= 0) + r1 = SSL_get_error(dtls, ret); + + openssl_get_error(ctx); + return r1; } /** @@ -207,7 +292,6 @@ static void openssl_dtls_on_info(const SSL *dtls, int where, int r0) const char *method = "undefined", *alert_type, *alert_desc; enum DTLSState state; DTLSContext *ctx = (DTLSContext*)SSL_get_ex_data(dtls, 0); - void *s1 = ctx->log_avcl; w = where & ~SSL_ST_MASK; if (w & SSL_ST_CONNECT) @@ -215,9 +299,9 @@ static void openssl_dtls_on_info(const SSL *dtls, int where, int r0) else if (w & SSL_ST_ACCEPT) method = "SSL_accept"; - r1 = SSL_get_error(dtls, r0); + r1 = openssl_ssl_get_error(ctx, r0); if (where & SSL_CB_LOOP) { - av_log(s1, AV_LOG_VERBOSE, "DTLS: method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", + av_log(ctx, AV_LOG_VERBOSE, "DTLS: Info method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, r0, r1); } else if (where & SSL_CB_ALERT) { method = (where & SSL_CB_READ) ? "read":"write"; @@ -226,11 +310,11 @@ static void openssl_dtls_on_info(const SSL *dtls, int where, int r0) alert_desc = SSL_alert_desc_string(r0); if (!av_strcasecmp(alert_type, "warning") && !av_strcasecmp(alert_desc, "CN")) - av_log(s1, AV_LOG_WARNING, "DTLS: SSL3 alert method=%s type=%s, desc=%s(%s), where=%d, ret=%d, r1=%d\n", + av_log(ctx, AV_LOG_WARNING, "DTLS: SSL3 alert method=%s type=%s, desc=%s(%s), where=%d, ret=%d, r1=%d\n", method, alert_type, alert_desc, SSL_alert_desc_string_long(r0), where, r0, r1); else - av_log(s1, AV_LOG_ERROR, "DTLS: SSL3 alert method=%s type=%s, desc=%s(%s), where=%d, ret=%d, r1=%d\n", - method, alert_type, alert_desc, SSL_alert_desc_string_long(r0), where, r0, r1); + av_log(ctx, AV_LOG_ERROR, "DTLS: SSL3 alert method=%s type=%s, desc=%s(%s), where=%d, ret=%d, r1=%d %s\n", + method, alert_type, alert_desc, SSL_alert_desc_string_long(r0), where, r0, r1, ctx->error_message); /** * Notify the DTLS to handle the ALERT message, which maybe means media connection disconnect. @@ -242,20 +326,20 @@ static void openssl_dtls_on_info(const SSL *dtls, int where, int r0) is_close_notify = !av_strncasecmp(alert_desc, "CN", 2); state = is_fatal ? DTLS_STATE_FAILED : (is_warning && is_close_notify ? DTLS_STATE_CLOSED : DTLS_STATE_NONE); if (state != DTLS_STATE_NONE && ctx->on_state) { - av_log(s1, AV_LOG_INFO, "DTLS: Notify ctx=%p, state=%d, fatal=%d, warning=%d, cn=%d\n", + av_log(ctx, AV_LOG_INFO, "DTLS: Notify ctx=%p, state=%d, fatal=%d, warning=%d, cn=%d\n", ctx, state, is_fatal, is_warning, is_close_notify); ctx->on_state(ctx, state, alert_type, alert_desc); } } else if (where & SSL_CB_EXIT) { if (!r0) - av_log(s1, AV_LOG_WARNING, "DTLS: Fail method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", + av_log(ctx, AV_LOG_WARNING, "DTLS: Fail method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, r0, r1); else if (r0 < 0) if (r1 != SSL_ERROR_NONE && r1 != SSL_ERROR_WANT_READ && r1 != SSL_ERROR_WANT_WRITE) - av_log(s1, AV_LOG_ERROR, "DTLS: Error method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", - method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, r0, r1); + av_log(ctx, AV_LOG_ERROR, "DTLS: Error method=%s state=%s(%s), where=%d, ret=%d, r1=%d %s\n", + method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, r0, r1, ctx->error_message); else - av_log(s1, AV_LOG_VERBOSE, "DTLS: method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", + av_log(ctx, AV_LOG_VERBOSE, "DTLS: Info method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, r0, r1); } } @@ -265,18 +349,17 @@ static void openssl_dtls_state_trace(DTLSContext *ctx, uint8_t *data, int length uint8_t content_type = 0; uint16_t size = 0; uint8_t handshake_type = 0; - void *s1 = ctx->log_avcl; /* Change_cipher_spec(20), alert(21), handshake(22), application_data(23) */ if (length >= 1) - content_type = (uint8_t)data[0]; + content_type = AV_RB8(&data[0]); if (length >= 13) - size = (uint16_t)(data[11])<<8 | (uint16_t)data[12]; + size = AV_RB16(&data[11]); if (length >= 14) - handshake_type = (uint8_t)data[13]; + handshake_type = AV_RB8(&data[13]); - av_log(s1, AV_LOG_VERBOSE, "WHIP: DTLS state %s %s, done=%u, arq=%u, len=%u, cnt=%u, size=%u, hs=%u\n", - "Active", (incoming? "RECV":"SEND"), ctx->dtls_done_for_us, ctx->dtls_arq_packets, length, + av_log(ctx, AV_LOG_VERBOSE, "DTLS: Trace %s, done=%u, arq=%u, len=%u, cnt=%u, size=%u, hs=%u\n", + (incoming? "RECV":"SEND"), ctx->dtls_done_for_us, ctx->dtls_arq_packets, length, content_type, size, handshake_type); } @@ -302,14 +385,13 @@ static long openssl_dtls_bio_out_callback_ex(BIO *b, int oper, const char *argp, uint8_t content_type, handshake_type; uint8_t *data = (uint8_t*)argp; DTLSContext* ctx = b ? (DTLSContext*)BIO_get_callback_arg(b) : NULL; - void *s1 = ctx ? ctx->log_avcl : NULL; #if OPENSSL_VERSION_NUMBER >= 0x30000000L // v3.0.x req_size = len; - av_log(s1, AV_LOG_DEBUG, "DTLS: bio callback b=%p, oper=%d, argp=%p, len=%ld, argi=%d, argl=%ld, retvalue=%d, processed=%p, req_size=%d\n", + av_log(ctx, AV_LOG_DEBUG, "DTLS: BIO callback b=%p, oper=%d, argp=%p, len=%ld, argi=%d, argl=%ld, retvalue=%d, processed=%p, req_size=%d\n", b, oper, argp, len, argi, argl, retvalue, processed, req_size); #else - av_log(s1, AV_LOG_DEBUG, "DTLS: bio callback b=%p, oper=%d, argp=%p, argi=%d, argl=%ld, retvalue=%ld, req_size=%d\n", + av_log(ctx, AV_LOG_DEBUG, "DTLS: BIO callback b=%p, oper=%d, argp=%p, argi=%d, argl=%ld, retvalue=%ld, req_size=%d\n", b, oper, argp, argi, argl, retvalue, req_size); #endif @@ -318,8 +400,8 @@ static long openssl_dtls_bio_out_callback_ex(BIO *b, int oper, const char *argp, openssl_dtls_state_trace(ctx, data, req_size, 0); ret = ctx->on_write ? ctx->on_write(ctx, data, req_size) : 0; - content_type = req_size > 0 ? data[0] : 0; - handshake_type = req_size > 13 ? data[13] : 0; + content_type = req_size > 0 ? AV_RB8(&data[0]) : 0; + handshake_type = req_size > 13 ? AV_RB8(&data[13]) : 0; is_arq = ctx->dtls_last_content_type == content_type && ctx->dtls_last_handshake_type == handshake_type; ctx->dtls_arq_packets += is_arq; @@ -327,7 +409,7 @@ static long openssl_dtls_bio_out_callback_ex(BIO *b, int oper, const char *argp, ctx->dtls_last_handshake_type = handshake_type; if (ret < 0) { - av_log(s1, AV_LOG_ERROR, "DTLS: Send request failed, oper=%d, content=%d, handshake=%d, size=%d, is_arq=%d\n", + av_log(ctx, AV_LOG_ERROR, "DTLS: Send request failed, oper=%d, content=%d, handshake=%d, size=%d, is_arq=%d\n", oper, content_type, handshake_type, req_size, is_arq); return ret; } @@ -338,24 +420,31 @@ static long openssl_dtls_bio_out_callback_ex(BIO *b, int oper, const char *argp, static int openssl_dtls_gen_private_key(DTLSContext *ctx) { int ret = 0; -#if OPENSSL_VERSION_NUMBER < 0x30000000L /* OpenSSL 3.0 */ - EC_GROUP *ecgroup = NULL; -#else - const char *curve = "prime256v1"; -#endif - void *s1 = ctx->log_avcl; - /* Should use the curves in ClientHello.supported_groups, for example: + /** + * Note that secp256r1 in openssl is called NID_X9_62_prime256v1 or prime256v1 in string, + * not NID_secp256k1 or secp256k1 in string. + * + * TODO: Should choose the curves in ClientHello.supported_groups, for example: * Supported Group: x25519 (0x001d) * Supported Group: secp256r1 (0x0017) * Supported Group: secp384r1 (0x0018) - * Note that secp256r1 in openssl is called NID_X9_62_prime256v1 or prime256v1 in string, - * not NID_secp256k1 or secp256k1 in string */ +#if OPENSSL_VERSION_NUMBER < 0x30000000L /* OpenSSL 3.0 */ + EC_GROUP *ecgroup = NULL; + int curve = NID_X9_62_prime256v1; +#else + const char *curve = SN_X9_62_prime256v1; +#endif + #if OPENSSL_VERSION_NUMBER < 0x30000000L /* OpenSSL 3.0 */ ctx->dtls_pkey = EVP_PKEY_new(); ctx->dtls_eckey = EC_KEY_new(); - ecgroup = EC_GROUP_new_by_curve_name(NID_X9_62_prime256v1); + ecgroup = EC_GROUP_new_by_curve_name(curve); + if (!ecgroup) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Create EC group by curve=%d failed, %s", curve, openssl_get_error(ctx)); + goto einval_end; + } #if OPENSSL_VERSION_NUMBER < 0x10100000L // v1.1.x /* For openssl 1.0, we must set the group parameters, so that cert is ok. */ @@ -363,31 +452,30 @@ static int openssl_dtls_gen_private_key(DTLSContext *ctx) #endif if (EC_KEY_set_group(ctx->dtls_eckey, ecgroup) != 1) { - av_log(s1, AV_LOG_ERROR, "DTLS: EC_KEY_set_group failed\n"); - ret = AVERROR(EINVAL); - goto end; + av_log(ctx, AV_LOG_ERROR, "DTLS: Generate private key, EC_KEY_set_group failed, %s\n", openssl_get_error(ctx)); + goto einval_end; } if (EC_KEY_generate_key(ctx->dtls_eckey) != 1) { - av_log(s1, AV_LOG_ERROR, "DTLS: EC_KEY_generate_key failed\n"); - ret = AVERROR(EINVAL); - goto end; + av_log(ctx, AV_LOG_ERROR, "DTLS: Generate private key, EC_KEY_generate_key failed, %s\n", openssl_get_error(ctx)); + goto einval_end; } if (EVP_PKEY_set1_EC_KEY(ctx->dtls_pkey, ctx->dtls_eckey) != 1) { - av_log(s1, AV_LOG_ERROR, "DTLS: EVP_PKEY_set1_EC_KEY failed\n"); - ret = AVERROR(EINVAL); - goto end; + av_log(ctx, AV_LOG_ERROR, "DTLS: Generate private key, EVP_PKEY_set1_EC_KEY failed, %s\n", openssl_get_error(ctx)); + goto einval_end; } #else ctx->dtls_pkey = EVP_EC_gen(curve); if (!ctx->dtls_pkey) { - av_log(s1, AV_LOG_ERROR, "DTLS: EVP_EC_gen curve=%s failed\n", curve); - ret = AVERROR(EINVAL); - goto end; + av_log(ctx, AV_LOG_ERROR, "DTLS: Generate private key, EVP_EC_gen curve=%s failed, %s\n", curve, openssl_get_error(ctx)); + goto einval_end; } #endif + goto end; +einval_end: + ret = AVERROR(EINVAL); end: #if OPENSSL_VERSION_NUMBER < 0x30000000L /* OpenSSL 3.0 */ EC_GROUP_free(ecgroup); @@ -403,105 +491,95 @@ static int openssl_dtls_gen_certificate(DTLSContext *ctx) const char *aor = "ffmpeg.org"; X509_NAME* subject = NULL; X509 *dtls_cert = NULL; - void *s1 = ctx->log_avcl; /* To prevent a crash during cleanup, always initialize it. */ av_bprint_init(&fingerprint, 1, MAX_URL_SIZE); dtls_cert = ctx->dtls_cert = X509_new(); if (!dtls_cert) { - ret = AVERROR(ENOMEM); - goto end; + goto enomem_end; } + // TODO: Support non-self-signed certificate, for example, load from a file. subject = X509_NAME_new(); if (!subject) { - ret = AVERROR(ENOMEM); - goto end; + goto enomem_end; } serial = (int)av_get_random_seed(); if (ASN1_INTEGER_set(X509_get_serialNumber(dtls_cert), serial) != 1) { - av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set serial\n"); - ret = AVERROR(EINVAL); - goto end; + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set serial, %s\n", openssl_get_error(ctx)); + goto einval_end; } if (X509_NAME_add_entry_by_txt(subject, "CN", MBSTRING_ASC, aor, strlen(aor), -1, 0) != 1) { - av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set CN\n"); - ret = AVERROR(EINVAL); - goto end; + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set CN, %s\n", openssl_get_error(ctx)); + goto einval_end; } if (X509_set_issuer_name(dtls_cert, subject) != 1) { - av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set issuer\n"); - ret = AVERROR(EINVAL); - goto end; + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set issuer, %s\n", openssl_get_error(ctx)); + goto einval_end; } if (X509_set_subject_name(dtls_cert, subject) != 1) { - av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set subject name\n"); - ret = AVERROR(EINVAL); - goto end; + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set subject name, %s\n", openssl_get_error(ctx)); + goto einval_end; } expire_day = 365; if (!X509_gmtime_adj(X509_get_notBefore(dtls_cert), 0)) { - av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set notBefore\n"); - ret = AVERROR(EINVAL); - goto end; + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set notBefore, %s\n", openssl_get_error(ctx)); + goto einval_end; } if (!X509_gmtime_adj(X509_get_notAfter(dtls_cert), 60*60*24*expire_day)) { - av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set notAfter\n"); - ret = AVERROR(EINVAL); - goto end; + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set notAfter, %s\n", openssl_get_error(ctx)); + goto einval_end; } if (X509_set_version(dtls_cert, 2) != 1) { - av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set version\n"); - ret = AVERROR(EINVAL); - goto end; + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set version, %s\n", openssl_get_error(ctx)); + goto einval_end; } if (X509_set_pubkey(dtls_cert, ctx->dtls_pkey) != 1) { - av_log(s1, AV_LOG_ERROR, "WHIP: Failed to set public key\n"); - ret = AVERROR(EINVAL); - goto end; + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set public key, %s\n", openssl_get_error(ctx)); + goto einval_end; } if (!X509_sign(dtls_cert, ctx->dtls_pkey, EVP_sha1())) { - av_log(s1, AV_LOG_ERROR, "WHIP: Failed to sign certificate\n"); - ret = AVERROR(EINVAL); - goto end; + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to sign certificate, %s\n", openssl_get_error(ctx)); + goto einval_end; } /* Generate the fingerpint of certficate. */ if (X509_digest(dtls_cert, EVP_sha256(), md, &n) != 1) { - av_log(s1, AV_LOG_ERROR, "Failed to generate fingerprint\n"); - ret = AVERROR(EIO); - goto end; + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to generate fingerprint, %s\n", openssl_get_error(ctx)); + goto eio_end; } for (i = 0; i < n; i++) { av_bprintf(&fingerprint, "%02X", md[i]); if (i < n - 1) av_bprintf(&fingerprint, ":"); } - if (!av_bprint_is_complete(&fingerprint)) { - av_log(s1, AV_LOG_ERROR, "Fingerprint %d exceed max %d, %s\n", ret, MAX_URL_SIZE, fingerprint.str); - ret = AVERROR(EIO); - goto end; - } if (!fingerprint.str || !strlen(fingerprint.str)) { - av_log(s1, AV_LOG_ERROR, "Fingerprint is empty\n"); - ret = AVERROR(EINVAL); - goto end; + av_log(ctx, AV_LOG_ERROR, "DTLS: Fingerprint is empty\n"); + goto einval_end; } ctx->dtls_fingerprint = av_strdup(fingerprint.str); if (!ctx->dtls_fingerprint) { - ret = AVERROR(ENOMEM); - goto end; - } - + goto enomem_end; + } + + goto end; +enomem_end: + ret = AVERROR(ENOMEM); + goto end; +eio_end: + ret = AVERROR(EIO); + goto end; +einval_end: + ret = AVERROR(EINVAL); end: X509_NAME_free(subject); av_bprint_finalize(&fingerprint, NULL); @@ -514,12 +592,24 @@ static int openssl_dtls_gen_certificate(DTLSContext *ctx) static av_cold int openssl_dtls_init_context(DTLSContext *ctx) { int ret = 0; - void *s1 = ctx->log_avcl; EVP_PKEY *dtls_pkey = ctx->dtls_pkey; X509 *dtls_cert = ctx->dtls_cert; SSL_CTX *dtls_ctx = NULL; SSL *dtls = NULL; BIO *bio_in = NULL, *bio_out = NULL; + const char* ciphers = "ALL"; + /** + * The profile for OpenSSL's SRTP is SRTP_AES128_CM_SHA1_80, see ssl/d1_srtp.c. + * The profile for FFmpeg's SRTP is SRTP_AES128_CM_HMAC_SHA1_80, see libavformat/srtp.c. + */ + const char* profiles = "SRTP_AES128_CM_SHA1_80"; + + /* Refer to the test cases regarding these curves in the WebRTC code. */ +#if OPENSSL_VERSION_NUMBER >= 0x10100000L /* OpenSSL 1.1.0 */ + const char* curves = "X25519:P-256:P-384:P-521"; +#elif OPENSSL_VERSION_NUMBER >= 0x10002000L /* OpenSSL 1.0.2 */ + const char* curves = "P-256:P-384:P-521"; +#endif #if OPENSSL_VERSION_NUMBER < 0x10002000L /* OpenSSL v1.0.2 */ dtls_ctx = ctx->dtls_ctx = SSL_CTX_new(DTLSv1_method()); @@ -532,8 +622,9 @@ static av_cold int openssl_dtls_init_context(DTLSContext *ctx) #if OPENSSL_VERSION_NUMBER >= 0x10002000L /* OpenSSL 1.0.2 */ /* For ECDSA, we could set the curves list. */ - if (SSL_CTX_set1_curves_list(dtls_ctx, "P-521:P-384:P-256") != 1) { - av_log(s1, AV_LOG_ERROR, "DTLS: SSL_CTX_set1_curves_list failed\n"); + if (SSL_CTX_set1_curves_list(dtls_ctx, curves) != 1) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Init SSL_CTX_set1_curves_list failed, curves=%s, %s\n", + curves, openssl_get_error(ctx)); return AVERROR(EINVAL); } #endif @@ -547,23 +638,21 @@ static av_cold int openssl_dtls_init_context(DTLSContext *ctx) #endif /** - * We use "ALL", while you can use "DEFAULT" means "ALL:!EXPORT:!LOW:!aNULL:!eNULL:!SSLv2" - * Cipher Suite: ECDHE-ECDSA-AES128-CBC-SHA (0xc009) - * Cipher Suite: ECDHE-RSA-AES128-CBC-SHA (0xc013) - * Cipher Suite: ECDHE-ECDSA-AES256-CBC-SHA (0xc00a) - * Cipher Suite: ECDHE-RSA-AES256-CBC-SHA (0xc014) + * We activate "ALL" cipher suites to align with the peer's capabilities, + * ensuring maximum compatibility. */ - if (SSL_CTX_set_cipher_list(dtls_ctx, "ALL") != 1) { - av_log(s1, AV_LOG_ERROR, "DTLS: SSL_CTX_set_cipher_list failed\n"); + if (SSL_CTX_set_cipher_list(dtls_ctx, ciphers) != 1) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Init SSL_CTX_set_cipher_list failed, ciphers=%s, %s\n", + ciphers, openssl_get_error(ctx)); return AVERROR(EINVAL); } /* Setup the certificate. */ if (SSL_CTX_use_certificate(dtls_ctx, dtls_cert) != 1) { - av_log(s1, AV_LOG_ERROR, "DTLS: SSL_CTX_use_certificate failed\n"); + av_log(ctx, AV_LOG_ERROR, "DTLS: Init SSL_CTX_use_certificate failed, %s\n", openssl_get_error(ctx)); return AVERROR(EINVAL); } if (SSL_CTX_use_PrivateKey(dtls_ctx, dtls_pkey) != 1) { - av_log(s1, AV_LOG_ERROR, "DTLS: SSL_CTX_use_PrivateKey failed\n"); + av_log(ctx, AV_LOG_ERROR, "DTLS: Init SSL_CTX_use_PrivateKey failed, %s\n", openssl_get_error(ctx)); return AVERROR(EINVAL); } @@ -574,9 +663,10 @@ static av_cold int openssl_dtls_init_context(DTLSContext *ctx) SSL_CTX_set_verify_depth(dtls_ctx, 4); /* Whether we should read as many input bytes as possible (for non-blocking reads) or not. */ SSL_CTX_set_read_ahead(dtls_ctx, 1); - /* Only support SRTP_AES128_CM_SHA1_80, please read ssl/d1_srtp.c */ - if (SSL_CTX_set_tlsext_use_srtp(dtls_ctx, "SRTP_AES128_CM_SHA1_80")) { - av_log(s1, AV_LOG_ERROR, "DTLS: SSL_CTX_set_tlsext_use_srtp failed\n"); + /* Setup the SRTP context */ + if (SSL_CTX_set_tlsext_use_srtp(dtls_ctx, profiles)) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Init SSL_CTX_set_tlsext_use_srtp failed, profiles=%s, %s\n", + profiles, openssl_get_error(ctx)); return AVERROR(EINVAL); } @@ -644,30 +734,29 @@ static av_cold int openssl_dtls_init_context(DTLSContext *ctx) static av_cold int dtls_context_init(DTLSContext *ctx) { int ret = 0; - void *s1 = ctx->log_avcl; ctx->dtls_init_starttime = av_gettime(); /* Generate a private key to ctx->dtls_pkey. */ if ((ret = openssl_dtls_gen_private_key(ctx)) < 0) { - av_log(s1, AV_LOG_ERROR, "Failed to generate DTLS private key\n"); + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to generate DTLS private key\n"); return ret; } /* Generate a self-signed certificate. */ if ((ret = openssl_dtls_gen_certificate(ctx)) < 0) { - av_log(s1, AV_LOG_ERROR, "Failed to generate DTLS certificate\n"); + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to generate DTLS certificate\n"); return ret; } if ((ret = openssl_dtls_init_context(ctx)) < 0) { - av_log(s1, AV_LOG_ERROR, "Failed to initialize DTLS context\n"); + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to initialize DTLS context\n"); return ret; } ctx->dtls_init_endtime = av_gettime(); - av_log(s1, AV_LOG_INFO, "DTLS: Setup ok, MTU=%d, cost=%dms, fingerprint %s\n", - ctx->mtu, RTC_ELAPSED(ctx->dtls_init_starttime, av_gettime()), ctx->dtls_fingerprint); + av_log(ctx, AV_LOG_VERBOSE, "DTLS: Setup ok, MTU=%d, cost=%dms, fingerprint %s\n", + ctx->mtu, ELAPSED(ctx->dtls_init_starttime, av_gettime()), ctx->dtls_fingerprint); return ret; } @@ -680,8 +769,6 @@ static int dtls_context_start(DTLSContext *ctx) { int ret = 0, r0, r1; SSL *dtls = ctx->dtls; - void *s1 = ctx->log_avcl; - char detail_error[256]; ctx->dtls_handshake_starttime = av_gettime(); @@ -698,13 +785,10 @@ static int dtls_context_start(DTLSContext *ctx) * packets are received. */ r0 = SSL_do_handshake(dtls); - r1 = SSL_get_error(dtls, r0); - if (r0 < 0 && r1 == SSL_ERROR_SSL) - ERR_error_string_n(ERR_get_error(), detail_error, sizeof(detail_error)); - ERR_clear_error(); + r1 = openssl_ssl_get_error(ctx, r0); // Fatal SSL error, for example, no available suite when peer is DTLS 1.0 while we are DTLS 1.2. if (r0 < 0 && (r1 != SSL_ERROR_NONE && r1 != SSL_ERROR_WANT_READ && r1 != SSL_ERROR_WANT_WRITE)) { - av_log(s1, AV_LOG_ERROR, "Failed to drive SSL context, r0=%d, r1=%d %s\n", r0, r1, detail_error); + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to drive SSL context, r0=%d, r1=%d %s\n", r0, r1, ctx->error_message); return AVERROR(EIO); } @@ -726,15 +810,13 @@ static int dtls_context_write(DTLSContext *ctx, char* buf, int size) SSL *dtls = ctx->dtls; const char* dst = "EXTRACTOR-dtls_srtp"; BIO *bio_in = ctx->bio_in; - void *s1 = ctx->log_avcl; - char detail_error[256]; /* Got DTLS response successfully. */ openssl_dtls_state_trace(ctx, buf, size, 1); if ((r0 = BIO_write(bio_in, buf, size)) <= 0) { res_ct = size > 0 ? buf[0]: 0; res_ht = size > 13 ? buf[13] : 0; - av_log(s1, AV_LOG_ERROR, "DTLS: Feed response failed, content=%d, handshake=%d, size=%d, r0=%d\n", + av_log(ctx, AV_LOG_ERROR, "DTLS: Feed response failed, content=%d, handshake=%d, size=%d, r0=%d\n", res_ct, res_ht, size, r0); ret = AVERROR(EIO); goto end; @@ -745,18 +827,15 @@ static int dtls_context_write(DTLSContext *ctx, char* buf, int size) * We limit the MTU to 1200 for DTLS handshake, which ensures that the buffer is large enough for reading. */ r0 = SSL_read(dtls, buf, sizeof(buf)); - r1 = SSL_get_error(dtls, r0); - if (r0 < 0 && r1 == SSL_ERROR_SSL) - ERR_error_string_n(ERR_get_error(), detail_error, sizeof(detail_error)); - ERR_clear_error(); + r1 = openssl_ssl_get_error(ctx, r0); if (r0 <= 0) { if (r1 != SSL_ERROR_WANT_READ && r1 != SSL_ERROR_WANT_WRITE && r1 != SSL_ERROR_ZERO_RETURN) { - av_log(s1, AV_LOG_ERROR, "DTLS: Read failed, r0=%d, r1=%d %s\n", r0, r1, detail_error); + av_log(ctx, AV_LOG_ERROR, "DTLS: Read failed, r0=%d, r1=%d %s\n", r0, r1, ctx->error_message); ret = AVERROR(EIO); goto end; } } else { - av_log(s1, AV_LOG_TRACE, "DTLS: Read %d bytes, r0=%d, r1=%d\n", r0, r0, r1); + av_log(ctx, AV_LOG_TRACE, "DTLS: Read %d bytes, r0=%d, r1=%d\n", r0, r0, r1); } /* Check whether the DTLS is completed. */ @@ -769,10 +848,11 @@ static int dtls_context_write(DTLSContext *ctx, char* buf, int size) /* Export SRTP master key after DTLS done */ if (!ctx->dtls_srtp_key_exported) { - ret = SSL_export_keying_material(dtls, ctx->dtls_srtp_material, sizeof(ctx->dtls_srtp_material), + ret = SSL_export_keying_material(dtls, ctx->dtls_srtp_materials, sizeof(ctx->dtls_srtp_materials), dst, strlen(dst), NULL, 0, 0); + r1 = openssl_ssl_get_error(ctx, r0); if (!ret) { - av_log(s1, AV_LOG_ERROR, "DTLS: SSL export key r0=%lu, ret=%d\n", ERR_get_error(), ret); + av_log(ctx, AV_LOG_ERROR, "DTLS: SSL export key ret=%d, r1=%d %s\n", ret, r1, ctx->error_message); ret = AVERROR(EIO); goto end; } @@ -845,14 +925,6 @@ typedef struct RTCContext { AVCodecParameters *audio_par; AVCodecParameters *video_par; - /* The SPS/PPS of AVC video */ - uint8_t *avc_sps; - int avc_sps_size; - uint8_t *avc_pps; - int avc_pps_size; - /* The size of NALU in ISOM format. */ - int avc_nal_length_size; - /* The ICE username and pwd fragment generated by the muxer. */ char ice_ufrag_local[9]; char ice_pwd_local[33]; @@ -887,6 +959,7 @@ typedef struct RTCContext { /* These variables represent timestamps used for calculating and tracking the cost. */ int64_t rtc_starttime; + /* */ int64_t rtc_init_time; int64_t rtc_offer_time; int64_t rtc_answer_time; @@ -900,14 +973,11 @@ typedef struct RTCContext { DTLSContext dtls_ctx; /* The SRTP send context, to encrypt outgoing packets. */ - struct SRTPContext srtp_audio_send; - struct SRTPContext srtp_video_send; - struct SRTPContext srtp_rtcp_send; + SRTPContext srtp_audio_send; + SRTPContext srtp_video_send; + SRTPContext srtp_rtcp_send; /* The SRTP receive context, to decrypt incoming packets. */ - struct SRTPContext srtp_recv; - - /* The time jitter base for audio OPUS stream. */ - int64_t audio_jitter_base; + SRTPContext srtp_recv; /* The UDP transport is used for delivering ICE, DTLS and SRTP packets. */ URLContext *udp_uc; @@ -928,8 +998,6 @@ typedef struct RTCContext { char* authorization; } RTCContext; -static int on_rtp_write_packet(void *opaque, uint8_t *buf, int buf_size); - /** * When DTLS state change. */ @@ -941,14 +1009,14 @@ static int dtls_context_on_state(DTLSContext *ctx, enum DTLSState state, const c if (state == DTLS_STATE_CLOSED) { rtc->dtls_closed = 1; - av_log(s, AV_LOG_INFO, "WHIP: DTLS session closed, type=%s, desc=%s, elapsed=%dms\n", - type ? type : "", desc ? desc : "", RTC_ELAPSED(rtc->rtc_starttime, av_gettime())); + av_log(rtc, AV_LOG_VERBOSE, "WHIP: DTLS session closed, type=%s, desc=%s, elapsed=%dms\n", + type ? type : "", desc ? desc : "", ELAPSED(rtc->rtc_starttime, av_gettime())); return ret; } if (state == DTLS_STATE_FAILED) { rtc->state = RTC_STATE_FAILED; - av_log(s, AV_LOG_ERROR, "WHIP: DTLS session failed, type=%s, desc=%s\n", + av_log(rtc, AV_LOG_ERROR, "WHIP: DTLS session failed, type=%s, desc=%s\n", type ? type : "", desc ? desc : ""); rtc->dtls_ret = AVERROR(EIO); return ret; @@ -957,10 +1025,10 @@ static int dtls_context_on_state(DTLSContext *ctx, enum DTLSState state, const c if (state == DTLS_STATE_FINISHED && rtc->state < RTC_STATE_DTLS_FINISHED) { rtc->state = RTC_STATE_DTLS_FINISHED; rtc->rtc_dtls_time = av_gettime(); - av_log(s, AV_LOG_INFO, "WHIP: DTLS handshake, done=%d, exported=%d, arq=%d, srtp_material=%luB, cost=%dms, elapsed=%dms\n", - ctx->dtls_done_for_us, ctx->dtls_srtp_key_exported, ctx->dtls_arq_packets, sizeof(ctx->dtls_srtp_material), - RTC_ELAPSED(ctx->dtls_handshake_starttime, ctx->dtls_handshake_endtime), - RTC_ELAPSED(rtc->rtc_starttime, av_gettime())); + av_log(rtc, AV_LOG_VERBOSE, "WHIP: DTLS handshake, done=%d, exported=%d, arq=%d, srtp_material=%luB, cost=%dms, elapsed=%dms\n", + ctx->dtls_done_for_us, ctx->dtls_srtp_key_exported, ctx->dtls_arq_packets, sizeof(ctx->dtls_srtp_materials), + ELAPSED(ctx->dtls_handshake_starttime, ctx->dtls_handshake_endtime), + ELAPSED(rtc->rtc_starttime, av_gettime())); return ret; } @@ -976,7 +1044,7 @@ static int dtls_context_on_write(DTLSContext *ctx, char* data, int size) RTCContext *rtc = s->priv_data; if (!rtc->udp_uc) { - av_log(s, AV_LOG_ERROR, "WHIP: DTLS write data, but udp_uc is NULL\n"); + av_log(rtc, AV_LOG_ERROR, "WHIP: DTLS write data, but udp_uc is NULL\n"); return AVERROR(EIO); } @@ -994,146 +1062,30 @@ static av_cold int whip_init(AVFormatContext *s) rtc->rtc_starttime = av_gettime(); /* Use the same logging context as AV format. */ - rtc->dtls_ctx.log_avcl = s; + rtc->dtls_ctx.av_class = rtc->av_class; rtc->dtls_ctx.mtu = rtc->pkt_size; rtc->dtls_ctx.opaque = s; rtc->dtls_ctx.on_state = dtls_context_on_state; rtc->dtls_ctx.on_write = dtls_context_on_write; if ((ret = dtls_context_init(&rtc->dtls_ctx)) < 0) { - av_log(s, AV_LOG_ERROR, "WHIP: Failed to init DTLS context\n"); + av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to init DTLS context\n"); return ret; } if (rtc->pkt_size < ideal_pkt_size) - av_log(s, AV_LOG_WARNING, "WHIP: pkt_size=%d(<%d) is too small, may cause packet loss\n", + av_log(rtc, AV_LOG_WARNING, "WHIP: pkt_size=%d(<%d) is too small, may cause packet loss\n", rtc->pkt_size, ideal_pkt_size); if (rtc->state < RTC_STATE_INIT) rtc->state = RTC_STATE_INIT; rtc->rtc_init_time = av_gettime(); - av_log(s, AV_LOG_INFO, "WHIP: Init state=%d, handshake_timeout=%dms, pkt_size=%d, elapsed=%dms\n", - rtc->state, rtc->handshake_timeout, rtc->pkt_size, RTC_ELAPSED(rtc->rtc_starttime, av_gettime())); + av_log(rtc, AV_LOG_VERBOSE, "WHIP: Init state=%d, handshake_timeout=%dms, pkt_size=%d, elapsed=%dms\n", + rtc->state, rtc->handshake_timeout, rtc->pkt_size, ELAPSED(rtc->rtc_starttime, av_gettime())); return 0; } -/** - * Parses the ISOM AVCC format of extradata and extracts SPS/PPS. - * - * This function is used to parse SPS/PPS from the extradata in ISOM AVCC format. - * It can handle both ISOM and annexb formats but only parses data in ISOM format. - * If the extradata is in annexb format, this function ignores it, and uses the entire - * extradata as a sequence header with SPS/PPS. Refer to ff_isom_write_avcc. - * - * @param s Pointer to the AVFormatContext - * @param extradata Pointer to the extradata - * @param extradata_size Size of the extradata - * @returns Returns 0 if successful or AVERROR_xxx in case of an error. - */ -static int isom_read_avcc(AVFormatContext *s, uint8_t *extradata, int extradata_size) -{ - int ret = 0; - uint8_t version, nal_length_size, nb_sps, nb_pps; - AVIOContext *pb; - RTCContext *rtc = s->priv_data; - - if (!extradata || !extradata_size) - return 0; - - /* Not H.264 ISOM format, may be annexb etc. */ - if (extradata_size < 4 || extradata[0] != 1) { - if (!ff_avc_find_startcode(extradata, extradata + extradata_size)) { - av_log(s, AV_LOG_ERROR, "Format must be ISOM or annexb\n"); - return AVERROR_INVALIDDATA; - } - return 0; - } - - /* Parse the SPS/PPS in ISOM format in extradata. */ - pb = avio_alloc_context(extradata, extradata_size, 0, NULL, NULL, NULL, NULL); - if (!pb) - return AVERROR(ENOMEM); - - version = avio_r8(pb); /* version */ - avio_r8(pb); /* avc profile */ - avio_r8(pb); /* avc profile compat */ - avio_r8(pb); /* avc level */ - nal_length_size = avio_r8(pb); /* 6 bits reserved (111111) + 2 bits nal size length - 1 (11) */ - nb_sps = avio_r8(pb); /* 3 bits reserved (111) + 5 bits number of sps */ - - if (version != 1) { - av_log(s, AV_LOG_ERROR, "Invalid version=%d\n", version); - ret = AVERROR_INVALIDDATA; - goto end; - } - - rtc->avc_nal_length_size = (nal_length_size & 0x03) + 1; - if (rtc->avc_nal_length_size == 3) { - av_log(s, AV_LOG_ERROR, "Invalid nal length size=%d\n", rtc->avc_nal_length_size); - ret = AVERROR_INVALIDDATA; - goto end; - } - - /* Read SPS */ - nb_sps &= 0x1f; - if (nb_sps != 1 || avio_feof(pb)) { - av_log(s, AV_LOG_ERROR, "Invalid number of sps=%d, eof=%d\n", nb_sps, avio_feof(pb)); - ret = AVERROR_INVALIDDATA; - goto end; - } - - rtc->avc_sps_size = avio_rb16(pb); /* sps size */ - if (rtc->avc_sps_size <= 0 || avio_feof(pb)) { - av_log(s, AV_LOG_ERROR, "Invalid sps size=%d, eof=%d\n", rtc->avc_sps_size, avio_feof(pb)); - ret = AVERROR_INVALIDDATA; - goto end; - } - - rtc->avc_sps = av_malloc(rtc->avc_sps_size); - if (!rtc->avc_sps) { - ret = AVERROR(ENOMEM); - goto end; - } - - ret = avio_read(pb, rtc->avc_sps, rtc->avc_sps_size); /* sps */ - if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to read sps, size=%d\n", rtc->avc_sps_size); - goto end; - } - - /* Read PPS */ - nb_pps = avio_r8(pb); /* number of pps */ - if (nb_pps != 1 || avio_feof(pb)) { - av_log(s, AV_LOG_ERROR, "Invalid number of pps=%d, eof=%d\n", nb_pps, avio_feof(pb)); - ret = AVERROR_INVALIDDATA; - goto end; - } - - rtc->avc_pps_size = avio_rb16(pb); /* pps size */ - if (rtc->avc_pps_size <= 0 || avio_feof(pb)) { - av_log(s, AV_LOG_ERROR, "Invalid pps size=%d, eof=%d\n", rtc->avc_pps_size, avio_feof(pb)); - ret = AVERROR_INVALIDDATA; - goto end; - } - - rtc->avc_pps = av_malloc(rtc->avc_pps_size); - if (!rtc->avc_pps) { - ret = AVERROR(ENOMEM); - goto end; - } - - ret = avio_read(pb, rtc->avc_pps, rtc->avc_pps_size); /* pps */ - if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to read pps, size=%d\n", rtc->avc_pps_size); - goto end; - } - -end: - avio_context_free(&pb); - return ret; -} - /** * Parses video SPS/PPS from the extradata of codecpar and checks the codec. * Currently only supports video(h264) and audio(opus). Note that only baseline @@ -1145,10 +1097,19 @@ static int isom_read_avcc(AVFormatContext *s, uint8_t *extradata, int extradata * * @param s Pointer to the AVFormatContext * @returns Returns 0 if successful or AVERROR_xxx in case of an error. + * + * TODO: FIXME: There is an issue with the timestamp of OPUS audio, especially when + * the input is an MP4 file. The timestamp deviates from the expected value of 960, + * causing Chrome to play the audio stream with noise. This problem can be replicated + * by transcoding a specific file into MP4 format and publishing it using the WHIP + * muxer. However, when directly transcoding and publishing through the WHIP muxer, + * the issue is not present, and the audio timestamp remains consistent. The root + * cause is still unknown, and this comment has been added to address this issue + * in the future. Further research is needed to resolve the problem. */ static int parse_codec(AVFormatContext *s) { - int i, ret; + int i; RTCContext *rtc = s->priv_data; for (i = 0; i < s->nb_streams; i++) { @@ -1157,54 +1118,48 @@ static int parse_codec(AVFormatContext *s) switch (par->codec_type) { case AVMEDIA_TYPE_VIDEO: if (rtc->video_par) { - av_log(s, AV_LOG_ERROR, "Only one video stream is supported by RTC\n"); + av_log(rtc, AV_LOG_ERROR, "WHIP: Only one video stream is supported by RTC\n"); return AVERROR(EINVAL); } rtc->video_par = par; if (par->codec_id != AV_CODEC_ID_H264) { - av_log(s, AV_LOG_ERROR, "Unsupported video codec %s by RTC, choose h264\n", + av_log(rtc, AV_LOG_ERROR, "WHIP: Unsupported video codec %s by RTC, choose h264\n", desc ? desc->name : "unknown"); return AVERROR_PATCHWELCOME; } if (par->video_delay > 0) { - av_log(s, AV_LOG_ERROR, "Unsupported B frames by RTC\n"); + av_log(rtc, AV_LOG_ERROR, "WHIP: Unsupported B frames by RTC\n"); return AVERROR_PATCHWELCOME; } - - ret = isom_read_avcc(s, par->extradata, par->extradata_size); - if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to parse SPS/PPS from extradata\n"); - return ret; - } break; case AVMEDIA_TYPE_AUDIO: if (rtc->audio_par) { - av_log(s, AV_LOG_ERROR, "Only one audio stream is supported by RTC\n"); + av_log(rtc, AV_LOG_ERROR, "WHIP: Only one audio stream is supported by RTC\n"); return AVERROR(EINVAL); } rtc->audio_par = par; if (par->codec_id != AV_CODEC_ID_OPUS) { - av_log(s, AV_LOG_ERROR, "Unsupported audio codec %s by RTC, choose opus\n", + av_log(rtc, AV_LOG_ERROR, "WHIP: Unsupported audio codec %s by RTC, choose opus\n", desc ? desc->name : "unknown"); return AVERROR_PATCHWELCOME; } if (par->ch_layout.nb_channels != 2) { - av_log(s, AV_LOG_ERROR, "Unsupported audio channels %d by RTC, choose stereo\n", + av_log(rtc, AV_LOG_ERROR, "WHIP: Unsupported audio channels %d by RTC, choose stereo\n", par->ch_layout.nb_channels); return AVERROR_PATCHWELCOME; } if (par->sample_rate != 48000) { - av_log(s, AV_LOG_ERROR, "Unsupported audio sample rate %d by RTC, choose 48000\n", par->sample_rate); + av_log(rtc, AV_LOG_ERROR, "WHIP: Unsupported audio sample rate %d by RTC, choose 48000\n", par->sample_rate); return AVERROR_PATCHWELCOME; } break; default: - av_log(s, AV_LOG_ERROR, "Codec type '%s' for stream %d is not supported by RTC\n", + av_log(rtc, AV_LOG_ERROR, "WHIP: Codec type '%s' for stream %d is not supported by RTC\n", av_get_media_type_string(par->codec_type), i); return AVERROR_PATCHWELCOME; } @@ -1231,7 +1186,7 @@ static int generate_sdp_offer(AVFormatContext *s) av_bprint_init(&bp, 1, MAX_SDP_SIZE); if (rtc->sdp_offer) { - av_log(s, AV_LOG_ERROR, "SDP offer is already set\n"); + av_log(rtc, AV_LOG_ERROR, "WHIP: SDP offer is already set\n"); ret = AVERROR(EINVAL); goto end; } @@ -1245,22 +1200,19 @@ static int generate_sdp_offer(AVFormatContext *s) rtc->audio_ssrc = av_get_random_seed(); rtc->video_ssrc = av_get_random_seed(); - rtc->audio_payload_type = 111; - rtc->video_payload_type = 106; + rtc->audio_payload_type = RTC_RTP_PAYLOAD_TYPE_OPUS; + rtc->video_payload_type = RTC_RTP_PAYLOAD_TYPE_H264; av_bprintf(&bp, "" "v=0\r\n" - "o=FFmpeg 4489045141692799359 2 IN IP4 127.0.0.1\r\n" + "o=FFmpeg %s 2 IN IP4 %s\r\n" "s=FFmpegPublishSession\r\n" "t=0 0\r\n" "a=group:BUNDLE 0 1\r\n" "a=extmap-allow-mixed\r\n" - "a=msid-semantic: WMS\r\n"); - if (!av_bprint_is_complete(&bp)) { - av_log(s, AV_LOG_ERROR, "Offer %d exceed max %d, %s\n", ret, MAX_SDP_SIZE, bp.str); - ret = AVERROR(EIO); - goto end; - } + "a=msid-semantic: WMS\r\n", + RTC_SDP_SESSION_ID, + RTC_SDP_CREATOR_IP); if (rtc->audio_par) { av_bprintf(&bp, "" @@ -1286,11 +1238,6 @@ static int generate_sdp_offer(AVFormatContext *s) rtc->audio_par->ch_layout.nb_channels, rtc->audio_ssrc, rtc->audio_ssrc); - if (!av_bprint_is_complete(&bp)) { - av_log(s, AV_LOG_ERROR, "Offer %d exceed max %d, %s\n", ret, MAX_SDP_SIZE, bp.str); - ret = AVERROR(EIO); - goto end; - } } if (rtc->video_par) { @@ -1324,11 +1271,12 @@ static int generate_sdp_offer(AVFormatContext *s) level, rtc->video_ssrc, rtc->video_ssrc); - if (!av_bprint_is_complete(&bp)) { - av_log(s, AV_LOG_ERROR, "Offer %d exceed max %d, %s\n", ret, MAX_SDP_SIZE, bp.str); - ret = AVERROR(EIO); - goto end; - } + } + + if (!av_bprint_is_complete(&bp)) { + av_log(rtc, AV_LOG_ERROR, "WHIP: Offer exceed max %d, %s\n", MAX_SDP_SIZE, bp.str); + ret = AVERROR(EIO); + goto end; } rtc->sdp_offer = av_strdup(bp.str); @@ -1340,7 +1288,7 @@ static int generate_sdp_offer(AVFormatContext *s) if (rtc->state < RTC_STATE_OFFER) rtc->state = RTC_STATE_OFFER; rtc->rtc_offer_time = av_gettime(); - av_log(s, AV_LOG_VERBOSE, "WHIP: Generated state=%d, offer: %s\n", rtc->state, rtc->sdp_offer); + av_log(rtc, AV_LOG_VERBOSE, "WHIP: Generated state=%d, offer: %s\n", rtc->state, rtc->sdp_offer); end: av_bprint_finalize(&bp, NULL); @@ -1366,12 +1314,12 @@ static int exchange_sdp(AVFormatContext *s) ret = ffurl_alloc(&whip_uc, s->url, AVIO_FLAG_READ_WRITE, &s->interrupt_callback); if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to alloc HTTP context: %s\n", s->url); + av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to alloc HTTP context: %s\n", s->url); goto end; } if (!rtc->sdp_offer || !strlen(rtc->sdp_offer)) { - av_log(s, AV_LOG_ERROR, "No offer to exchange\n"); + av_log(rtc, AV_LOG_ERROR, "WHIP: No offer to exchange\n"); ret = AVERROR(EINVAL); goto end; } @@ -1382,7 +1330,7 @@ static int exchange_sdp(AVFormatContext *s) if (rtc->authorization) ret += snprintf(buf + ret, sizeof(buf) - ret, "Authorization: Bearer %s\r\n", rtc->authorization); if (ret <= 0 || ret >= sizeof(buf)) { - av_log(s, AV_LOG_ERROR, "Failed to generate headers, size=%d, %s\n", ret, buf); + av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to generate headers, size=%d, %s\n", ret, buf); ret = AVERROR(EINVAL); goto end; } @@ -1393,7 +1341,7 @@ static int exchange_sdp(AVFormatContext *s) ret = ffurl_connect(whip_uc, NULL); if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to request url=%s, offer: %s\n", s->url, rtc->sdp_offer); + av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to request url=%s, offer: %s\n", s->url, rtc->sdp_offer); goto end; } @@ -1413,21 +1361,21 @@ static int exchange_sdp(AVFormatContext *s) break; } if (ret <= 0) { - av_log(s, AV_LOG_ERROR, "Failed to read response from url=%s, offer is %s, answer is %s\n", + av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to read response from url=%s, offer is %s, answer is %s\n", s->url, rtc->sdp_offer, rtc->sdp_answer); goto end; } av_bprintf(&bp, "%.*s", ret, buf); if (!av_bprint_is_complete(&bp)) { - av_log(s, AV_LOG_ERROR, "Answer %d exceed max size %d, %s\n", ret, MAX_SDP_SIZE, bp.str); + av_log(rtc, AV_LOG_ERROR, "WHIP: Answer exceed max size %d, %.*s, %s\n", MAX_SDP_SIZE, ret, buf, bp.str); ret = AVERROR(EIO); goto end; } } if (!av_strstart(bp.str, "v=", NULL)) { - av_log(s, AV_LOG_ERROR, "Invalid answer: %s\n", bp.str); + av_log(rtc, AV_LOG_ERROR, "WHIP: Invalid answer: %s\n", bp.str); ret = AVERROR(EINVAL); goto end; } @@ -1440,7 +1388,7 @@ static int exchange_sdp(AVFormatContext *s) if (rtc->state < RTC_STATE_ANSWER) rtc->state = RTC_STATE_ANSWER; - av_log(s, AV_LOG_VERBOSE, "WHIP: Got state=%d, answer: %s\n", rtc->state, rtc->sdp_answer); + av_log(rtc, AV_LOG_VERBOSE, "WHIP: Got state=%d, answer: %s\n", rtc->state, rtc->sdp_answer); end: ffurl_closep(&whip_uc); @@ -1469,7 +1417,7 @@ static int parse_answer(AVFormatContext *s) RTCContext *rtc = s->priv_data; if (!rtc->sdp_answer || !strlen(rtc->sdp_answer)) { - av_log(s, AV_LOG_ERROR, "No answer to parse\n"); + av_log(rtc, AV_LOG_ERROR, "WHIP: No answer to parse\n"); ret = AVERROR(EINVAL); goto end; } @@ -1499,14 +1447,14 @@ static int parse_answer(AVFormatContext *s) int priority, port; ret = sscanf(ptr, "%16s %d %128s %d typ host", protocol, &priority, host, &port); if (ret != 4) { - av_log(s, AV_LOG_ERROR, "Failed %d to parse line %d %s from %s\n", + av_log(rtc, AV_LOG_ERROR, "WHIP: Failed %d to parse line %d %s from %s\n", ret, i, line, rtc->sdp_answer); ret = AVERROR(EIO); goto end; } if (av_strcasecmp(protocol, "udp")) { - av_log(s, AV_LOG_ERROR, "Protocol %s is not supported by RTC, choose udp, line %d %s of %s\n", + av_log(rtc, AV_LOG_ERROR, "WHIP: Protocol %s is not supported by RTC, choose udp, line %d %s of %s\n", protocol, i, line, rtc->sdp_answer); ret = AVERROR(EIO); goto end; @@ -1524,19 +1472,19 @@ static int parse_answer(AVFormatContext *s) } if (!rtc->ice_pwd_remote || !strlen(rtc->ice_pwd_remote)) { - av_log(s, AV_LOG_ERROR, "No remote ice pwd parsed from %s\n", rtc->sdp_answer); + av_log(rtc, AV_LOG_ERROR, "WHIP: No remote ice pwd parsed from %s\n", rtc->sdp_answer); ret = AVERROR(EINVAL); goto end; } if (!rtc->ice_ufrag_remote || !strlen(rtc->ice_ufrag_remote)) { - av_log(s, AV_LOG_ERROR, "No remote ice ufrag parsed from %s\n", rtc->sdp_answer); + av_log(rtc, AV_LOG_ERROR, "WHIP: No remote ice ufrag parsed from %s\n", rtc->sdp_answer); ret = AVERROR(EINVAL); goto end; } if (!rtc->ice_protocol || !rtc->ice_host || !rtc->ice_port) { - av_log(s, AV_LOG_ERROR, "No ice candidate parsed from %s\n", rtc->sdp_answer); + av_log(rtc, AV_LOG_ERROR, "WHIP: No ice candidate parsed from %s\n", rtc->sdp_answer); ret = AVERROR(EINVAL); goto end; } @@ -1544,9 +1492,9 @@ static int parse_answer(AVFormatContext *s) if (rtc->state < RTC_STATE_NEGOTIATED) rtc->state = RTC_STATE_NEGOTIATED; rtc->rtc_answer_time = av_gettime(); - av_log(s, AV_LOG_INFO, "WHIP: SDP state=%d, offer=%luB, answer=%luB, ufrag=%s, pwd=%luB, transport=%s://%s:%d, elapsed=%dms\n", + av_log(rtc, AV_LOG_VERBOSE, "WHIP: SDP state=%d, offer=%luB, answer=%luB, ufrag=%s, pwd=%luB, transport=%s://%s:%d, elapsed=%dms\n", rtc->state, strlen(rtc->sdp_offer), strlen(rtc->sdp_answer), rtc->ice_ufrag_remote, strlen(rtc->ice_pwd_remote), - rtc->ice_protocol, rtc->ice_host, rtc->ice_port, RTC_ELAPSED(rtc->rtc_starttime, av_gettime())); + rtc->ice_protocol, rtc->ice_host, rtc->ice_port, ELAPSED(rtc->rtc_starttime, av_gettime())); end: avio_context_free(&pb); @@ -1595,7 +1543,7 @@ static int ice_create_request(AVFormatContext *s, uint8_t *buf, int buf_size, in /* The username is the concatenation of the two ICE ufrag */ ret = snprintf(username, sizeof(username), "%s:%s", rtc->ice_ufrag_remote, rtc->ice_ufrag_local); if (ret <= 0 || ret >= sizeof(username)) { - av_log(s, AV_LOG_ERROR, "Failed to build username %s:%s, max=%lu, ret=%d\n", + av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to build username %s:%s, max=%lu, ret=%d\n", rtc->ice_ufrag_remote, rtc->ice_ufrag_local, sizeof(username), ret); ret = AVERROR(EIO); goto end; @@ -1656,14 +1604,15 @@ static int ice_create_request(AVFormatContext *s, uint8_t *buf, int buf_size, in * @param response_size Pointer to an integer that will store the size of the generated response. * @return Returns 0 if successful or AVERROR_xxx if an error occurs. */ -static int ice_create_response(AVFormatContext *s, char *tid, int tid_size, uint8_t *buf, int buf_size, int *response_size) { +static int ice_create_response(AVFormatContext *s, char *tid, int tid_size, uint8_t *buf, int buf_size, int *response_size) +{ int ret = 0, size, crc32; AVIOContext *pb = NULL; AVHMAC *hmac = NULL; RTCContext *rtc = s->priv_data; if (tid_size != 12) { - av_log(s, AV_LOG_ERROR, "Invalid transaction ID size. Expected 12, got %d\n", tid_size); + av_log(rtc, AV_LOG_ERROR, "WHIP: Invalid transaction ID size. Expected 12, got %d\n", tid_size); return AVERROR(EINVAL); } @@ -1714,19 +1663,49 @@ static int ice_create_response(AVFormatContext *s, char *tid, int tid_size, uint return ret; } -static int ice_is_binding_request(char *buf, int buf_size) { - return buf_size > 1 && buf[0] == 0x00 && buf[1] == 0x01; +/** + * A Binding request has class=0b00 (request) and method=0b000000000001 (Binding) + * and is encoded into the first 16 bits as 0x0001. + * See https://datatracker.ietf.org/doc/html/rfc5389#section-6 + */ +static int ice_is_binding_request(uint8_t *b, int size) +{ + return size >= ICE_STUN_HEADER_SIZE && AV_RB16(&b[0]) == 0x0001; +} + +/** + * A Binding response has class=0b10 (success response) and method=0b000000000001, + * and is encoded into the first 16 bits as 0x0101. + */ +static int ice_is_binding_response(uint8_t *b, int size) +{ + return size >= ICE_STUN_HEADER_SIZE && AV_RB16(&b[0]) == 0x0101; +} + +/** + * In RTP packets, the first byte is represented as 0b10xxxxxx, where the initial + * two bits (0b10) indicate the RTP version, + * see https://www.rfc-editor.org/rfc/rfc3550#section-5.1 + * The RTCP packet header is similar to RTP, + * see https://www.rfc-editor.org/rfc/rfc3550#section-6.4.1 + */ +static int rtc_is_rtp_rtcp(uint8_t *b, int size) +{ + return size >= RTC_RTP_HEADER_SIZE && (b[0] & 0xC0) == 0x80; } -static int ice_is_binding_response(char *buf, int buf_size) { - return buf_size > 1 && buf[0] == 0x01 && buf[1] == 0x01; +/* Whether the packet is RTCP. */ +static int rtc_is_rtcp(uint8_t *b, int size) +{ + return size >= RTC_RTP_HEADER_SIZE && b[1] >= RTC_RTCP_PT_START && b[1] <= RTC_RTCP_PT_END; } /** * This function handles incoming binding request messages by responding to them. * If the message is not a binding request, it will be ignored. */ -static int ice_handle_binding_request(AVFormatContext *s, char *buf, int buf_size) { +static int ice_handle_binding_request(AVFormatContext *s, char *buf, int buf_size) +{ int ret = 0, size; char tid[12]; RTCContext *rtc = s->priv_data; @@ -1735,8 +1714,9 @@ static int ice_handle_binding_request(AVFormatContext *s, char *buf, int buf_siz if (!ice_is_binding_request(buf, buf_size)) return ret; - if (buf_size < 20) { - av_log(s, AV_LOG_ERROR, "Invalid STUN message size. Expected at least 20, got %d\n", buf_size); + if (buf_size < ICE_STUN_HEADER_SIZE) { + av_log(rtc, AV_LOG_ERROR, "WHIP: Invalid STUN message, expected at least %d, got %d\n", + ICE_STUN_HEADER_SIZE, buf_size); return AVERROR(EINVAL); } @@ -1746,13 +1726,13 @@ static int ice_handle_binding_request(AVFormatContext *s, char *buf, int buf_siz /* Build the STUN binding response. */ ret = ice_create_response(s, tid, sizeof(tid), rtc->buf, sizeof(rtc->buf), &size); if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to create STUN binding response, size=%d\n", size); + av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to create STUN binding response, size=%d\n", size); return ret; } ret = ffurl_write(rtc->udp_uc, rtc->buf, size); if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to send STUN binding response, size=%d\n", size); + av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to send STUN binding response, size=%d\n", size); return ret; } @@ -1774,7 +1754,7 @@ static int udp_connect(AVFormatContext *s) ff_url_join(url, sizeof(url), "udp", NULL, rtc->ice_host, rtc->ice_port, NULL); ret = ffurl_alloc(&rtc->udp_uc, url, AVIO_FLAG_WRITE, &s->interrupt_callback); if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to open udp://%s:%d\n", rtc->ice_host, rtc->ice_port); + av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to open udp://%s:%d\n", rtc->ice_host, rtc->ice_port); return ret; } @@ -1786,7 +1766,7 @@ static int udp_connect(AVFormatContext *s) ret = ffurl_connect(rtc->udp_uc, NULL); if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to connect udp://%s:%d\n", rtc->ice_host, rtc->ice_port); + av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to connect udp://%s:%d\n", rtc->ice_host, rtc->ice_port); return ret; } @@ -1797,8 +1777,8 @@ static int udp_connect(AVFormatContext *s) if (rtc->state < RTC_STATE_UDP_CONNECTED) rtc->state = RTC_STATE_UDP_CONNECTED; rtc->rtc_udp_time = av_gettime(); - av_log(s, AV_LOG_VERBOSE, "WHIP: UDP state=%d, elapsed=%dms, connected to udp://%s:%d\n", - rtc->state, RTC_ELAPSED(rtc->rtc_starttime, av_gettime()), rtc->ice_host, rtc->ice_port); + av_log(rtc, AV_LOG_VERBOSE, "WHIP: UDP state=%d, elapsed=%dms, connected to udp://%s:%d\n", + rtc->state, ELAPSED(rtc->rtc_starttime, av_gettime()), rtc->ice_host, rtc->ice_port); return ret; } @@ -1810,7 +1790,7 @@ static int ice_dtls_handshake(AVFormatContext *s) RTCContext *rtc = s->priv_data; if (rtc->state < RTC_STATE_UDP_CONNECTED || !rtc->udp_uc) { - av_log(s, AV_LOG_ERROR, "WHIP: UDP not connected, state=%d, udp_uc=%p\n", rtc->state, rtc->udp_uc); + av_log(rtc, AV_LOG_ERROR, "WHIP: UDP not connected, state=%d, udp_uc=%p\n", rtc->state, rtc->udp_uc); return AVERROR(EINVAL); } @@ -1819,13 +1799,13 @@ static int ice_dtls_handshake(AVFormatContext *s) /* Build the STUN binding request. */ ret = ice_create_request(s, rtc->buf, sizeof(rtc->buf), &size); if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to create STUN binding request, size=%d\n", size); + av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to create STUN binding request, size=%d\n", size); goto end; } ret = ffurl_write(rtc->udp_uc, rtc->buf, size); if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to send STUN binding request, size=%d\n", size); + av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to send STUN binding request, size=%d\n", size); goto end; } @@ -1840,8 +1820,8 @@ static int ice_dtls_handshake(AVFormatContext *s) now = av_gettime(); if (now - starttime >= rtc->handshake_timeout * 1000) { - av_log(s, AV_LOG_ERROR, "WHIP: DTLS handshake timeout=%dms, cost=%dms, elapsed=%dms, state=%d\n", - rtc->handshake_timeout, RTC_ELAPSED(starttime, now), RTC_ELAPSED(rtc->rtc_starttime, now), rtc->state); + av_log(rtc, AV_LOG_ERROR, "WHIP: DTLS handshake timeout=%dms, cost=%dms, elapsed=%dms, state=%d\n", + rtc->handshake_timeout, ELAPSED(starttime, now), ELAPSED(rtc->rtc_starttime, now), rtc->state); ret = AVERROR(ETIMEDOUT); goto end; } @@ -1855,7 +1835,7 @@ static int ice_dtls_handshake(AVFormatContext *s) av_usleep(5 * 1000); continue; } - av_log(s, AV_LOG_ERROR, "Failed to read message\n"); + av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to read message\n"); goto end; } @@ -1868,9 +1848,9 @@ static int ice_dtls_handshake(AVFormatContext *s) if (rtc->state < RTC_STATE_ICE_CONNECTED) { rtc->state = RTC_STATE_ICE_CONNECTED; rtc->rtc_ice_time = av_gettime(); - av_log(s, AV_LOG_INFO, "WHIP: ICE STUN ok, state=%d, url=udp://%s:%d, location=%s, username=%s:%s, res=%dB, elapsed=%dms\n", + av_log(rtc, AV_LOG_VERBOSE, "WHIP: ICE STUN ok, state=%d, url=udp://%s:%d, location=%s, username=%s:%s, res=%dB, elapsed=%dms\n", rtc->state, rtc->ice_host, rtc->ice_port, rtc->whip_resource_url ? rtc->whip_resource_url : "", - rtc->ice_ufrag_remote, rtc->ice_ufrag_local, ret, RTC_ELAPSED(rtc->rtc_starttime, av_gettime())); + rtc->ice_ufrag_remote, rtc->ice_ufrag_local, ret, ELAPSED(rtc->rtc_starttime, av_gettime())); /* If got the first binding response, start DTLS handshake. */ if ((ret = dtls_context_start(&rtc->dtls_ctx)) < 0) @@ -1910,67 +1890,128 @@ static int ice_dtls_handshake(AVFormatContext *s) static int setup_srtp(AVFormatContext *s) { int ret; - char recv_key[DTLS_SRTP_MASTER_KEY_LEN], send_key[DTLS_SRTP_MASTER_KEY_LEN]; - char buf[AV_BASE64_SIZE(DTLS_SRTP_MASTER_KEY_LEN)]; - const char* suite = "AES_CM_128_HMAC_SHA1_80"; + char recv_key[DTLS_SRTP_KEY_LEN + DTLS_SRTP_SALT_LEN]; + char send_key[DTLS_SRTP_KEY_LEN + DTLS_SRTP_SALT_LEN]; + char buf[AV_BASE64_SIZE(DTLS_SRTP_KEY_LEN + DTLS_SRTP_SALT_LEN)]; + /** + * The profile for OpenSSL's SRTP is SRTP_AES128_CM_SHA1_80, see ssl/d1_srtp.c. + * The profile for FFmpeg's SRTP is SRTP_AES128_CM_HMAC_SHA1_80, see libavformat/srtp.c. + */ + const char* suite = "SRTP_AES128_CM_HMAC_SHA1_80"; RTCContext *rtc = s->priv_data; + /** + * This represents the material used to build the SRTP master key. It is + * generated by DTLS and has the following layout: + * 16B 16B 14B 14B + * client_key | server_key | client_salt | server_salt + */ + char *client_key = rtc->dtls_ctx.dtls_srtp_materials; + char *server_key = rtc->dtls_ctx.dtls_srtp_materials + DTLS_SRTP_KEY_LEN; + char *client_salt = server_key + DTLS_SRTP_KEY_LEN; + char *server_salt = client_salt + DTLS_SRTP_SALT_LEN; + /* As DTLS server, the recv key is client master key plus salt. */ - memcpy(recv_key, rtc->dtls_ctx.dtls_srtp_material, 16); - memcpy(recv_key + 16, rtc->dtls_ctx.dtls_srtp_material + 32, 14); + memcpy(recv_key, client_key, DTLS_SRTP_KEY_LEN); + memcpy(recv_key + DTLS_SRTP_KEY_LEN, client_salt, DTLS_SRTP_SALT_LEN); /* As DTLS server, the send key is server master key plus salt. */ - memcpy(send_key, rtc->dtls_ctx.dtls_srtp_material + 16, 16); - memcpy(send_key + 16, rtc->dtls_ctx.dtls_srtp_material + 46, 14); + memcpy(send_key, server_key, DTLS_SRTP_KEY_LEN); + memcpy(send_key + DTLS_SRTP_KEY_LEN, server_salt, DTLS_SRTP_SALT_LEN); /* Setup SRTP context for outgoing packets */ if (!av_base64_encode(buf, sizeof(buf), send_key, sizeof(send_key))) { - av_log(s, AV_LOG_ERROR, "Failed to encode send key\n"); + av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to encode send key\n"); ret = AVERROR(EIO); goto end; } ret = ff_srtp_set_crypto(&rtc->srtp_audio_send, suite, buf); if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to set crypto for audio send\n"); + av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to set crypto for audio send\n"); goto end; } ret = ff_srtp_set_crypto(&rtc->srtp_video_send, suite, buf); if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to set crypto for video send\n"); + av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to set crypto for video send\n"); goto end; } ret = ff_srtp_set_crypto(&rtc->srtp_rtcp_send, suite, buf); if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to set crypto for rtcp send\n"); + av_log(rtc, AV_LOG_ERROR, "Failed to set crypto for rtcp send\n"); goto end; } /* Setup SRTP context for incoming packets */ if (!av_base64_encode(buf, sizeof(buf), recv_key, sizeof(recv_key))) { - av_log(s, AV_LOG_ERROR, "Failed to encode recv key\n"); + av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to encode recv key\n"); ret = AVERROR(EIO); goto end; } ret = ff_srtp_set_crypto(&rtc->srtp_recv, suite, buf); if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to set crypto for recv\n"); + av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to set crypto for recv\n"); goto end; } if (rtc->state < RTC_STATE_SRTP_FINISHED) rtc->state = RTC_STATE_SRTP_FINISHED; rtc->rtc_srtp_time = av_gettime(); - av_log(s, AV_LOG_INFO, "WHIP: SRTP setup done, state=%d, suite=%s, key=%luB, elapsed=%dms\n", - rtc->state, suite, sizeof(send_key), RTC_ELAPSED(rtc->rtc_starttime, av_gettime())); + av_log(rtc, AV_LOG_VERBOSE, "WHIP: SRTP setup done, state=%d, suite=%s, key=%luB, elapsed=%dms\n", + rtc->state, suite, sizeof(send_key), ELAPSED(rtc->rtc_starttime, av_gettime())); end: return ret; } +/** + * Callback triggered by the RTP muxer when it creates and sends out an RTP packet. + * + * This function modifies the video STAP packet, removing the markers, and updating the + * NRI of the first NALU. Additionally, it uses the corresponding SRTP context to encrypt + * the RTP packet, where the video packet is handled by the video SRTP context. + */ +static int on_rtp_write_packet(void *opaque, uint8_t *buf, int buf_size) +{ + int ret, cipher_size, is_rtcp, is_video; + uint8_t payload_type; + AVFormatContext *s = opaque; + RTCContext *rtc = s->priv_data; + SRTPContext *srtp; + + /* Ignore if not RTP or RTCP packet. */ + if (!rtc_is_rtp_rtcp(buf, buf_size)) + return 0; + + /* Only support audio, video and rtcp. */ + is_rtcp = rtc_is_rtcp(buf, buf_size); + payload_type = buf[1] & 0x7f; + is_video = payload_type == rtc->video_payload_type; + if (!is_rtcp && payload_type != rtc->video_payload_type && payload_type != rtc->audio_payload_type) + return 0; + + /* Get the corresponding SRTP context. */ + srtp = is_rtcp ? &rtc->srtp_rtcp_send : (is_video? &rtc->srtp_video_send : &rtc->srtp_audio_send); + + /* Encrypt by SRTP and send out. */ + cipher_size = ff_srtp_encrypt(srtp, buf, buf_size, rtc->buf, sizeof(rtc->buf)); + if (cipher_size <= 0 || cipher_size < buf_size) { + av_log(rtc, AV_LOG_WARNING, "WHIP: Failed to encrypt packet=%dB, cipher=%dB\n", buf_size, cipher_size); + return 0; + } + + ret = ffurl_write(rtc->udp_uc, rtc->buf, cipher_size); + if (ret < 0) { + av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to write packet=%dB, ret=%d\n", cipher_size, ret); + return ret; + } + + return ret; +} + /** * Creates dedicated RTP muxers for each stream in the AVFormatContext to build RTP * packets from the encoded frames. @@ -1982,7 +2023,7 @@ static int setup_srtp(AVFormatContext *s) * * @return 0 if OK, AVERROR_xxx on error */ -static int create_rtp_muxer(AVFormatContext *s) +static int rtc_create_rtp_muxer(AVFormatContext *s) { int ret, i, is_video, buffer_size, max_packet_size; AVFormatContext *rtp_ctx = NULL; @@ -1993,7 +2034,7 @@ static int create_rtp_muxer(AVFormatContext *s) const AVOutputFormat *rtp_format = av_guess_format("rtp", NULL, NULL); if (!rtp_format) { - av_log(s, AV_LOG_ERROR, "Failed to guess rtp muxer\n"); + av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to guess rtp muxer\n"); ret = AVERROR(ENOSYS); goto end; } @@ -2030,6 +2071,15 @@ static int create_rtp_muxer(AVFormatContext *s) avcodec_parameters_copy(rtp_ctx->streams[0]->codecpar, s->streams[i]->codecpar); rtp_ctx->streams[0]->time_base = s->streams[i]->time_base; + /** + * For H.264, consistently utilize the annexb format through the Bitstream Filter (BSF); + * therefore, we deactivate the extradata detection for the RTP muxer. + */ + if (s->streams[i]->codecpar->codec_id == AV_CODEC_ID_H264) { + av_freep(&rtp_ctx->streams[i]->codecpar->extradata); + rtp_ctx->streams[i]->codecpar->extradata_size = 0; + } + buffer = av_malloc(buffer_size); if (!buffer) { ret = AVERROR(ENOMEM); @@ -2052,7 +2102,7 @@ static int create_rtp_muxer(AVFormatContext *s) ret = avformat_write_header(rtp_ctx, &opts); if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to write rtp header\n"); + av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to write rtp header\n"); goto end; } @@ -2065,17 +2115,16 @@ static int create_rtp_muxer(AVFormatContext *s) if (rtc->state < RTC_STATE_READY) rtc->state = RTC_STATE_READY; rtc->rtc_ready_time = av_gettime(); - av_log(s, AV_LOG_INFO, "WHIP: Muxer is ready, state=%d, buffer_size=%d, max_packet_size=%d, " - "elapsed=%dms(init:%d,offer:%d,answer:%d,udp:%d,ice:%d,dtls:%d,srtp:%d,ready:%d)\n", - rtc->state, buffer_size, max_packet_size, RTC_ELAPSED(rtc->rtc_starttime, av_gettime()), - RTC_ELAPSED(rtc->rtc_starttime, rtc->rtc_init_time), - RTC_ELAPSED(rtc->rtc_init_time, rtc->rtc_offer_time), - RTC_ELAPSED(rtc->rtc_offer_time, rtc->rtc_answer_time), - RTC_ELAPSED(rtc->rtc_answer_time, rtc->rtc_udp_time), - RTC_ELAPSED(rtc->rtc_udp_time, rtc->rtc_ice_time), - RTC_ELAPSED(rtc->rtc_ice_time, rtc->rtc_dtls_time), - RTC_ELAPSED(rtc->rtc_dtls_time, rtc->rtc_srtp_time), - RTC_ELAPSED(rtc->rtc_srtp_time, rtc->rtc_ready_time)); + av_log(rtc, AV_LOG_INFO, "WHIP: Muxer state=%d, buffer_size=%d, max_packet_size=%d, " + "elapsed=%dms(init:%d,offer:%d,answer:%d,udp:%d,ice:%d,dtls:%d,srtp:%d)\n", + rtc->state, buffer_size, max_packet_size, ELAPSED(rtc->rtc_starttime, av_gettime()), + ELAPSED(rtc->rtc_starttime, rtc->rtc_init_time), + ELAPSED(rtc->rtc_init_time, rtc->rtc_offer_time), + ELAPSED(rtc->rtc_offer_time, rtc->rtc_answer_time), + ELAPSED(rtc->rtc_answer_time, rtc->rtc_udp_time), + ELAPSED(rtc->rtc_udp_time, rtc->rtc_ice_time), + ELAPSED(rtc->rtc_ice_time, rtc->rtc_dtls_time), + ELAPSED(rtc->rtc_dtls_time, rtc->rtc_srtp_time)); end: if (rtp_ctx) @@ -2085,136 +2134,6 @@ static int create_rtp_muxer(AVFormatContext *s) return ret; } -/** - * Callback triggered by the RTP muxer when it creates and sends out an RTP packet. - * - * This function modifies the video STAP packet, removing the markers, and updating the - * NRI of the first NALU. Additionally, it uses the corresponding SRTP context to encrypt - * the RTP packet, where the video packet is handled by the video SRTP context. - */ -static int on_rtp_write_packet(void *opaque, uint8_t *buf, int buf_size) -{ - int ret, cipher_size, is_rtcp, is_video; - uint8_t payload_type, nalu_header; - AVFormatContext *s = opaque; - RTCContext *rtc = s->priv_data; - struct SRTPContext *srtp; - - /* Ignore if not RTP or RTCP packet. */ - if (buf_size < 12 || (buf[0] & 0xC0) != 0x80) - return 0; - - /* Only support audio, video and rtcp. */ - is_rtcp = buf[1] >= 192 && buf[1] <= 223; - payload_type = buf[1] & 0x7f; - is_video = payload_type == rtc->video_payload_type; - if (!is_rtcp && payload_type != rtc->video_payload_type && payload_type != rtc->audio_payload_type) - return 0; - - /** - * For video, the STAP-A with SPS/PPS should: - * 1. The marker bit should be 0, never be 1. - * 2. The NRI should equal to the first NALU's. - */ - if (is_video && buf_size > 12) { - nalu_header = buf[12] & 0x1f; - if (nalu_header == NALU_TYPE_STAP_A) { - /* Reset the marker bit to 0. */ - if (buf[1] & 0x80) - buf[1] &= 0x7f; - - /* Reset the NRI to the first NALU's NRI. */ - if (buf_size > 15 && (buf[15]&0x60) != (buf[12]&0x60)) - buf[12] = (buf[12]&0x80) | (buf[15]&0x60) | (buf[12]&0x1f); - } - } - - /* Get the corresponding SRTP context. */ - srtp = is_rtcp ? &rtc->srtp_rtcp_send : (is_video? &rtc->srtp_video_send : &rtc->srtp_audio_send); - - /* Encrypt by SRTP and send out. */ - cipher_size = ff_srtp_encrypt(srtp, buf, buf_size, rtc->buf, sizeof(rtc->buf)); - if (cipher_size <= 0 || cipher_size < buf_size) { - av_log(s, AV_LOG_WARNING, "Failed to encrypt packet=%dB, cipher=%dB\n", buf_size, cipher_size); - return 0; - } - - ret = ffurl_write(rtc->udp_uc, rtc->buf, cipher_size); - if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to write packet=%dB, ret=%d\n", cipher_size, ret); - return ret; - } - - return ret; -} - -/** - * Inserts the SPS/PPS data before each IDR (Instantaneous Decoder Refresh) frame. - * - * The SPS/PPS is parsed from the extradata. If it's in ISOM format, the SPS/PPS is - * multiplexed to the data field of the packet. If it's in annexb format, then the entire - * extradata is set to the data field of the packet. - */ -static int insert_sps_pps_packet(AVFormatContext *s, AVPacket *pkt) -{ - int ret, is_idr, size, i; - uint8_t *p; - AVPacket* extra = NULL; - AVStream *st = s->streams[pkt->stream_index]; - AVFormatContext *rtp_ctx = st->priv_data; - RTCContext *rtc = s->priv_data; - - is_idr = (pkt->flags & AV_PKT_FLAG_KEY) && st->codecpar->codec_type == AVMEDIA_TYPE_VIDEO; - if (!is_idr || !st->codecpar->extradata) - return 0; - - extra = av_packet_alloc(); - if (!extra) - return AVERROR(ENOMEM); - - size = !rtc->avc_nal_length_size ? st->codecpar->extradata_size : - rtc->avc_nal_length_size * 2 + rtc->avc_sps_size + rtc->avc_pps_size; - ret = av_new_packet(extra, size); - if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to allocate extra packet\n"); - goto end; - } - - /* Encode SPS/PPS in annexb format. */ - if (!rtc->avc_nal_length_size) { - memcpy(extra->data, st->codecpar->extradata, size); - } else { - /* Encode SPS/PPS in ISOM format. */ - p = extra->data; - for (i = 0; i < rtc->avc_nal_length_size; i++) { - *p++ = rtc->avc_sps_size >> (8 * (rtc->avc_nal_length_size - i - 1)); - } - memcpy(p, rtc->avc_sps, rtc->avc_sps_size); - p += rtc->avc_sps_size; - - /* Encode PPS in ISOM format. */ - for (i = 0; i < rtc->avc_nal_length_size; i++) { - *p++ = rtc->avc_pps_size >> (8 * (rtc->avc_nal_length_size - i - 1)); - } - memcpy(p, rtc->avc_pps, rtc->avc_pps_size); - p += rtc->avc_pps_size; - } - - /* Setup packet and feed it to chain. */ - extra->pts = pkt->pts; - extra->dts = pkt->dts; - extra->stream_index = pkt->stream_index; - extra->time_base = pkt->time_base; - - ret = ff_write_chained(rtp_ctx, 0, extra, s, 0); - if (ret < 0) - goto end; - -end: - av_packet_free(&extra); - return ret; -} - /** * RTC is connectionless, for it's based on UDP, so it check whether sesison is * timeout. In such case, publishers can't republish the stream util the session @@ -2235,7 +2154,7 @@ static int whip_dispose(AVFormatContext *s) ret = ffurl_alloc(&whip_uc, rtc->whip_resource_url, AVIO_FLAG_READ_WRITE, &s->interrupt_callback); if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to alloc WHIP delete context: %s\n", s->url); + av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to alloc WHIP delete context: %s\n", s->url); goto end; } @@ -2243,7 +2162,7 @@ static int whip_dispose(AVFormatContext *s) av_opt_set(whip_uc->priv_data, "method", "DELETE", 0); ret = ffurl_connect(whip_uc, NULL); if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to DELETE url=%s\n", rtc->whip_resource_url); + av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to DELETE url=%s\n", rtc->whip_resource_url); goto end; } @@ -2254,12 +2173,12 @@ static int whip_dispose(AVFormatContext *s) break; } if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to read response from DELETE url=%s\n", rtc->whip_resource_url); + av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to read response from DELETE url=%s\n", rtc->whip_resource_url); goto end; } } - av_log(s, AV_LOG_INFO, "WHIP: Dispose resource %s ok\n", rtc->whip_resource_url); + av_log(rtc, AV_LOG_INFO, "WHIP: Dispose resource %s ok\n", rtc->whip_resource_url); end: ffurl_closep(&whip_uc); @@ -2295,7 +2214,7 @@ static av_cold int rtc_init(AVFormatContext *s) if ((ret = setup_srtp(s)) < 0) goto end; - if ((ret = create_rtp_muxer(s)) < 0) + if ((ret = rtc_create_rtp_muxer(s)) < 0) goto end; end: @@ -2323,39 +2242,22 @@ static int rtc_write_packet(AVFormatContext *s, AVPacket *pkt) if (ret > 0) { if (is_dtls_packet(rtc->buf, ret)) { if ((ret = dtls_context_write(&rtc->dtls_ctx, rtc->buf, ret)) < 0) { - av_log(s, AV_LOG_ERROR, "Failed to handle DTLS message\n"); + av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to handle DTLS message\n"); goto end; } } } else if (ret != AVERROR(EAGAIN)) { - av_log(s, AV_LOG_ERROR, "Failed to read from UDP socket\n"); - goto end; - } - - /* For audio OPUS stream, correct the timestamp. */ - if (st->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) { - pkt->dts = pkt->pts = rtc->audio_jitter_base; - // TODO: FIXME: For opus 48khz, each frame is 20ms which is 48000*20/1000 = 960. It appears that there is a - // bug introduced by libopus regarding the timestamp. Instead of being exactly 960, there is a slight - // deviation, such as 956 or 970. This deviation can cause Chrome to play the audio stream with noise. - // Although we are unsure of the root cause, we can simply correct the timestamp by using the timebase of - // Opus. We need to conduct further research and remove this line. - rtc->audio_jitter_base += 960; - } - - ret = insert_sps_pps_packet(s, pkt); - if (ret < 0) { - av_log(s, AV_LOG_ERROR, "Failed to insert SPS/PPS packet\n"); + av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to read from UDP socket\n"); goto end; } ret = ff_write_chained(rtp_ctx, 0, pkt, s, 0); if (ret < 0) { if (ret == AVERROR(EINVAL)) { - av_log(s, AV_LOG_WARNING, "Ignore failed to write packet=%dB, ret=%d\n", pkt->size, ret); + av_log(rtc, AV_LOG_WARNING, "WHIP: Ignore failed to write packet=%dB, ret=%d\n", pkt->size, ret); ret = 0; } else - av_log(s, AV_LOG_ERROR, "Failed to write packet, size=%d\n", pkt->size); + av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to write packet, size=%d\n", pkt->size); goto end; } @@ -2376,7 +2278,7 @@ static av_cold void rtc_deinit(AVFormatContext *s) ret = whip_dispose(s); if (ret < 0) - av_log(s, AV_LOG_WARNING, "Failed to dispose resource, ret=%d\n", ret); + av_log(rtc, AV_LOG_WARNING, "WHIP: Failed to dispose resource, ret=%d\n", ret); for (i = 0; i < s->nb_streams; i++) { AVFormatContext* rtp_ctx = s->streams[i]->priv_data; @@ -2389,8 +2291,6 @@ static av_cold void rtc_deinit(AVFormatContext *s) s->streams[i]->priv_data = NULL; } - av_freep(&rtc->avc_sps); - av_freep(&rtc->avc_pps); av_freep(&rtc->sdp_offer); av_freep(&rtc->sdp_answer); av_freep(&rtc->whip_resource_url); @@ -2407,6 +2307,21 @@ static av_cold void rtc_deinit(AVFormatContext *s) dtls_context_deinit(&rtc->dtls_ctx); } +static int rtc_check_bitstream(AVFormatContext *s, AVStream *st, const AVPacket *pkt) +{ + int ret = 1; + + if (st->codecpar->codec_id == AV_CODEC_ID_H264) { + if (pkt->size >= 5 && AV_RB32(pkt->data) != 0x0000001 && + (AV_RB24(pkt->data) != 0x000001 || + (st->codecpar->extradata_size > 0 && + st->codecpar->extradata[0] == 1))) + ret = ff_stream_add_bitstream_filter(st, "h264_mp4toannexb", NULL); + } + + return ret; +} + #define OFFSET(x) offsetof(RTCContext, x) #define DEC AV_OPT_FLAG_DECODING_PARAM static const AVOption options[] = { @@ -2417,7 +2332,7 @@ static const AVOption options[] = { }; static const AVClass rtc_muxer_class = { - .class_name = "WebRTC muxer", + .class_name = "WHIP muxer", .item_name = av_default_item_name, .option = options, .version = LIBAVUTIL_VERSION_INT, @@ -2425,13 +2340,19 @@ static const AVClass rtc_muxer_class = { const FFOutputFormat ff_rtc_muxer = { .p.name = "rtc", - .p.long_name = NULL_IF_CONFIG_SMALL("WHIP WebRTC muxer"), + .p.long_name = NULL_IF_CONFIG_SMALL("WHIP(WebRTC-HTTP ingestion protocol) muxer"), .p.audio_codec = AV_CODEC_ID_OPUS, .p.video_codec = AV_CODEC_ID_H264, - .p.flags = AVFMT_GLOBALHEADER | AVFMT_NOFILE, + /** + * Avoid using AVFMT_GLOBALHEADER, for annexb format, it's necessary for the + * encoder to insert metadata headers (e.g., SPS/PPS for H.264) before each + * IDR frame. + */ + .p.flags = AVFMT_NOFILE, .p.priv_class = &rtc_muxer_class, .priv_data_size = sizeof(RTCContext), .init = rtc_init, .write_packet = rtc_write_packet, .deinit = rtc_deinit, + .check_bitstream = rtc_check_bitstream, }; diff --git a/libavformat/srtp.h b/libavformat/srtp.h index 3189f8f54bd0e..35224cc9ba901 100644 --- a/libavformat/srtp.h +++ b/libavformat/srtp.h @@ -27,7 +27,7 @@ struct AVAES; struct AVHMAC; -struct SRTPContext { +typedef struct SRTPContext { struct AVAES *aes; struct AVHMAC *hmac; int rtp_hmac_size, rtcp_hmac_size; @@ -40,7 +40,7 @@ struct SRTPContext { uint32_t roc; uint32_t rtcp_index; -}; +} SRTPContext; int ff_srtp_set_crypto(struct SRTPContext *s, const char *suite, const char *params); From 2ac410051c80e23f5cc451f29e439a98d5851a5d Mon Sep 17 00:00:00 2001 From: Winlin Date: Fri, 9 Jun 2023 16:40:46 +0800 Subject: [PATCH 46/60] WHIP: Do not hardcode codec for SDP. (#5) See https://github.com/ossrs/ffmpeg-webrtc/discussions/5#discussioncomment-6096202 --- libavformat/rtcenc.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index f6e124ae7c815..32b5e5e9eebe9 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -1179,6 +1179,7 @@ static int parse_codec(AVFormatContext *s) static int generate_sdp_offer(AVFormatContext *s) { int ret = 0, profile, level, profile_iop; + const char *acodec_name = NULL, *vcodec_name = NULL; AVBPrint bp; RTCContext *rtc = s->priv_data; @@ -1215,6 +1216,9 @@ static int generate_sdp_offer(AVFormatContext *s) RTC_SDP_CREATOR_IP); if (rtc->audio_par) { + if (rtc->audio_par->codec_id == AV_CODEC_ID_OPUS) + acodec_name = "opus"; + av_bprintf(&bp, "" "m=audio 9 UDP/TLS/RTP/SAVPF %u\r\n" "c=IN IP4 0.0.0.0\r\n" @@ -1226,7 +1230,7 @@ static int generate_sdp_offer(AVFormatContext *s) "a=sendonly\r\n" "a=msid:FFmpeg audio\r\n" "a=rtcp-mux\r\n" - "a=rtpmap:%u opus/%d/%d\r\n" + "a=rtpmap:%u %s/%d/%d\r\n" "a=ssrc:%u cname:FFmpeg\r\n" "a=ssrc:%u msid:FFmpeg audio\r\n", rtc->audio_payload_type, @@ -1234,6 +1238,7 @@ static int generate_sdp_offer(AVFormatContext *s) rtc->ice_pwd_local, rtc->dtls_ctx.dtls_fingerprint, rtc->audio_payload_type, + acodec_name, rtc->audio_par->sample_rate, rtc->audio_par->ch_layout.nb_channels, rtc->audio_ssrc, @@ -1241,9 +1246,14 @@ static int generate_sdp_offer(AVFormatContext *s) } if (rtc->video_par) { - profile = rtc->video_par->profile < 0 ? 0x42 : rtc->video_par->profile; - level = rtc->video_par->level < 0 ? 30 : rtc->video_par->level; - profile_iop = profile & FF_PROFILE_H264_CONSTRAINED; + profile_iop = profile = rtc->video_par->profile < 0 ? 0x42 : rtc->video_par->profile; + level = rtc->video_par->level < 0 ? 0x1e : rtc->video_par->level; + if (rtc->video_par->codec_id == AV_CODEC_ID_H264) { + vcodec_name = "H264"; + profile_iop &= FF_PROFILE_H264_CONSTRAINED; + profile &= (~FF_PROFILE_H264_CONSTRAINED); + } + av_bprintf(&bp, "" "m=video 9 UDP/TLS/RTP/SAVPF %u\r\n" "c=IN IP4 0.0.0.0\r\n" @@ -1256,7 +1266,7 @@ static int generate_sdp_offer(AVFormatContext *s) "a=msid:FFmpeg video\r\n" "a=rtcp-mux\r\n" "a=rtcp-rsize\r\n" - "a=rtpmap:%u H264/90000\r\n" + "a=rtpmap:%u %s/90000\r\n" "a=fmtp:%u level-asymmetry-allowed=1;packetization-mode=1;profile-level-id=%02x%02x%02x\r\n" "a=ssrc:%u cname:FFmpeg\r\n" "a=ssrc:%u msid:FFmpeg video\r\n", @@ -1265,8 +1275,9 @@ static int generate_sdp_offer(AVFormatContext *s) rtc->ice_pwd_local, rtc->dtls_ctx.dtls_fingerprint, rtc->video_payload_type, + vcodec_name, rtc->video_payload_type, - profile & (~FF_PROFILE_H264_CONSTRAINED), + profile, profile_iop, level, rtc->video_ssrc, From 5819297450a970b34dade72ed44396d7382003ca Mon Sep 17 00:00:00 2001 From: winlin Date: Fri, 9 Jun 2023 19:49:49 +0800 Subject: [PATCH 47/60] WHIP: Parse profile and level from extradata. --- libavformat/Makefile | 2 +- libavformat/rtcenc.c | 88 +++++++++++++++++++++++++++++++++++++++----- 2 files changed, 80 insertions(+), 10 deletions(-) diff --git a/libavformat/Makefile b/libavformat/Makefile index b5000b08a452f..b980b1232126b 100644 --- a/libavformat/Makefile +++ b/libavformat/Makefile @@ -499,7 +499,7 @@ OBJS-$(CONFIG_RSD_DEMUXER) += rsd.o OBJS-$(CONFIG_RPL_DEMUXER) += rpl.o OBJS-$(CONFIG_RSO_DEMUXER) += rsodec.o rso.o pcm.o OBJS-$(CONFIG_RSO_MUXER) += rsoenc.o rso.o rawenc.o -OBJS-$(CONFIG_RTC_MUXER) += rtcenc.o http.o srtp.o +OBJS-$(CONFIG_RTC_MUXER) += rtcenc.o avc.o http.o srtp.o OBJS-$(CONFIG_RTP_MPEGTS_MUXER) += rtpenc_mpegts.o OBJS-$(CONFIG_RTP_MUXER) += rtp.o \ rtpenc_aac.o \ diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index 32b5e5e9eebe9..2833b5f69ee6f 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -23,6 +23,8 @@ #include #include "libavcodec/avcodec.h" +#include "libavcodec/h264.h" +#include "libavcodec/startcode.h" #include "libavutil/base64.h" #include "libavutil/bprint.h" #include "libavutil/crc.h" @@ -31,6 +33,7 @@ #include "libavutil/opt.h" #include "libavutil/random_seed.h" #include "libavutil/time.h" +#include "avc.h" #include "avio_internal.h" #include "http.h" #include "internal.h" @@ -1086,6 +1089,56 @@ static av_cold int whip_init(AVFormatContext *s) return 0; } +/** + * When utilizing an encoder, such as libx264, to encode a stream, the extradata in + * par->extradata contains the SPS, which includes profile and level information. + * However, the profile and level of par remain unspecified. Therefore, it is necessary + * to extract the profile and level data from the extradata and assign it to the par's + * profile and level. + * + * When copying a stream, the extradata, as well as the profile and level of the par, + * are already set by demuxer. + */ +static int parse_profile_level(AVFormatContext *s, AVCodecParameters *par) +{ + int ret = 0; + const uint8_t *r = par->extradata, *r1, *end = par->extradata + par->extradata_size; + H264SPS seq, *const sps = &seq; + uint32_t state; + RTCContext *rtc = s->priv_data; + + if (par->codec_id != AV_CODEC_ID_H264) + return ret; + + if (par->profile != FF_PROFILE_UNKNOWN && par->level != FF_LEVEL_UNKNOWN) + return ret; + + while (1) { + r = avpriv_find_start_code(r, end, &state); + if (r >= end) + break; + + r1 = ff_avc_find_startcode(r, end); + if ((state & 0x1f) == H264_NAL_SPS) { + ret = ff_avc_decode_sps(sps, r, r1 - r); + if (ret < 0) { + av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to decode SPS, state=%x, size=%d\n", + state, (int)(r1 - r)); + return ret; + } + + av_log(rtc, AV_LOG_INFO, "WHIP: Parse profile=%d, level=%d from SPS\n", + sps->profile_idc, sps->level_idc); + par->profile = sps->profile_idc; + par->level = sps->level_idc; + } + + r = r1; + } + + return ret; +} + /** * Parses video SPS/PPS from the extradata of codecpar and checks the codec. * Currently only supports video(h264) and audio(opus). Note that only baseline @@ -1109,7 +1162,7 @@ static av_cold int whip_init(AVFormatContext *s) */ static int parse_codec(AVFormatContext *s) { - int i; + int i, ret = 0; RTCContext *rtc = s->priv_data; for (i = 0; i < s->nb_streams; i++) { @@ -1133,6 +1186,20 @@ static int parse_codec(AVFormatContext *s) av_log(rtc, AV_LOG_ERROR, "WHIP: Unsupported B frames by RTC\n"); return AVERROR_PATCHWELCOME; } + + if ((ret = parse_profile_level(s, par)) < 0) { + av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to parse SPS/PPS from extradata\n"); + return AVERROR(EINVAL); + } + + if (par->profile == FF_PROFILE_UNKNOWN) { + av_log(rtc, AV_LOG_WARNING, "WHIP: No profile found in extradata, consider baseline\n"); + return AVERROR(EINVAL); + } + if (par->level == FF_LEVEL_UNKNOWN) { + av_log(rtc, AV_LOG_WARNING, "WHIP: No level found in extradata, consider 3.1\n"); + return AVERROR(EINVAL); + } break; case AVMEDIA_TYPE_AUDIO: if (rtc->audio_par) { @@ -1165,7 +1232,7 @@ static int parse_codec(AVFormatContext *s) } } - return 0; + return ret; } /** @@ -1246,8 +1313,8 @@ static int generate_sdp_offer(AVFormatContext *s) } if (rtc->video_par) { - profile_iop = profile = rtc->video_par->profile < 0 ? 0x42 : rtc->video_par->profile; - level = rtc->video_par->level < 0 ? 0x1e : rtc->video_par->level; + profile_iop = profile = rtc->video_par->profile; + level = rtc->video_par->level; if (rtc->video_par->codec_id == AV_CODEC_ID_H264) { vcodec_name = "H264"; profile_iop &= FF_PROFILE_H264_CONSTRAINED; @@ -2320,14 +2387,17 @@ static av_cold void rtc_deinit(AVFormatContext *s) static int rtc_check_bitstream(AVFormatContext *s, AVStream *st, const AVPacket *pkt) { - int ret = 1; + int ret = 1, extradata_isom = 0; + uint8_t *b = pkt->data; + RTCContext *rtc = s->priv_data; if (st->codecpar->codec_id == AV_CODEC_ID_H264) { - if (pkt->size >= 5 && AV_RB32(pkt->data) != 0x0000001 && - (AV_RB24(pkt->data) != 0x000001 || - (st->codecpar->extradata_size > 0 && - st->codecpar->extradata[0] == 1))) + extradata_isom = st->codecpar->extradata_size > 0 && st->codecpar->extradata[0] == 1; + if (pkt->size >= 5 && AV_RB32(b) != 0x0000001 && (AV_RB24(b) != 0x000001 || extradata_isom)) { ret = ff_stream_add_bitstream_filter(st, "h264_mp4toannexb", NULL); + av_log(rtc, AV_LOG_INFO, "WHIP: Enable BSF h264_mp4toannexb, packet=[%x %x %x %x %x ...], extradata_isom=%d\n", + b[0], b[1], b[2], b[3], b[4], extradata_isom); + } } return ret; From 0c4a1588419b52884ab410784a33fd602a0c0aab Mon Sep 17 00:00:00 2001 From: winlin Date: Sat, 10 Jun 2023 08:34:55 +0800 Subject: [PATCH 48/60] WHIP: Insert SPS and PPS before IDR frames in annexb format due to h264_mp4toannexb filter only processing MP4 ISOM format. --- libavformat/rtcenc.c | 119 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 108 insertions(+), 11 deletions(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index 2833b5f69ee6f..9aca271f684ad 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -928,6 +928,13 @@ typedef struct RTCContext { AVCodecParameters *audio_par; AVCodecParameters *video_par; + /** + * The h264_mp4toannexb Bitstream Filter (BSF) bypasses the AnnexB packet; + * therefore, it is essential to insert the SPS and PPS before each IDR frame + * in such cases. + */ + int h264_annexb_insert_sps_pps; + /* The ICE username and pwd fragment generated by the muxer. */ char ice_ufrag_local[9]; char ice_pwd_local[33]; @@ -970,7 +977,6 @@ typedef struct RTCContext { int64_t rtc_ice_time; int64_t rtc_dtls_time; int64_t rtc_srtp_time; - int64_t rtc_ready_time; /* The DTLS context. */ DTLSContext dtls_ctx; @@ -1113,6 +1119,12 @@ static int parse_profile_level(AVFormatContext *s, AVCodecParameters *par) if (par->profile != FF_PROFILE_UNKNOWN && par->level != FF_LEVEL_UNKNOWN) return ret; + if (!par->extradata || par->extradata_size <= 0) { + par->profile = FF_PROFILE_H264_BASELINE; + par->level = 0x1e; + return ret; + } + while (1) { r = avpriv_find_start_code(r, end, &state); if (r >= end) @@ -1127,7 +1139,7 @@ static int parse_profile_level(AVFormatContext *s, AVCodecParameters *par) return ret; } - av_log(rtc, AV_LOG_INFO, "WHIP: Parse profile=%d, level=%d from SPS\n", + av_log(rtc, AV_LOG_VERBOSE, "WHIP: Parse profile=%d, level=%d from SPS\n", sps->profile_idc, sps->level_idc); par->profile = sps->profile_idc; par->level = sps->level_idc; @@ -2192,7 +2204,6 @@ static int rtc_create_rtp_muxer(AVFormatContext *s) if (rtc->state < RTC_STATE_READY) rtc->state = RTC_STATE_READY; - rtc->rtc_ready_time = av_gettime(); av_log(rtc, AV_LOG_INFO, "WHIP: Muxer state=%d, buffer_size=%d, max_packet_size=%d, " "elapsed=%dms(init:%d,offer:%d,answer:%d,udp:%d,ice:%d,dtls:%d,srtp:%d)\n", rtc->state, buffer_size, max_packet_size, ELAPSED(rtc->rtc_starttime, av_gettime()), @@ -2303,6 +2314,89 @@ static av_cold int rtc_init(AVFormatContext *s) return ret; } +/** + * Since the h264_mp4toannexb filter only processes the MP4 ISOM format and bypasses + * the annexb format, it is necessary to manually insert encoder metadata before each + * IDR when dealing with annexb format packets. For instance, in the case of H.264, + * we must insert SPS and PPS before the IDR frame. + */ +static int h264_annexb_insert_sps_pps(AVFormatContext *s, AVPacket *pkt) +{ + int ret = 0; + AVPacket *in = NULL; + AVCodecParameters *par = s->streams[pkt->stream_index]->codecpar; + uint32_t nal_size = 0, out_size = par ? par->extradata_size : 0; + uint8_t unit_type, sps_seen = 0, pps_seen = 0, idr_seen = 0, *out; + const uint8_t *buf, *buf_end, *r1; + + if (!pkt || !pkt->data || pkt->size <= 0) + return ret; + if (!par || !par->extradata || par->extradata_size <= 0) + return ret; + + /* Discover NALU type from packet. */ + buf_end = pkt->data + pkt->size; + for (buf = ff_avc_find_startcode(pkt->data, buf_end); buf < buf_end; buf += nal_size) { + while (!*(buf++)); + r1 = ff_avc_find_startcode(buf, buf_end); + if ((nal_size = r1 - buf) > 0) { + unit_type = *buf & 0x1f; + if (unit_type == H264_NAL_SPS) { + sps_seen = 1; + } else if (unit_type == H264_NAL_PPS) { + pps_seen = 1; + } else if (unit_type == H264_NAL_IDR_SLICE) { + idr_seen = 1; + } + + out_size += 3 + nal_size; + } + } + + if (!idr_seen || (sps_seen && pps_seen)) + return ret; + + /* See av_bsf_send_packet */ + in = av_packet_alloc(); + if (!in) + return AVERROR(ENOMEM); + + ret = av_packet_make_refcounted(pkt); + if (ret < 0) + goto fail; + + av_packet_move_ref(in, pkt); + + /* Create a new packet with sps/pps inserted. */ + ret = av_new_packet(pkt, out_size); + if (ret < 0) + goto fail; + + ret = av_packet_copy_props(pkt, in); + if (ret < 0) + goto fail; + + memcpy(pkt->data, par->extradata, par->extradata_size); + out = pkt->data + par->extradata_size; + buf_end = in->data + in->size; + for (buf = ff_avc_find_startcode(in->data, buf_end); buf < buf_end; buf += nal_size) { + while (!*(buf++)); + r1 = ff_avc_find_startcode(buf, buf_end); + if ((nal_size = r1 - buf) > 0) { + AV_WB24(out, 0x00001); + memcpy(out + 3, buf, nal_size); + out += 3 + nal_size; + } + } + +fail: + if (ret < 0) + av_packet_unref(pkt); + av_packet_free(&in); + + return ret; +} + static int rtc_write_packet(AVFormatContext *s, AVPacket *pkt) { int ret; @@ -2329,6 +2423,13 @@ static int rtc_write_packet(AVFormatContext *s, AVPacket *pkt) goto end; } + if (rtc->h264_annexb_insert_sps_pps && st->codecpar->codec_id == AV_CODEC_ID_H264) { + if ((ret = h264_annexb_insert_sps_pps(s, pkt)) < 0) { + av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to insert SPS/PPS before IDR\n"); + goto end; + } + } + ret = ff_write_chained(rtp_ctx, 0, pkt, s, 0); if (ret < 0) { if (ret == AVERROR(EINVAL)) { @@ -2395,9 +2496,10 @@ static int rtc_check_bitstream(AVFormatContext *s, AVStream *st, const AVPacket extradata_isom = st->codecpar->extradata_size > 0 && st->codecpar->extradata[0] == 1; if (pkt->size >= 5 && AV_RB32(b) != 0x0000001 && (AV_RB24(b) != 0x000001 || extradata_isom)) { ret = ff_stream_add_bitstream_filter(st, "h264_mp4toannexb", NULL); - av_log(rtc, AV_LOG_INFO, "WHIP: Enable BSF h264_mp4toannexb, packet=[%x %x %x %x %x ...], extradata_isom=%d\n", + av_log(rtc, AV_LOG_VERBOSE, "WHIP: Enable BSF h264_mp4toannexb, packet=[%x %x %x %x %x ...], extradata_isom=%d\n", b[0], b[1], b[2], b[3], b[4], extradata_isom); - } + } else + rtc->h264_annexb_insert_sps_pps = 1; } return ret; @@ -2424,12 +2526,7 @@ const FFOutputFormat ff_rtc_muxer = { .p.long_name = NULL_IF_CONFIG_SMALL("WHIP(WebRTC-HTTP ingestion protocol) muxer"), .p.audio_codec = AV_CODEC_ID_OPUS, .p.video_codec = AV_CODEC_ID_H264, - /** - * Avoid using AVFMT_GLOBALHEADER, for annexb format, it's necessary for the - * encoder to insert metadata headers (e.g., SPS/PPS for H.264) before each - * IDR frame. - */ - .p.flags = AVFMT_NOFILE, + .p.flags = AVFMT_NOFILE | AVFMT_GLOBALHEADER, .p.priv_class = &rtc_muxer_class, .priv_data_size = sizeof(RTCContext), .init = rtc_init, From 812932ea834142a644194765d1626f1af5cb051a Mon Sep 17 00:00:00 2001 From: winlin Date: Sat, 10 Jun 2023 08:49:02 +0800 Subject: [PATCH 49/60] WHIP: Failed immediately if there is no profile and extradata present. --- libavformat/rtcenc.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/libavformat/rtcenc.c b/libavformat/rtcenc.c index 9aca271f684ad..fe2a491fa3ab0 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/rtcenc.c @@ -1096,14 +1096,16 @@ static av_cold int whip_init(AVFormatContext *s) } /** + * When duplicating a stream, the demuxer has already set the extradata, profile, and + * level of the par. Keep in mind that this function will not be invoked since the + * profile and level are set. + * * When utilizing an encoder, such as libx264, to encode a stream, the extradata in * par->extradata contains the SPS, which includes profile and level information. * However, the profile and level of par remain unspecified. Therefore, it is necessary * to extract the profile and level data from the extradata and assign it to the par's - * profile and level. - * - * When copying a stream, the extradata, as well as the profile and level of the par, - * are already set by demuxer. + * profile and level. Keep in mind that AVFMT_GLOBALHEADER must be enabled; otherwise, + * the extradata will remain empty. */ static int parse_profile_level(AVFormatContext *s, AVCodecParameters *par) { @@ -1120,9 +1122,9 @@ static int parse_profile_level(AVFormatContext *s, AVCodecParameters *par) return ret; if (!par->extradata || par->extradata_size <= 0) { - par->profile = FF_PROFILE_H264_BASELINE; - par->level = 0x1e; - return ret; + av_log(rtc, AV_LOG_ERROR, "WHIP: Unable to parse profile from empty extradata=%p, size=%d\n", + par->extradata, par->extradata_size); + return AVERROR(EINVAL); } while (1) { @@ -2526,7 +2528,7 @@ const FFOutputFormat ff_rtc_muxer = { .p.long_name = NULL_IF_CONFIG_SMALL("WHIP(WebRTC-HTTP ingestion protocol) muxer"), .p.audio_codec = AV_CODEC_ID_OPUS, .p.video_codec = AV_CODEC_ID_H264, - .p.flags = AVFMT_NOFILE | AVFMT_GLOBALHEADER, + .p.flags = AVFMT_GLOBALHEADER | AVFMT_NOFILE, .p.priv_class = &rtc_muxer_class, .priv_data_size = sizeof(RTCContext), .init = rtc_init, From aaf26ef2004d9919a7f38c96eb2787fd7cc2fe65 Mon Sep 17 00:00:00 2001 From: winlin Date: Mon, 19 Jun 2023 20:28:34 +0800 Subject: [PATCH 50/60] WHIP: Refine names and comments. 1. Change the CommonName from ffmpeg.org to lavf. 2. Rename rtcenc.c to whip.c, rtc to whip. 3. Replace av_get_random_seed by AVLFG. 4. Add TODO to support libtls, mbedtls, and gnutls. --- configure | 5 +- doc/muxers.texi | 82 ++-- libavformat/Makefile | 2 +- libavformat/allformats.c | 2 +- libavformat/{rtcenc.c => whip.c} | 789 ++++++++++++++++--------------- 5 files changed, 445 insertions(+), 435 deletions(-) rename libavformat/{rtcenc.c => whip.c} (76%) diff --git a/configure b/configure index 7d6f12cde63cf..698cfcd813dc9 100755 --- a/configure +++ b/configure @@ -3532,7 +3532,6 @@ ogg_demuxer_select="dirac_parse" ogv_muxer_select="ogg_muxer" opus_muxer_select="ogg_muxer" psp_muxer_select="mov_muxer" -rtc_muxer_deps_any="openssl" rtp_demuxer_select="sdp_demuxer" rtp_mpegts_muxer_select="mpegts_muxer rtp_muxer" rtpdec_select="asf_demuxer mov_demuxer mpegts_demuxer rm_demuxer rtp_protocol srtp" @@ -3557,6 +3556,8 @@ wav_demuxer_select="riffdec" wav_muxer_select="riffenc" webm_chunk_muxer_select="webm_muxer" webm_dash_manifest_demuxer_select="matroska_demuxer" +# TODO: Support libtls, mbedtls, and gnutls. +whip_muxer_deps_any="openssl" wtv_demuxer_select="mpegts_demuxer riffdec" wtv_muxer_select="mpegts_muxer riffenc" xmv_demuxer_select="riffdec" @@ -6914,7 +6915,7 @@ enabled rkmpp && { require_pkg_config rkmpp rockchip_mpp rockchip/r enabled vapoursynth && require_pkg_config vapoursynth "vapoursynth-script >= 42" VSScript.h vsscript_init enabled openssl && { - enabled rtc_muxer && { + enabled whip_muxer && { $pkg_config --exists --print-errors "openssl >= 1.0.1k" || require_pkg_config openssl "openssl >= 1.0.1k" openssl/ssl.h SSL_library_init || require_pkg_config openssl "openssl >= 1.0.1k" openssl/ssl.h OPENSSL_init_ssl diff --git a/doc/muxers.texi b/doc/muxers.texi index c6cf61a864ebd..b4c6704fed90b 100644 --- a/doc/muxers.texi +++ b/doc/muxers.texi @@ -1333,47 +1333,6 @@ Set custom HTTP headers, can override built in default headers. Applicable only @end table -@anchor{rtc} -@section rtc - -WebRTC (Real-Time Communication) muxer that supports sub-second latency streaming according to -the WHIP (WebRTC-HTTP ingestion protocol) specification. - -It uses HTTP as a signaling protocol to exchange SDP capabilities and ICE lite candidates. Then, -it uses STUN binding requests and responses to establish a session over UDP. Subsequently, it -initiates a DTLS handshake to exchange the SRTP encryption keys. Lastly, it splits video and -audio frames into RTP packets and encrypts them using SRTP. - -Ensure that you use H.264 without B frames and Opus for the audio codec. For example, to convert -an input file with @command{ffmpeg} to WebRTC: -@example -ffmpeg -re -i input.mp4 -acodec libopus -ar 48000 -ac 2 \ - -vcodec libx264 -profile:v baseline -tune zerolatency -threads 1 -bf 0 \ - -f rtc "http://localhost:1985/rtc/v1/whip/?app=live&stream=livestream" -@end example - -For this example, we have employed low latency options, resulting in an end-to-end latency of -approximately 150ms. - -@subsection Options - -This muxer supports the following options: - -@table @option - -@item handshake_timeout @var{integer} -Set the timeout in milliseconds for ICE and DTLS handshake. -Default value is 5000. - -@item pkt_size @var{integer} -Set the maximum size, in bytes, of RTP packets that send out. -Default value is 1500. - -@item authorization @var{string} -The optional Bearer token for WHIP Authorization. - -@end table - @anchor{ico} @section ico @@ -2887,4 +2846,45 @@ ffmpeg -f webm_dash_manifest -i video1.webm \ manifest.xml @end example +@anchor{whip} +@section whip + +WebRTC (Real-Time Communication) muxer that supports sub-second latency streaming according to +the WHIP (WebRTC-HTTP ingestion protocol) specification. + +It uses HTTP as a signaling protocol to exchange SDP capabilities and ICE lite candidates. Then, +it uses STUN binding requests and responses to establish a session over UDP. Subsequently, it +initiates a DTLS handshake to exchange the SRTP encryption keys. Lastly, it splits video and +audio frames into RTP packets and encrypts them using SRTP. + +Ensure that you use H.264 without B frames and Opus for the audio codec. For example, to convert +an input file with @command{ffmpeg} to WebRTC: +@example +ffmpeg -re -i input.mp4 -acodec libopus -ar 48000 -ac 2 \ + -vcodec libx264 -profile:v baseline -tune zerolatency -threads 1 -bf 0 \ + -f whip "http://localhost:1985/rtc/v1/whip/?app=live&stream=livestream" +@end example + +For this example, we have employed low latency options, resulting in an end-to-end latency of +approximately 150ms. + +@subsection Options + +This muxer supports the following options: + +@table @option + +@item handshake_timeout @var{integer} +Set the timeout in milliseconds for ICE and DTLS handshake. +Default value is 5000. + +@item pkt_size @var{integer} +Set the maximum size, in bytes, of RTP packets that send out. +Default value is 1500. + +@item authorization @var{string} +The optional Bearer token for WHIP Authorization. + +@end table + @c man end MUXERS diff --git a/libavformat/Makefile b/libavformat/Makefile index b980b1232126b..dd429944234eb 100644 --- a/libavformat/Makefile +++ b/libavformat/Makefile @@ -499,7 +499,6 @@ OBJS-$(CONFIG_RSD_DEMUXER) += rsd.o OBJS-$(CONFIG_RPL_DEMUXER) += rpl.o OBJS-$(CONFIG_RSO_DEMUXER) += rsodec.o rso.o pcm.o OBJS-$(CONFIG_RSO_MUXER) += rsoenc.o rso.o rawenc.o -OBJS-$(CONFIG_RTC_MUXER) += rtcenc.o avc.o http.o srtp.o OBJS-$(CONFIG_RTP_MPEGTS_MUXER) += rtpenc_mpegts.o OBJS-$(CONFIG_RTP_MUXER) += rtp.o \ rtpenc_aac.o \ @@ -622,6 +621,7 @@ OBJS-$(CONFIG_WEBM_CHUNK_MUXER) += webm_chunk.o OBJS-$(CONFIG_WEBP_MUXER) += webpenc.o OBJS-$(CONFIG_WEBVTT_DEMUXER) += webvttdec.o subtitles.o OBJS-$(CONFIG_WEBVTT_MUXER) += webvttenc.o +OBJS-$(CONFIG_WHIP_MUXER) += whip.o avc.o http.o srtp.o OBJS-$(CONFIG_WSAUD_DEMUXER) += westwood_aud.o OBJS-$(CONFIG_WSAUD_MUXER) += westwood_audenc.o OBJS-$(CONFIG_WSD_DEMUXER) += wsddec.o rawdec.o diff --git a/libavformat/allformats.c b/libavformat/allformats.c index ddaa4da0bfd63..e8825a92b54a3 100644 --- a/libavformat/allformats.c +++ b/libavformat/allformats.c @@ -398,7 +398,6 @@ extern const AVInputFormat ff_rpl_demuxer; extern const AVInputFormat ff_rsd_demuxer; extern const AVInputFormat ff_rso_demuxer; extern const FFOutputFormat ff_rso_muxer; -extern const FFOutputFormat ff_rtc_muxer; extern const AVInputFormat ff_rtp_demuxer; extern const FFOutputFormat ff_rtp_muxer; extern const FFOutputFormat ff_rtp_mpegts_muxer; @@ -505,6 +504,7 @@ extern const FFOutputFormat ff_webm_chunk_muxer; extern const FFOutputFormat ff_webp_muxer; extern const AVInputFormat ff_webvtt_demuxer; extern const FFOutputFormat ff_webvtt_muxer; +extern const FFOutputFormat ff_whip_muxer; extern const AVInputFormat ff_wsaud_demuxer; extern const FFOutputFormat ff_wsaud_muxer; extern const AVInputFormat ff_wsd_demuxer; diff --git a/libavformat/rtcenc.c b/libavformat/whip.c similarity index 76% rename from libavformat/rtcenc.c rename to libavformat/whip.c index fe2a491fa3ab0..d65cf4931bfc4 100644 --- a/libavformat/rtcenc.c +++ b/libavformat/whip.c @@ -30,6 +30,7 @@ #include "libavutil/crc.h" #include "libavutil/hmac.h" #include "libavutil/intreadwrite.h" +#include "libavutil/lfg.h" #include "libavutil/opt.h" #include "libavutil/random_seed.h" #include "libavutil/time.h" @@ -110,8 +111,8 @@ #define DTLS_VERSION_12 0xfefd /* Referring to Chrome's definition of RTP payload types. */ -#define RTC_RTP_PAYLOAD_TYPE_H264 106 -#define RTC_RTP_PAYLOAD_TYPE_OPUS 111 +#define WHIP_RTP_PAYLOAD_TYPE_H264 106 +#define WHIP_RTP_PAYLOAD_TYPE_OPUS 111 /** * The STUN message header, which is 20 bytes long, comprises the @@ -126,7 +127,7 @@ * SequenceNumber(2B), Timestamp(4B), and SSRC(4B). * See https://www.rfc-editor.org/rfc/rfc3550#section-5.1 */ -#define RTC_RTP_HEADER_SIZE 12 +#define WHIP_RTP_HEADER_SIZE 12 /** * For RTCP, PT is [128, 223] (or without marker [0, 95]). Literally, RTCP starts @@ -138,15 +139,15 @@ * for standard audio and video encodings" at * https://www.iana.org/assignments/rtp-parameters/rtp-parameters.xhtml#rtp-parameters-1 */ -#define RTC_RTCP_PT_START 192 -#define RTC_RTCP_PT_END 223 +#define WHIP_RTCP_PT_START 192 +#define WHIP_RTCP_PT_END 223 /** * In the case of ICE-LITE, these fields are not used; instead, they are defined * as constant values. */ -#define RTC_SDP_SESSION_ID "4489045141692799359" -#define RTC_SDP_CREATOR_IP "127.0.0.1" +#define WHIP_SDP_SESSION_ID "4489045141692799359" +#define WHIP_SDP_CREATOR_IP "127.0.0.1" /* Calculate the elapsed time from starttime to endtime in milliseconds. */ #define ELAPSED(starttime, endtime) ((int)(endtime - starttime) / 1000) @@ -491,7 +492,7 @@ static int openssl_dtls_gen_certificate(DTLSContext *ctx) int ret = 0, serial, expire_day, i, n = 0; AVBPrint fingerprint; unsigned char md[EVP_MAX_MD_SIZE]; - const char *aor = "ffmpeg.org"; + const char *aor = "lavf"; X509_NAME* subject = NULL; X509 *dtls_cert = NULL; @@ -885,41 +886,41 @@ static av_cold void dtls_context_deinit(DTLSContext *ctx) #endif } -enum RTCState { - RTC_STATE_NONE, +enum WHIPState { + WHIP_STATE_NONE, /* The initial state. */ - RTC_STATE_INIT, + WHIP_STATE_INIT, /* The muxer has sent the offer to the peer. */ - RTC_STATE_OFFER, + WHIP_STATE_OFFER, /* The muxer has received the answer from the peer. */ - RTC_STATE_ANSWER, + WHIP_STATE_ANSWER, /** * After parsing the answer received from the peer, the muxer negotiates the abilities * in the offer that it generated. */ - RTC_STATE_NEGOTIATED, + WHIP_STATE_NEGOTIATED, /* The muxer has connected to the peer via UDP. */ - RTC_STATE_UDP_CONNECTED, + WHIP_STATE_UDP_CONNECTED, /* The muxer has sent the ICE request to the peer. */ - RTC_STATE_ICE_CONNECTING, + WHIP_STATE_ICE_CONNECTING, /* The muxer has received the ICE response from the peer. */ - RTC_STATE_ICE_CONNECTED, + WHIP_STATE_ICE_CONNECTED, /* The muxer has finished the DTLS handshake with the peer. */ - RTC_STATE_DTLS_FINISHED, + WHIP_STATE_DTLS_FINISHED, /* The muxer has finished the SRTP setup. */ - RTC_STATE_SRTP_FINISHED, + WHIP_STATE_SRTP_FINISHED, /* The muxer is ready to send/receive media frames. */ - RTC_STATE_READY, + WHIP_STATE_READY, /* The muxer is failed. */ - RTC_STATE_FAILED, + WHIP_STATE_FAILED, }; -typedef struct RTCContext { +typedef struct WHIPContext { AVClass *av_class; /* The state of the RTC connection. */ - enum RTCState state; + enum WHIPState state; /* The callback return value for DTLS. */ int dtls_ret; int dtls_closed; @@ -935,6 +936,9 @@ typedef struct RTCContext { */ int h264_annexb_insert_sps_pps; + /* The random number generator. */ + AVLFG rnd; + /* The ICE username and pwd fragment generated by the muxer. */ char ice_ufrag_local[9]; char ice_pwd_local[33]; @@ -968,15 +972,15 @@ typedef struct RTCContext { char *whip_resource_url; /* These variables represent timestamps used for calculating and tracking the cost. */ - int64_t rtc_starttime; + int64_t whip_starttime; /* */ - int64_t rtc_init_time; - int64_t rtc_offer_time; - int64_t rtc_answer_time; - int64_t rtc_udp_time; - int64_t rtc_ice_time; - int64_t rtc_dtls_time; - int64_t rtc_srtp_time; + int64_t whip_init_time; + int64_t whip_offer_time; + int64_t whip_answer_time; + int64_t whip_udp_time; + int64_t whip_ice_time; + int64_t whip_dtls_time; + int64_t whip_srtp_time; /* The DTLS context. */ DTLSContext dtls_ctx; @@ -1005,7 +1009,7 @@ typedef struct RTCContext { * See https://www.ietf.org/archive/id/draft-ietf-wish-whip-08.html#name-authentication-and-authoriz */ char* authorization; -} RTCContext; +} WHIPContext; /** * When DTLS state change. @@ -1014,30 +1018,30 @@ static int dtls_context_on_state(DTLSContext *ctx, enum DTLSState state, const c { int ret = 0; AVFormatContext *s = ctx->opaque; - RTCContext *rtc = s->priv_data; + WHIPContext *whip = s->priv_data; if (state == DTLS_STATE_CLOSED) { - rtc->dtls_closed = 1; - av_log(rtc, AV_LOG_VERBOSE, "WHIP: DTLS session closed, type=%s, desc=%s, elapsed=%dms\n", - type ? type : "", desc ? desc : "", ELAPSED(rtc->rtc_starttime, av_gettime())); + whip->dtls_closed = 1; + av_log(whip, AV_LOG_VERBOSE, "WHIP: DTLS session closed, type=%s, desc=%s, elapsed=%dms\n", + type ? type : "", desc ? desc : "", ELAPSED(whip->whip_starttime, av_gettime())); return ret; } if (state == DTLS_STATE_FAILED) { - rtc->state = RTC_STATE_FAILED; - av_log(rtc, AV_LOG_ERROR, "WHIP: DTLS session failed, type=%s, desc=%s\n", + whip->state = WHIP_STATE_FAILED; + av_log(whip, AV_LOG_ERROR, "WHIP: DTLS session failed, type=%s, desc=%s\n", type ? type : "", desc ? desc : ""); - rtc->dtls_ret = AVERROR(EIO); + whip->dtls_ret = AVERROR(EIO); return ret; } - if (state == DTLS_STATE_FINISHED && rtc->state < RTC_STATE_DTLS_FINISHED) { - rtc->state = RTC_STATE_DTLS_FINISHED; - rtc->rtc_dtls_time = av_gettime(); - av_log(rtc, AV_LOG_VERBOSE, "WHIP: DTLS handshake, done=%d, exported=%d, arq=%d, srtp_material=%luB, cost=%dms, elapsed=%dms\n", + if (state == DTLS_STATE_FINISHED && whip->state < WHIP_STATE_DTLS_FINISHED) { + whip->state = WHIP_STATE_DTLS_FINISHED; + whip->whip_dtls_time = av_gettime(); + av_log(whip, AV_LOG_VERBOSE, "WHIP: DTLS handshake, done=%d, exported=%d, arq=%d, srtp_material=%luB, cost=%dms, elapsed=%dms\n", ctx->dtls_done_for_us, ctx->dtls_srtp_key_exported, ctx->dtls_arq_packets, sizeof(ctx->dtls_srtp_materials), ELAPSED(ctx->dtls_handshake_starttime, ctx->dtls_handshake_endtime), - ELAPSED(rtc->rtc_starttime, av_gettime())); + ELAPSED(whip->whip_starttime, av_gettime())); return ret; } @@ -1050,47 +1054,52 @@ static int dtls_context_on_state(DTLSContext *ctx, enum DTLSState state, const c static int dtls_context_on_write(DTLSContext *ctx, char* data, int size) { AVFormatContext *s = ctx->opaque; - RTCContext *rtc = s->priv_data; + WHIPContext *whip = s->priv_data; - if (!rtc->udp_uc) { - av_log(rtc, AV_LOG_ERROR, "WHIP: DTLS write data, but udp_uc is NULL\n"); + if (!whip->udp_uc) { + av_log(whip, AV_LOG_ERROR, "WHIP: DTLS write data, but udp_uc is NULL\n"); return AVERROR(EIO); } - return ffurl_write(rtc->udp_uc, data, size); + return ffurl_write(whip->udp_uc, data, size); } /** * Initialize and check the options for the WebRTC muxer. */ -static av_cold int whip_init(AVFormatContext *s) +static av_cold int initialize(AVFormatContext *s) { int ret, ideal_pkt_size = 532; - RTCContext *rtc = s->priv_data; + WHIPContext *whip = s->priv_data; + uint32_t seed; + + whip->whip_starttime = av_gettime(); - rtc->rtc_starttime = av_gettime(); + /* Initialize the random number generator. */ + seed = av_get_random_seed(); + av_lfg_init(&whip->rnd, seed); /* Use the same logging context as AV format. */ - rtc->dtls_ctx.av_class = rtc->av_class; - rtc->dtls_ctx.mtu = rtc->pkt_size; - rtc->dtls_ctx.opaque = s; - rtc->dtls_ctx.on_state = dtls_context_on_state; - rtc->dtls_ctx.on_write = dtls_context_on_write; - - if ((ret = dtls_context_init(&rtc->dtls_ctx)) < 0) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to init DTLS context\n"); + whip->dtls_ctx.av_class = whip->av_class; + whip->dtls_ctx.mtu = whip->pkt_size; + whip->dtls_ctx.opaque = s; + whip->dtls_ctx.on_state = dtls_context_on_state; + whip->dtls_ctx.on_write = dtls_context_on_write; + + if ((ret = dtls_context_init(&whip->dtls_ctx)) < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to init DTLS context\n"); return ret; } - if (rtc->pkt_size < ideal_pkt_size) - av_log(rtc, AV_LOG_WARNING, "WHIP: pkt_size=%d(<%d) is too small, may cause packet loss\n", - rtc->pkt_size, ideal_pkt_size); + if (whip->pkt_size < ideal_pkt_size) + av_log(whip, AV_LOG_WARNING, "WHIP: pkt_size=%d(<%d) is too small, may cause packet loss\n", + whip->pkt_size, ideal_pkt_size); - if (rtc->state < RTC_STATE_INIT) - rtc->state = RTC_STATE_INIT; - rtc->rtc_init_time = av_gettime(); - av_log(rtc, AV_LOG_VERBOSE, "WHIP: Init state=%d, handshake_timeout=%dms, pkt_size=%d, elapsed=%dms\n", - rtc->state, rtc->handshake_timeout, rtc->pkt_size, ELAPSED(rtc->rtc_starttime, av_gettime())); + if (whip->state < WHIP_STATE_INIT) + whip->state = WHIP_STATE_INIT; + whip->whip_init_time = av_gettime(); + av_log(whip, AV_LOG_VERBOSE, "WHIP: Init state=%d, handshake_timeout=%dms, pkt_size=%d, seed=%d, elapsed=%dms\n", + whip->state, whip->handshake_timeout, whip->pkt_size, seed, ELAPSED(whip->whip_starttime, av_gettime())); return 0; } @@ -1113,7 +1122,7 @@ static int parse_profile_level(AVFormatContext *s, AVCodecParameters *par) const uint8_t *r = par->extradata, *r1, *end = par->extradata + par->extradata_size; H264SPS seq, *const sps = &seq; uint32_t state; - RTCContext *rtc = s->priv_data; + WHIPContext *whip = s->priv_data; if (par->codec_id != AV_CODEC_ID_H264) return ret; @@ -1122,7 +1131,7 @@ static int parse_profile_level(AVFormatContext *s, AVCodecParameters *par) return ret; if (!par->extradata || par->extradata_size <= 0) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Unable to parse profile from empty extradata=%p, size=%d\n", + av_log(whip, AV_LOG_ERROR, "WHIP: Unable to parse profile from empty extradata=%p, size=%d\n", par->extradata, par->extradata_size); return AVERROR(EINVAL); } @@ -1136,12 +1145,12 @@ static int parse_profile_level(AVFormatContext *s, AVCodecParameters *par) if ((state & 0x1f) == H264_NAL_SPS) { ret = ff_avc_decode_sps(sps, r, r1 - r); if (ret < 0) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to decode SPS, state=%x, size=%d\n", + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to decode SPS, state=%x, size=%d\n", state, (int)(r1 - r)); return ret; } - av_log(rtc, AV_LOG_VERBOSE, "WHIP: Parse profile=%d, level=%d from SPS\n", + av_log(whip, AV_LOG_VERBOSE, "WHIP: Parse profile=%d, level=%d from SPS\n", sps->profile_idc, sps->level_idc); par->profile = sps->profile_idc; par->level = sps->level_idc; @@ -1177,70 +1186,70 @@ static int parse_profile_level(AVFormatContext *s, AVCodecParameters *par) static int parse_codec(AVFormatContext *s) { int i, ret = 0; - RTCContext *rtc = s->priv_data; + WHIPContext *whip = s->priv_data; for (i = 0; i < s->nb_streams; i++) { AVCodecParameters *par = s->streams[i]->codecpar; const AVCodecDescriptor *desc = avcodec_descriptor_get(par->codec_id); switch (par->codec_type) { case AVMEDIA_TYPE_VIDEO: - if (rtc->video_par) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Only one video stream is supported by RTC\n"); + if (whip->video_par) { + av_log(whip, AV_LOG_ERROR, "WHIP: Only one video stream is supported by RTC\n"); return AVERROR(EINVAL); } - rtc->video_par = par; + whip->video_par = par; if (par->codec_id != AV_CODEC_ID_H264) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Unsupported video codec %s by RTC, choose h264\n", + av_log(whip, AV_LOG_ERROR, "WHIP: Unsupported video codec %s by RTC, choose h264\n", desc ? desc->name : "unknown"); return AVERROR_PATCHWELCOME; } if (par->video_delay > 0) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Unsupported B frames by RTC\n"); + av_log(whip, AV_LOG_ERROR, "WHIP: Unsupported B frames by RTC\n"); return AVERROR_PATCHWELCOME; } if ((ret = parse_profile_level(s, par)) < 0) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to parse SPS/PPS from extradata\n"); + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to parse SPS/PPS from extradata\n"); return AVERROR(EINVAL); } if (par->profile == FF_PROFILE_UNKNOWN) { - av_log(rtc, AV_LOG_WARNING, "WHIP: No profile found in extradata, consider baseline\n"); + av_log(whip, AV_LOG_WARNING, "WHIP: No profile found in extradata, consider baseline\n"); return AVERROR(EINVAL); } if (par->level == FF_LEVEL_UNKNOWN) { - av_log(rtc, AV_LOG_WARNING, "WHIP: No level found in extradata, consider 3.1\n"); + av_log(whip, AV_LOG_WARNING, "WHIP: No level found in extradata, consider 3.1\n"); return AVERROR(EINVAL); } break; case AVMEDIA_TYPE_AUDIO: - if (rtc->audio_par) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Only one audio stream is supported by RTC\n"); + if (whip->audio_par) { + av_log(whip, AV_LOG_ERROR, "WHIP: Only one audio stream is supported by RTC\n"); return AVERROR(EINVAL); } - rtc->audio_par = par; + whip->audio_par = par; if (par->codec_id != AV_CODEC_ID_OPUS) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Unsupported audio codec %s by RTC, choose opus\n", + av_log(whip, AV_LOG_ERROR, "WHIP: Unsupported audio codec %s by RTC, choose opus\n", desc ? desc->name : "unknown"); return AVERROR_PATCHWELCOME; } if (par->ch_layout.nb_channels != 2) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Unsupported audio channels %d by RTC, choose stereo\n", + av_log(whip, AV_LOG_ERROR, "WHIP: Unsupported audio channels %d by RTC, choose stereo\n", par->ch_layout.nb_channels); return AVERROR_PATCHWELCOME; } if (par->sample_rate != 48000) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Unsupported audio sample rate %d by RTC, choose 48000\n", par->sample_rate); + av_log(whip, AV_LOG_ERROR, "WHIP: Unsupported audio sample rate %d by RTC, choose 48000\n", par->sample_rate); return AVERROR_PATCHWELCOME; } break; default: - av_log(rtc, AV_LOG_ERROR, "WHIP: Codec type '%s' for stream %d is not supported by RTC\n", + av_log(whip, AV_LOG_ERROR, "WHIP: Codec type '%s' for stream %d is not supported by RTC\n", av_get_media_type_string(par->codec_type), i); return AVERROR_PATCHWELCOME; } @@ -1262,28 +1271,28 @@ static int generate_sdp_offer(AVFormatContext *s) int ret = 0, profile, level, profile_iop; const char *acodec_name = NULL, *vcodec_name = NULL; AVBPrint bp; - RTCContext *rtc = s->priv_data; + WHIPContext *whip = s->priv_data; /* To prevent a crash during cleanup, always initialize it. */ av_bprint_init(&bp, 1, MAX_SDP_SIZE); - if (rtc->sdp_offer) { - av_log(rtc, AV_LOG_ERROR, "WHIP: SDP offer is already set\n"); + if (whip->sdp_offer) { + av_log(whip, AV_LOG_ERROR, "WHIP: SDP offer is already set\n"); ret = AVERROR(EINVAL); goto end; } - snprintf(rtc->ice_ufrag_local, sizeof(rtc->ice_ufrag_local), "%08x", - av_get_random_seed()); - snprintf(rtc->ice_pwd_local, sizeof(rtc->ice_pwd_local), "%08x%08x%08x%08x", - av_get_random_seed(), av_get_random_seed(), av_get_random_seed(), - av_get_random_seed()); + snprintf(whip->ice_ufrag_local, sizeof(whip->ice_ufrag_local), "%08x", + av_lfg_get(&whip->rnd)); + snprintf(whip->ice_pwd_local, sizeof(whip->ice_pwd_local), "%08x%08x%08x%08x", + av_lfg_get(&whip->rnd), av_lfg_get(&whip->rnd), av_lfg_get(&whip->rnd), + av_lfg_get(&whip->rnd)); - rtc->audio_ssrc = av_get_random_seed(); - rtc->video_ssrc = av_get_random_seed(); + whip->audio_ssrc = av_lfg_get(&whip->rnd); + whip->video_ssrc = av_lfg_get(&whip->rnd); - rtc->audio_payload_type = RTC_RTP_PAYLOAD_TYPE_OPUS; - rtc->video_payload_type = RTC_RTP_PAYLOAD_TYPE_H264; + whip->audio_payload_type = WHIP_RTP_PAYLOAD_TYPE_OPUS; + whip->video_payload_type = WHIP_RTP_PAYLOAD_TYPE_H264; av_bprintf(&bp, "" "v=0\r\n" @@ -1293,11 +1302,11 @@ static int generate_sdp_offer(AVFormatContext *s) "a=group:BUNDLE 0 1\r\n" "a=extmap-allow-mixed\r\n" "a=msid-semantic: WMS\r\n", - RTC_SDP_SESSION_ID, - RTC_SDP_CREATOR_IP); + WHIP_SDP_SESSION_ID, + WHIP_SDP_CREATOR_IP); - if (rtc->audio_par) { - if (rtc->audio_par->codec_id == AV_CODEC_ID_OPUS) + if (whip->audio_par) { + if (whip->audio_par->codec_id == AV_CODEC_ID_OPUS) acodec_name = "opus"; av_bprintf(&bp, "" @@ -1314,22 +1323,22 @@ static int generate_sdp_offer(AVFormatContext *s) "a=rtpmap:%u %s/%d/%d\r\n" "a=ssrc:%u cname:FFmpeg\r\n" "a=ssrc:%u msid:FFmpeg audio\r\n", - rtc->audio_payload_type, - rtc->ice_ufrag_local, - rtc->ice_pwd_local, - rtc->dtls_ctx.dtls_fingerprint, - rtc->audio_payload_type, + whip->audio_payload_type, + whip->ice_ufrag_local, + whip->ice_pwd_local, + whip->dtls_ctx.dtls_fingerprint, + whip->audio_payload_type, acodec_name, - rtc->audio_par->sample_rate, - rtc->audio_par->ch_layout.nb_channels, - rtc->audio_ssrc, - rtc->audio_ssrc); + whip->audio_par->sample_rate, + whip->audio_par->ch_layout.nb_channels, + whip->audio_ssrc, + whip->audio_ssrc); } - if (rtc->video_par) { - profile_iop = profile = rtc->video_par->profile; - level = rtc->video_par->level; - if (rtc->video_par->codec_id == AV_CODEC_ID_H264) { + if (whip->video_par) { + profile_iop = profile = whip->video_par->profile; + level = whip->video_par->level; + if (whip->video_par->codec_id == AV_CODEC_ID_H264) { vcodec_name = "H264"; profile_iop &= FF_PROFILE_H264_CONSTRAINED; profile &= (~FF_PROFILE_H264_CONSTRAINED); @@ -1351,36 +1360,36 @@ static int generate_sdp_offer(AVFormatContext *s) "a=fmtp:%u level-asymmetry-allowed=1;packetization-mode=1;profile-level-id=%02x%02x%02x\r\n" "a=ssrc:%u cname:FFmpeg\r\n" "a=ssrc:%u msid:FFmpeg video\r\n", - rtc->video_payload_type, - rtc->ice_ufrag_local, - rtc->ice_pwd_local, - rtc->dtls_ctx.dtls_fingerprint, - rtc->video_payload_type, + whip->video_payload_type, + whip->ice_ufrag_local, + whip->ice_pwd_local, + whip->dtls_ctx.dtls_fingerprint, + whip->video_payload_type, vcodec_name, - rtc->video_payload_type, + whip->video_payload_type, profile, profile_iop, level, - rtc->video_ssrc, - rtc->video_ssrc); + whip->video_ssrc, + whip->video_ssrc); } if (!av_bprint_is_complete(&bp)) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Offer exceed max %d, %s\n", MAX_SDP_SIZE, bp.str); + av_log(whip, AV_LOG_ERROR, "WHIP: Offer exceed max %d, %s\n", MAX_SDP_SIZE, bp.str); ret = AVERROR(EIO); goto end; } - rtc->sdp_offer = av_strdup(bp.str); - if (!rtc->sdp_offer) { + whip->sdp_offer = av_strdup(bp.str); + if (!whip->sdp_offer) { ret = AVERROR(ENOMEM); goto end; } - if (rtc->state < RTC_STATE_OFFER) - rtc->state = RTC_STATE_OFFER; - rtc->rtc_offer_time = av_gettime(); - av_log(rtc, AV_LOG_VERBOSE, "WHIP: Generated state=%d, offer: %s\n", rtc->state, rtc->sdp_offer); + if (whip->state < WHIP_STATE_OFFER) + whip->state = WHIP_STATE_OFFER; + whip->whip_offer_time = av_gettime(); + av_log(whip, AV_LOG_VERBOSE, "WHIP: Generated state=%d, offer: %s\n", whip->state, whip->sdp_offer); end: av_bprint_finalize(&bp, NULL); @@ -1397,7 +1406,7 @@ static int exchange_sdp(AVFormatContext *s) int ret; char buf[MAX_URL_SIZE]; AVBPrint bp; - RTCContext *rtc = s->priv_data; + WHIPContext *whip = s->priv_data; /* The URL context is an HTTP transport layer for the WHIP protocol. */ URLContext *whip_uc = NULL; @@ -1406,12 +1415,12 @@ static int exchange_sdp(AVFormatContext *s) ret = ffurl_alloc(&whip_uc, s->url, AVIO_FLAG_READ_WRITE, &s->interrupt_callback); if (ret < 0) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to alloc HTTP context: %s\n", s->url); + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to alloc HTTP context: %s\n", s->url); goto end; } - if (!rtc->sdp_offer || !strlen(rtc->sdp_offer)) { - av_log(rtc, AV_LOG_ERROR, "WHIP: No offer to exchange\n"); + if (!whip->sdp_offer || !strlen(whip->sdp_offer)) { + av_log(whip, AV_LOG_ERROR, "WHIP: No offer to exchange\n"); ret = AVERROR(EINVAL); goto end; } @@ -1419,27 +1428,27 @@ static int exchange_sdp(AVFormatContext *s) ret = snprintf(buf, sizeof(buf), "Cache-Control: no-cache\r\n" "Content-Type: application/sdp\r\n"); - if (rtc->authorization) - ret += snprintf(buf + ret, sizeof(buf) - ret, "Authorization: Bearer %s\r\n", rtc->authorization); + if (whip->authorization) + ret += snprintf(buf + ret, sizeof(buf) - ret, "Authorization: Bearer %s\r\n", whip->authorization); if (ret <= 0 || ret >= sizeof(buf)) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to generate headers, size=%d, %s\n", ret, buf); + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to generate headers, size=%d, %s\n", ret, buf); ret = AVERROR(EINVAL); goto end; } av_opt_set(whip_uc->priv_data, "headers", buf, 0); av_opt_set(whip_uc->priv_data, "chunked_post", "0", 0); - av_opt_set_bin(whip_uc->priv_data, "post_data", rtc->sdp_offer, (int)strlen(rtc->sdp_offer), 0); + av_opt_set_bin(whip_uc->priv_data, "post_data", whip->sdp_offer, (int)strlen(whip->sdp_offer), 0); ret = ffurl_connect(whip_uc, NULL); if (ret < 0) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to request url=%s, offer: %s\n", s->url, rtc->sdp_offer); + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to request url=%s, offer: %s\n", s->url, whip->sdp_offer); goto end; } if (ff_http_get_new_location(whip_uc)) { - rtc->whip_resource_url = av_strdup(ff_http_get_new_location(whip_uc)); - if (!rtc->whip_resource_url) { + whip->whip_resource_url = av_strdup(ff_http_get_new_location(whip_uc)); + if (!whip->whip_resource_url) { ret = AVERROR(ENOMEM); goto end; } @@ -1453,34 +1462,34 @@ static int exchange_sdp(AVFormatContext *s) break; } if (ret <= 0) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to read response from url=%s, offer is %s, answer is %s\n", - s->url, rtc->sdp_offer, rtc->sdp_answer); + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to read response from url=%s, offer is %s, answer is %s\n", + s->url, whip->sdp_offer, whip->sdp_answer); goto end; } av_bprintf(&bp, "%.*s", ret, buf); if (!av_bprint_is_complete(&bp)) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Answer exceed max size %d, %.*s, %s\n", MAX_SDP_SIZE, ret, buf, bp.str); + av_log(whip, AV_LOG_ERROR, "WHIP: Answer exceed max size %d, %.*s, %s\n", MAX_SDP_SIZE, ret, buf, bp.str); ret = AVERROR(EIO); goto end; } } if (!av_strstart(bp.str, "v=", NULL)) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Invalid answer: %s\n", bp.str); + av_log(whip, AV_LOG_ERROR, "WHIP: Invalid answer: %s\n", bp.str); ret = AVERROR(EINVAL); goto end; } - rtc->sdp_answer = av_strdup(bp.str); - if (!rtc->sdp_answer) { + whip->sdp_answer = av_strdup(bp.str); + if (!whip->sdp_answer) { ret = AVERROR(ENOMEM); goto end; } - if (rtc->state < RTC_STATE_ANSWER) - rtc->state = RTC_STATE_ANSWER; - av_log(rtc, AV_LOG_VERBOSE, "WHIP: Got state=%d, answer: %s\n", rtc->state, rtc->sdp_answer); + if (whip->state < WHIP_STATE_ANSWER) + whip->state = WHIP_STATE_ANSWER; + av_log(whip, AV_LOG_VERBOSE, "WHIP: Got state=%d, answer: %s\n", whip->state, whip->sdp_answer); end: ffurl_closep(&whip_uc); @@ -1506,56 +1515,56 @@ static int parse_answer(AVFormatContext *s) char line[MAX_URL_SIZE]; const char *ptr; int i; - RTCContext *rtc = s->priv_data; + WHIPContext *whip = s->priv_data; - if (!rtc->sdp_answer || !strlen(rtc->sdp_answer)) { - av_log(rtc, AV_LOG_ERROR, "WHIP: No answer to parse\n"); + if (!whip->sdp_answer || !strlen(whip->sdp_answer)) { + av_log(whip, AV_LOG_ERROR, "WHIP: No answer to parse\n"); ret = AVERROR(EINVAL); goto end; } - pb = avio_alloc_context(rtc->sdp_answer, strlen(rtc->sdp_answer), 0, NULL, NULL, NULL, NULL); + pb = avio_alloc_context(whip->sdp_answer, strlen(whip->sdp_answer), 0, NULL, NULL, NULL, NULL); if (!pb) return AVERROR(ENOMEM); for (i = 0; !avio_feof(pb); i++) { ff_get_chomp_line(pb, line, sizeof(line)); - if (av_strstart(line, "a=ice-ufrag:", &ptr) && !rtc->ice_ufrag_remote) { - rtc->ice_ufrag_remote = av_strdup(ptr); - if (!rtc->ice_ufrag_remote) { + if (av_strstart(line, "a=ice-ufrag:", &ptr) && !whip->ice_ufrag_remote) { + whip->ice_ufrag_remote = av_strdup(ptr); + if (!whip->ice_ufrag_remote) { ret = AVERROR(ENOMEM); goto end; } - } else if (av_strstart(line, "a=ice-pwd:", &ptr) && !rtc->ice_pwd_remote) { - rtc->ice_pwd_remote = av_strdup(ptr); - if (!rtc->ice_pwd_remote) { + } else if (av_strstart(line, "a=ice-pwd:", &ptr) && !whip->ice_pwd_remote) { + whip->ice_pwd_remote = av_strdup(ptr); + if (!whip->ice_pwd_remote) { ret = AVERROR(ENOMEM); goto end; } - } else if (av_strstart(line, "a=candidate:", &ptr) && !rtc->ice_protocol) { + } else if (av_strstart(line, "a=candidate:", &ptr) && !whip->ice_protocol) { ptr = av_stristr(ptr, "udp"); if (ptr && av_stristr(ptr, "host")) { char protocol[17], host[129]; int priority, port; ret = sscanf(ptr, "%16s %d %128s %d typ host", protocol, &priority, host, &port); if (ret != 4) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Failed %d to parse line %d %s from %s\n", - ret, i, line, rtc->sdp_answer); + av_log(whip, AV_LOG_ERROR, "WHIP: Failed %d to parse line %d %s from %s\n", + ret, i, line, whip->sdp_answer); ret = AVERROR(EIO); goto end; } if (av_strcasecmp(protocol, "udp")) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Protocol %s is not supported by RTC, choose udp, line %d %s of %s\n", - protocol, i, line, rtc->sdp_answer); + av_log(whip, AV_LOG_ERROR, "WHIP: Protocol %s is not supported by RTC, choose udp, line %d %s of %s\n", + protocol, i, line, whip->sdp_answer); ret = AVERROR(EIO); goto end; } - rtc->ice_protocol = av_strdup(protocol); - rtc->ice_host = av_strdup(host); - rtc->ice_port = port; - if (!rtc->ice_protocol || !rtc->ice_host) { + whip->ice_protocol = av_strdup(protocol); + whip->ice_host = av_strdup(host); + whip->ice_port = port; + if (!whip->ice_protocol || !whip->ice_host) { ret = AVERROR(ENOMEM); goto end; } @@ -1563,30 +1572,30 @@ static int parse_answer(AVFormatContext *s) } } - if (!rtc->ice_pwd_remote || !strlen(rtc->ice_pwd_remote)) { - av_log(rtc, AV_LOG_ERROR, "WHIP: No remote ice pwd parsed from %s\n", rtc->sdp_answer); + if (!whip->ice_pwd_remote || !strlen(whip->ice_pwd_remote)) { + av_log(whip, AV_LOG_ERROR, "WHIP: No remote ice pwd parsed from %s\n", whip->sdp_answer); ret = AVERROR(EINVAL); goto end; } - if (!rtc->ice_ufrag_remote || !strlen(rtc->ice_ufrag_remote)) { - av_log(rtc, AV_LOG_ERROR, "WHIP: No remote ice ufrag parsed from %s\n", rtc->sdp_answer); + if (!whip->ice_ufrag_remote || !strlen(whip->ice_ufrag_remote)) { + av_log(whip, AV_LOG_ERROR, "WHIP: No remote ice ufrag parsed from %s\n", whip->sdp_answer); ret = AVERROR(EINVAL); goto end; } - if (!rtc->ice_protocol || !rtc->ice_host || !rtc->ice_port) { - av_log(rtc, AV_LOG_ERROR, "WHIP: No ice candidate parsed from %s\n", rtc->sdp_answer); + if (!whip->ice_protocol || !whip->ice_host || !whip->ice_port) { + av_log(whip, AV_LOG_ERROR, "WHIP: No ice candidate parsed from %s\n", whip->sdp_answer); ret = AVERROR(EINVAL); goto end; } - if (rtc->state < RTC_STATE_NEGOTIATED) - rtc->state = RTC_STATE_NEGOTIATED; - rtc->rtc_answer_time = av_gettime(); - av_log(rtc, AV_LOG_VERBOSE, "WHIP: SDP state=%d, offer=%luB, answer=%luB, ufrag=%s, pwd=%luB, transport=%s://%s:%d, elapsed=%dms\n", - rtc->state, strlen(rtc->sdp_offer), strlen(rtc->sdp_answer), rtc->ice_ufrag_remote, strlen(rtc->ice_pwd_remote), - rtc->ice_protocol, rtc->ice_host, rtc->ice_port, ELAPSED(rtc->rtc_starttime, av_gettime())); + if (whip->state < WHIP_STATE_NEGOTIATED) + whip->state = WHIP_STATE_NEGOTIATED; + whip->whip_answer_time = av_gettime(); + av_log(whip, AV_LOG_VERBOSE, "WHIP: SDP state=%d, offer=%luB, answer=%luB, ufrag=%s, pwd=%luB, transport=%s://%s:%d, elapsed=%dms\n", + whip->state, strlen(whip->sdp_offer), strlen(whip->sdp_answer), whip->ice_ufrag_remote, strlen(whip->ice_pwd_remote), + whip->ice_protocol, whip->ice_host, whip->ice_port, ELAPSED(whip->whip_starttime, av_gettime())); end: avio_context_free(&pb); @@ -1612,7 +1621,7 @@ static int ice_create_request(AVFormatContext *s, uint8_t *buf, int buf_size, in char username[128]; AVIOContext *pb = NULL; AVHMAC *hmac = NULL; - RTCContext *rtc = s->priv_data; + WHIPContext *whip = s->priv_data; pb = avio_alloc_context(buf, buf_size, 1, NULL, NULL, NULL, NULL); if (!pb) @@ -1628,15 +1637,15 @@ static int ice_create_request(AVFormatContext *s, uint8_t *buf, int buf_size, in avio_wb16(pb, 0x0001); /* STUN binding request */ avio_wb16(pb, 0); /* length */ avio_wb32(pb, STUN_MAGIC_COOKIE); /* magic cookie */ - avio_wb32(pb, av_get_random_seed()); /* transaction ID */ - avio_wb32(pb, av_get_random_seed()); /* transaction ID */ - avio_wb32(pb, av_get_random_seed()); /* transaction ID */ + avio_wb32(pb, av_lfg_get(&whip->rnd)); /* transaction ID */ + avio_wb32(pb, av_lfg_get(&whip->rnd)); /* transaction ID */ + avio_wb32(pb, av_lfg_get(&whip->rnd)); /* transaction ID */ /* The username is the concatenation of the two ICE ufrag */ - ret = snprintf(username, sizeof(username), "%s:%s", rtc->ice_ufrag_remote, rtc->ice_ufrag_local); + ret = snprintf(username, sizeof(username), "%s:%s", whip->ice_ufrag_remote, whip->ice_ufrag_local); if (ret <= 0 || ret >= sizeof(username)) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to build username %s:%s, max=%lu, ret=%d\n", - rtc->ice_ufrag_remote, rtc->ice_ufrag_local, sizeof(username), ret); + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to build username %s:%s, max=%lu, ret=%d\n", + whip->ice_ufrag_remote, whip->ice_ufrag_local, sizeof(username), ret); ret = AVERROR(EIO); goto end; } @@ -1658,7 +1667,7 @@ static int ice_create_request(AVFormatContext *s, uint8_t *buf, int buf_size, in size = avio_tell(pb); buf[2] = (size - 20) >> 8; buf[3] = (size - 20) & 0xFF; - av_hmac_init(hmac, rtc->ice_pwd_remote, strlen(rtc->ice_pwd_remote)); + av_hmac_init(hmac, whip->ice_pwd_remote, strlen(whip->ice_pwd_remote)); av_hmac_update(hmac, buf, size - 24); av_hmac_final(hmac, buf + size - 20, 20); @@ -1701,10 +1710,10 @@ static int ice_create_response(AVFormatContext *s, char *tid, int tid_size, uint int ret = 0, size, crc32; AVIOContext *pb = NULL; AVHMAC *hmac = NULL; - RTCContext *rtc = s->priv_data; + WHIPContext *whip = s->priv_data; if (tid_size != 12) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Invalid transaction ID size. Expected 12, got %d\n", tid_size); + av_log(whip, AV_LOG_ERROR, "WHIP: Invalid transaction ID size. Expected 12, got %d\n", tid_size); return AVERROR(EINVAL); } @@ -1731,7 +1740,7 @@ static int ice_create_response(AVFormatContext *s, char *tid, int tid_size, uint size = avio_tell(pb); buf[2] = (size - 20) >> 8; buf[3] = (size - 20) & 0xFF; - av_hmac_init(hmac, rtc->ice_pwd_local, strlen(rtc->ice_pwd_local)); + av_hmac_init(hmac, whip->ice_pwd_local, strlen(whip->ice_pwd_local)); av_hmac_update(hmac, buf, size - 24); av_hmac_final(hmac, buf + size - 20, 20); @@ -1781,15 +1790,15 @@ static int ice_is_binding_response(uint8_t *b, int size) * The RTCP packet header is similar to RTP, * see https://www.rfc-editor.org/rfc/rfc3550#section-6.4.1 */ -static int rtc_is_rtp_rtcp(uint8_t *b, int size) +static int media_is_rtp_rtcp(uint8_t *b, int size) { - return size >= RTC_RTP_HEADER_SIZE && (b[0] & 0xC0) == 0x80; + return size >= WHIP_RTP_HEADER_SIZE && (b[0] & 0xC0) == 0x80; } /* Whether the packet is RTCP. */ -static int rtc_is_rtcp(uint8_t *b, int size) +static int media_is_rtcp(uint8_t *b, int size) { - return size >= RTC_RTP_HEADER_SIZE && b[1] >= RTC_RTCP_PT_START && b[1] <= RTC_RTCP_PT_END; + return size >= WHIP_RTP_HEADER_SIZE && b[1] >= WHIP_RTCP_PT_START && b[1] <= WHIP_RTCP_PT_END; } /** @@ -1800,14 +1809,14 @@ static int ice_handle_binding_request(AVFormatContext *s, char *buf, int buf_siz { int ret = 0, size; char tid[12]; - RTCContext *rtc = s->priv_data; + WHIPContext *whip = s->priv_data; /* Ignore if not a binding request. */ if (!ice_is_binding_request(buf, buf_size)) return ret; if (buf_size < ICE_STUN_HEADER_SIZE) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Invalid STUN message, expected at least %d, got %d\n", + av_log(whip, AV_LOG_ERROR, "WHIP: Invalid STUN message, expected at least %d, got %d\n", ICE_STUN_HEADER_SIZE, buf_size); return AVERROR(EINVAL); } @@ -1816,15 +1825,15 @@ static int ice_handle_binding_request(AVFormatContext *s, char *buf, int buf_siz memcpy(tid, buf + 8, 12); /* Build the STUN binding response. */ - ret = ice_create_response(s, tid, sizeof(tid), rtc->buf, sizeof(rtc->buf), &size); + ret = ice_create_response(s, tid, sizeof(tid), whip->buf, sizeof(whip->buf), &size); if (ret < 0) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to create STUN binding response, size=%d\n", size); + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to create STUN binding response, size=%d\n", size); return ret; } - ret = ffurl_write(rtc->udp_uc, rtc->buf, size); + ret = ffurl_write(whip->udp_uc, whip->buf, size); if (ret < 0) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to send STUN binding response, size=%d\n", size); + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to send STUN binding response, size=%d\n", size); return ret; } @@ -1840,37 +1849,37 @@ static int udp_connect(AVFormatContext *s) { int ret = 0; char url[256], tmp[16]; - RTCContext *rtc = s->priv_data; + WHIPContext *whip = s->priv_data; /* Build UDP URL and create the UDP context as transport. */ - ff_url_join(url, sizeof(url), "udp", NULL, rtc->ice_host, rtc->ice_port, NULL); - ret = ffurl_alloc(&rtc->udp_uc, url, AVIO_FLAG_WRITE, &s->interrupt_callback); + ff_url_join(url, sizeof(url), "udp", NULL, whip->ice_host, whip->ice_port, NULL); + ret = ffurl_alloc(&whip->udp_uc, url, AVIO_FLAG_WRITE, &s->interrupt_callback); if (ret < 0) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to open udp://%s:%d\n", rtc->ice_host, rtc->ice_port); + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to open udp://%s:%d\n", whip->ice_host, whip->ice_port); return ret; } - av_opt_set(rtc->udp_uc->priv_data, "connect", "1", 0); - av_opt_set(rtc->udp_uc->priv_data, "fifo_size", "0", 0); + av_opt_set(whip->udp_uc->priv_data, "connect", "1", 0); + av_opt_set(whip->udp_uc->priv_data, "fifo_size", "0", 0); /* Set the max packet size to the buffer size. */ - snprintf(tmp, sizeof(tmp), "%d", rtc->pkt_size); - av_opt_set(rtc->udp_uc->priv_data, "pkt_size", tmp, 0); + snprintf(tmp, sizeof(tmp), "%d", whip->pkt_size); + av_opt_set(whip->udp_uc->priv_data, "pkt_size", tmp, 0); - ret = ffurl_connect(rtc->udp_uc, NULL); + ret = ffurl_connect(whip->udp_uc, NULL); if (ret < 0) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to connect udp://%s:%d\n", rtc->ice_host, rtc->ice_port); + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to connect udp://%s:%d\n", whip->ice_host, whip->ice_port); return ret; } /* Make the socket non-blocking, set to READ and WRITE mode after connected */ - ff_socket_nonblock(ffurl_get_file_handle(rtc->udp_uc), 1); - rtc->udp_uc->flags |= AVIO_FLAG_READ | AVIO_FLAG_NONBLOCK; + ff_socket_nonblock(ffurl_get_file_handle(whip->udp_uc), 1); + whip->udp_uc->flags |= AVIO_FLAG_READ | AVIO_FLAG_NONBLOCK; - if (rtc->state < RTC_STATE_UDP_CONNECTED) - rtc->state = RTC_STATE_UDP_CONNECTED; - rtc->rtc_udp_time = av_gettime(); - av_log(rtc, AV_LOG_VERBOSE, "WHIP: UDP state=%d, elapsed=%dms, connected to udp://%s:%d\n", - rtc->state, ELAPSED(rtc->rtc_starttime, av_gettime()), rtc->ice_host, rtc->ice_port); + if (whip->state < WHIP_STATE_UDP_CONNECTED) + whip->state = WHIP_STATE_UDP_CONNECTED; + whip->whip_udp_time = av_gettime(); + av_log(whip, AV_LOG_VERBOSE, "WHIP: UDP state=%d, elapsed=%dms, connected to udp://%s:%d\n", + whip->state, ELAPSED(whip->whip_starttime, av_gettime()), whip->ice_host, whip->ice_port); return ret; } @@ -1879,55 +1888,55 @@ static int ice_dtls_handshake(AVFormatContext *s) { int ret = 0, size, i; int64_t starttime = av_gettime(), now; - RTCContext *rtc = s->priv_data; + WHIPContext *whip = s->priv_data; - if (rtc->state < RTC_STATE_UDP_CONNECTED || !rtc->udp_uc) { - av_log(rtc, AV_LOG_ERROR, "WHIP: UDP not connected, state=%d, udp_uc=%p\n", rtc->state, rtc->udp_uc); + if (whip->state < WHIP_STATE_UDP_CONNECTED || !whip->udp_uc) { + av_log(whip, AV_LOG_ERROR, "WHIP: UDP not connected, state=%d, udp_uc=%p\n", whip->state, whip->udp_uc); return AVERROR(EINVAL); } while (1) { - if (rtc->state <= RTC_STATE_ICE_CONNECTING) { + if (whip->state <= WHIP_STATE_ICE_CONNECTING) { /* Build the STUN binding request. */ - ret = ice_create_request(s, rtc->buf, sizeof(rtc->buf), &size); + ret = ice_create_request(s, whip->buf, sizeof(whip->buf), &size); if (ret < 0) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to create STUN binding request, size=%d\n", size); + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to create STUN binding request, size=%d\n", size); goto end; } - ret = ffurl_write(rtc->udp_uc, rtc->buf, size); + ret = ffurl_write(whip->udp_uc, whip->buf, size); if (ret < 0) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to send STUN binding request, size=%d\n", size); + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to send STUN binding request, size=%d\n", size); goto end; } - if (rtc->state < RTC_STATE_ICE_CONNECTING) - rtc->state = RTC_STATE_ICE_CONNECTING; + if (whip->state < WHIP_STATE_ICE_CONNECTING) + whip->state = WHIP_STATE_ICE_CONNECTING; } next_packet: - if (rtc->state >= RTC_STATE_DTLS_FINISHED) + if (whip->state >= WHIP_STATE_DTLS_FINISHED) /* DTLS handshake is done, exit the loop. */ break; now = av_gettime(); - if (now - starttime >= rtc->handshake_timeout * 1000) { - av_log(rtc, AV_LOG_ERROR, "WHIP: DTLS handshake timeout=%dms, cost=%dms, elapsed=%dms, state=%d\n", - rtc->handshake_timeout, ELAPSED(starttime, now), ELAPSED(rtc->rtc_starttime, now), rtc->state); + if (now - starttime >= whip->handshake_timeout * 1000) { + av_log(whip, AV_LOG_ERROR, "WHIP: DTLS handshake timeout=%dms, cost=%dms, elapsed=%dms, state=%d\n", + whip->handshake_timeout, ELAPSED(starttime, now), ELAPSED(whip->whip_starttime, now), whip->state); ret = AVERROR(ETIMEDOUT); goto end; } /* Read the STUN or DTLS messages from peer. */ for (i = 0; i < ICE_DTLS_READ_INTERVAL / 5; i++) { - ret = ffurl_read(rtc->udp_uc, rtc->buf, sizeof(rtc->buf)); + ret = ffurl_read(whip->udp_uc, whip->buf, sizeof(whip->buf)); if (ret > 0) break; if (ret == AVERROR(EAGAIN)) { av_usleep(5 * 1000); continue; } - av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to read message\n"); + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to read message\n"); goto end; } @@ -1936,31 +1945,31 @@ static int ice_dtls_handshake(AVFormatContext *s) continue; /* Handle the ICE binding response. */ - if (ice_is_binding_response(rtc->buf, ret)) { - if (rtc->state < RTC_STATE_ICE_CONNECTED) { - rtc->state = RTC_STATE_ICE_CONNECTED; - rtc->rtc_ice_time = av_gettime(); - av_log(rtc, AV_LOG_VERBOSE, "WHIP: ICE STUN ok, state=%d, url=udp://%s:%d, location=%s, username=%s:%s, res=%dB, elapsed=%dms\n", - rtc->state, rtc->ice_host, rtc->ice_port, rtc->whip_resource_url ? rtc->whip_resource_url : "", - rtc->ice_ufrag_remote, rtc->ice_ufrag_local, ret, ELAPSED(rtc->rtc_starttime, av_gettime())); + if (ice_is_binding_response(whip->buf, ret)) { + if (whip->state < WHIP_STATE_ICE_CONNECTED) { + whip->state = WHIP_STATE_ICE_CONNECTED; + whip->whip_ice_time = av_gettime(); + av_log(whip, AV_LOG_VERBOSE, "WHIP: ICE STUN ok, state=%d, url=udp://%s:%d, location=%s, username=%s:%s, res=%dB, elapsed=%dms\n", + whip->state, whip->ice_host, whip->ice_port, whip->whip_resource_url ? whip->whip_resource_url : "", + whip->ice_ufrag_remote, whip->ice_ufrag_local, ret, ELAPSED(whip->whip_starttime, av_gettime())); /* If got the first binding response, start DTLS handshake. */ - if ((ret = dtls_context_start(&rtc->dtls_ctx)) < 0) + if ((ret = dtls_context_start(&whip->dtls_ctx)) < 0) goto end; } goto next_packet; } /* When a binding request is received, it is necessary to respond immediately. */ - if (ice_is_binding_request(rtc->buf, ret)) { - if ((ret = ice_handle_binding_request(s, rtc->buf, ret)) < 0) + if (ice_is_binding_request(whip->buf, ret)) { + if ((ret = ice_handle_binding_request(s, whip->buf, ret)) < 0) goto end; goto next_packet; } /* If got any DTLS messages, handle it. */ - if (is_dtls_packet(rtc->buf, ret) && rtc->state >= RTC_STATE_ICE_CONNECTED) { - if ((ret = dtls_context_write(&rtc->dtls_ctx, rtc->buf, ret)) < 0) + if (is_dtls_packet(whip->buf, ret) && whip->state >= WHIP_STATE_ICE_CONNECTED) { + if ((ret = dtls_context_write(&whip->dtls_ctx, whip->buf, ret)) < 0) goto end; goto next_packet; } @@ -1990,7 +1999,7 @@ static int setup_srtp(AVFormatContext *s) * The profile for FFmpeg's SRTP is SRTP_AES128_CM_HMAC_SHA1_80, see libavformat/srtp.c. */ const char* suite = "SRTP_AES128_CM_HMAC_SHA1_80"; - RTCContext *rtc = s->priv_data; + WHIPContext *whip = s->priv_data; /** * This represents the material used to build the SRTP master key. It is @@ -1998,8 +2007,8 @@ static int setup_srtp(AVFormatContext *s) * 16B 16B 14B 14B * client_key | server_key | client_salt | server_salt */ - char *client_key = rtc->dtls_ctx.dtls_srtp_materials; - char *server_key = rtc->dtls_ctx.dtls_srtp_materials + DTLS_SRTP_KEY_LEN; + char *client_key = whip->dtls_ctx.dtls_srtp_materials; + char *server_key = whip->dtls_ctx.dtls_srtp_materials + DTLS_SRTP_KEY_LEN; char *client_salt = server_key + DTLS_SRTP_KEY_LEN; char *server_salt = client_salt + DTLS_SRTP_SALT_LEN; @@ -2013,47 +2022,47 @@ static int setup_srtp(AVFormatContext *s) /* Setup SRTP context for outgoing packets */ if (!av_base64_encode(buf, sizeof(buf), send_key, sizeof(send_key))) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to encode send key\n"); + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to encode send key\n"); ret = AVERROR(EIO); goto end; } - ret = ff_srtp_set_crypto(&rtc->srtp_audio_send, suite, buf); + ret = ff_srtp_set_crypto(&whip->srtp_audio_send, suite, buf); if (ret < 0) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to set crypto for audio send\n"); + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to set crypto for audio send\n"); goto end; } - ret = ff_srtp_set_crypto(&rtc->srtp_video_send, suite, buf); + ret = ff_srtp_set_crypto(&whip->srtp_video_send, suite, buf); if (ret < 0) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to set crypto for video send\n"); + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to set crypto for video send\n"); goto end; } - ret = ff_srtp_set_crypto(&rtc->srtp_rtcp_send, suite, buf); + ret = ff_srtp_set_crypto(&whip->srtp_rtcp_send, suite, buf); if (ret < 0) { - av_log(rtc, AV_LOG_ERROR, "Failed to set crypto for rtcp send\n"); + av_log(whip, AV_LOG_ERROR, "Failed to set crypto for rtcp send\n"); goto end; } /* Setup SRTP context for incoming packets */ if (!av_base64_encode(buf, sizeof(buf), recv_key, sizeof(recv_key))) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to encode recv key\n"); + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to encode recv key\n"); ret = AVERROR(EIO); goto end; } - ret = ff_srtp_set_crypto(&rtc->srtp_recv, suite, buf); + ret = ff_srtp_set_crypto(&whip->srtp_recv, suite, buf); if (ret < 0) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to set crypto for recv\n"); + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to set crypto for recv\n"); goto end; } - if (rtc->state < RTC_STATE_SRTP_FINISHED) - rtc->state = RTC_STATE_SRTP_FINISHED; - rtc->rtc_srtp_time = av_gettime(); - av_log(rtc, AV_LOG_VERBOSE, "WHIP: SRTP setup done, state=%d, suite=%s, key=%luB, elapsed=%dms\n", - rtc->state, suite, sizeof(send_key), ELAPSED(rtc->rtc_starttime, av_gettime())); + if (whip->state < WHIP_STATE_SRTP_FINISHED) + whip->state = WHIP_STATE_SRTP_FINISHED; + whip->whip_srtp_time = av_gettime(); + av_log(whip, AV_LOG_VERBOSE, "WHIP: SRTP setup done, state=%d, suite=%s, key=%luB, elapsed=%dms\n", + whip->state, suite, sizeof(send_key), ELAPSED(whip->whip_starttime, av_gettime())); end: return ret; @@ -2071,33 +2080,33 @@ static int on_rtp_write_packet(void *opaque, uint8_t *buf, int buf_size) int ret, cipher_size, is_rtcp, is_video; uint8_t payload_type; AVFormatContext *s = opaque; - RTCContext *rtc = s->priv_data; + WHIPContext *whip = s->priv_data; SRTPContext *srtp; /* Ignore if not RTP or RTCP packet. */ - if (!rtc_is_rtp_rtcp(buf, buf_size)) + if (!media_is_rtp_rtcp(buf, buf_size)) return 0; /* Only support audio, video and rtcp. */ - is_rtcp = rtc_is_rtcp(buf, buf_size); + is_rtcp = media_is_rtcp(buf, buf_size); payload_type = buf[1] & 0x7f; - is_video = payload_type == rtc->video_payload_type; - if (!is_rtcp && payload_type != rtc->video_payload_type && payload_type != rtc->audio_payload_type) + is_video = payload_type == whip->video_payload_type; + if (!is_rtcp && payload_type != whip->video_payload_type && payload_type != whip->audio_payload_type) return 0; /* Get the corresponding SRTP context. */ - srtp = is_rtcp ? &rtc->srtp_rtcp_send : (is_video? &rtc->srtp_video_send : &rtc->srtp_audio_send); + srtp = is_rtcp ? &whip->srtp_rtcp_send : (is_video? &whip->srtp_video_send : &whip->srtp_audio_send); /* Encrypt by SRTP and send out. */ - cipher_size = ff_srtp_encrypt(srtp, buf, buf_size, rtc->buf, sizeof(rtc->buf)); + cipher_size = ff_srtp_encrypt(srtp, buf, buf_size, whip->buf, sizeof(whip->buf)); if (cipher_size <= 0 || cipher_size < buf_size) { - av_log(rtc, AV_LOG_WARNING, "WHIP: Failed to encrypt packet=%dB, cipher=%dB\n", buf_size, cipher_size); + av_log(whip, AV_LOG_WARNING, "WHIP: Failed to encrypt packet=%dB, cipher=%dB\n", buf_size, cipher_size); return 0; } - ret = ffurl_write(rtc->udp_uc, rtc->buf, cipher_size); + ret = ffurl_write(whip->udp_uc, whip->buf, cipher_size); if (ret < 0) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to write packet=%dB, ret=%d\n", cipher_size, ret); + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to write packet=%dB, ret=%d\n", cipher_size, ret); return ret; } @@ -2115,18 +2124,18 @@ static int on_rtp_write_packet(void *opaque, uint8_t *buf, int buf_size) * * @return 0 if OK, AVERROR_xxx on error */ -static int rtc_create_rtp_muxer(AVFormatContext *s) +static int create_rtp_muxer(AVFormatContext *s) { int ret, i, is_video, buffer_size, max_packet_size; AVFormatContext *rtp_ctx = NULL; AVDictionary *opts = NULL; uint8_t *buffer = NULL; char buf[64]; - RTCContext *rtc = s->priv_data; + WHIPContext *whip = s->priv_data; const AVOutputFormat *rtp_format = av_guess_format("rtp", NULL, NULL); if (!rtp_format) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to guess rtp muxer\n"); + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to guess rtp muxer\n"); ret = AVERROR(ENOSYS); goto end; } @@ -2134,7 +2143,7 @@ static int rtc_create_rtp_muxer(AVFormatContext *s) /* The UDP buffer size, may greater than MTU. */ buffer_size = MAX_UDP_BUFFER_SIZE; /* The RTP payload max size. Reserved some bytes for SRTP checksum and padding. */ - max_packet_size = rtc->pkt_size - DTLS_SRTP_CHECKSUM_LEN; + max_packet_size = whip->pkt_size - DTLS_SRTP_CHECKSUM_LEN; for (i = 0; i < s->nb_streams; i++) { rtp_ctx = avformat_alloc_context(); @@ -2187,14 +2196,14 @@ static int rtc_create_rtp_muxer(AVFormatContext *s) rtp_ctx->pb->av_class = &ff_avio_class; is_video = s->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO; - snprintf(buf, sizeof(buf), "%d", is_video? rtc->video_payload_type : rtc->audio_payload_type); + snprintf(buf, sizeof(buf), "%d", is_video? whip->video_payload_type : whip->audio_payload_type); av_dict_set(&opts, "payload_type", buf, 0); - snprintf(buf, sizeof(buf), "%d", is_video? rtc->video_ssrc : rtc->audio_ssrc); + snprintf(buf, sizeof(buf), "%d", is_video? whip->video_ssrc : whip->audio_ssrc); av_dict_set(&opts, "ssrc", buf, 0); ret = avformat_write_header(rtp_ctx, &opts); if (ret < 0) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to write rtp header\n"); + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to write rtp header\n"); goto end; } @@ -2204,18 +2213,18 @@ static int rtc_create_rtp_muxer(AVFormatContext *s) rtp_ctx = NULL; } - if (rtc->state < RTC_STATE_READY) - rtc->state = RTC_STATE_READY; - av_log(rtc, AV_LOG_INFO, "WHIP: Muxer state=%d, buffer_size=%d, max_packet_size=%d, " + if (whip->state < WHIP_STATE_READY) + whip->state = WHIP_STATE_READY; + av_log(whip, AV_LOG_INFO, "WHIP: Muxer state=%d, buffer_size=%d, max_packet_size=%d, " "elapsed=%dms(init:%d,offer:%d,answer:%d,udp:%d,ice:%d,dtls:%d,srtp:%d)\n", - rtc->state, buffer_size, max_packet_size, ELAPSED(rtc->rtc_starttime, av_gettime()), - ELAPSED(rtc->rtc_starttime, rtc->rtc_init_time), - ELAPSED(rtc->rtc_init_time, rtc->rtc_offer_time), - ELAPSED(rtc->rtc_offer_time, rtc->rtc_answer_time), - ELAPSED(rtc->rtc_answer_time, rtc->rtc_udp_time), - ELAPSED(rtc->rtc_udp_time, rtc->rtc_ice_time), - ELAPSED(rtc->rtc_ice_time, rtc->rtc_dtls_time), - ELAPSED(rtc->rtc_dtls_time, rtc->rtc_srtp_time)); + whip->state, buffer_size, max_packet_size, ELAPSED(whip->whip_starttime, av_gettime()), + ELAPSED(whip->whip_starttime, whip->whip_init_time), + ELAPSED(whip->whip_init_time, whip->whip_offer_time), + ELAPSED(whip->whip_offer_time, whip->whip_answer_time), + ELAPSED(whip->whip_answer_time, whip->whip_udp_time), + ELAPSED(whip->whip_udp_time, whip->whip_ice_time), + ELAPSED(whip->whip_ice_time, whip->whip_dtls_time), + ELAPSED(whip->whip_dtls_time, whip->whip_srtp_time)); end: if (rtp_ctx) @@ -2233,19 +2242,19 @@ static int rtc_create_rtp_muxer(AVFormatContext *s) * should expire and close the session immediately, so that publishers can republish * the stream quickly. */ -static int whip_dispose(AVFormatContext *s) +static int dispose_session(AVFormatContext *s) { int ret; char buf[MAX_URL_SIZE]; URLContext *whip_uc = NULL; - RTCContext *rtc = s->priv_data; + WHIPContext *whip = s->priv_data; - if (!rtc->whip_resource_url) + if (!whip->whip_resource_url) return 0; - ret = ffurl_alloc(&whip_uc, rtc->whip_resource_url, AVIO_FLAG_READ_WRITE, &s->interrupt_callback); + ret = ffurl_alloc(&whip_uc, whip->whip_resource_url, AVIO_FLAG_READ_WRITE, &s->interrupt_callback); if (ret < 0) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to alloc WHIP delete context: %s\n", s->url); + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to alloc WHIP delete context: %s\n", s->url); goto end; } @@ -2253,7 +2262,7 @@ static int whip_dispose(AVFormatContext *s) av_opt_set(whip_uc->priv_data, "method", "DELETE", 0); ret = ffurl_connect(whip_uc, NULL); if (ret < 0) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to DELETE url=%s\n", rtc->whip_resource_url); + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to DELETE url=%s\n", whip->whip_resource_url); goto end; } @@ -2264,58 +2273,18 @@ static int whip_dispose(AVFormatContext *s) break; } if (ret < 0) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to read response from DELETE url=%s\n", rtc->whip_resource_url); + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to read response from DELETE url=%s\n", whip->whip_resource_url); goto end; } } - av_log(rtc, AV_LOG_INFO, "WHIP: Dispose resource %s ok\n", rtc->whip_resource_url); + av_log(whip, AV_LOG_INFO, "WHIP: Dispose resource %s ok\n", whip->whip_resource_url); end: ffurl_closep(&whip_uc); return ret; } -static av_cold int rtc_init(AVFormatContext *s) -{ - int ret; - RTCContext *rtc = s->priv_data; - - if ((ret = whip_init(s)) < 0) - goto end; - - if ((ret = parse_codec(s)) < 0) - goto end; - - if ((ret = generate_sdp_offer(s)) < 0) - goto end; - - if ((ret = exchange_sdp(s)) < 0) - goto end; - - if ((ret = parse_answer(s)) < 0) - goto end; - - if ((ret = udp_connect(s)) < 0) - goto end; - - if ((ret = ice_dtls_handshake(s)) < 0) - goto end; - - if ((ret = setup_srtp(s)) < 0) - goto end; - - if ((ret = rtc_create_rtp_muxer(s)) < 0) - goto end; - -end: - if (ret < 0 && rtc->state < RTC_STATE_FAILED) - rtc->state = RTC_STATE_FAILED; - if (ret >= 0 && rtc->state >= RTC_STATE_FAILED && rtc->dtls_ret < 0) - ret = rtc->dtls_ret; - return ret; -} - /** * Since the h264_mp4toannexb filter only processes the MP4 ISOM format and bypasses * the annexb format, it is necessary to manually insert encoder metadata before each @@ -2399,10 +2368,50 @@ static int h264_annexb_insert_sps_pps(AVFormatContext *s, AVPacket *pkt) return ret; } -static int rtc_write_packet(AVFormatContext *s, AVPacket *pkt) +static av_cold int whip_init(AVFormatContext *s) +{ + int ret; + WHIPContext *whip = s->priv_data; + + if ((ret = initialize(s)) < 0) + goto end; + + if ((ret = parse_codec(s)) < 0) + goto end; + + if ((ret = generate_sdp_offer(s)) < 0) + goto end; + + if ((ret = exchange_sdp(s)) < 0) + goto end; + + if ((ret = parse_answer(s)) < 0) + goto end; + + if ((ret = udp_connect(s)) < 0) + goto end; + + if ((ret = ice_dtls_handshake(s)) < 0) + goto end; + + if ((ret = setup_srtp(s)) < 0) + goto end; + + if ((ret = create_rtp_muxer(s)) < 0) + goto end; + +end: + if (ret < 0 && whip->state < WHIP_STATE_FAILED) + whip->state = WHIP_STATE_FAILED; + if (ret >= 0 && whip->state >= WHIP_STATE_FAILED && whip->dtls_ret < 0) + ret = whip->dtls_ret; + return ret; +} + +static int whip_write_packet(AVFormatContext *s, AVPacket *pkt) { int ret; - RTCContext *rtc = s->priv_data; + WHIPContext *whip = s->priv_data; AVStream *st = s->streams[pkt->stream_index]; AVFormatContext *rtp_ctx = st->priv_data; @@ -2412,22 +2421,22 @@ static int rtc_write_packet(AVFormatContext *s, AVPacket *pkt) * Receive packets from the server such as ICE binding requests, DTLS messages, * and RTCP like PLI requests, then respond to them. */ - ret = ffurl_read(rtc->udp_uc, rtc->buf, sizeof(rtc->buf)); + ret = ffurl_read(whip->udp_uc, whip->buf, sizeof(whip->buf)); if (ret > 0) { - if (is_dtls_packet(rtc->buf, ret)) { - if ((ret = dtls_context_write(&rtc->dtls_ctx, rtc->buf, ret)) < 0) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to handle DTLS message\n"); + if (is_dtls_packet(whip->buf, ret)) { + if ((ret = dtls_context_write(&whip->dtls_ctx, whip->buf, ret)) < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to handle DTLS message\n"); goto end; } } } else if (ret != AVERROR(EAGAIN)) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to read from UDP socket\n"); + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to read from UDP socket\n"); goto end; } - if (rtc->h264_annexb_insert_sps_pps && st->codecpar->codec_id == AV_CODEC_ID_H264) { + if (whip->h264_annexb_insert_sps_pps && st->codecpar->codec_id == AV_CODEC_ID_H264) { if ((ret = h264_annexb_insert_sps_pps(s, pkt)) < 0) { - av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to insert SPS/PPS before IDR\n"); + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to insert SPS/PPS before IDR\n"); goto end; } } @@ -2435,31 +2444,31 @@ static int rtc_write_packet(AVFormatContext *s, AVPacket *pkt) ret = ff_write_chained(rtp_ctx, 0, pkt, s, 0); if (ret < 0) { if (ret == AVERROR(EINVAL)) { - av_log(rtc, AV_LOG_WARNING, "WHIP: Ignore failed to write packet=%dB, ret=%d\n", pkt->size, ret); + av_log(whip, AV_LOG_WARNING, "WHIP: Ignore failed to write packet=%dB, ret=%d\n", pkt->size, ret); ret = 0; } else - av_log(rtc, AV_LOG_ERROR, "WHIP: Failed to write packet, size=%d\n", pkt->size); + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to write packet, size=%d\n", pkt->size); goto end; } end: - if (ret < 0 && rtc->state < RTC_STATE_FAILED) - rtc->state = RTC_STATE_FAILED; - if (ret >= 0 && rtc->state >= RTC_STATE_FAILED && rtc->dtls_ret < 0) - ret = rtc->dtls_ret; - if (ret >= 0 && rtc->dtls_closed) + if (ret < 0 && whip->state < WHIP_STATE_FAILED) + whip->state = WHIP_STATE_FAILED; + if (ret >= 0 && whip->state >= WHIP_STATE_FAILED && whip->dtls_ret < 0) + ret = whip->dtls_ret; + if (ret >= 0 && whip->dtls_closed) ret = AVERROR(EIO); return ret; } -static av_cold void rtc_deinit(AVFormatContext *s) +static av_cold void whip_deinit(AVFormatContext *s) { int i, ret; - RTCContext *rtc = s->priv_data; + WHIPContext *whip = s->priv_data; - ret = whip_dispose(s); + ret = dispose_session(s); if (ret < 0) - av_log(rtc, AV_LOG_WARNING, "WHIP: Failed to dispose resource, ret=%d\n", ret); + av_log(whip, AV_LOG_WARNING, "WHIP: Failed to dispose resource, ret=%d\n", ret); for (i = 0; i < s->nb_streams; i++) { AVFormatContext* rtp_ctx = s->streams[i]->priv_data; @@ -2472,42 +2481,42 @@ static av_cold void rtc_deinit(AVFormatContext *s) s->streams[i]->priv_data = NULL; } - av_freep(&rtc->sdp_offer); - av_freep(&rtc->sdp_answer); - av_freep(&rtc->whip_resource_url); - av_freep(&rtc->ice_ufrag_remote); - av_freep(&rtc->ice_pwd_remote); - av_freep(&rtc->ice_protocol); - av_freep(&rtc->ice_host); - av_freep(&rtc->authorization); - ffurl_closep(&rtc->udp_uc); - ff_srtp_free(&rtc->srtp_audio_send); - ff_srtp_free(&rtc->srtp_video_send); - ff_srtp_free(&rtc->srtp_rtcp_send); - ff_srtp_free(&rtc->srtp_recv); - dtls_context_deinit(&rtc->dtls_ctx); + av_freep(&whip->sdp_offer); + av_freep(&whip->sdp_answer); + av_freep(&whip->whip_resource_url); + av_freep(&whip->ice_ufrag_remote); + av_freep(&whip->ice_pwd_remote); + av_freep(&whip->ice_protocol); + av_freep(&whip->ice_host); + av_freep(&whip->authorization); + ffurl_closep(&whip->udp_uc); + ff_srtp_free(&whip->srtp_audio_send); + ff_srtp_free(&whip->srtp_video_send); + ff_srtp_free(&whip->srtp_rtcp_send); + ff_srtp_free(&whip->srtp_recv); + dtls_context_deinit(&whip->dtls_ctx); } -static int rtc_check_bitstream(AVFormatContext *s, AVStream *st, const AVPacket *pkt) +static int whip_check_bitstream(AVFormatContext *s, AVStream *st, const AVPacket *pkt) { int ret = 1, extradata_isom = 0; uint8_t *b = pkt->data; - RTCContext *rtc = s->priv_data; + WHIPContext *whip = s->priv_data; if (st->codecpar->codec_id == AV_CODEC_ID_H264) { extradata_isom = st->codecpar->extradata_size > 0 && st->codecpar->extradata[0] == 1; if (pkt->size >= 5 && AV_RB32(b) != 0x0000001 && (AV_RB24(b) != 0x000001 || extradata_isom)) { ret = ff_stream_add_bitstream_filter(st, "h264_mp4toannexb", NULL); - av_log(rtc, AV_LOG_VERBOSE, "WHIP: Enable BSF h264_mp4toannexb, packet=[%x %x %x %x %x ...], extradata_isom=%d\n", + av_log(whip, AV_LOG_VERBOSE, "WHIP: Enable BSF h264_mp4toannexb, packet=[%x %x %x %x %x ...], extradata_isom=%d\n", b[0], b[1], b[2], b[3], b[4], extradata_isom); } else - rtc->h264_annexb_insert_sps_pps = 1; + whip->h264_annexb_insert_sps_pps = 1; } return ret; } -#define OFFSET(x) offsetof(RTCContext, x) +#define OFFSET(x) offsetof(WHIPContext, x) #define DEC AV_OPT_FLAG_DECODING_PARAM static const AVOption options[] = { { "handshake_timeout", "Timeout in milliseconds for ICE and DTLS handshake.", OFFSET(handshake_timeout), AV_OPT_TYPE_INT, { .i64 = 5000 }, -1, INT_MAX, DEC }, @@ -2516,23 +2525,23 @@ static const AVOption options[] = { { NULL }, }; -static const AVClass rtc_muxer_class = { +static const AVClass whip_muxer_class = { .class_name = "WHIP muxer", .item_name = av_default_item_name, .option = options, .version = LIBAVUTIL_VERSION_INT, }; -const FFOutputFormat ff_rtc_muxer = { - .p.name = "rtc", +const FFOutputFormat ff_whip_muxer = { + .p.name = "whip", .p.long_name = NULL_IF_CONFIG_SMALL("WHIP(WebRTC-HTTP ingestion protocol) muxer"), .p.audio_codec = AV_CODEC_ID_OPUS, .p.video_codec = AV_CODEC_ID_H264, .p.flags = AVFMT_GLOBALHEADER | AVFMT_NOFILE, - .p.priv_class = &rtc_muxer_class, - .priv_data_size = sizeof(RTCContext), - .init = rtc_init, - .write_packet = rtc_write_packet, - .deinit = rtc_deinit, - .check_bitstream = rtc_check_bitstream, + .p.priv_class = &whip_muxer_class, + .priv_data_size = sizeof(WHIPContext), + .init = whip_init, + .write_packet = whip_write_packet, + .deinit = whip_deinit, + .check_bitstream = whip_check_bitstream, }; From 66a064fb2abb197e132dc52cc367a51d8517402a Mon Sep 17 00:00:00 2001 From: winlin Date: Wed, 21 Jun 2023 07:16:14 +0800 Subject: [PATCH 51/60] WHIP: Free buffer leak of pb for RTP muxer. --- libavformat/whip.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libavformat/whip.c b/libavformat/whip.c index d65cf4931bfc4..b137222f9d124 100644 --- a/libavformat/whip.c +++ b/libavformat/whip.c @@ -2476,6 +2476,12 @@ static av_cold void whip_deinit(AVFormatContext *s) continue; av_write_trailer(rtp_ctx); + /** + * Keep in mind that it is necessary to free the buffer of pb since we allocate + * it and pass it to pb using avio_alloc_context, while avio_context_free does + * not perform this action. + */ + av_freep(&rtp_ctx->pb->buffer); avio_context_free(&rtp_ctx->pb); avformat_free_context(rtp_ctx); s->streams[i]->priv_data = NULL; From 9c7a09165b38ee56f354a7cc63d4881adaadb857 Mon Sep 17 00:00:00 2001 From: winlin Date: Wed, 21 Jun 2023 10:33:54 +0800 Subject: [PATCH 52/60] WHIP: Use options for passing parameters to the HTTP or UDP context. --- libavformat/whip.c | 63 +++++++++++++++++++++------------------------- 1 file changed, 29 insertions(+), 34 deletions(-) diff --git a/libavformat/whip.c b/libavformat/whip.c index b137222f9d124..ea789f0264340 100644 --- a/libavformat/whip.c +++ b/libavformat/whip.c @@ -1409,25 +1409,19 @@ static int exchange_sdp(AVFormatContext *s) WHIPContext *whip = s->priv_data; /* The URL context is an HTTP transport layer for the WHIP protocol. */ URLContext *whip_uc = NULL; + AVDictionary *opts = NULL; + char *hex_data = NULL; /* To prevent a crash during cleanup, always initialize it. */ av_bprint_init(&bp, 1, MAX_SDP_SIZE); - ret = ffurl_alloc(&whip_uc, s->url, AVIO_FLAG_READ_WRITE, &s->interrupt_callback); - if (ret < 0) { - av_log(whip, AV_LOG_ERROR, "WHIP: Failed to alloc HTTP context: %s\n", s->url); - goto end; - } - if (!whip->sdp_offer || !strlen(whip->sdp_offer)) { av_log(whip, AV_LOG_ERROR, "WHIP: No offer to exchange\n"); ret = AVERROR(EINVAL); goto end; } - ret = snprintf(buf, sizeof(buf), - "Cache-Control: no-cache\r\n" - "Content-Type: application/sdp\r\n"); + ret = snprintf(buf, sizeof(buf), "Cache-Control: no-cache\r\nContent-Type: application/sdp\r\n"); if (whip->authorization) ret += snprintf(buf + ret, sizeof(buf) - ret, "Authorization: Bearer %s\r\n", whip->authorization); if (ret <= 0 || ret >= sizeof(buf)) { @@ -1436,11 +1430,15 @@ static int exchange_sdp(AVFormatContext *s) goto end; } - av_opt_set(whip_uc->priv_data, "headers", buf, 0); - av_opt_set(whip_uc->priv_data, "chunked_post", "0", 0); - av_opt_set_bin(whip_uc->priv_data, "post_data", whip->sdp_offer, (int)strlen(whip->sdp_offer), 0); + av_dict_set(&opts, "headers", buf, 0); + av_dict_set_int(&opts, "chunked_post", 0, 0); + + hex_data = av_mallocz(2 * strlen(whip->sdp_offer) + 1); + ff_data_to_hex(hex_data, whip->sdp_offer, strlen(whip->sdp_offer), 0); + av_dict_set(&opts, "post_data", hex_data, 0); - ret = ffurl_connect(whip_uc, NULL); + ret = ffurl_open_whitelist(&whip_uc, s->url, AVIO_FLAG_READ_WRITE, &s->interrupt_callback, + &opts, s->protocol_whitelist, s->protocol_blacklist, NULL); if (ret < 0) { av_log(whip, AV_LOG_ERROR, "WHIP: Failed to request url=%s, offer: %s\n", s->url, whip->sdp_offer); goto end; @@ -1494,6 +1492,8 @@ static int exchange_sdp(AVFormatContext *s) end: ffurl_closep(&whip_uc); av_bprint_finalize(&bp, NULL); + av_dict_free(&opts); + av_freep(&hex_data); return ret; } @@ -1848,27 +1848,23 @@ static int ice_handle_binding_request(AVFormatContext *s, char *buf, int buf_siz static int udp_connect(AVFormatContext *s) { int ret = 0; - char url[256], tmp[16]; + char url[256]; + AVDictionary *opts = NULL; WHIPContext *whip = s->priv_data; /* Build UDP URL and create the UDP context as transport. */ ff_url_join(url, sizeof(url), "udp", NULL, whip->ice_host, whip->ice_port, NULL); - ret = ffurl_alloc(&whip->udp_uc, url, AVIO_FLAG_WRITE, &s->interrupt_callback); - if (ret < 0) { - av_log(whip, AV_LOG_ERROR, "WHIP: Failed to open udp://%s:%d\n", whip->ice_host, whip->ice_port); - return ret; - } - av_opt_set(whip->udp_uc->priv_data, "connect", "1", 0); - av_opt_set(whip->udp_uc->priv_data, "fifo_size", "0", 0); + av_dict_set_int(&opts, "connect", 1, 0); + av_dict_set_int(&opts, "fifo_size", 0, 0); /* Set the max packet size to the buffer size. */ - snprintf(tmp, sizeof(tmp), "%d", whip->pkt_size); - av_opt_set(whip->udp_uc->priv_data, "pkt_size", tmp, 0); + av_dict_set_int(&opts, "pkt_size", whip->pkt_size, 0); - ret = ffurl_connect(whip->udp_uc, NULL); + ret = ffurl_open_whitelist(&whip->udp_uc, url, AVIO_FLAG_WRITE, &s->interrupt_callback, + &opts, s->protocol_whitelist, s->protocol_blacklist, NULL); if (ret < 0) { av_log(whip, AV_LOG_ERROR, "WHIP: Failed to connect udp://%s:%d\n", whip->ice_host, whip->ice_port); - return ret; + goto end; } /* Make the socket non-blocking, set to READ and WRITE mode after connected */ @@ -1881,6 +1877,8 @@ static int udp_connect(AVFormatContext *s) av_log(whip, AV_LOG_VERBOSE, "WHIP: UDP state=%d, elapsed=%dms, connected to udp://%s:%d\n", whip->state, ELAPSED(whip->whip_starttime, av_gettime()), whip->ice_host, whip->ice_port); +end: + av_dict_free(&opts); return ret; } @@ -2247,20 +2245,16 @@ static int dispose_session(AVFormatContext *s) int ret; char buf[MAX_URL_SIZE]; URLContext *whip_uc = NULL; + AVDictionary *opts = NULL; WHIPContext *whip = s->priv_data; if (!whip->whip_resource_url) return 0; - ret = ffurl_alloc(&whip_uc, whip->whip_resource_url, AVIO_FLAG_READ_WRITE, &s->interrupt_callback); - if (ret < 0) { - av_log(whip, AV_LOG_ERROR, "WHIP: Failed to alloc WHIP delete context: %s\n", s->url); - goto end; - } - - av_opt_set(whip_uc->priv_data, "chunked_post", "0", 0); - av_opt_set(whip_uc->priv_data, "method", "DELETE", 0); - ret = ffurl_connect(whip_uc, NULL); + av_dict_set_int(&opts, "chunked_post", 0, 0); + av_dict_set(&opts, "method", "DELETE", 0); + ret = ffurl_open_whitelist(&whip_uc, whip->whip_resource_url, AVIO_FLAG_READ_WRITE, &s->interrupt_callback, + &opts, s->protocol_whitelist, s->protocol_blacklist, NULL); if (ret < 0) { av_log(whip, AV_LOG_ERROR, "WHIP: Failed to DELETE url=%s\n", whip->whip_resource_url); goto end; @@ -2282,6 +2276,7 @@ static int dispose_session(AVFormatContext *s) end: ffurl_closep(&whip_uc); + av_dict_free(&opts); return ret; } From de33fcc2e18347efc202089c5fa070993bc0f1cd Mon Sep 17 00:00:00 2001 From: winlin Date: Wed, 21 Jun 2023 10:36:59 +0800 Subject: [PATCH 53/60] WHIP: Enhance security by using BearToken for delete API. --- libavformat/whip.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/libavformat/whip.c b/libavformat/whip.c index ea789f0264340..1d23914b3a7fe 100644 --- a/libavformat/whip.c +++ b/libavformat/whip.c @@ -2251,6 +2251,16 @@ static int dispose_session(AVFormatContext *s) if (!whip->whip_resource_url) return 0; + ret = snprintf(buf, sizeof(buf), "Cache-Control: no-cache\r\n"); + if (whip->authorization) + ret += snprintf(buf + ret, sizeof(buf) - ret, "Authorization: Bearer %s\r\n", whip->authorization); + if (ret <= 0 || ret >= sizeof(buf)) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to generate headers, size=%d, %s\n", ret, buf); + ret = AVERROR(EINVAL); + goto end; + } + + av_dict_set(&opts, "headers", buf, 0); av_dict_set_int(&opts, "chunked_post", 0, 0); av_dict_set(&opts, "method", "DELETE", 0); ret = ffurl_open_whitelist(&whip_uc, whip->whip_resource_url, AVIO_FLAG_READ_WRITE, &s->interrupt_callback, From 8971a1f82ec579b43d310dad0f9ac5183983b5e9 Mon Sep 17 00:00:00 2001 From: winlin Date: Wed, 21 Jun 2023 11:03:03 +0800 Subject: [PATCH 54/60] WHIP: Support user specified certificate and key file. --- doc/muxers.texi | 6 +++ libavformat/whip.c | 95 +++++++++++++++++++++++++++++++++++++++------- 2 files changed, 88 insertions(+), 13 deletions(-) diff --git a/doc/muxers.texi b/doc/muxers.texi index b4c6704fed90b..489a584a99984 100644 --- a/doc/muxers.texi +++ b/doc/muxers.texi @@ -2885,6 +2885,12 @@ Default value is 1500. @item authorization @var{string} The optional Bearer token for WHIP Authorization. +@item cert_file @var{string} +The optional certificate file path for DTLS. + +@item key_file @var{string} +The optional private key file path for DTLS. + @end table @c man end MUXERS diff --git a/libavformat/whip.c b/libavformat/whip.c index 1d23914b3a7fe..7299cf7187bd1 100644 --- a/libavformat/whip.c +++ b/libavformat/whip.c @@ -232,6 +232,9 @@ typedef struct DTLSContext { int error_code; char error_message[256]; + /* The certificate and private key used for DTLS handshake. */ + char* cert_file; + char* key_file; /** * The size of RTP packet, should generally be set to MTU. * Note that pion requires a smaller value, for example, 1200. @@ -421,6 +424,45 @@ static long openssl_dtls_bio_out_callback_ex(BIO *b, int oper, const char *argp, return retvalue; } +static int openssl_read_certificate(DTLSContext *ctx) +{ + int ret = 0; + FILE *fp_key = NULL, *fp_cert = NULL; + + fp_key = fopen(ctx->key_file, "r"); + if (!fp_key) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to open key file %s\n", ctx->key_file); + ret = AVERROR(EIO); + goto end; + } + + ctx->dtls_pkey = PEM_read_PrivateKey(fp_key, NULL, NULL, NULL); + if (!ctx->dtls_pkey) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to read private key from %s\n", ctx->key_file); + ret = AVERROR(EIO); + goto end; + } + + fp_cert = fopen(ctx->cert_file, "r"); + if (!fp_cert) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to open certificate file %s\n", ctx->cert_file); + ret = AVERROR(EIO); + goto end; + } + + ctx->dtls_cert = PEM_read_X509(fp_cert, NULL, NULL, NULL); + if (!ctx->dtls_cert) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to read certificate from %s\n", ctx->cert_file); + ret = AVERROR(EIO); + goto end; + } + +end: + if (fp_key) fclose(fp_key); + if (fp_cert) fclose(fp_cert); + return ret; +} + static int openssl_dtls_gen_private_key(DTLSContext *ctx) { int ret = 0; @@ -635,7 +677,8 @@ static av_cold int openssl_dtls_init_context(DTLSContext *ctx) #if OPENSSL_VERSION_NUMBER < 0x10100000L // v1.1.x #if OPENSSL_VERSION_NUMBER < 0x10002000L // v1.0.2 - SSL_CTX_set_tmp_ecdh(dtls_ctx, ctx->dtls_eckey); + if (ctx->dtls_eckey) + SSL_CTX_set_tmp_ecdh(dtls_ctx, ctx->dtls_eckey); #else SSL_CTX_set_ecdh_auto(dtls_ctx, 1); #endif @@ -741,16 +784,25 @@ static av_cold int dtls_context_init(DTLSContext *ctx) ctx->dtls_init_starttime = av_gettime(); - /* Generate a private key to ctx->dtls_pkey. */ - if ((ret = openssl_dtls_gen_private_key(ctx)) < 0) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to generate DTLS private key\n"); - return ret; - } + if (ctx->cert_file && ctx->key_file) { + /* Read the private key and file from the file. */ + if ((ret = openssl_read_certificate(ctx)) < 0) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to read DTLS certificate from cert=%s, key=%s\n", + ctx->cert_file, ctx->key_file); + return ret; + } + } else { + /* Generate a private key to ctx->dtls_pkey. */ + if ((ret = openssl_dtls_gen_private_key(ctx)) < 0) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to generate DTLS private key\n"); + return ret; + } - /* Generate a self-signed certificate. */ - if ((ret = openssl_dtls_gen_certificate(ctx)) < 0) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to generate DTLS certificate\n"); - return ret; + /* Generate a self-signed certificate. */ + if ((ret = openssl_dtls_gen_certificate(ctx)) < 0) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to generate DTLS certificate\n"); + return ret; + } } if ((ret = openssl_dtls_init_context(ctx)) < 0) { @@ -881,6 +933,8 @@ static av_cold void dtls_context_deinit(DTLSContext *ctx) X509_free(ctx->dtls_cert); EVP_PKEY_free(ctx->dtls_pkey); av_freep(&ctx->dtls_fingerprint); + av_freep(&ctx->cert_file); + av_freep(&ctx->key_file); #if OPENSSL_VERSION_NUMBER < 0x30000000L /* OpenSSL 3.0 */ EC_KEY_free(ctx->dtls_eckey); #endif @@ -1009,6 +1063,9 @@ typedef struct WHIPContext { * See https://www.ietf.org/archive/id/draft-ietf-wish-whip-08.html#name-authentication-and-authoriz */ char* authorization; + /* The certificate and private key used for DTLS handshake. */ + char* cert_file; + char* key_file; } WHIPContext; /** @@ -1085,6 +1142,10 @@ static av_cold int initialize(AVFormatContext *s) whip->dtls_ctx.opaque = s; whip->dtls_ctx.on_state = dtls_context_on_state; whip->dtls_ctx.on_write = dtls_context_on_write; + if (whip->cert_file) + whip->dtls_ctx.cert_file = av_strdup(whip->cert_file); + if (whip->key_file) + whip->dtls_ctx.key_file = av_strdup(whip->key_file); if ((ret = dtls_context_init(&whip->dtls_ctx)) < 0) { av_log(whip, AV_LOG_ERROR, "WHIP: Failed to init DTLS context\n"); @@ -1434,6 +1495,10 @@ static int exchange_sdp(AVFormatContext *s) av_dict_set_int(&opts, "chunked_post", 0, 0); hex_data = av_mallocz(2 * strlen(whip->sdp_offer) + 1); + if (!hex_data) { + ret = AVERROR(ENOMEM); + goto end; + } ff_data_to_hex(hex_data, whip->sdp_offer, strlen(whip->sdp_offer), 0); av_dict_set(&opts, "post_data", hex_data, 0); @@ -2500,6 +2565,8 @@ static av_cold void whip_deinit(AVFormatContext *s) av_freep(&whip->ice_protocol); av_freep(&whip->ice_host); av_freep(&whip->authorization); + av_freep(&whip->cert_file); + av_freep(&whip->key_file); ffurl_closep(&whip->udp_uc); ff_srtp_free(&whip->srtp_audio_send); ff_srtp_free(&whip->srtp_video_send); @@ -2530,9 +2597,11 @@ static int whip_check_bitstream(AVFormatContext *s, AVStream *st, const AVPacket #define OFFSET(x) offsetof(WHIPContext, x) #define DEC AV_OPT_FLAG_DECODING_PARAM static const AVOption options[] = { - { "handshake_timeout", "Timeout in milliseconds for ICE and DTLS handshake.", OFFSET(handshake_timeout), AV_OPT_TYPE_INT, { .i64 = 5000 }, -1, INT_MAX, DEC }, - { "pkt_size", "The maximum size, in bytes, of RTP packets that send out", OFFSET(pkt_size), AV_OPT_TYPE_INT, { .i64 = 1200 }, -1, INT_MAX, DEC }, - { "authorization", "The optional Bearer token for WHIP Authorization", OFFSET(authorization), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, DEC }, + { "handshake_timeout", "Timeout in milliseconds for ICE and DTLS handshake.", OFFSET(handshake_timeout), AV_OPT_TYPE_INT, { .i64 = 5000 }, -1, INT_MAX, DEC }, + { "pkt_size", "The maximum size, in bytes, of RTP packets that send out", OFFSET(pkt_size), AV_OPT_TYPE_INT, { .i64 = 1200 }, -1, INT_MAX, DEC }, + { "authorization", "The optional Bearer token for WHIP Authorization", OFFSET(authorization), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, DEC }, + { "cert_file", "The optional certificate file path for DTLS", OFFSET(cert_file), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, DEC }, + { "key_file", "The optional private key file path for DTLS", OFFSET(key_file), AV_OPT_TYPE_STRING, { .str = NULL }, 0, 0, DEC }, { NULL }, }; From 0bd6867b14fc711c78c7ead36ae3ab29187a7202 Mon Sep 17 00:00:00 2001 From: winlin Date: Wed, 5 Jul 2023 07:49:50 +0800 Subject: [PATCH 55/60] WHIP: Read DTLS certificate file by ffurl. --- libavformat/whip.c | 99 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 83 insertions(+), 16 deletions(-) diff --git a/libavformat/whip.c b/libavformat/whip.c index 7299cf7187bd1..425e39925c3d8 100644 --- a/libavformat/whip.c +++ b/libavformat/whip.c @@ -48,6 +48,11 @@ */ #define MAX_SDP_SIZE 8192 +/** + * Maximum size limit of a certificate and private key size. + */ +#define MAX_CERTIFICATE_SIZE 8192 + /** * Maximum size of the buffer for sending and receiving UDP packets. * Please note that this size does not limit the size of the UDP packet that can be sent. @@ -152,6 +157,49 @@ /* Calculate the elapsed time from starttime to endtime in milliseconds. */ #define ELAPSED(starttime, endtime) ((int)(endtime - starttime) / 1000) +/** + * Read all data from the given URL url and store it in the given buffer bp. + */ +static int url_read_all(AVFormatContext *s, const char *url, AVBPrint *bp) +{ + int ret = 0; + AVDictionary *opts = NULL; + URLContext *uc = NULL; + char buf[MAX_URL_SIZE]; + + ret = ffurl_open_whitelist(&uc, url, AVIO_FLAG_READ, &s->interrupt_callback, + &opts, s->protocol_whitelist, s->protocol_blacklist, NULL); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "WHIP: Failed to open url %s\n", url); + goto end; + } + + while (1) { + ret = ffurl_read(uc, buf, sizeof(buf)); + if (ret == AVERROR_EOF) { + /* Reset the error because we read all response as answer util EOF. */ + ret = 0; + break; + } + if (ret <= 0) { + av_log(s, AV_LOG_ERROR, "WHIP: Failed to read from url=%s, key is %s\n", url, bp->str); + goto end; + } + + av_bprintf(bp, "%.*s", ret, buf); + if (!av_bprint_is_complete(bp)) { + av_log(s, AV_LOG_ERROR, "WHIP: Exceed max size %.*s, %s\n", ret, buf, bp->str); + ret = AVERROR(EIO); + goto end; + } + } + +end: + ffurl_closep(&uc); + av_dict_free(&opts); + return ret; +} + /* STUN Attribute, comprehension-required range (0x0000-0x7FFF) */ enum STUNAttr { STUN_ATTR_USERNAME = 0x0006, /// shared secret response/bind request @@ -424,33 +472,50 @@ static long openssl_dtls_bio_out_callback_ex(BIO *b, int oper, const char *argp, return retvalue; } -static int openssl_read_certificate(DTLSContext *ctx) +static int openssl_read_certificate(AVFormatContext *s, DTLSContext *ctx) { int ret = 0; - FILE *fp_key = NULL, *fp_cert = NULL; + BIO *key_b = NULL, *cert_b = NULL; + AVBPrint key_bp, cert_bp; + + /* To prevent a crash during cleanup, always initialize it. */ + av_bprint_init(&key_bp, 1, MAX_CERTIFICATE_SIZE); + av_bprint_init(&cert_bp, 1, MAX_CERTIFICATE_SIZE); - fp_key = fopen(ctx->key_file, "r"); - if (!fp_key) { + /* Read key file. */ + ret = url_read_all(s, ctx->key_file, &key_bp); + if (ret < 0) { av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to open key file %s\n", ctx->key_file); - ret = AVERROR(EIO); goto end; } - ctx->dtls_pkey = PEM_read_PrivateKey(fp_key, NULL, NULL, NULL); + if ((key_b = BIO_new(BIO_s_mem())) == NULL) { + ret = AVERROR(ENOMEM); + goto end; + } + + BIO_write(key_b, key_bp.str, key_bp.len); + ctx->dtls_pkey = PEM_read_bio_PrivateKey(key_b, NULL, NULL, NULL); if (!ctx->dtls_pkey) { av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to read private key from %s\n", ctx->key_file); ret = AVERROR(EIO); goto end; } - fp_cert = fopen(ctx->cert_file, "r"); - if (!fp_cert) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to open certificate file %s\n", ctx->cert_file); - ret = AVERROR(EIO); + /* Read certificate. */ + ret = url_read_all(s, ctx->cert_file, &cert_bp); + if (ret < 0) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to open cert file %s\n", ctx->cert_file); + goto end; + } + + if ((cert_b = BIO_new(BIO_s_mem())) == NULL) { + ret = AVERROR(ENOMEM); goto end; } - ctx->dtls_cert = PEM_read_X509(fp_cert, NULL, NULL, NULL); + BIO_write(cert_b, cert_bp.str, cert_bp.len); + ctx->dtls_cert = PEM_read_bio_X509(cert_b, NULL, NULL, NULL); if (!ctx->dtls_cert) { av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to read certificate from %s\n", ctx->cert_file); ret = AVERROR(EIO); @@ -458,8 +523,10 @@ static int openssl_read_certificate(DTLSContext *ctx) } end: - if (fp_key) fclose(fp_key); - if (fp_cert) fclose(fp_cert); + BIO_free(key_b); + av_bprint_finalize(&key_bp, NULL); + BIO_free(cert_b); + av_bprint_finalize(&cert_bp, NULL); return ret; } @@ -778,7 +845,7 @@ static av_cold int openssl_dtls_init_context(DTLSContext *ctx) * ff_openssl_init in tls_openssl.c has already called SSL_library_init(), and therefore, * there is no need to call it again. */ -static av_cold int dtls_context_init(DTLSContext *ctx) +static av_cold int dtls_context_init(AVFormatContext *s, DTLSContext *ctx) { int ret = 0; @@ -786,7 +853,7 @@ static av_cold int dtls_context_init(DTLSContext *ctx) if (ctx->cert_file && ctx->key_file) { /* Read the private key and file from the file. */ - if ((ret = openssl_read_certificate(ctx)) < 0) { + if ((ret = openssl_read_certificate(s, ctx)) < 0) { av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to read DTLS certificate from cert=%s, key=%s\n", ctx->cert_file, ctx->key_file); return ret; @@ -1147,7 +1214,7 @@ static av_cold int initialize(AVFormatContext *s) if (whip->key_file) whip->dtls_ctx.key_file = av_strdup(whip->key_file); - if ((ret = dtls_context_init(&whip->dtls_ctx)) < 0) { + if ((ret = dtls_context_init(s, &whip->dtls_ctx)) < 0) { av_log(whip, AV_LOG_ERROR, "WHIP: Failed to init DTLS context\n"); return ret; } From 0b95312eb5a7756cc6692f83de1aef7dbd50efb6 Mon Sep 17 00:00:00 2001 From: winlin Date: Sat, 8 Jul 2023 12:43:46 +0800 Subject: [PATCH 56/60] WHIP: Free bio_in if ENOMEM. --- libavformat/whip.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/libavformat/whip.c b/libavformat/whip.c index 425e39925c3d8..929387e66ccad 100644 --- a/libavformat/whip.c +++ b/libavformat/whip.c @@ -730,7 +730,8 @@ static av_cold int openssl_dtls_init_context(DTLSContext *ctx) dtls_ctx = ctx->dtls_ctx = SSL_CTX_new(DTLS_method()); #endif if (!dtls_ctx) { - return AVERROR(ENOMEM); + ret = AVERROR(ENOMEM); + goto end; } #if OPENSSL_VERSION_NUMBER >= 0x10002000L /* OpenSSL 1.0.2 */ @@ -738,7 +739,8 @@ static av_cold int openssl_dtls_init_context(DTLSContext *ctx) if (SSL_CTX_set1_curves_list(dtls_ctx, curves) != 1) { av_log(ctx, AV_LOG_ERROR, "DTLS: Init SSL_CTX_set1_curves_list failed, curves=%s, %s\n", curves, openssl_get_error(ctx)); - return AVERROR(EINVAL); + ret = AVERROR(EINVAL); + return ret; } #endif @@ -758,16 +760,19 @@ static av_cold int openssl_dtls_init_context(DTLSContext *ctx) if (SSL_CTX_set_cipher_list(dtls_ctx, ciphers) != 1) { av_log(ctx, AV_LOG_ERROR, "DTLS: Init SSL_CTX_set_cipher_list failed, ciphers=%s, %s\n", ciphers, openssl_get_error(ctx)); - return AVERROR(EINVAL); + ret = AVERROR(EINVAL); + return ret; } /* Setup the certificate. */ if (SSL_CTX_use_certificate(dtls_ctx, dtls_cert) != 1) { av_log(ctx, AV_LOG_ERROR, "DTLS: Init SSL_CTX_use_certificate failed, %s\n", openssl_get_error(ctx)); - return AVERROR(EINVAL); + ret = AVERROR(EINVAL); + return ret; } if (SSL_CTX_use_PrivateKey(dtls_ctx, dtls_pkey) != 1) { av_log(ctx, AV_LOG_ERROR, "DTLS: Init SSL_CTX_use_PrivateKey failed, %s\n", openssl_get_error(ctx)); - return AVERROR(EINVAL); + ret = AVERROR(EINVAL); + return ret; } /* Server will send Certificate Request. */ @@ -781,13 +786,15 @@ static av_cold int openssl_dtls_init_context(DTLSContext *ctx) if (SSL_CTX_set_tlsext_use_srtp(dtls_ctx, profiles)) { av_log(ctx, AV_LOG_ERROR, "DTLS: Init SSL_CTX_set_tlsext_use_srtp failed, profiles=%s, %s\n", profiles, openssl_get_error(ctx)); - return AVERROR(EINVAL); + ret = AVERROR(EINVAL); + return ret; } /* The dtls should not be created unless the dtls_ctx has been initialized. */ dtls = ctx->dtls = SSL_new(dtls_ctx); if (!dtls) { - return AVERROR(ENOMEM); + ret = AVERROR(ENOMEM); + goto end; } /* Setup the callback for logging. */ @@ -804,14 +811,16 @@ static av_cold int openssl_dtls_init_context(DTLSContext *ctx) DTLS_set_link_mtu(dtls, ctx->mtu); #endif - bio_in = ctx->bio_in = BIO_new(BIO_s_mem()); + bio_in = BIO_new(BIO_s_mem()); if (!bio_in) { - return AVERROR(ENOMEM); + ret = AVERROR(ENOMEM); + goto end; } bio_out = BIO_new(BIO_s_mem()); if (!bio_out) { - return AVERROR(ENOMEM); + ret = AVERROR(ENOMEM); + goto end; } /** @@ -835,8 +844,14 @@ static av_cold int openssl_dtls_init_context(DTLSContext *ctx) #endif BIO_set_callback_arg(bio_out, (char*)ctx); + ctx->bio_in = bio_in; SSL_set_bio(dtls, bio_in, bio_out); + /* Now the bio_in and bio_out are owned by dtls, so we should set them to NULL. */ + bio_in = bio_out = NULL; +end: + BIO_free(bio_in); + BIO_free(bio_out); return ret; } From 63e3a55921c633478e8433e7b4fa12d9f22b259f Mon Sep 17 00:00:00 2001 From: winlin Date: Thu, 24 Aug 2023 11:15:49 +0800 Subject: [PATCH 57/60] WHIP: Refine DTLS, extract DTLS APIs. --- libavformat/whip.c | 4095 ++++++++++++++++++++++---------------------- 1 file changed, 2073 insertions(+), 2022 deletions(-) diff --git a/libavformat/whip.c b/libavformat/whip.c index 929387e66ccad..d76bda48ec296 100644 --- a/libavformat/whip.c +++ b/libavformat/whip.c @@ -42,26 +42,6 @@ #include "network.h" #include "srtp.h" -/** - * Maximum size limit of a Session Description Protocol (SDP), - * be it an offer or answer. - */ -#define MAX_SDP_SIZE 8192 - -/** - * Maximum size limit of a certificate and private key size. - */ -#define MAX_CERTIFICATE_SIZE 8192 - -/** - * Maximum size of the buffer for sending and receiving UDP packets. - * Please note that this size does not limit the size of the UDP packet that can be sent. - * To set the limit for packet size, modify the `pkt_size` parameter. - * For instance, it is possible to set the UDP buffer to 4096 to send or receive packets, - * but please keep in mind that the `pkt_size` option limits the packet size to 1400. - */ -#define MAX_UDP_BUFFER_SIZE 4096 - /** * The size of the Secure Real-time Transport Protocol (SRTP) master key material * that is exported by Secure Sockets Layer (SSL) after a successful Datagram @@ -70,7 +50,6 @@ */ #define DTLS_SRTP_KEY_LEN 16 #define DTLS_SRTP_SALT_LEN 14 - /** * The maximum size of the Secure Real-time Transport Protocol (SRTP) HMAC checksum * and padding that is appended to the end of the packet. To calculate the maximum @@ -78,6 +57,44 @@ * this size from the `pkt_size`. */ #define DTLS_SRTP_CHECKSUM_LEN 16 +/* DTLS init state. */ +#define DTLS_STATE_NONE 0 +/* Whether DTLS handshake is finished. */ +#define DTLS_STATE_FINISHED 1 +/* Whether DTLS session is closed. */ +#define DTLS_STATE_CLOSED 2 +/* Whether DTLS handshake is failed. */ +#define DTLS_STATE_FAILED 3 +typedef int (*dtls_fn_on_state)(void *ctx, void *opaque, int state, const char* type, const char* desc); +typedef int (*dtls_fn_on_write)(void *ctx, void *opaque, char* data, int size); + +static void* dtls_context_new(AVClass *av_class, void *opaque, int pkt_size, dtls_fn_on_state on_state, dtls_fn_on_write on_write, const char* cert_file, const char* key_file); +static av_cold int dtls_context_init(AVFormatContext *s, void *ctx); +static int dtls_context_start(void *ctx); +static int dtls_context_write(void *ctx, char* buf, int size); +static av_cold void dtls_context_deinit(void *ctx); + +static int dtls_can_handle_packet(uint8_t *b, int size); +static char* dtls_get_fingerprint(void *ctx); +static uint8_t* dtls_get_srtp_client_key(void *ctx); +static uint8_t* dtls_get_srtp_server_key(void *ctx); +static uint8_t* dtls_get_srtp_client_salt(void *ctx); +static uint8_t* dtls_get_srtp_server_salt(void *ctx); + +/** + * Maximum size limit of a Session Description Protocol (SDP), + * be it an offer or answer. + */ +#define MAX_SDP_SIZE 8192 + +/** + * Maximum size of the buffer for sending and receiving UDP packets. + * Please note that this size does not limit the size of the UDP packet that can be sent. + * To set the limit for packet size, modify the `pkt_size` parameter. + * For instance, it is possible to set the UDP buffer to 4096 to send or receive packets, + * but please keep in mind that the `pkt_size` option limits the packet size to 1400. + */ +#define MAX_UDP_BUFFER_SIZE 4096 /** * When sending ICE or DTLS messages, responses are received via UDP. However, the peer @@ -93,28 +110,6 @@ /* The magic cookie for Session Traversal Utilities for NAT (STUN) messages. */ #define STUN_MAGIC_COOKIE 0x2112A442 -/** - * The DTLS content type. - * See https://tools.ietf.org/html/rfc2246#section-6.2.1 - * change_cipher_spec(20), alert(21), handshake(22), application_data(23) - */ -#define DTLS_CONTENT_TYPE_CHANGE_CIPHER_SPEC 20 - -/** - * The DTLS record layer header has a total size of 13 bytes, consisting of - * ContentType (1 byte), ProtocolVersion (2 bytes), Epoch (2 bytes), - * SequenceNumber (6 bytes), and Length (2 bytes). - * See https://datatracker.ietf.org/doc/html/rfc9147#section-4 - */ -#define DTLS_RECORD_LAYER_HEADER_LEN 13 - -/** - * The DTLS version number, which is 0xfeff for DTLS 1.0, or 0xfefd for DTLS 1.2. - * See https://datatracker.ietf.org/doc/html/rfc9147#name-the-dtls-record-layer - */ -#define DTLS_VERSION_10 0xfeff -#define DTLS_VERSION_12 0xfefd - /* Referring to Chrome's definition of RTP payload types. */ #define WHIP_RTP_PAYLOAD_TYPE_H264 106 #define WHIP_RTP_PAYLOAD_TYPE_OPUS 111 @@ -157,49 +152,6 @@ /* Calculate the elapsed time from starttime to endtime in milliseconds. */ #define ELAPSED(starttime, endtime) ((int)(endtime - starttime) / 1000) -/** - * Read all data from the given URL url and store it in the given buffer bp. - */ -static int url_read_all(AVFormatContext *s, const char *url, AVBPrint *bp) -{ - int ret = 0; - AVDictionary *opts = NULL; - URLContext *uc = NULL; - char buf[MAX_URL_SIZE]; - - ret = ffurl_open_whitelist(&uc, url, AVIO_FLAG_READ, &s->interrupt_callback, - &opts, s->protocol_whitelist, s->protocol_blacklist, NULL); - if (ret < 0) { - av_log(s, AV_LOG_ERROR, "WHIP: Failed to open url %s\n", url); - goto end; - } - - while (1) { - ret = ffurl_read(uc, buf, sizeof(buf)); - if (ret == AVERROR_EOF) { - /* Reset the error because we read all response as answer util EOF. */ - ret = 0; - break; - } - if (ret <= 0) { - av_log(s, AV_LOG_ERROR, "WHIP: Failed to read from url=%s, key is %s\n", url, bp->str); - goto end; - } - - av_bprintf(bp, "%.*s", ret, buf); - if (!av_bprint_is_complete(bp)) { - av_log(s, AV_LOG_ERROR, "WHIP: Exceed max size %.*s, %s\n", ret, buf, bp->str); - ret = AVERROR(EIO); - goto end; - } - } - -end: - ffurl_closep(&uc); - av_dict_free(&opts); - return ret; -} - /* STUN Attribute, comprehension-required range (0x0000-0x7FFF) */ enum STUNAttr { STUN_ATTR_USERNAME = 0x0006, /// shared secret response/bind request @@ -208,1363 +160,1372 @@ enum STUNAttr { STUN_ATTR_FINGERPRINT = 0x8028, /// rfc5389 }; -enum DTLSState { - DTLS_STATE_NONE, +enum WHIPState { + WHIP_STATE_NONE, - /* Whether DTLS handshake is finished. */ - DTLS_STATE_FINISHED, - /* Whether DTLS session is closed. */ - DTLS_STATE_CLOSED, - /* Whether DTLS handshake is failed. */ - DTLS_STATE_FAILED, + /* The initial state. */ + WHIP_STATE_INIT, + /* The muxer has sent the offer to the peer. */ + WHIP_STATE_OFFER, + /* The muxer has received the answer from the peer. */ + WHIP_STATE_ANSWER, + /** + * After parsing the answer received from the peer, the muxer negotiates the abilities + * in the offer that it generated. + */ + WHIP_STATE_NEGOTIATED, + /* The muxer has connected to the peer via UDP. */ + WHIP_STATE_UDP_CONNECTED, + /* The muxer has sent the ICE request to the peer. */ + WHIP_STATE_ICE_CONNECTING, + /* The muxer has received the ICE response from the peer. */ + WHIP_STATE_ICE_CONNECTED, + /* The muxer has finished the DTLS handshake with the peer. */ + WHIP_STATE_DTLS_FINISHED, + /* The muxer has finished the SRTP setup. */ + WHIP_STATE_SRTP_FINISHED, + /* The muxer is ready to send/receive media frames. */ + WHIP_STATE_READY, + /* The muxer is failed. */ + WHIP_STATE_FAILED, }; -typedef struct DTLSContext DTLSContext; -typedef int (*DTLSContext_on_state_fn)(DTLSContext *ctx, enum DTLSState state, const char* type, const char* desc); -typedef int (*DTLSContext_on_write_fn)(DTLSContext *ctx, char* data, int size); - -typedef struct DTLSContext { +typedef struct WHIPContext { AVClass *av_class; - /* For callback. */ - DTLSContext_on_state_fn on_state; - DTLSContext_on_write_fn on_write; - void* opaque; + /* The state of the RTC connection. */ + enum WHIPState state; + /* The callback return value for DTLS. */ + int dtls_ret; + int dtls_closed; - /* For logging. */ - AVClass *log_avcl; + /* Parameters for the input audio and video codecs. */ + AVCodecParameters *audio_par; + AVCodecParameters *video_par; - /* The DTLS context. */ - SSL_CTX *dtls_ctx; - SSL *dtls; - /* The DTLS BIOs. */ - BIO *bio_in; + /** + * The h264_mp4toannexb Bitstream Filter (BSF) bypasses the AnnexB packet; + * therefore, it is essential to insert the SPS and PPS before each IDR frame + * in such cases. + */ + int h264_annexb_insert_sps_pps; - /* The private key for DTLS handshake. */ - EVP_PKEY *dtls_pkey; - /* The EC key for DTLS handshake. */ - EC_KEY* dtls_eckey; - /* The SSL certificate used for fingerprint in SDP and DTLS handshake. */ - X509 *dtls_cert; - /* The fingerprint of certificate, used in SDP offer. */ - char *dtls_fingerprint; + /* The random number generator. */ + AVLFG rnd; + /* The ICE username and pwd fragment generated by the muxer. */ + char ice_ufrag_local[9]; + char ice_pwd_local[33]; + /* The SSRC of the audio and video stream, generated by the muxer. */ + uint32_t audio_ssrc; + uint32_t video_ssrc; + /* The PT(Payload Type) of stream, generated by the muxer. */ + uint8_t audio_payload_type; + uint8_t video_payload_type; /** - * This represents the material used to build the SRTP master key. It is - * generated by DTLS and has the following layout: - * 16B 16B 14B 14B - * client_key | server_key | client_salt | server_salt + * This is the SDP offer generated by the muxer based on the codec parameters, + * DTLS, and ICE information. */ - uint8_t dtls_srtp_materials[(DTLS_SRTP_KEY_LEN + DTLS_SRTP_SALT_LEN) * 2]; + char *sdp_offer; - /* Whether the DTLS is done at least for us. */ - int dtls_done_for_us; - /* Whether the SRTP key is exported. */ - int dtls_srtp_key_exported; - /* The number of packets retransmitted for DTLS. */ - int dtls_arq_packets; + /* The ICE username and pwd from remote server. */ + char *ice_ufrag_remote; + char *ice_pwd_remote; /** - * This is the last DTLS content type and handshake type that is used to detect - * the ARQ packet. + * This represents the ICE candidate protocol, priority, host and port. + * Currently, we only support one candidate and choose the first UDP candidate. + * However, we plan to support multiple candidates in the future. */ - uint8_t dtls_last_content_type; - uint8_t dtls_last_handshake_type; + char *ice_protocol; + char *ice_host; + int ice_port; + + /* The SDP answer received from the WebRTC server. */ + char *sdp_answer; + /* The resource URL returned in the Location header of WHIP HTTP response. */ + char *whip_resource_url; /* These variables represent timestamps used for calculating and tracking the cost. */ - int64_t dtls_init_starttime; - int64_t dtls_init_endtime; - int64_t dtls_handshake_starttime; - int64_t dtls_handshake_endtime; + int64_t whip_starttime; + /* */ + int64_t whip_init_time; + int64_t whip_offer_time; + int64_t whip_answer_time; + int64_t whip_udp_time; + int64_t whip_ice_time; + int64_t whip_dtls_time; + int64_t whip_srtp_time; - /* Helper for get error code and message. */ - int error_code; - char error_message[256]; + /* The DTLS context. */ + void *dtls_ctx; - /* The certificate and private key used for DTLS handshake. */ - char* cert_file; - char* key_file; + /* The SRTP send context, to encrypt outgoing packets. */ + SRTPContext srtp_audio_send; + SRTPContext srtp_video_send; + SRTPContext srtp_rtcp_send; + /* The SRTP receive context, to decrypt incoming packets. */ + SRTPContext srtp_recv; + + /* The UDP transport is used for delivering ICE, DTLS and SRTP packets. */ + URLContext *udp_uc; + /* The buffer for UDP transmission. */ + char buf[MAX_UDP_BUFFER_SIZE]; + + /* The timeout in milliseconds for ICE and DTLS handshake. */ + int handshake_timeout; /** * The size of RTP packet, should generally be set to MTU. * Note that pion requires a smaller value, for example, 1200. */ - int mtu; -} DTLSContext; + int pkt_size; + /** + * The optional Bearer token for WHIP Authorization. + * See https://www.ietf.org/archive/id/draft-ietf-wish-whip-08.html#name-authentication-and-authoriz + */ + char* authorization; + /* The certificate and private key used for DTLS handshake. */ + char* cert_file; + char* key_file; +} WHIPContext; /** - * Whether the packet is a DTLS packet. + * When DTLS state change. */ -static int is_dtls_packet(uint8_t *b, int size) { - uint16_t version = AV_RB16(&b[1]); - return size > DTLS_RECORD_LAYER_HEADER_LEN && - b[0] >= DTLS_CONTENT_TYPE_CHANGE_CIPHER_SPEC && - (version == DTLS_VERSION_10 || version == DTLS_VERSION_12); -} - -/** - * Retrieves the error message for the latest OpenSSL error. - * - * This function retrieves the error code from the thread's error queue, converts it - * to a human-readable string, and stores it in the DTLSContext's error_message field. - * The error queue is then cleared using ERR_clear_error(). - */ -static const char* openssl_get_error(DTLSContext *ctx) +static int dtls_context_on_state(void *pctx, void *opaque, int state, const char* type, const char* desc) { - int r2 = ERR_get_error(); - if (r2) - ERR_error_string_n(r2, ctx->error_message, sizeof(ctx->error_message)); - else - ctx->error_message[0] = '\0'; + int ret = 0; + AVFormatContext *s = opaque; + WHIPContext *whip = s->priv_data; - ERR_clear_error(); - return ctx->error_message; + if (state == DTLS_STATE_CLOSED) { + whip->dtls_closed = 1; + av_log(whip, AV_LOG_VERBOSE, "WHIP: DTLS session closed, type=%s, desc=%s, elapsed=%dms\n", + type ? type : "", desc ? desc : "", ELAPSED(whip->whip_starttime, av_gettime())); + return ret; + } + + if (state == DTLS_STATE_FAILED) { + whip->state = WHIP_STATE_FAILED; + av_log(whip, AV_LOG_ERROR, "WHIP: DTLS session failed, type=%s, desc=%s\n", + type ? type : "", desc ? desc : ""); + whip->dtls_ret = AVERROR(EIO); + return ret; + } + + if (state == DTLS_STATE_FINISHED && whip->state < WHIP_STATE_DTLS_FINISHED) { + whip->state = WHIP_STATE_DTLS_FINISHED; + whip->whip_dtls_time = av_gettime(); + av_log(whip, AV_LOG_VERBOSE, "WHIP: DTLS handshake, elapsed=%dms\n", ELAPSED(whip->whip_starttime, av_gettime())); + return ret; + } + + return ret; } /** - * Get the error code for the given SSL operation result. - * - * This function retrieves the error code for the given SSL operation result - * and stores the error message in the DTLS context if an error occurred. - * It also clears the error queue. + * When DTLS write data. */ -static int openssl_ssl_get_error(DTLSContext *ctx, int ret) +static int dtls_context_on_write(void *ctx, void *opaque, char* data, int size) { - SSL *dtls = ctx->dtls; - int r1 = SSL_ERROR_NONE; + AVFormatContext *s = opaque; + WHIPContext *whip = s->priv_data; - if (ret <= 0) - r1 = SSL_get_error(dtls, ret); + if (!whip->udp_uc) { + av_log(whip, AV_LOG_ERROR, "WHIP: DTLS write data, but udp_uc is NULL\n"); + return AVERROR(EIO); + } - openssl_get_error(ctx); - return r1; + return ffurl_write(whip->udp_uc, data, size); } /** - * Callback function to print the OpenSSL SSL status. + * Initialize and check the options for the WebRTC muxer. */ -static void openssl_dtls_on_info(const SSL *dtls, int where, int r0) +static av_cold int initialize(AVFormatContext *s) { - int w, r1, is_fatal, is_warning, is_close_notify; - const char *method = "undefined", *alert_type, *alert_desc; - enum DTLSState state; - DTLSContext *ctx = (DTLSContext*)SSL_get_ex_data(dtls, 0); - - w = where & ~SSL_ST_MASK; - if (w & SSL_ST_CONNECT) - method = "SSL_connect"; - else if (w & SSL_ST_ACCEPT) - method = "SSL_accept"; + int ret, ideal_pkt_size = 532; + WHIPContext *whip = s->priv_data; + uint32_t seed; - r1 = openssl_ssl_get_error(ctx, r0); - if (where & SSL_CB_LOOP) { - av_log(ctx, AV_LOG_VERBOSE, "DTLS: Info method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", - method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, r0, r1); - } else if (where & SSL_CB_ALERT) { - method = (where & SSL_CB_READ) ? "read":"write"; + whip->whip_starttime = av_gettime(); - alert_type = SSL_alert_type_string_long(r0); - alert_desc = SSL_alert_desc_string(r0); + /* Initialize the random number generator. */ + seed = av_get_random_seed(); + av_lfg_init(&whip->rnd, seed); - if (!av_strcasecmp(alert_type, "warning") && !av_strcasecmp(alert_desc, "CN")) - av_log(ctx, AV_LOG_WARNING, "DTLS: SSL3 alert method=%s type=%s, desc=%s(%s), where=%d, ret=%d, r1=%d\n", - method, alert_type, alert_desc, SSL_alert_desc_string_long(r0), where, r0, r1); - else - av_log(ctx, AV_LOG_ERROR, "DTLS: SSL3 alert method=%s type=%s, desc=%s(%s), where=%d, ret=%d, r1=%d %s\n", - method, alert_type, alert_desc, SSL_alert_desc_string_long(r0), where, r0, r1, ctx->error_message); + /* Use the same logging context as AV format. */ + whip->dtls_ctx = dtls_context_new(whip->av_class, s, whip->pkt_size, dtls_context_on_state, dtls_context_on_write, whip->cert_file, whip->key_file); - /** - * Notify the DTLS to handle the ALERT message, which maybe means media connection disconnect. - * CN(Close Notify) is sent when peer close the PeerConnection. fatal, IP(Illegal Parameter) - * is sent when DTLS failed. - */ - is_fatal = !av_strncasecmp(alert_type, "fatal", 5); - is_warning = !av_strncasecmp(alert_type, "warning", 7); - is_close_notify = !av_strncasecmp(alert_desc, "CN", 2); - state = is_fatal ? DTLS_STATE_FAILED : (is_warning && is_close_notify ? DTLS_STATE_CLOSED : DTLS_STATE_NONE); - if (state != DTLS_STATE_NONE && ctx->on_state) { - av_log(ctx, AV_LOG_INFO, "DTLS: Notify ctx=%p, state=%d, fatal=%d, warning=%d, cn=%d\n", - ctx, state, is_fatal, is_warning, is_close_notify); - ctx->on_state(ctx, state, alert_type, alert_desc); - } - } else if (where & SSL_CB_EXIT) { - if (!r0) - av_log(ctx, AV_LOG_WARNING, "DTLS: Fail method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", - method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, r0, r1); - else if (r0 < 0) - if (r1 != SSL_ERROR_NONE && r1 != SSL_ERROR_WANT_READ && r1 != SSL_ERROR_WANT_WRITE) - av_log(ctx, AV_LOG_ERROR, "DTLS: Error method=%s state=%s(%s), where=%d, ret=%d, r1=%d %s\n", - method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, r0, r1, ctx->error_message); - else - av_log(ctx, AV_LOG_VERBOSE, "DTLS: Info method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", - method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, r0, r1); + if ((ret = dtls_context_init(s, whip->dtls_ctx)) < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to init DTLS context\n"); + return ret; } -} -static void openssl_dtls_state_trace(DTLSContext *ctx, uint8_t *data, int length, int incoming) -{ - uint8_t content_type = 0; - uint16_t size = 0; - uint8_t handshake_type = 0; + if (whip->pkt_size < ideal_pkt_size) + av_log(whip, AV_LOG_WARNING, "WHIP: pkt_size=%d(<%d) is too small, may cause packet loss\n", + whip->pkt_size, ideal_pkt_size); - /* Change_cipher_spec(20), alert(21), handshake(22), application_data(23) */ - if (length >= 1) - content_type = AV_RB8(&data[0]); - if (length >= 13) - size = AV_RB16(&data[11]); - if (length >= 14) - handshake_type = AV_RB8(&data[13]); + if (whip->state < WHIP_STATE_INIT) + whip->state = WHIP_STATE_INIT; + whip->whip_init_time = av_gettime(); + av_log(whip, AV_LOG_VERBOSE, "WHIP: Init state=%d, handshake_timeout=%dms, pkt_size=%d, seed=%d, elapsed=%dms\n", + whip->state, whip->handshake_timeout, whip->pkt_size, seed, ELAPSED(whip->whip_starttime, av_gettime())); - av_log(ctx, AV_LOG_VERBOSE, "DTLS: Trace %s, done=%u, arq=%u, len=%u, cnt=%u, size=%u, hs=%u\n", - (incoming? "RECV":"SEND"), ctx->dtls_done_for_us, ctx->dtls_arq_packets, length, - content_type, size, handshake_type); + return 0; } /** - * Always return 1 to accept any certificate. This is because we allow the peer to - * use a temporary self-signed certificate for DTLS. + * When duplicating a stream, the demuxer has already set the extradata, profile, and + * level of the par. Keep in mind that this function will not be invoked since the + * profile and level are set. + * + * When utilizing an encoder, such as libx264, to encode a stream, the extradata in + * par->extradata contains the SPS, which includes profile and level information. + * However, the profile and level of par remain unspecified. Therefore, it is necessary + * to extract the profile and level data from the extradata and assign it to the par's + * profile and level. Keep in mind that AVFMT_GLOBALHEADER must be enabled; otherwise, + * the extradata will remain empty. */ -static int openssl_dtls_verify_callback(int preverify_ok, X509_STORE_CTX *ctx) +static int parse_profile_level(AVFormatContext *s, AVCodecParameters *par) { - return 1; -} + int ret = 0; + const uint8_t *r = par->extradata, *r1, *end = par->extradata + par->extradata_size; + H264SPS seq, *const sps = &seq; + uint32_t state; + WHIPContext *whip = s->priv_data; -/** - * DTLS BIO read callback. - */ -#if OPENSSL_VERSION_NUMBER < 0x30000000L // v3.0.x -static long openssl_dtls_bio_out_callback(BIO* b, int oper, const char* argp, int argi, long argl, long retvalue) -#else -static long openssl_dtls_bio_out_callback_ex(BIO *b, int oper, const char *argp, size_t len, int argi, long argl, int retvalue, size_t *processed) -#endif -{ - int ret, req_size = argi, is_arq = 0; - uint8_t content_type, handshake_type; - uint8_t *data = (uint8_t*)argp; - DTLSContext* ctx = b ? (DTLSContext*)BIO_get_callback_arg(b) : NULL; + if (par->codec_id != AV_CODEC_ID_H264) + return ret; -#if OPENSSL_VERSION_NUMBER >= 0x30000000L // v3.0.x - req_size = len; - av_log(ctx, AV_LOG_DEBUG, "DTLS: BIO callback b=%p, oper=%d, argp=%p, len=%ld, argi=%d, argl=%ld, retvalue=%d, processed=%p, req_size=%d\n", - b, oper, argp, len, argi, argl, retvalue, processed, req_size); -#else - av_log(ctx, AV_LOG_DEBUG, "DTLS: BIO callback b=%p, oper=%d, argp=%p, argi=%d, argl=%ld, retvalue=%ld, req_size=%d\n", - b, oper, argp, argi, argl, retvalue, req_size); -#endif + if (par->profile != FF_PROFILE_UNKNOWN && par->level != FF_LEVEL_UNKNOWN) + return ret; - if (oper != BIO_CB_WRITE || !argp || req_size <= 0) - return retvalue; + if (!par->extradata || par->extradata_size <= 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Unable to parse profile from empty extradata=%p, size=%d\n", + par->extradata, par->extradata_size); + return AVERROR(EINVAL); + } - openssl_dtls_state_trace(ctx, data, req_size, 0); - ret = ctx->on_write ? ctx->on_write(ctx, data, req_size) : 0; - content_type = req_size > 0 ? AV_RB8(&data[0]) : 0; - handshake_type = req_size > 13 ? AV_RB8(&data[13]) : 0; + while (1) { + r = avpriv_find_start_code(r, end, &state); + if (r >= end) + break; - is_arq = ctx->dtls_last_content_type == content_type && ctx->dtls_last_handshake_type == handshake_type; - ctx->dtls_arq_packets += is_arq; - ctx->dtls_last_content_type = content_type; - ctx->dtls_last_handshake_type = handshake_type; + r1 = ff_avc_find_startcode(r, end); + if ((state & 0x1f) == H264_NAL_SPS) { + ret = ff_avc_decode_sps(sps, r, r1 - r); + if (ret < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to decode SPS, state=%x, size=%d\n", + state, (int)(r1 - r)); + return ret; + } - if (ret < 0) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Send request failed, oper=%d, content=%d, handshake=%d, size=%d, is_arq=%d\n", - oper, content_type, handshake_type, req_size, is_arq); - return ret; + av_log(whip, AV_LOG_VERBOSE, "WHIP: Parse profile=%d, level=%d from SPS\n", + sps->profile_idc, sps->level_idc); + par->profile = sps->profile_idc; + par->level = sps->level_idc; + } + + r = r1; } - return retvalue; + return ret; } -static int openssl_read_certificate(AVFormatContext *s, DTLSContext *ctx) +/** + * Parses video SPS/PPS from the extradata of codecpar and checks the codec. + * Currently only supports video(h264) and audio(opus). Note that only baseline + * and constrained baseline profiles of h264 are supported. + * + * If the profile is less than 0, the function considers the profile as baseline. + * It may need to parse the profile from SPS/PPS. This situation occurs when ingesting + * desktop and transcoding. + * + * @param s Pointer to the AVFormatContext + * @returns Returns 0 if successful or AVERROR_xxx in case of an error. + * + * TODO: FIXME: There is an issue with the timestamp of OPUS audio, especially when + * the input is an MP4 file. The timestamp deviates from the expected value of 960, + * causing Chrome to play the audio stream with noise. This problem can be replicated + * by transcoding a specific file into MP4 format and publishing it using the WHIP + * muxer. However, when directly transcoding and publishing through the WHIP muxer, + * the issue is not present, and the audio timestamp remains consistent. The root + * cause is still unknown, and this comment has been added to address this issue + * in the future. Further research is needed to resolve the problem. + */ +static int parse_codec(AVFormatContext *s) { - int ret = 0; - BIO *key_b = NULL, *cert_b = NULL; - AVBPrint key_bp, cert_bp; - - /* To prevent a crash during cleanup, always initialize it. */ - av_bprint_init(&key_bp, 1, MAX_CERTIFICATE_SIZE); - av_bprint_init(&cert_bp, 1, MAX_CERTIFICATE_SIZE); - - /* Read key file. */ - ret = url_read_all(s, ctx->key_file, &key_bp); - if (ret < 0) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to open key file %s\n", ctx->key_file); - goto end; - } + int i, ret = 0; + WHIPContext *whip = s->priv_data; - if ((key_b = BIO_new(BIO_s_mem())) == NULL) { - ret = AVERROR(ENOMEM); - goto end; - } - - BIO_write(key_b, key_bp.str, key_bp.len); - ctx->dtls_pkey = PEM_read_bio_PrivateKey(key_b, NULL, NULL, NULL); - if (!ctx->dtls_pkey) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to read private key from %s\n", ctx->key_file); - ret = AVERROR(EIO); - goto end; - } - - /* Read certificate. */ - ret = url_read_all(s, ctx->cert_file, &cert_bp); - if (ret < 0) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to open cert file %s\n", ctx->cert_file); - goto end; - } - - if ((cert_b = BIO_new(BIO_s_mem())) == NULL) { - ret = AVERROR(ENOMEM); - goto end; - } - - BIO_write(cert_b, cert_bp.str, cert_bp.len); - ctx->dtls_cert = PEM_read_bio_X509(cert_b, NULL, NULL, NULL); - if (!ctx->dtls_cert) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to read certificate from %s\n", ctx->cert_file); - ret = AVERROR(EIO); - goto end; - } - -end: - BIO_free(key_b); - av_bprint_finalize(&key_bp, NULL); - BIO_free(cert_b); - av_bprint_finalize(&cert_bp, NULL); - return ret; -} + for (i = 0; i < s->nb_streams; i++) { + AVCodecParameters *par = s->streams[i]->codecpar; + const AVCodecDescriptor *desc = avcodec_descriptor_get(par->codec_id); + switch (par->codec_type) { + case AVMEDIA_TYPE_VIDEO: + if (whip->video_par) { + av_log(whip, AV_LOG_ERROR, "WHIP: Only one video stream is supported by RTC\n"); + return AVERROR(EINVAL); + } + whip->video_par = par; -static int openssl_dtls_gen_private_key(DTLSContext *ctx) -{ - int ret = 0; + if (par->codec_id != AV_CODEC_ID_H264) { + av_log(whip, AV_LOG_ERROR, "WHIP: Unsupported video codec %s by RTC, choose h264\n", + desc ? desc->name : "unknown"); + return AVERROR_PATCHWELCOME; + } - /** - * Note that secp256r1 in openssl is called NID_X9_62_prime256v1 or prime256v1 in string, - * not NID_secp256k1 or secp256k1 in string. - * - * TODO: Should choose the curves in ClientHello.supported_groups, for example: - * Supported Group: x25519 (0x001d) - * Supported Group: secp256r1 (0x0017) - * Supported Group: secp384r1 (0x0018) - */ -#if OPENSSL_VERSION_NUMBER < 0x30000000L /* OpenSSL 3.0 */ - EC_GROUP *ecgroup = NULL; - int curve = NID_X9_62_prime256v1; -#else - const char *curve = SN_X9_62_prime256v1; -#endif + if (par->video_delay > 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Unsupported B frames by RTC\n"); + return AVERROR_PATCHWELCOME; + } -#if OPENSSL_VERSION_NUMBER < 0x30000000L /* OpenSSL 3.0 */ - ctx->dtls_pkey = EVP_PKEY_new(); - ctx->dtls_eckey = EC_KEY_new(); - ecgroup = EC_GROUP_new_by_curve_name(curve); - if (!ecgroup) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Create EC group by curve=%d failed, %s", curve, openssl_get_error(ctx)); - goto einval_end; - } + if ((ret = parse_profile_level(s, par)) < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to parse SPS/PPS from extradata\n"); + return AVERROR(EINVAL); + } -#if OPENSSL_VERSION_NUMBER < 0x10100000L // v1.1.x - /* For openssl 1.0, we must set the group parameters, so that cert is ok. */ - EC_GROUP_set_asn1_flag(ecgroup, OPENSSL_EC_NAMED_CURVE); -#endif + if (par->profile == FF_PROFILE_UNKNOWN) { + av_log(whip, AV_LOG_WARNING, "WHIP: No profile found in extradata, consider baseline\n"); + return AVERROR(EINVAL); + } + if (par->level == FF_LEVEL_UNKNOWN) { + av_log(whip, AV_LOG_WARNING, "WHIP: No level found in extradata, consider 3.1\n"); + return AVERROR(EINVAL); + } + break; + case AVMEDIA_TYPE_AUDIO: + if (whip->audio_par) { + av_log(whip, AV_LOG_ERROR, "WHIP: Only one audio stream is supported by RTC\n"); + return AVERROR(EINVAL); + } + whip->audio_par = par; - if (EC_KEY_set_group(ctx->dtls_eckey, ecgroup) != 1) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Generate private key, EC_KEY_set_group failed, %s\n", openssl_get_error(ctx)); - goto einval_end; - } + if (par->codec_id != AV_CODEC_ID_OPUS) { + av_log(whip, AV_LOG_ERROR, "WHIP: Unsupported audio codec %s by RTC, choose opus\n", + desc ? desc->name : "unknown"); + return AVERROR_PATCHWELCOME; + } - if (EC_KEY_generate_key(ctx->dtls_eckey) != 1) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Generate private key, EC_KEY_generate_key failed, %s\n", openssl_get_error(ctx)); - goto einval_end; - } + if (par->ch_layout.nb_channels != 2) { + av_log(whip, AV_LOG_ERROR, "WHIP: Unsupported audio channels %d by RTC, choose stereo\n", + par->ch_layout.nb_channels); + return AVERROR_PATCHWELCOME; + } - if (EVP_PKEY_set1_EC_KEY(ctx->dtls_pkey, ctx->dtls_eckey) != 1) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Generate private key, EVP_PKEY_set1_EC_KEY failed, %s\n", openssl_get_error(ctx)); - goto einval_end; - } -#else - ctx->dtls_pkey = EVP_EC_gen(curve); - if (!ctx->dtls_pkey) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Generate private key, EVP_EC_gen curve=%s failed, %s\n", curve, openssl_get_error(ctx)); - goto einval_end; + if (par->sample_rate != 48000) { + av_log(whip, AV_LOG_ERROR, "WHIP: Unsupported audio sample rate %d by RTC, choose 48000\n", par->sample_rate); + return AVERROR_PATCHWELCOME; + } + break; + default: + av_log(whip, AV_LOG_ERROR, "WHIP: Codec type '%s' for stream %d is not supported by RTC\n", + av_get_media_type_string(par->codec_type), i); + return AVERROR_PATCHWELCOME; + } } -#endif - goto end; -einval_end: - ret = AVERROR(EINVAL); -end: -#if OPENSSL_VERSION_NUMBER < 0x30000000L /* OpenSSL 3.0 */ - EC_GROUP_free(ecgroup); -#endif return ret; } -static int openssl_dtls_gen_certificate(DTLSContext *ctx) +/** + * Generate SDP offer according to the codec parameters, DTLS and ICE information. + * + * Note that we don't use av_sdp_create to generate SDP offer because it doesn't + * support DTLS and ICE information. + * + * @return 0 if OK, AVERROR_xxx on error + */ +static int generate_sdp_offer(AVFormatContext *s) { - int ret = 0, serial, expire_day, i, n = 0; - AVBPrint fingerprint; - unsigned char md[EVP_MAX_MD_SIZE]; - const char *aor = "lavf"; - X509_NAME* subject = NULL; - X509 *dtls_cert = NULL; + int ret = 0, profile, level, profile_iop; + const char *acodec_name = NULL, *vcodec_name = NULL; + AVBPrint bp; + WHIPContext *whip = s->priv_data; /* To prevent a crash during cleanup, always initialize it. */ - av_bprint_init(&fingerprint, 1, MAX_URL_SIZE); + av_bprint_init(&bp, 1, MAX_SDP_SIZE); - dtls_cert = ctx->dtls_cert = X509_new(); - if (!dtls_cert) { - goto enomem_end; + if (whip->sdp_offer) { + av_log(whip, AV_LOG_ERROR, "WHIP: SDP offer is already set\n"); + ret = AVERROR(EINVAL); + goto end; } - // TODO: Support non-self-signed certificate, for example, load from a file. - subject = X509_NAME_new(); - if (!subject) { - goto enomem_end; - } + snprintf(whip->ice_ufrag_local, sizeof(whip->ice_ufrag_local), "%08x", + av_lfg_get(&whip->rnd)); + snprintf(whip->ice_pwd_local, sizeof(whip->ice_pwd_local), "%08x%08x%08x%08x", + av_lfg_get(&whip->rnd), av_lfg_get(&whip->rnd), av_lfg_get(&whip->rnd), + av_lfg_get(&whip->rnd)); - serial = (int)av_get_random_seed(); - if (ASN1_INTEGER_set(X509_get_serialNumber(dtls_cert), serial) != 1) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set serial, %s\n", openssl_get_error(ctx)); - goto einval_end; - } + whip->audio_ssrc = av_lfg_get(&whip->rnd); + whip->video_ssrc = av_lfg_get(&whip->rnd); - if (X509_NAME_add_entry_by_txt(subject, "CN", MBSTRING_ASC, aor, strlen(aor), -1, 0) != 1) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set CN, %s\n", openssl_get_error(ctx)); - goto einval_end; - } + whip->audio_payload_type = WHIP_RTP_PAYLOAD_TYPE_OPUS; + whip->video_payload_type = WHIP_RTP_PAYLOAD_TYPE_H264; - if (X509_set_issuer_name(dtls_cert, subject) != 1) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set issuer, %s\n", openssl_get_error(ctx)); - goto einval_end; - } - if (X509_set_subject_name(dtls_cert, subject) != 1) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set subject name, %s\n", openssl_get_error(ctx)); - goto einval_end; - } + av_bprintf(&bp, "" + "v=0\r\n" + "o=FFmpeg %s 2 IN IP4 %s\r\n" + "s=FFmpegPublishSession\r\n" + "t=0 0\r\n" + "a=group:BUNDLE 0 1\r\n" + "a=extmap-allow-mixed\r\n" + "a=msid-semantic: WMS\r\n", + WHIP_SDP_SESSION_ID, + WHIP_SDP_CREATOR_IP); - expire_day = 365; - if (!X509_gmtime_adj(X509_get_notBefore(dtls_cert), 0)) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set notBefore, %s\n", openssl_get_error(ctx)); - goto einval_end; - } - if (!X509_gmtime_adj(X509_get_notAfter(dtls_cert), 60*60*24*expire_day)) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set notAfter, %s\n", openssl_get_error(ctx)); - goto einval_end; - } + if (whip->audio_par) { + if (whip->audio_par->codec_id == AV_CODEC_ID_OPUS) + acodec_name = "opus"; - if (X509_set_version(dtls_cert, 2) != 1) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set version, %s\n", openssl_get_error(ctx)); - goto einval_end; + av_bprintf(&bp, "" + "m=audio 9 UDP/TLS/RTP/SAVPF %u\r\n" + "c=IN IP4 0.0.0.0\r\n" + "a=ice-ufrag:%s\r\n" + "a=ice-pwd:%s\r\n" + "a=fingerprint:sha-256 %s\r\n" + "a=setup:passive\r\n" + "a=mid:0\r\n" + "a=sendonly\r\n" + "a=msid:FFmpeg audio\r\n" + "a=rtcp-mux\r\n" + "a=rtpmap:%u %s/%d/%d\r\n" + "a=ssrc:%u cname:FFmpeg\r\n" + "a=ssrc:%u msid:FFmpeg audio\r\n", + whip->audio_payload_type, + whip->ice_ufrag_local, + whip->ice_pwd_local, + dtls_get_fingerprint(whip->dtls_ctx), + whip->audio_payload_type, + acodec_name, + whip->audio_par->sample_rate, + whip->audio_par->ch_layout.nb_channels, + whip->audio_ssrc, + whip->audio_ssrc); } - if (X509_set_pubkey(dtls_cert, ctx->dtls_pkey) != 1) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set public key, %s\n", openssl_get_error(ctx)); - goto einval_end; - } + if (whip->video_par) { + profile_iop = profile = whip->video_par->profile; + level = whip->video_par->level; + if (whip->video_par->codec_id == AV_CODEC_ID_H264) { + vcodec_name = "H264"; + profile_iop &= FF_PROFILE_H264_CONSTRAINED; + profile &= (~FF_PROFILE_H264_CONSTRAINED); + } - if (!X509_sign(dtls_cert, ctx->dtls_pkey, EVP_sha1())) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to sign certificate, %s\n", openssl_get_error(ctx)); - goto einval_end; + av_bprintf(&bp, "" + "m=video 9 UDP/TLS/RTP/SAVPF %u\r\n" + "c=IN IP4 0.0.0.0\r\n" + "a=ice-ufrag:%s\r\n" + "a=ice-pwd:%s\r\n" + "a=fingerprint:sha-256 %s\r\n" + "a=setup:passive\r\n" + "a=mid:1\r\n" + "a=sendonly\r\n" + "a=msid:FFmpeg video\r\n" + "a=rtcp-mux\r\n" + "a=rtcp-rsize\r\n" + "a=rtpmap:%u %s/90000\r\n" + "a=fmtp:%u level-asymmetry-allowed=1;packetization-mode=1;profile-level-id=%02x%02x%02x\r\n" + "a=ssrc:%u cname:FFmpeg\r\n" + "a=ssrc:%u msid:FFmpeg video\r\n", + whip->video_payload_type, + whip->ice_ufrag_local, + whip->ice_pwd_local, + dtls_get_fingerprint(whip->dtls_ctx), + whip->video_payload_type, + vcodec_name, + whip->video_payload_type, + profile, + profile_iop, + level, + whip->video_ssrc, + whip->video_ssrc); } - /* Generate the fingerpint of certficate. */ - if (X509_digest(dtls_cert, EVP_sha256(), md, &n) != 1) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to generate fingerprint, %s\n", openssl_get_error(ctx)); - goto eio_end; - } - for (i = 0; i < n; i++) { - av_bprintf(&fingerprint, "%02X", md[i]); - if (i < n - 1) - av_bprintf(&fingerprint, ":"); - } - if (!fingerprint.str || !strlen(fingerprint.str)) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Fingerprint is empty\n"); - goto einval_end; + if (!av_bprint_is_complete(&bp)) { + av_log(whip, AV_LOG_ERROR, "WHIP: Offer exceed max %d, %s\n", MAX_SDP_SIZE, bp.str); + ret = AVERROR(EIO); + goto end; } - ctx->dtls_fingerprint = av_strdup(fingerprint.str); - if (!ctx->dtls_fingerprint) { - goto enomem_end; + whip->sdp_offer = av_strdup(bp.str); + if (!whip->sdp_offer) { + ret = AVERROR(ENOMEM); + goto end; } - goto end; -enomem_end: - ret = AVERROR(ENOMEM); - goto end; -eio_end: - ret = AVERROR(EIO); - goto end; -einval_end: - ret = AVERROR(EINVAL); + if (whip->state < WHIP_STATE_OFFER) + whip->state = WHIP_STATE_OFFER; + whip->whip_offer_time = av_gettime(); + av_log(whip, AV_LOG_VERBOSE, "WHIP: Generated state=%d, offer: %s\n", whip->state, whip->sdp_offer); + end: - X509_NAME_free(subject); - av_bprint_finalize(&fingerprint, NULL); + av_bprint_finalize(&bp, NULL); return ret; } /** - * Initializes DTLS context using ECDHE. + * Exchange SDP offer with WebRTC peer to get the answer. + * + * @return 0 if OK, AVERROR_xxx on error */ -static av_cold int openssl_dtls_init_context(DTLSContext *ctx) +static int exchange_sdp(AVFormatContext *s) { - int ret = 0; - EVP_PKEY *dtls_pkey = ctx->dtls_pkey; - X509 *dtls_cert = ctx->dtls_cert; - SSL_CTX *dtls_ctx = NULL; - SSL *dtls = NULL; - BIO *bio_in = NULL, *bio_out = NULL; - const char* ciphers = "ALL"; - /** - * The profile for OpenSSL's SRTP is SRTP_AES128_CM_SHA1_80, see ssl/d1_srtp.c. - * The profile for FFmpeg's SRTP is SRTP_AES128_CM_HMAC_SHA1_80, see libavformat/srtp.c. - */ - const char* profiles = "SRTP_AES128_CM_SHA1_80"; + int ret; + char buf[MAX_URL_SIZE]; + AVBPrint bp; + WHIPContext *whip = s->priv_data; + /* The URL context is an HTTP transport layer for the WHIP protocol. */ + URLContext *whip_uc = NULL; + AVDictionary *opts = NULL; + char *hex_data = NULL; - /* Refer to the test cases regarding these curves in the WebRTC code. */ -#if OPENSSL_VERSION_NUMBER >= 0x10100000L /* OpenSSL 1.1.0 */ - const char* curves = "X25519:P-256:P-384:P-521"; -#elif OPENSSL_VERSION_NUMBER >= 0x10002000L /* OpenSSL 1.0.2 */ - const char* curves = "P-256:P-384:P-521"; -#endif + /* To prevent a crash during cleanup, always initialize it. */ + av_bprint_init(&bp, 1, MAX_SDP_SIZE); -#if OPENSSL_VERSION_NUMBER < 0x10002000L /* OpenSSL v1.0.2 */ - dtls_ctx = ctx->dtls_ctx = SSL_CTX_new(DTLSv1_method()); -#else - dtls_ctx = ctx->dtls_ctx = SSL_CTX_new(DTLS_method()); -#endif - if (!dtls_ctx) { - ret = AVERROR(ENOMEM); + if (!whip->sdp_offer || !strlen(whip->sdp_offer)) { + av_log(whip, AV_LOG_ERROR, "WHIP: No offer to exchange\n"); + ret = AVERROR(EINVAL); goto end; } -#if OPENSSL_VERSION_NUMBER >= 0x10002000L /* OpenSSL 1.0.2 */ - /* For ECDSA, we could set the curves list. */ - if (SSL_CTX_set1_curves_list(dtls_ctx, curves) != 1) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Init SSL_CTX_set1_curves_list failed, curves=%s, %s\n", - curves, openssl_get_error(ctx)); + ret = snprintf(buf, sizeof(buf), "Cache-Control: no-cache\r\nContent-Type: application/sdp\r\n"); + if (whip->authorization) + ret += snprintf(buf + ret, sizeof(buf) - ret, "Authorization: Bearer %s\r\n", whip->authorization); + if (ret <= 0 || ret >= sizeof(buf)) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to generate headers, size=%d, %s\n", ret, buf); ret = AVERROR(EINVAL); - return ret; + goto end; } -#endif -#if OPENSSL_VERSION_NUMBER < 0x10100000L // v1.1.x -#if OPENSSL_VERSION_NUMBER < 0x10002000L // v1.0.2 - if (ctx->dtls_eckey) - SSL_CTX_set_tmp_ecdh(dtls_ctx, ctx->dtls_eckey); -#else - SSL_CTX_set_ecdh_auto(dtls_ctx, 1); -#endif -#endif + av_dict_set(&opts, "headers", buf, 0); + av_dict_set_int(&opts, "chunked_post", 0, 0); - /** - * We activate "ALL" cipher suites to align with the peer's capabilities, - * ensuring maximum compatibility. - */ - if (SSL_CTX_set_cipher_list(dtls_ctx, ciphers) != 1) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Init SSL_CTX_set_cipher_list failed, ciphers=%s, %s\n", - ciphers, openssl_get_error(ctx)); - ret = AVERROR(EINVAL); - return ret; + hex_data = av_mallocz(2 * strlen(whip->sdp_offer) + 1); + if (!hex_data) { + ret = AVERROR(ENOMEM); + goto end; } - /* Setup the certificate. */ - if (SSL_CTX_use_certificate(dtls_ctx, dtls_cert) != 1) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Init SSL_CTX_use_certificate failed, %s\n", openssl_get_error(ctx)); - ret = AVERROR(EINVAL); - return ret; + ff_data_to_hex(hex_data, whip->sdp_offer, strlen(whip->sdp_offer), 0); + av_dict_set(&opts, "post_data", hex_data, 0); + + ret = ffurl_open_whitelist(&whip_uc, s->url, AVIO_FLAG_READ_WRITE, &s->interrupt_callback, + &opts, s->protocol_whitelist, s->protocol_blacklist, NULL); + if (ret < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to request url=%s, offer: %s\n", s->url, whip->sdp_offer); + goto end; } - if (SSL_CTX_use_PrivateKey(dtls_ctx, dtls_pkey) != 1) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Init SSL_CTX_use_PrivateKey failed, %s\n", openssl_get_error(ctx)); - ret = AVERROR(EINVAL); - return ret; + + if (ff_http_get_new_location(whip_uc)) { + whip->whip_resource_url = av_strdup(ff_http_get_new_location(whip_uc)); + if (!whip->whip_resource_url) { + ret = AVERROR(ENOMEM); + goto end; + } } - /* Server will send Certificate Request. */ - SSL_CTX_set_verify(dtls_ctx, SSL_VERIFY_PEER | SSL_VERIFY_CLIENT_ONCE, openssl_dtls_verify_callback); - /* The depth count is "level 0:peer certificate", "level 1: CA certificate", - * "level 2: higher level CA certificate", and so on. */ - SSL_CTX_set_verify_depth(dtls_ctx, 4); - /* Whether we should read as many input bytes as possible (for non-blocking reads) or not. */ - SSL_CTX_set_read_ahead(dtls_ctx, 1); - /* Setup the SRTP context */ - if (SSL_CTX_set_tlsext_use_srtp(dtls_ctx, profiles)) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Init SSL_CTX_set_tlsext_use_srtp failed, profiles=%s, %s\n", - profiles, openssl_get_error(ctx)); + while (1) { + ret = ffurl_read(whip_uc, buf, sizeof(buf)); + if (ret == AVERROR_EOF) { + /* Reset the error because we read all response as answer util EOF. */ + ret = 0; + break; + } + if (ret <= 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to read response from url=%s, offer is %s, answer is %s\n", + s->url, whip->sdp_offer, whip->sdp_answer); + goto end; + } + + av_bprintf(&bp, "%.*s", ret, buf); + if (!av_bprint_is_complete(&bp)) { + av_log(whip, AV_LOG_ERROR, "WHIP: Answer exceed max size %d, %.*s, %s\n", MAX_SDP_SIZE, ret, buf, bp.str); + ret = AVERROR(EIO); + goto end; + } + } + + if (!av_strstart(bp.str, "v=", NULL)) { + av_log(whip, AV_LOG_ERROR, "WHIP: Invalid answer: %s\n", bp.str); ret = AVERROR(EINVAL); - return ret; + goto end; } - /* The dtls should not be created unless the dtls_ctx has been initialized. */ - dtls = ctx->dtls = SSL_new(dtls_ctx); - if (!dtls) { + whip->sdp_answer = av_strdup(bp.str); + if (!whip->sdp_answer) { ret = AVERROR(ENOMEM); goto end; } - /* Setup the callback for logging. */ - SSL_set_ex_data(dtls, 0, ctx); - SSL_set_info_callback(dtls, openssl_dtls_on_info); + if (whip->state < WHIP_STATE_ANSWER) + whip->state = WHIP_STATE_ANSWER; + av_log(whip, AV_LOG_VERBOSE, "WHIP: Got state=%d, answer: %s\n", whip->state, whip->sdp_answer); - /** - * We have set the MTU to fragment the DTLS packet. It is important to note that the - * packet is split to ensure that each handshake packet is smaller than the MTU. - */ - SSL_set_options(dtls, SSL_OP_NO_QUERY_MTU); - SSL_set_mtu(dtls, ctx->mtu); -#if OPENSSL_VERSION_NUMBER >= 0x100010b0L /* OpenSSL 1.0.1k */ - DTLS_set_link_mtu(dtls, ctx->mtu); -#endif +end: + ffurl_closep(&whip_uc); + av_bprint_finalize(&bp, NULL); + av_dict_free(&opts); + av_freep(&hex_data); + return ret; +} + +/** + * Parses the ICE ufrag, pwd, and candidates from the SDP answer. + * + * This function is used to extract the ICE ufrag, pwd, and candidates from the SDP answer. + * It returns an error if any of these fields is NULL. The function only uses the first + * candidate if there are multiple candidates. However, support for multiple candidates + * will be added in the future. + * + * @param s Pointer to the AVFormatContext + * @returns Returns 0 if successful or AVERROR_xxx if an error occurs. + */ +static int parse_answer(AVFormatContext *s) +{ + int ret = 0; + AVIOContext *pb; + char line[MAX_URL_SIZE]; + const char *ptr; + int i; + WHIPContext *whip = s->priv_data; + + if (!whip->sdp_answer || !strlen(whip->sdp_answer)) { + av_log(whip, AV_LOG_ERROR, "WHIP: No answer to parse\n"); + ret = AVERROR(EINVAL); + goto end; + } + + pb = avio_alloc_context(whip->sdp_answer, strlen(whip->sdp_answer), 0, NULL, NULL, NULL, NULL); + if (!pb) + return AVERROR(ENOMEM); + + for (i = 0; !avio_feof(pb); i++) { + ff_get_chomp_line(pb, line, sizeof(line)); + if (av_strstart(line, "a=ice-ufrag:", &ptr) && !whip->ice_ufrag_remote) { + whip->ice_ufrag_remote = av_strdup(ptr); + if (!whip->ice_ufrag_remote) { + ret = AVERROR(ENOMEM); + goto end; + } + } else if (av_strstart(line, "a=ice-pwd:", &ptr) && !whip->ice_pwd_remote) { + whip->ice_pwd_remote = av_strdup(ptr); + if (!whip->ice_pwd_remote) { + ret = AVERROR(ENOMEM); + goto end; + } + } else if (av_strstart(line, "a=candidate:", &ptr) && !whip->ice_protocol) { + ptr = av_stristr(ptr, "udp"); + if (ptr && av_stristr(ptr, "host")) { + char protocol[17], host[129]; + int priority, port; + ret = sscanf(ptr, "%16s %d %128s %d typ host", protocol, &priority, host, &port); + if (ret != 4) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed %d to parse line %d %s from %s\n", + ret, i, line, whip->sdp_answer); + ret = AVERROR(EIO); + goto end; + } + + if (av_strcasecmp(protocol, "udp")) { + av_log(whip, AV_LOG_ERROR, "WHIP: Protocol %s is not supported by RTC, choose udp, line %d %s of %s\n", + protocol, i, line, whip->sdp_answer); + ret = AVERROR(EIO); + goto end; + } + + whip->ice_protocol = av_strdup(protocol); + whip->ice_host = av_strdup(host); + whip->ice_port = port; + if (!whip->ice_protocol || !whip->ice_host) { + ret = AVERROR(ENOMEM); + goto end; + } + } + } + } - bio_in = BIO_new(BIO_s_mem()); - if (!bio_in) { - ret = AVERROR(ENOMEM); + if (!whip->ice_pwd_remote || !strlen(whip->ice_pwd_remote)) { + av_log(whip, AV_LOG_ERROR, "WHIP: No remote ice pwd parsed from %s\n", whip->sdp_answer); + ret = AVERROR(EINVAL); goto end; } - bio_out = BIO_new(BIO_s_mem()); - if (!bio_out) { - ret = AVERROR(ENOMEM); + if (!whip->ice_ufrag_remote || !strlen(whip->ice_ufrag_remote)) { + av_log(whip, AV_LOG_ERROR, "WHIP: No remote ice ufrag parsed from %s\n", whip->sdp_answer); + ret = AVERROR(EINVAL); goto end; } - /** - * Please be aware that it is necessary to use a callback to obtain the packet to be written out. It is - * imperative that BIO_get_mem_data is not used to retrieve the packet, as it returns all the bytes that - * need to be sent out. - * For example, if MTU is set to 1200, and we got two DTLS packets to sendout: - * ServerHello, 95bytes. - * Certificate, 1105+143=1248bytes. - * If use BIO_get_mem_data, it will return 95+1248=1343bytes, which is larger than MTU 1200. - * If use callback, it will return two UDP packets: - * ServerHello+Certificate(Frament) = 95+1105=1200bytes. - * Certificate(Fragment) = 143bytes. - * Note that there should be more packets in real world, like ServerKeyExchange, CertificateRequest, - * and ServerHelloDone. Here we just use two packets for example. - */ -#if OPENSSL_VERSION_NUMBER < 0x30000000L // v3.0.x - BIO_set_callback(bio_out, openssl_dtls_bio_out_callback); -#else - BIO_set_callback_ex(bio_out, openssl_dtls_bio_out_callback_ex); -#endif - BIO_set_callback_arg(bio_out, (char*)ctx); + if (!whip->ice_protocol || !whip->ice_host || !whip->ice_port) { + av_log(whip, AV_LOG_ERROR, "WHIP: No ice candidate parsed from %s\n", whip->sdp_answer); + ret = AVERROR(EINVAL); + goto end; + } - ctx->bio_in = bio_in; - SSL_set_bio(dtls, bio_in, bio_out); - /* Now the bio_in and bio_out are owned by dtls, so we should set them to NULL. */ - bio_in = bio_out = NULL; + if (whip->state < WHIP_STATE_NEGOTIATED) + whip->state = WHIP_STATE_NEGOTIATED; + whip->whip_answer_time = av_gettime(); + av_log(whip, AV_LOG_VERBOSE, "WHIP: SDP state=%d, offer=%luB, answer=%luB, ufrag=%s, pwd=%luB, transport=%s://%s:%d, elapsed=%dms\n", + whip->state, strlen(whip->sdp_offer), strlen(whip->sdp_answer), whip->ice_ufrag_remote, strlen(whip->ice_pwd_remote), + whip->ice_protocol, whip->ice_host, whip->ice_port, ELAPSED(whip->whip_starttime, av_gettime())); end: - BIO_free(bio_in); - BIO_free(bio_out); + avio_context_free(&pb); return ret; } /** - * Generate a self-signed certificate and private key for DTLS. Please note that the - * ff_openssl_init in tls_openssl.c has already called SSL_library_init(), and therefore, - * there is no need to call it again. + * Creates and marshals an ICE binding request packet. + * + * This function creates and marshals an ICE binding request packet. The function only + * generates the username attribute and does not include goog-network-info, ice-controlling, + * use-candidate, and priority. However, some of these attributes may be added in the future. + * + * @param s Pointer to the AVFormatContext + * @param buf Pointer to memory buffer to store the request packet + * @param buf_size Size of the memory buffer + * @param request_size Pointer to an integer that receives the size of the request packet + * @return Returns 0 if successful or AVERROR_xxx if an error occurs. */ -static av_cold int dtls_context_init(AVFormatContext *s, DTLSContext *ctx) +static int ice_create_request(AVFormatContext *s, uint8_t *buf, int buf_size, int *request_size) { - int ret = 0; - - ctx->dtls_init_starttime = av_gettime(); + int ret, size, crc32; + char username[128]; + AVIOContext *pb = NULL; + AVHMAC *hmac = NULL; + WHIPContext *whip = s->priv_data; - if (ctx->cert_file && ctx->key_file) { - /* Read the private key and file from the file. */ - if ((ret = openssl_read_certificate(s, ctx)) < 0) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to read DTLS certificate from cert=%s, key=%s\n", - ctx->cert_file, ctx->key_file); - return ret; - } - } else { - /* Generate a private key to ctx->dtls_pkey. */ - if ((ret = openssl_dtls_gen_private_key(ctx)) < 0) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to generate DTLS private key\n"); - return ret; - } + pb = avio_alloc_context(buf, buf_size, 1, NULL, NULL, NULL, NULL); + if (!pb) + return AVERROR(ENOMEM); - /* Generate a self-signed certificate. */ - if ((ret = openssl_dtls_gen_certificate(ctx)) < 0) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to generate DTLS certificate\n"); - return ret; - } + hmac = av_hmac_alloc(AV_HMAC_SHA1); + if (!hmac) { + ret = AVERROR(ENOMEM); + goto end; } - if ((ret = openssl_dtls_init_context(ctx)) < 0) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to initialize DTLS context\n"); - return ret; - } + /* Write 20 bytes header */ + avio_wb16(pb, 0x0001); /* STUN binding request */ + avio_wb16(pb, 0); /* length */ + avio_wb32(pb, STUN_MAGIC_COOKIE); /* magic cookie */ + avio_wb32(pb, av_lfg_get(&whip->rnd)); /* transaction ID */ + avio_wb32(pb, av_lfg_get(&whip->rnd)); /* transaction ID */ + avio_wb32(pb, av_lfg_get(&whip->rnd)); /* transaction ID */ - ctx->dtls_init_endtime = av_gettime(); - av_log(ctx, AV_LOG_VERBOSE, "DTLS: Setup ok, MTU=%d, cost=%dms, fingerprint %s\n", - ctx->mtu, ELAPSED(ctx->dtls_init_starttime, av_gettime()), ctx->dtls_fingerprint); + /* The username is the concatenation of the two ICE ufrag */ + ret = snprintf(username, sizeof(username), "%s:%s", whip->ice_ufrag_remote, whip->ice_ufrag_local); + if (ret <= 0 || ret >= sizeof(username)) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to build username %s:%s, max=%lu, ret=%d\n", + whip->ice_ufrag_remote, whip->ice_ufrag_local, sizeof(username), ret); + ret = AVERROR(EIO); + goto end; + } - return ret; -} + /* Write the username attribute */ + avio_wb16(pb, STUN_ATTR_USERNAME); /* attribute type username */ + avio_wb16(pb, ret); /* size of username */ + avio_write(pb, username, ret); /* bytes of username */ + ffio_fill(pb, 0, (4 - (ret % 4)) % 4); /* padding */ -/** - * Once the DTLS role has been negotiated - active for the DTLS client or passive for the - * DTLS server - we proceed to set up the DTLS state and initiate the handshake. - */ -static int dtls_context_start(DTLSContext *ctx) -{ - int ret = 0, r0, r1; - SSL *dtls = ctx->dtls; + /* Write the use-candidate attribute */ + avio_wb16(pb, STUN_ATTR_USE_CANDIDATE); /* attribute type use-candidate */ + avio_wb16(pb, 0); /* size of use-candidate */ - ctx->dtls_handshake_starttime = av_gettime(); + /* Build and update message integrity */ + avio_wb16(pb, STUN_ATTR_MESSAGE_INTEGRITY); /* attribute type message integrity */ + avio_wb16(pb, 20); /* size of message integrity */ + ffio_fill(pb, 0, 20); /* fill with zero to directly write and skip it */ + size = avio_tell(pb); + buf[2] = (size - 20) >> 8; + buf[3] = (size - 20) & 0xFF; + av_hmac_init(hmac, whip->ice_pwd_remote, strlen(whip->ice_pwd_remote)); + av_hmac_update(hmac, buf, size - 24); + av_hmac_final(hmac, buf + size - 20, 20); - /* Setup DTLS as passive, which is server role. */ - SSL_set_accept_state(dtls); + /* Write the fingerprint attribute */ + avio_wb16(pb, STUN_ATTR_FINGERPRINT); /* attribute type fingerprint */ + avio_wb16(pb, 4); /* size of fingerprint */ + ffio_fill(pb, 0, 4); /* fill with zero to directly write and skip it */ + size = avio_tell(pb); + buf[2] = (size - 20) >> 8; + buf[3] = (size - 20) & 0xFF; + /* Refer to the av_hash_alloc("CRC32"), av_hash_init and av_hash_final */ + crc32 = av_crc(av_crc_get_table(AV_CRC_32_IEEE_LE), 0xFFFFFFFF, buf, size - 8) ^ 0xFFFFFFFF; + avio_skip(pb, -4); + avio_wb32(pb, crc32 ^ 0x5354554E); /* xor with "STUN" */ - /** - * During initialization, we only need to call SSL_do_handshake once because SSL_read consumes - * the handshake message if the handshake is incomplete. - * To simplify maintenance, we initiate the handshake for both the DTLS server and client after - * sending out the ICE response in the start_active_handshake function. It's worth noting that - * although the DTLS server may receive the ClientHello immediately after sending out the ICE - * response, this shouldn't be an issue as the handshake function is called before any DTLS - * packets are received. - */ - r0 = SSL_do_handshake(dtls); - r1 = openssl_ssl_get_error(ctx, r0); - // Fatal SSL error, for example, no available suite when peer is DTLS 1.0 while we are DTLS 1.2. - if (r0 < 0 && (r1 != SSL_ERROR_NONE && r1 != SSL_ERROR_WANT_READ && r1 != SSL_ERROR_WANT_WRITE)) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to drive SSL context, r0=%d, r1=%d %s\n", r0, r1, ctx->error_message); - return AVERROR(EIO); - } + *request_size = size; +end: + avio_context_free(&pb); + av_hmac_free(hmac); return ret; } /** - * DTLS handshake with server, as a server in passive mode, using openssl. + * Create an ICE binding response. * - * This function initializes the SSL context as the client role using OpenSSL and - * then performs the DTLS handshake until success. Upon successful completion, it - * exports the SRTP material key. + * This function generates an ICE binding response and writes it to the provided + * buffer. The response is signed using the local password for message integrity. * - * @return 0 if OK, AVERROR_xxx on error + * @param s Pointer to the AVFormatContext structure. + * @param tid Pointer to the transaction ID of the binding request. The tid_size should be 12. + * @param tid_size The size of the transaction ID, should be 12. + * @param buf Pointer to the buffer where the response will be written. + * @param buf_size The size of the buffer provided for the response. + * @param response_size Pointer to an integer that will store the size of the generated response. + * @return Returns 0 if successful or AVERROR_xxx if an error occurs. */ -static int dtls_context_write(DTLSContext *ctx, char* buf, int size) +static int ice_create_response(AVFormatContext *s, char *tid, int tid_size, uint8_t *buf, int buf_size, int *response_size) { - int ret = 0, res_ct, res_ht, r0, r1, do_callback; - SSL *dtls = ctx->dtls; - const char* dst = "EXTRACTOR-dtls_srtp"; - BIO *bio_in = ctx->bio_in; - - /* Got DTLS response successfully. */ - openssl_dtls_state_trace(ctx, buf, size, 1); - if ((r0 = BIO_write(bio_in, buf, size)) <= 0) { - res_ct = size > 0 ? buf[0]: 0; - res_ht = size > 13 ? buf[13] : 0; - av_log(ctx, AV_LOG_ERROR, "DTLS: Feed response failed, content=%d, handshake=%d, size=%d, r0=%d\n", - res_ct, res_ht, size, r0); - ret = AVERROR(EIO); - goto end; - } + int ret = 0, size, crc32; + AVIOContext *pb = NULL; + AVHMAC *hmac = NULL; + WHIPContext *whip = s->priv_data; - /** - * If there is data available in bio_in, use SSL_read to allow SSL to process it. - * We limit the MTU to 1200 for DTLS handshake, which ensures that the buffer is large enough for reading. - */ - r0 = SSL_read(dtls, buf, sizeof(buf)); - r1 = openssl_ssl_get_error(ctx, r0); - if (r0 <= 0) { - if (r1 != SSL_ERROR_WANT_READ && r1 != SSL_ERROR_WANT_WRITE && r1 != SSL_ERROR_ZERO_RETURN) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Read failed, r0=%d, r1=%d %s\n", r0, r1, ctx->error_message); - ret = AVERROR(EIO); - goto end; - } - } else { - av_log(ctx, AV_LOG_TRACE, "DTLS: Read %d bytes, r0=%d, r1=%d\n", r0, r0, r1); + if (tid_size != 12) { + av_log(whip, AV_LOG_ERROR, "WHIP: Invalid transaction ID size. Expected 12, got %d\n", tid_size); + return AVERROR(EINVAL); } - /* Check whether the DTLS is completed. */ - if (SSL_is_init_finished(dtls) != 1) - goto end; - - do_callback = ctx->on_state && !ctx->dtls_done_for_us; - ctx->dtls_done_for_us = 1; - ctx->dtls_handshake_endtime = av_gettime(); - - /* Export SRTP master key after DTLS done */ - if (!ctx->dtls_srtp_key_exported) { - ret = SSL_export_keying_material(dtls, ctx->dtls_srtp_materials, sizeof(ctx->dtls_srtp_materials), - dst, strlen(dst), NULL, 0, 0); - r1 = openssl_ssl_get_error(ctx, r0); - if (!ret) { - av_log(ctx, AV_LOG_ERROR, "DTLS: SSL export key ret=%d, r1=%d %s\n", ret, r1, ctx->error_message); - ret = AVERROR(EIO); - goto end; - } + pb = avio_alloc_context(buf, buf_size, 1, NULL, NULL, NULL, NULL); + if (!pb) + return AVERROR(ENOMEM); - ctx->dtls_srtp_key_exported = 1; + hmac = av_hmac_alloc(AV_HMAC_SHA1); + if (!hmac) { + ret = AVERROR(ENOMEM); + goto end; } - if (do_callback && (ret = ctx->on_state(ctx, DTLS_STATE_FINISHED, NULL, NULL)) < 0) - goto end; + /* Write 20 bytes header */ + avio_wb16(pb, 0x0101); /* STUN binding response */ + avio_wb16(pb, 0); /* length */ + avio_wb32(pb, STUN_MAGIC_COOKIE); /* magic cookie */ + avio_write(pb, tid, tid_size); /* transaction ID */ + + /* Build and update message integrity */ + avio_wb16(pb, STUN_ATTR_MESSAGE_INTEGRITY); /* attribute type message integrity */ + avio_wb16(pb, 20); /* size of message integrity */ + ffio_fill(pb, 0, 20); /* fill with zero to directly write and skip it */ + size = avio_tell(pb); + buf[2] = (size - 20) >> 8; + buf[3] = (size - 20) & 0xFF; + av_hmac_init(hmac, whip->ice_pwd_local, strlen(whip->ice_pwd_local)); + av_hmac_update(hmac, buf, size - 24); + av_hmac_final(hmac, buf + size - 20, 20); + + /* Write the fingerprint attribute */ + avio_wb16(pb, STUN_ATTR_FINGERPRINT); /* attribute type fingerprint */ + avio_wb16(pb, 4); /* size of fingerprint */ + ffio_fill(pb, 0, 4); /* fill with zero to directly write and skip it */ + size = avio_tell(pb); + buf[2] = (size - 20) >> 8; + buf[3] = (size - 20) & 0xFF; + /* Refer to the av_hash_alloc("CRC32"), av_hash_init and av_hash_final */ + crc32 = av_crc(av_crc_get_table(AV_CRC_32_IEEE_LE), 0xFFFFFFFF, buf, size - 8) ^ 0xFFFFFFFF; + avio_skip(pb, -4); + avio_wb32(pb, crc32 ^ 0x5354554E); /* xor with "STUN" */ + + *response_size = size; end: + avio_context_free(&pb); + av_hmac_free(hmac); return ret; } /** - * Cleanup the DTLS context. + * A Binding request has class=0b00 (request) and method=0b000000000001 (Binding) + * and is encoded into the first 16 bits as 0x0001. + * See https://datatracker.ietf.org/doc/html/rfc5389#section-6 */ -static av_cold void dtls_context_deinit(DTLSContext *ctx) +static int ice_is_binding_request(uint8_t *b, int size) { - SSL_free(ctx->dtls); - SSL_CTX_free(ctx->dtls_ctx); - X509_free(ctx->dtls_cert); - EVP_PKEY_free(ctx->dtls_pkey); - av_freep(&ctx->dtls_fingerprint); - av_freep(&ctx->cert_file); - av_freep(&ctx->key_file); -#if OPENSSL_VERSION_NUMBER < 0x30000000L /* OpenSSL 3.0 */ - EC_KEY_free(ctx->dtls_eckey); -#endif + return size >= ICE_STUN_HEADER_SIZE && AV_RB16(&b[0]) == 0x0001; } -enum WHIPState { - WHIP_STATE_NONE, - - /* The initial state. */ - WHIP_STATE_INIT, - /* The muxer has sent the offer to the peer. */ - WHIP_STATE_OFFER, - /* The muxer has received the answer from the peer. */ - WHIP_STATE_ANSWER, - /** - * After parsing the answer received from the peer, the muxer negotiates the abilities - * in the offer that it generated. - */ - WHIP_STATE_NEGOTIATED, - /* The muxer has connected to the peer via UDP. */ - WHIP_STATE_UDP_CONNECTED, - /* The muxer has sent the ICE request to the peer. */ - WHIP_STATE_ICE_CONNECTING, - /* The muxer has received the ICE response from the peer. */ - WHIP_STATE_ICE_CONNECTED, - /* The muxer has finished the DTLS handshake with the peer. */ - WHIP_STATE_DTLS_FINISHED, - /* The muxer has finished the SRTP setup. */ - WHIP_STATE_SRTP_FINISHED, - /* The muxer is ready to send/receive media frames. */ - WHIP_STATE_READY, - /* The muxer is failed. */ - WHIP_STATE_FAILED, -}; - -typedef struct WHIPContext { - AVClass *av_class; - - /* The state of the RTC connection. */ - enum WHIPState state; - /* The callback return value for DTLS. */ - int dtls_ret; - int dtls_closed; - - /* Parameters for the input audio and video codecs. */ - AVCodecParameters *audio_par; - AVCodecParameters *video_par; - - /** - * The h264_mp4toannexb Bitstream Filter (BSF) bypasses the AnnexB packet; - * therefore, it is essential to insert the SPS and PPS before each IDR frame - * in such cases. - */ - int h264_annexb_insert_sps_pps; +/** + * A Binding response has class=0b10 (success response) and method=0b000000000001, + * and is encoded into the first 16 bits as 0x0101. + */ +static int ice_is_binding_response(uint8_t *b, int size) +{ + return size >= ICE_STUN_HEADER_SIZE && AV_RB16(&b[0]) == 0x0101; +} - /* The random number generator. */ - AVLFG rnd; +/** + * In RTP packets, the first byte is represented as 0b10xxxxxx, where the initial + * two bits (0b10) indicate the RTP version, + * see https://www.rfc-editor.org/rfc/rfc3550#section-5.1 + * The RTCP packet header is similar to RTP, + * see https://www.rfc-editor.org/rfc/rfc3550#section-6.4.1 + */ +static int media_is_rtp_rtcp(uint8_t *b, int size) +{ + return size >= WHIP_RTP_HEADER_SIZE && (b[0] & 0xC0) == 0x80; +} - /* The ICE username and pwd fragment generated by the muxer. */ - char ice_ufrag_local[9]; - char ice_pwd_local[33]; - /* The SSRC of the audio and video stream, generated by the muxer. */ - uint32_t audio_ssrc; - uint32_t video_ssrc; - /* The PT(Payload Type) of stream, generated by the muxer. */ - uint8_t audio_payload_type; - uint8_t video_payload_type; - /** - * This is the SDP offer generated by the muxer based on the codec parameters, - * DTLS, and ICE information. - */ - char *sdp_offer; +/* Whether the packet is RTCP. */ +static int media_is_rtcp(uint8_t *b, int size) +{ + return size >= WHIP_RTP_HEADER_SIZE && b[1] >= WHIP_RTCP_PT_START && b[1] <= WHIP_RTCP_PT_END; +} - /* The ICE username and pwd from remote server. */ - char *ice_ufrag_remote; - char *ice_pwd_remote; - /** - * This represents the ICE candidate protocol, priority, host and port. - * Currently, we only support one candidate and choose the first UDP candidate. - * However, we plan to support multiple candidates in the future. - */ - char *ice_protocol; - char *ice_host; - int ice_port; +/** + * This function handles incoming binding request messages by responding to them. + * If the message is not a binding request, it will be ignored. + */ +static int ice_handle_binding_request(AVFormatContext *s, char *buf, int buf_size) +{ + int ret = 0, size; + char tid[12]; + WHIPContext *whip = s->priv_data; - /* The SDP answer received from the WebRTC server. */ - char *sdp_answer; - /* The resource URL returned in the Location header of WHIP HTTP response. */ - char *whip_resource_url; + /* Ignore if not a binding request. */ + if (!ice_is_binding_request(buf, buf_size)) + return ret; - /* These variables represent timestamps used for calculating and tracking the cost. */ - int64_t whip_starttime; - /* */ - int64_t whip_init_time; - int64_t whip_offer_time; - int64_t whip_answer_time; - int64_t whip_udp_time; - int64_t whip_ice_time; - int64_t whip_dtls_time; - int64_t whip_srtp_time; + if (buf_size < ICE_STUN_HEADER_SIZE) { + av_log(whip, AV_LOG_ERROR, "WHIP: Invalid STUN message, expected at least %d, got %d\n", + ICE_STUN_HEADER_SIZE, buf_size); + return AVERROR(EINVAL); + } - /* The DTLS context. */ - DTLSContext dtls_ctx; + /* Parse transaction id from binding request in buf. */ + memcpy(tid, buf + 8, 12); - /* The SRTP send context, to encrypt outgoing packets. */ - SRTPContext srtp_audio_send; - SRTPContext srtp_video_send; - SRTPContext srtp_rtcp_send; - /* The SRTP receive context, to decrypt incoming packets. */ - SRTPContext srtp_recv; + /* Build the STUN binding response. */ + ret = ice_create_response(s, tid, sizeof(tid), whip->buf, sizeof(whip->buf), &size); + if (ret < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to create STUN binding response, size=%d\n", size); + return ret; + } - /* The UDP transport is used for delivering ICE, DTLS and SRTP packets. */ - URLContext *udp_uc; - /* The buffer for UDP transmission. */ - char buf[MAX_UDP_BUFFER_SIZE]; + ret = ffurl_write(whip->udp_uc, whip->buf, size); + if (ret < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to send STUN binding response, size=%d\n", size); + return ret; + } - /* The timeout in milliseconds for ICE and DTLS handshake. */ - int handshake_timeout; - /** - * The size of RTP packet, should generally be set to MTU. - * Note that pion requires a smaller value, for example, 1200. - */ - int pkt_size; - /** - * The optional Bearer token for WHIP Authorization. - * See https://www.ietf.org/archive/id/draft-ietf-wish-whip-08.html#name-authentication-and-authoriz - */ - char* authorization; - /* The certificate and private key used for DTLS handshake. */ - char* cert_file; - char* key_file; -} WHIPContext; + return 0; +} /** - * When DTLS state change. + * To establish a connection with the UDP server, we utilize ICE-LITE in a Client-Server + * mode. In this setup, FFmpeg acts as the UDP client, while the peer functions as the + * UDP server. */ -static int dtls_context_on_state(DTLSContext *ctx, enum DTLSState state, const char* type, const char* desc) +static int udp_connect(AVFormatContext *s) { int ret = 0; - AVFormatContext *s = ctx->opaque; + char url[256]; + AVDictionary *opts = NULL; WHIPContext *whip = s->priv_data; - if (state == DTLS_STATE_CLOSED) { - whip->dtls_closed = 1; - av_log(whip, AV_LOG_VERBOSE, "WHIP: DTLS session closed, type=%s, desc=%s, elapsed=%dms\n", - type ? type : "", desc ? desc : "", ELAPSED(whip->whip_starttime, av_gettime())); - return ret; - } + /* Build UDP URL and create the UDP context as transport. */ + ff_url_join(url, sizeof(url), "udp", NULL, whip->ice_host, whip->ice_port, NULL); - if (state == DTLS_STATE_FAILED) { - whip->state = WHIP_STATE_FAILED; - av_log(whip, AV_LOG_ERROR, "WHIP: DTLS session failed, type=%s, desc=%s\n", - type ? type : "", desc ? desc : ""); - whip->dtls_ret = AVERROR(EIO); - return ret; + av_dict_set_int(&opts, "connect", 1, 0); + av_dict_set_int(&opts, "fifo_size", 0, 0); + /* Set the max packet size to the buffer size. */ + av_dict_set_int(&opts, "pkt_size", whip->pkt_size, 0); + + ret = ffurl_open_whitelist(&whip->udp_uc, url, AVIO_FLAG_WRITE, &s->interrupt_callback, + &opts, s->protocol_whitelist, s->protocol_blacklist, NULL); + if (ret < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to connect udp://%s:%d\n", whip->ice_host, whip->ice_port); + goto end; } - if (state == DTLS_STATE_FINISHED && whip->state < WHIP_STATE_DTLS_FINISHED) { - whip->state = WHIP_STATE_DTLS_FINISHED; - whip->whip_dtls_time = av_gettime(); - av_log(whip, AV_LOG_VERBOSE, "WHIP: DTLS handshake, done=%d, exported=%d, arq=%d, srtp_material=%luB, cost=%dms, elapsed=%dms\n", - ctx->dtls_done_for_us, ctx->dtls_srtp_key_exported, ctx->dtls_arq_packets, sizeof(ctx->dtls_srtp_materials), - ELAPSED(ctx->dtls_handshake_starttime, ctx->dtls_handshake_endtime), - ELAPSED(whip->whip_starttime, av_gettime())); - return ret; - } + /* Make the socket non-blocking, set to READ and WRITE mode after connected */ + ff_socket_nonblock(ffurl_get_file_handle(whip->udp_uc), 1); + whip->udp_uc->flags |= AVIO_FLAG_READ | AVIO_FLAG_NONBLOCK; + + if (whip->state < WHIP_STATE_UDP_CONNECTED) + whip->state = WHIP_STATE_UDP_CONNECTED; + whip->whip_udp_time = av_gettime(); + av_log(whip, AV_LOG_VERBOSE, "WHIP: UDP state=%d, elapsed=%dms, connected to udp://%s:%d\n", + whip->state, ELAPSED(whip->whip_starttime, av_gettime()), whip->ice_host, whip->ice_port); +end: + av_dict_free(&opts); return ret; } -/** - * When DTLS write data. - */ -static int dtls_context_on_write(DTLSContext *ctx, char* data, int size) +static int ice_dtls_handshake(AVFormatContext *s) { - AVFormatContext *s = ctx->opaque; + int ret = 0, size, i; + int64_t starttime = av_gettime(), now; WHIPContext *whip = s->priv_data; - if (!whip->udp_uc) { - av_log(whip, AV_LOG_ERROR, "WHIP: DTLS write data, but udp_uc is NULL\n"); - return AVERROR(EIO); + if (whip->state < WHIP_STATE_UDP_CONNECTED || !whip->udp_uc) { + av_log(whip, AV_LOG_ERROR, "WHIP: UDP not connected, state=%d, udp_uc=%p\n", whip->state, whip->udp_uc); + return AVERROR(EINVAL); } - return ffurl_write(whip->udp_uc, data, size); -} + while (1) { + if (whip->state <= WHIP_STATE_ICE_CONNECTING) { + /* Build the STUN binding request. */ + ret = ice_create_request(s, whip->buf, sizeof(whip->buf), &size); + if (ret < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to create STUN binding request, size=%d\n", size); + goto end; + } -/** - * Initialize and check the options for the WebRTC muxer. - */ -static av_cold int initialize(AVFormatContext *s) -{ - int ret, ideal_pkt_size = 532; - WHIPContext *whip = s->priv_data; - uint32_t seed; + ret = ffurl_write(whip->udp_uc, whip->buf, size); + if (ret < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to send STUN binding request, size=%d\n", size); + goto end; + } - whip->whip_starttime = av_gettime(); + if (whip->state < WHIP_STATE_ICE_CONNECTING) + whip->state = WHIP_STATE_ICE_CONNECTING; + } - /* Initialize the random number generator. */ - seed = av_get_random_seed(); - av_lfg_init(&whip->rnd, seed); +next_packet: + if (whip->state >= WHIP_STATE_DTLS_FINISHED) + /* DTLS handshake is done, exit the loop. */ + break; - /* Use the same logging context as AV format. */ - whip->dtls_ctx.av_class = whip->av_class; - whip->dtls_ctx.mtu = whip->pkt_size; - whip->dtls_ctx.opaque = s; - whip->dtls_ctx.on_state = dtls_context_on_state; - whip->dtls_ctx.on_write = dtls_context_on_write; - if (whip->cert_file) - whip->dtls_ctx.cert_file = av_strdup(whip->cert_file); - if (whip->key_file) - whip->dtls_ctx.key_file = av_strdup(whip->key_file); - - if ((ret = dtls_context_init(s, &whip->dtls_ctx)) < 0) { - av_log(whip, AV_LOG_ERROR, "WHIP: Failed to init DTLS context\n"); - return ret; - } + now = av_gettime(); + if (now - starttime >= whip->handshake_timeout * 1000) { + av_log(whip, AV_LOG_ERROR, "WHIP: DTLS handshake timeout=%dms, cost=%dms, elapsed=%dms, state=%d\n", + whip->handshake_timeout, ELAPSED(starttime, now), ELAPSED(whip->whip_starttime, now), whip->state); + ret = AVERROR(ETIMEDOUT); + goto end; + } - if (whip->pkt_size < ideal_pkt_size) - av_log(whip, AV_LOG_WARNING, "WHIP: pkt_size=%d(<%d) is too small, may cause packet loss\n", - whip->pkt_size, ideal_pkt_size); + /* Read the STUN or DTLS messages from peer. */ + for (i = 0; i < ICE_DTLS_READ_INTERVAL / 5; i++) { + ret = ffurl_read(whip->udp_uc, whip->buf, sizeof(whip->buf)); + if (ret > 0) + break; + if (ret == AVERROR(EAGAIN)) { + av_usleep(5 * 1000); + continue; + } + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to read message\n"); + goto end; + } - if (whip->state < WHIP_STATE_INIT) - whip->state = WHIP_STATE_INIT; - whip->whip_init_time = av_gettime(); - av_log(whip, AV_LOG_VERBOSE, "WHIP: Init state=%d, handshake_timeout=%dms, pkt_size=%d, seed=%d, elapsed=%dms\n", - whip->state, whip->handshake_timeout, whip->pkt_size, seed, ELAPSED(whip->whip_starttime, av_gettime())); + /* Got nothing, continue to process handshake. */ + if (ret <= 0) + continue; - return 0; + /* Handle the ICE binding response. */ + if (ice_is_binding_response(whip->buf, ret)) { + if (whip->state < WHIP_STATE_ICE_CONNECTED) { + whip->state = WHIP_STATE_ICE_CONNECTED; + whip->whip_ice_time = av_gettime(); + av_log(whip, AV_LOG_VERBOSE, "WHIP: ICE STUN ok, state=%d, url=udp://%s:%d, location=%s, username=%s:%s, res=%dB, elapsed=%dms\n", + whip->state, whip->ice_host, whip->ice_port, whip->whip_resource_url ? whip->whip_resource_url : "", + whip->ice_ufrag_remote, whip->ice_ufrag_local, ret, ELAPSED(whip->whip_starttime, av_gettime())); + + /* If got the first binding response, start DTLS handshake. */ + if ((ret = dtls_context_start(whip->dtls_ctx)) < 0) + goto end; + } + goto next_packet; + } + + /* When a binding request is received, it is necessary to respond immediately. */ + if (ice_is_binding_request(whip->buf, ret)) { + if ((ret = ice_handle_binding_request(s, whip->buf, ret)) < 0) + goto end; + goto next_packet; + } + + /* If got any DTLS messages, handle it. */ + if (dtls_can_handle_packet(whip->buf, ret) && whip->state >= WHIP_STATE_ICE_CONNECTED) { + if ((ret = dtls_context_write(whip->dtls_ctx, whip->buf, ret)) < 0) + goto end; + goto next_packet; + } + } + +end: + return ret; } /** - * When duplicating a stream, the demuxer has already set the extradata, profile, and - * level of the par. Keep in mind that this function will not be invoked since the - * profile and level are set. + * Establish the SRTP context using the keying material exported from DTLS. * - * When utilizing an encoder, such as libx264, to encode a stream, the extradata in - * par->extradata contains the SPS, which includes profile and level information. - * However, the profile and level of par remain unspecified. Therefore, it is necessary - * to extract the profile and level data from the extradata and assign it to the par's - * profile and level. Keep in mind that AVFMT_GLOBALHEADER must be enabled; otherwise, - * the extradata will remain empty. + * Create separate SRTP contexts for sending video and audio, as their sequences differ + * and should not share a single context. Generate a single SRTP context for receiving + * RTCP only. + * + * @return 0 if OK, AVERROR_xxx on error */ -static int parse_profile_level(AVFormatContext *s, AVCodecParameters *par) +static int setup_srtp(AVFormatContext *s) { - int ret = 0; - const uint8_t *r = par->extradata, *r1, *end = par->extradata + par->extradata_size; - H264SPS seq, *const sps = &seq; - uint32_t state; + int ret; + char recv_key[DTLS_SRTP_KEY_LEN + DTLS_SRTP_SALT_LEN]; + char send_key[DTLS_SRTP_KEY_LEN + DTLS_SRTP_SALT_LEN]; + char buf[AV_BASE64_SIZE(DTLS_SRTP_KEY_LEN + DTLS_SRTP_SALT_LEN)]; + /** + * The profile for OpenSSL's SRTP is SRTP_AES128_CM_SHA1_80, see ssl/d1_srtp.c. + * The profile for FFmpeg's SRTP is SRTP_AES128_CM_HMAC_SHA1_80, see libavformat/srtp.c. + */ + const char* suite = "SRTP_AES128_CM_HMAC_SHA1_80"; WHIPContext *whip = s->priv_data; - if (par->codec_id != AV_CODEC_ID_H264) - return ret; + /** + * This represents the material used to build the SRTP master key. It is + * generated by DTLS and has the following layout: + * 16B 16B 14B 14B + * client_key | server_key | client_salt | server_salt + */ + char *client_key = dtls_get_srtp_client_key(whip->dtls_ctx); + char *server_key = dtls_get_srtp_server_key(whip->dtls_ctx); + char *client_salt = dtls_get_srtp_client_salt(whip->dtls_ctx); + char *server_salt = dtls_get_srtp_server_salt(whip->dtls_ctx); - if (par->profile != FF_PROFILE_UNKNOWN && par->level != FF_LEVEL_UNKNOWN) - return ret; + /* As DTLS server, the recv key is client master key plus salt. */ + memcpy(recv_key, client_key, DTLS_SRTP_KEY_LEN); + memcpy(recv_key + DTLS_SRTP_KEY_LEN, client_salt, DTLS_SRTP_SALT_LEN); - if (!par->extradata || par->extradata_size <= 0) { - av_log(whip, AV_LOG_ERROR, "WHIP: Unable to parse profile from empty extradata=%p, size=%d\n", - par->extradata, par->extradata_size); - return AVERROR(EINVAL); + /* As DTLS server, the send key is server master key plus salt. */ + memcpy(send_key, server_key, DTLS_SRTP_KEY_LEN); + memcpy(send_key + DTLS_SRTP_KEY_LEN, server_salt, DTLS_SRTP_SALT_LEN); + + /* Setup SRTP context for outgoing packets */ + if (!av_base64_encode(buf, sizeof(buf), send_key, sizeof(send_key))) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to encode send key\n"); + ret = AVERROR(EIO); + goto end; } - while (1) { - r = avpriv_find_start_code(r, end, &state); - if (r >= end) - break; + ret = ff_srtp_set_crypto(&whip->srtp_audio_send, suite, buf); + if (ret < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to set crypto for audio send\n"); + goto end; + } - r1 = ff_avc_find_startcode(r, end); - if ((state & 0x1f) == H264_NAL_SPS) { - ret = ff_avc_decode_sps(sps, r, r1 - r); - if (ret < 0) { - av_log(whip, AV_LOG_ERROR, "WHIP: Failed to decode SPS, state=%x, size=%d\n", - state, (int)(r1 - r)); - return ret; - } + ret = ff_srtp_set_crypto(&whip->srtp_video_send, suite, buf); + if (ret < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to set crypto for video send\n"); + goto end; + } - av_log(whip, AV_LOG_VERBOSE, "WHIP: Parse profile=%d, level=%d from SPS\n", - sps->profile_idc, sps->level_idc); - par->profile = sps->profile_idc; - par->level = sps->level_idc; - } + ret = ff_srtp_set_crypto(&whip->srtp_rtcp_send, suite, buf); + if (ret < 0) { + av_log(whip, AV_LOG_ERROR, "Failed to set crypto for rtcp send\n"); + goto end; + } - r = r1; + /* Setup SRTP context for incoming packets */ + if (!av_base64_encode(buf, sizeof(buf), recv_key, sizeof(recv_key))) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to encode recv key\n"); + ret = AVERROR(EIO); + goto end; + } + + ret = ff_srtp_set_crypto(&whip->srtp_recv, suite, buf); + if (ret < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to set crypto for recv\n"); + goto end; } + if (whip->state < WHIP_STATE_SRTP_FINISHED) + whip->state = WHIP_STATE_SRTP_FINISHED; + whip->whip_srtp_time = av_gettime(); + av_log(whip, AV_LOG_VERBOSE, "WHIP: SRTP setup done, state=%d, suite=%s, key=%luB, elapsed=%dms\n", + whip->state, suite, sizeof(send_key), ELAPSED(whip->whip_starttime, av_gettime())); + +end: return ret; } /** - * Parses video SPS/PPS from the extradata of codecpar and checks the codec. - * Currently only supports video(h264) and audio(opus). Note that only baseline - * and constrained baseline profiles of h264 are supported. - * - * If the profile is less than 0, the function considers the profile as baseline. - * It may need to parse the profile from SPS/PPS. This situation occurs when ingesting - * desktop and transcoding. - * - * @param s Pointer to the AVFormatContext - * @returns Returns 0 if successful or AVERROR_xxx in case of an error. + * Callback triggered by the RTP muxer when it creates and sends out an RTP packet. * - * TODO: FIXME: There is an issue with the timestamp of OPUS audio, especially when - * the input is an MP4 file. The timestamp deviates from the expected value of 960, - * causing Chrome to play the audio stream with noise. This problem can be replicated - * by transcoding a specific file into MP4 format and publishing it using the WHIP - * muxer. However, when directly transcoding and publishing through the WHIP muxer, - * the issue is not present, and the audio timestamp remains consistent. The root - * cause is still unknown, and this comment has been added to address this issue - * in the future. Further research is needed to resolve the problem. + * This function modifies the video STAP packet, removing the markers, and updating the + * NRI of the first NALU. Additionally, it uses the corresponding SRTP context to encrypt + * the RTP packet, where the video packet is handled by the video SRTP context. */ -static int parse_codec(AVFormatContext *s) +static int on_rtp_write_packet(void *opaque, uint8_t *buf, int buf_size) { - int i, ret = 0; + int ret, cipher_size, is_rtcp, is_video; + uint8_t payload_type; + AVFormatContext *s = opaque; WHIPContext *whip = s->priv_data; + SRTPContext *srtp; - for (i = 0; i < s->nb_streams; i++) { - AVCodecParameters *par = s->streams[i]->codecpar; - const AVCodecDescriptor *desc = avcodec_descriptor_get(par->codec_id); - switch (par->codec_type) { - case AVMEDIA_TYPE_VIDEO: - if (whip->video_par) { - av_log(whip, AV_LOG_ERROR, "WHIP: Only one video stream is supported by RTC\n"); - return AVERROR(EINVAL); - } - whip->video_par = par; - - if (par->codec_id != AV_CODEC_ID_H264) { - av_log(whip, AV_LOG_ERROR, "WHIP: Unsupported video codec %s by RTC, choose h264\n", - desc ? desc->name : "unknown"); - return AVERROR_PATCHWELCOME; - } - - if (par->video_delay > 0) { - av_log(whip, AV_LOG_ERROR, "WHIP: Unsupported B frames by RTC\n"); - return AVERROR_PATCHWELCOME; - } - - if ((ret = parse_profile_level(s, par)) < 0) { - av_log(whip, AV_LOG_ERROR, "WHIP: Failed to parse SPS/PPS from extradata\n"); - return AVERROR(EINVAL); - } + /* Ignore if not RTP or RTCP packet. */ + if (!media_is_rtp_rtcp(buf, buf_size)) + return 0; - if (par->profile == FF_PROFILE_UNKNOWN) { - av_log(whip, AV_LOG_WARNING, "WHIP: No profile found in extradata, consider baseline\n"); - return AVERROR(EINVAL); - } - if (par->level == FF_LEVEL_UNKNOWN) { - av_log(whip, AV_LOG_WARNING, "WHIP: No level found in extradata, consider 3.1\n"); - return AVERROR(EINVAL); - } - break; - case AVMEDIA_TYPE_AUDIO: - if (whip->audio_par) { - av_log(whip, AV_LOG_ERROR, "WHIP: Only one audio stream is supported by RTC\n"); - return AVERROR(EINVAL); - } - whip->audio_par = par; + /* Only support audio, video and rtcp. */ + is_rtcp = media_is_rtcp(buf, buf_size); + payload_type = buf[1] & 0x7f; + is_video = payload_type == whip->video_payload_type; + if (!is_rtcp && payload_type != whip->video_payload_type && payload_type != whip->audio_payload_type) + return 0; - if (par->codec_id != AV_CODEC_ID_OPUS) { - av_log(whip, AV_LOG_ERROR, "WHIP: Unsupported audio codec %s by RTC, choose opus\n", - desc ? desc->name : "unknown"); - return AVERROR_PATCHWELCOME; - } + /* Get the corresponding SRTP context. */ + srtp = is_rtcp ? &whip->srtp_rtcp_send : (is_video? &whip->srtp_video_send : &whip->srtp_audio_send); - if (par->ch_layout.nb_channels != 2) { - av_log(whip, AV_LOG_ERROR, "WHIP: Unsupported audio channels %d by RTC, choose stereo\n", - par->ch_layout.nb_channels); - return AVERROR_PATCHWELCOME; - } + /* Encrypt by SRTP and send out. */ + cipher_size = ff_srtp_encrypt(srtp, buf, buf_size, whip->buf, sizeof(whip->buf)); + if (cipher_size <= 0 || cipher_size < buf_size) { + av_log(whip, AV_LOG_WARNING, "WHIP: Failed to encrypt packet=%dB, cipher=%dB\n", buf_size, cipher_size); + return 0; + } - if (par->sample_rate != 48000) { - av_log(whip, AV_LOG_ERROR, "WHIP: Unsupported audio sample rate %d by RTC, choose 48000\n", par->sample_rate); - return AVERROR_PATCHWELCOME; - } - break; - default: - av_log(whip, AV_LOG_ERROR, "WHIP: Codec type '%s' for stream %d is not supported by RTC\n", - av_get_media_type_string(par->codec_type), i); - return AVERROR_PATCHWELCOME; - } + ret = ffurl_write(whip->udp_uc, whip->buf, cipher_size); + if (ret < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to write packet=%dB, ret=%d\n", cipher_size, ret); + return ret; } return ret; } /** - * Generate SDP offer according to the codec parameters, DTLS and ICE information. + * Creates dedicated RTP muxers for each stream in the AVFormatContext to build RTP + * packets from the encoded frames. * - * Note that we don't use av_sdp_create to generate SDP offer because it doesn't - * support DTLS and ICE information. + * The corresponding SRTP context is utilized to encrypt each stream's RTP packets. For + * example, a video SRTP context is used for the video stream. Additionally, the + * "on_rtp_write_packet" callback function is set as the write function for each RTP + * muxer to send out encrypted RTP packets. * * @return 0 if OK, AVERROR_xxx on error */ -static int generate_sdp_offer(AVFormatContext *s) +static int create_rtp_muxer(AVFormatContext *s) { - int ret = 0, profile, level, profile_iop; - const char *acodec_name = NULL, *vcodec_name = NULL; - AVBPrint bp; + int ret, i, is_video, buffer_size, max_packet_size; + AVFormatContext *rtp_ctx = NULL; + AVDictionary *opts = NULL; + uint8_t *buffer = NULL; + char buf[64]; WHIPContext *whip = s->priv_data; - /* To prevent a crash during cleanup, always initialize it. */ - av_bprint_init(&bp, 1, MAX_SDP_SIZE); - - if (whip->sdp_offer) { - av_log(whip, AV_LOG_ERROR, "WHIP: SDP offer is already set\n"); - ret = AVERROR(EINVAL); + const AVOutputFormat *rtp_format = av_guess_format("rtp", NULL, NULL); + if (!rtp_format) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to guess rtp muxer\n"); + ret = AVERROR(ENOSYS); goto end; } - snprintf(whip->ice_ufrag_local, sizeof(whip->ice_ufrag_local), "%08x", - av_lfg_get(&whip->rnd)); - snprintf(whip->ice_pwd_local, sizeof(whip->ice_pwd_local), "%08x%08x%08x%08x", - av_lfg_get(&whip->rnd), av_lfg_get(&whip->rnd), av_lfg_get(&whip->rnd), - av_lfg_get(&whip->rnd)); + /* The UDP buffer size, may greater than MTU. */ + buffer_size = MAX_UDP_BUFFER_SIZE; + /* The RTP payload max size. Reserved some bytes for SRTP checksum and padding. */ + max_packet_size = whip->pkt_size - DTLS_SRTP_CHECKSUM_LEN; - whip->audio_ssrc = av_lfg_get(&whip->rnd); - whip->video_ssrc = av_lfg_get(&whip->rnd); + for (i = 0; i < s->nb_streams; i++) { + rtp_ctx = avformat_alloc_context(); + if (!rtp_ctx) { + ret = AVERROR(ENOMEM); + goto end; + } - whip->audio_payload_type = WHIP_RTP_PAYLOAD_TYPE_OPUS; - whip->video_payload_type = WHIP_RTP_PAYLOAD_TYPE_H264; + rtp_ctx->oformat = rtp_format; + if (!avformat_new_stream(rtp_ctx, NULL)) { + ret = AVERROR(ENOMEM); + goto end; + } + /* Pass the interrupt callback on */ + rtp_ctx->interrupt_callback = s->interrupt_callback; + /* Copy the max delay setting; the rtp muxer reads this. */ + rtp_ctx->max_delay = s->max_delay; + /* Copy other stream parameters. */ + rtp_ctx->streams[0]->sample_aspect_ratio = s->streams[i]->sample_aspect_ratio; + rtp_ctx->flags |= s->flags & AVFMT_FLAG_BITEXACT; + rtp_ctx->strict_std_compliance = s->strict_std_compliance; - av_bprintf(&bp, "" - "v=0\r\n" - "o=FFmpeg %s 2 IN IP4 %s\r\n" - "s=FFmpegPublishSession\r\n" - "t=0 0\r\n" - "a=group:BUNDLE 0 1\r\n" - "a=extmap-allow-mixed\r\n" - "a=msid-semantic: WMS\r\n", - WHIP_SDP_SESSION_ID, - WHIP_SDP_CREATOR_IP); + /* Set the synchronized start time. */ + rtp_ctx->start_time_realtime = s->start_time_realtime; - if (whip->audio_par) { - if (whip->audio_par->codec_id == AV_CODEC_ID_OPUS) - acodec_name = "opus"; + avcodec_parameters_copy(rtp_ctx->streams[0]->codecpar, s->streams[i]->codecpar); + rtp_ctx->streams[0]->time_base = s->streams[i]->time_base; - av_bprintf(&bp, "" - "m=audio 9 UDP/TLS/RTP/SAVPF %u\r\n" - "c=IN IP4 0.0.0.0\r\n" - "a=ice-ufrag:%s\r\n" - "a=ice-pwd:%s\r\n" - "a=fingerprint:sha-256 %s\r\n" - "a=setup:passive\r\n" - "a=mid:0\r\n" - "a=sendonly\r\n" - "a=msid:FFmpeg audio\r\n" - "a=rtcp-mux\r\n" - "a=rtpmap:%u %s/%d/%d\r\n" - "a=ssrc:%u cname:FFmpeg\r\n" - "a=ssrc:%u msid:FFmpeg audio\r\n", - whip->audio_payload_type, - whip->ice_ufrag_local, - whip->ice_pwd_local, - whip->dtls_ctx.dtls_fingerprint, - whip->audio_payload_type, - acodec_name, - whip->audio_par->sample_rate, - whip->audio_par->ch_layout.nb_channels, - whip->audio_ssrc, - whip->audio_ssrc); - } + /** + * For H.264, consistently utilize the annexb format through the Bitstream Filter (BSF); + * therefore, we deactivate the extradata detection for the RTP muxer. + */ + if (s->streams[i]->codecpar->codec_id == AV_CODEC_ID_H264) { + av_freep(&rtp_ctx->streams[i]->codecpar->extradata); + rtp_ctx->streams[i]->codecpar->extradata_size = 0; + } - if (whip->video_par) { - profile_iop = profile = whip->video_par->profile; - level = whip->video_par->level; - if (whip->video_par->codec_id == AV_CODEC_ID_H264) { - vcodec_name = "H264"; - profile_iop &= FF_PROFILE_H264_CONSTRAINED; - profile &= (~FF_PROFILE_H264_CONSTRAINED); + buffer = av_malloc(buffer_size); + if (!buffer) { + ret = AVERROR(ENOMEM); + goto end; } - av_bprintf(&bp, "" - "m=video 9 UDP/TLS/RTP/SAVPF %u\r\n" - "c=IN IP4 0.0.0.0\r\n" - "a=ice-ufrag:%s\r\n" - "a=ice-pwd:%s\r\n" - "a=fingerprint:sha-256 %s\r\n" - "a=setup:passive\r\n" - "a=mid:1\r\n" - "a=sendonly\r\n" - "a=msid:FFmpeg video\r\n" - "a=rtcp-mux\r\n" - "a=rtcp-rsize\r\n" - "a=rtpmap:%u %s/90000\r\n" - "a=fmtp:%u level-asymmetry-allowed=1;packetization-mode=1;profile-level-id=%02x%02x%02x\r\n" - "a=ssrc:%u cname:FFmpeg\r\n" - "a=ssrc:%u msid:FFmpeg video\r\n", - whip->video_payload_type, - whip->ice_ufrag_local, - whip->ice_pwd_local, - whip->dtls_ctx.dtls_fingerprint, - whip->video_payload_type, - vcodec_name, - whip->video_payload_type, - profile, - profile_iop, - level, - whip->video_ssrc, - whip->video_ssrc); - } + rtp_ctx->pb = avio_alloc_context(buffer, buffer_size, 1, s, NULL, on_rtp_write_packet, NULL); + if (!rtp_ctx->pb) { + ret = AVERROR(ENOMEM); + goto end; + } + rtp_ctx->pb->max_packet_size = max_packet_size; + rtp_ctx->pb->av_class = &ff_avio_class; - if (!av_bprint_is_complete(&bp)) { - av_log(whip, AV_LOG_ERROR, "WHIP: Offer exceed max %d, %s\n", MAX_SDP_SIZE, bp.str); - ret = AVERROR(EIO); - goto end; - } + is_video = s->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO; + snprintf(buf, sizeof(buf), "%d", is_video? whip->video_payload_type : whip->audio_payload_type); + av_dict_set(&opts, "payload_type", buf, 0); + snprintf(buf, sizeof(buf), "%d", is_video? whip->video_ssrc : whip->audio_ssrc); + av_dict_set(&opts, "ssrc", buf, 0); - whip->sdp_offer = av_strdup(bp.str); - if (!whip->sdp_offer) { - ret = AVERROR(ENOMEM); - goto end; + ret = avformat_write_header(rtp_ctx, &opts); + if (ret < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to write rtp header\n"); + goto end; + } + + ff_format_set_url(rtp_ctx, av_strdup(s->url)); + s->streams[i]->time_base = rtp_ctx->streams[0]->time_base; + s->streams[i]->priv_data = rtp_ctx; + rtp_ctx = NULL; } - if (whip->state < WHIP_STATE_OFFER) - whip->state = WHIP_STATE_OFFER; - whip->whip_offer_time = av_gettime(); - av_log(whip, AV_LOG_VERBOSE, "WHIP: Generated state=%d, offer: %s\n", whip->state, whip->sdp_offer); + if (whip->state < WHIP_STATE_READY) + whip->state = WHIP_STATE_READY; + av_log(whip, AV_LOG_INFO, "WHIP: Muxer state=%d, buffer_size=%d, max_packet_size=%d, " + "elapsed=%dms(init:%d,offer:%d,answer:%d,udp:%d,ice:%d,dtls:%d,srtp:%d)\n", + whip->state, buffer_size, max_packet_size, ELAPSED(whip->whip_starttime, av_gettime()), + ELAPSED(whip->whip_starttime, whip->whip_init_time), + ELAPSED(whip->whip_init_time, whip->whip_offer_time), + ELAPSED(whip->whip_offer_time, whip->whip_answer_time), + ELAPSED(whip->whip_answer_time, whip->whip_udp_time), + ELAPSED(whip->whip_udp_time, whip->whip_ice_time), + ELAPSED(whip->whip_ice_time, whip->whip_dtls_time), + ELAPSED(whip->whip_dtls_time, whip->whip_srtp_time)); end: - av_bprint_finalize(&bp, NULL); + if (rtp_ctx) + avio_context_free(&rtp_ctx->pb); + avformat_free_context(rtp_ctx); + av_dict_free(&opts); return ret; } /** - * Exchange SDP offer with WebRTC peer to get the answer. - * - * @return 0 if OK, AVERROR_xxx on error + * RTC is connectionless, for it's based on UDP, so it check whether sesison is + * timeout. In such case, publishers can't republish the stream util the session + * is timeout. + * This function is called to notify the server that the stream is ended, server + * should expire and close the session immediately, so that publishers can republish + * the stream quickly. */ -static int exchange_sdp(AVFormatContext *s) +static int dispose_session(AVFormatContext *s) { int ret; char buf[MAX_URL_SIZE]; - AVBPrint bp; - WHIPContext *whip = s->priv_data; - /* The URL context is an HTTP transport layer for the WHIP protocol. */ URLContext *whip_uc = NULL; AVDictionary *opts = NULL; - char *hex_data = NULL; - - /* To prevent a crash during cleanup, always initialize it. */ - av_bprint_init(&bp, 1, MAX_SDP_SIZE); + WHIPContext *whip = s->priv_data; - if (!whip->sdp_offer || !strlen(whip->sdp_offer)) { - av_log(whip, AV_LOG_ERROR, "WHIP: No offer to exchange\n"); - ret = AVERROR(EINVAL); - goto end; - } + if (!whip->whip_resource_url) + return 0; - ret = snprintf(buf, sizeof(buf), "Cache-Control: no-cache\r\nContent-Type: application/sdp\r\n"); + ret = snprintf(buf, sizeof(buf), "Cache-Control: no-cache\r\n"); if (whip->authorization) ret += snprintf(buf + ret, sizeof(buf) - ret, "Authorization: Bearer %s\r\n", whip->authorization); if (ret <= 0 || ret >= sizeof(buf)) { @@ -1575,1106 +1536,1196 @@ static int exchange_sdp(AVFormatContext *s) av_dict_set(&opts, "headers", buf, 0); av_dict_set_int(&opts, "chunked_post", 0, 0); - - hex_data = av_mallocz(2 * strlen(whip->sdp_offer) + 1); - if (!hex_data) { - ret = AVERROR(ENOMEM); - goto end; - } - ff_data_to_hex(hex_data, whip->sdp_offer, strlen(whip->sdp_offer), 0); - av_dict_set(&opts, "post_data", hex_data, 0); - - ret = ffurl_open_whitelist(&whip_uc, s->url, AVIO_FLAG_READ_WRITE, &s->interrupt_callback, + av_dict_set(&opts, "method", "DELETE", 0); + ret = ffurl_open_whitelist(&whip_uc, whip->whip_resource_url, AVIO_FLAG_READ_WRITE, &s->interrupt_callback, &opts, s->protocol_whitelist, s->protocol_blacklist, NULL); if (ret < 0) { - av_log(whip, AV_LOG_ERROR, "WHIP: Failed to request url=%s, offer: %s\n", s->url, whip->sdp_offer); + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to DELETE url=%s\n", whip->whip_resource_url); goto end; } - if (ff_http_get_new_location(whip_uc)) { - whip->whip_resource_url = av_strdup(ff_http_get_new_location(whip_uc)); - if (!whip->whip_resource_url) { - ret = AVERROR(ENOMEM); - goto end; - } - } - while (1) { ret = ffurl_read(whip_uc, buf, sizeof(buf)); if (ret == AVERROR_EOF) { - /* Reset the error because we read all response as answer util EOF. */ ret = 0; break; } - if (ret <= 0) { - av_log(whip, AV_LOG_ERROR, "WHIP: Failed to read response from url=%s, offer is %s, answer is %s\n", - s->url, whip->sdp_offer, whip->sdp_answer); - goto end; - } - - av_bprintf(&bp, "%.*s", ret, buf); - if (!av_bprint_is_complete(&bp)) { - av_log(whip, AV_LOG_ERROR, "WHIP: Answer exceed max size %d, %.*s, %s\n", MAX_SDP_SIZE, ret, buf, bp.str); - ret = AVERROR(EIO); + if (ret < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to read response from DELETE url=%s\n", whip->whip_resource_url); goto end; } } - if (!av_strstart(bp.str, "v=", NULL)) { - av_log(whip, AV_LOG_ERROR, "WHIP: Invalid answer: %s\n", bp.str); - ret = AVERROR(EINVAL); - goto end; - } - - whip->sdp_answer = av_strdup(bp.str); - if (!whip->sdp_answer) { - ret = AVERROR(ENOMEM); - goto end; - } - - if (whip->state < WHIP_STATE_ANSWER) - whip->state = WHIP_STATE_ANSWER; - av_log(whip, AV_LOG_VERBOSE, "WHIP: Got state=%d, answer: %s\n", whip->state, whip->sdp_answer); + av_log(whip, AV_LOG_INFO, "WHIP: Dispose resource %s ok\n", whip->whip_resource_url); end: ffurl_closep(&whip_uc); - av_bprint_finalize(&bp, NULL); av_dict_free(&opts); - av_freep(&hex_data); return ret; } /** - * Parses the ICE ufrag, pwd, and candidates from the SDP answer. - * - * This function is used to extract the ICE ufrag, pwd, and candidates from the SDP answer. - * It returns an error if any of these fields is NULL. The function only uses the first - * candidate if there are multiple candidates. However, support for multiple candidates - * will be added in the future. - * - * @param s Pointer to the AVFormatContext - * @returns Returns 0 if successful or AVERROR_xxx if an error occurs. + * Since the h264_mp4toannexb filter only processes the MP4 ISOM format and bypasses + * the annexb format, it is necessary to manually insert encoder metadata before each + * IDR when dealing with annexb format packets. For instance, in the case of H.264, + * we must insert SPS and PPS before the IDR frame. */ -static int parse_answer(AVFormatContext *s) +static int h264_annexb_insert_sps_pps(AVFormatContext *s, AVPacket *pkt) { int ret = 0; - AVIOContext *pb; - char line[MAX_URL_SIZE]; - const char *ptr; - int i; - WHIPContext *whip = s->priv_data; + AVPacket *in = NULL; + AVCodecParameters *par = s->streams[pkt->stream_index]->codecpar; + uint32_t nal_size = 0, out_size = par ? par->extradata_size : 0; + uint8_t unit_type, sps_seen = 0, pps_seen = 0, idr_seen = 0, *out; + const uint8_t *buf, *buf_end, *r1; - if (!whip->sdp_answer || !strlen(whip->sdp_answer)) { - av_log(whip, AV_LOG_ERROR, "WHIP: No answer to parse\n"); - ret = AVERROR(EINVAL); - goto end; + if (!pkt || !pkt->data || pkt->size <= 0) + return ret; + if (!par || !par->extradata || par->extradata_size <= 0) + return ret; + + /* Discover NALU type from packet. */ + buf_end = pkt->data + pkt->size; + for (buf = ff_avc_find_startcode(pkt->data, buf_end); buf < buf_end; buf += nal_size) { + while (!*(buf++)); + r1 = ff_avc_find_startcode(buf, buf_end); + if ((nal_size = r1 - buf) > 0) { + unit_type = *buf & 0x1f; + if (unit_type == H264_NAL_SPS) { + sps_seen = 1; + } else if (unit_type == H264_NAL_PPS) { + pps_seen = 1; + } else if (unit_type == H264_NAL_IDR_SLICE) { + idr_seen = 1; + } + + out_size += 3 + nal_size; + } } - pb = avio_alloc_context(whip->sdp_answer, strlen(whip->sdp_answer), 0, NULL, NULL, NULL, NULL); - if (!pb) + if (!idr_seen || (sps_seen && pps_seen)) + return ret; + + /* See av_bsf_send_packet */ + in = av_packet_alloc(); + if (!in) return AVERROR(ENOMEM); - for (i = 0; !avio_feof(pb); i++) { - ff_get_chomp_line(pb, line, sizeof(line)); - if (av_strstart(line, "a=ice-ufrag:", &ptr) && !whip->ice_ufrag_remote) { - whip->ice_ufrag_remote = av_strdup(ptr); - if (!whip->ice_ufrag_remote) { - ret = AVERROR(ENOMEM); - goto end; - } - } else if (av_strstart(line, "a=ice-pwd:", &ptr) && !whip->ice_pwd_remote) { - whip->ice_pwd_remote = av_strdup(ptr); - if (!whip->ice_pwd_remote) { - ret = AVERROR(ENOMEM); - goto end; - } - } else if (av_strstart(line, "a=candidate:", &ptr) && !whip->ice_protocol) { - ptr = av_stristr(ptr, "udp"); - if (ptr && av_stristr(ptr, "host")) { - char protocol[17], host[129]; - int priority, port; - ret = sscanf(ptr, "%16s %d %128s %d typ host", protocol, &priority, host, &port); - if (ret != 4) { - av_log(whip, AV_LOG_ERROR, "WHIP: Failed %d to parse line %d %s from %s\n", - ret, i, line, whip->sdp_answer); - ret = AVERROR(EIO); - goto end; - } + ret = av_packet_make_refcounted(pkt); + if (ret < 0) + goto fail; + + av_packet_move_ref(in, pkt); + + /* Create a new packet with sps/pps inserted. */ + ret = av_new_packet(pkt, out_size); + if (ret < 0) + goto fail; + + ret = av_packet_copy_props(pkt, in); + if (ret < 0) + goto fail; + + memcpy(pkt->data, par->extradata, par->extradata_size); + out = pkt->data + par->extradata_size; + buf_end = in->data + in->size; + for (buf = ff_avc_find_startcode(in->data, buf_end); buf < buf_end; buf += nal_size) { + while (!*(buf++)); + r1 = ff_avc_find_startcode(buf, buf_end); + if ((nal_size = r1 - buf) > 0) { + AV_WB24(out, 0x00001); + memcpy(out + 3, buf, nal_size); + out += 3 + nal_size; + } + } + +fail: + if (ret < 0) + av_packet_unref(pkt); + av_packet_free(&in); + + return ret; +} + +static av_cold int whip_init(AVFormatContext *s) +{ + int ret; + WHIPContext *whip = s->priv_data; + + if ((ret = initialize(s)) < 0) + goto end; + + if ((ret = parse_codec(s)) < 0) + goto end; + + if ((ret = generate_sdp_offer(s)) < 0) + goto end; + + if ((ret = exchange_sdp(s)) < 0) + goto end; + + if ((ret = parse_answer(s)) < 0) + goto end; - if (av_strcasecmp(protocol, "udp")) { - av_log(whip, AV_LOG_ERROR, "WHIP: Protocol %s is not supported by RTC, choose udp, line %d %s of %s\n", - protocol, i, line, whip->sdp_answer); - ret = AVERROR(EIO); - goto end; - } + if ((ret = udp_connect(s)) < 0) + goto end; - whip->ice_protocol = av_strdup(protocol); - whip->ice_host = av_strdup(host); - whip->ice_port = port; - if (!whip->ice_protocol || !whip->ice_host) { - ret = AVERROR(ENOMEM); - goto end; - } + if ((ret = ice_dtls_handshake(s)) < 0) + goto end; + + if ((ret = setup_srtp(s)) < 0) + goto end; + + if ((ret = create_rtp_muxer(s)) < 0) + goto end; + +end: + if (ret < 0 && whip->state < WHIP_STATE_FAILED) + whip->state = WHIP_STATE_FAILED; + if (ret >= 0 && whip->state >= WHIP_STATE_FAILED && whip->dtls_ret < 0) + ret = whip->dtls_ret; + return ret; +} + +static int whip_write_packet(AVFormatContext *s, AVPacket *pkt) +{ + int ret; + WHIPContext *whip = s->priv_data; + AVStream *st = s->streams[pkt->stream_index]; + AVFormatContext *rtp_ctx = st->priv_data; + + /* TODO: Send binding request every 1s as WebRTC heartbeat. */ + + /** + * Receive packets from the server such as ICE binding requests, DTLS messages, + * and RTCP like PLI requests, then respond to them. + */ + ret = ffurl_read(whip->udp_uc, whip->buf, sizeof(whip->buf)); + if (ret > 0) { + if (dtls_can_handle_packet(whip->buf, ret)) { + if ((ret = dtls_context_write(whip->dtls_ctx, whip->buf, ret)) < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to handle DTLS message\n"); + goto end; } } - } - - if (!whip->ice_pwd_remote || !strlen(whip->ice_pwd_remote)) { - av_log(whip, AV_LOG_ERROR, "WHIP: No remote ice pwd parsed from %s\n", whip->sdp_answer); - ret = AVERROR(EINVAL); + } else if (ret != AVERROR(EAGAIN)) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to read from UDP socket\n"); goto end; } - if (!whip->ice_ufrag_remote || !strlen(whip->ice_ufrag_remote)) { - av_log(whip, AV_LOG_ERROR, "WHIP: No remote ice ufrag parsed from %s\n", whip->sdp_answer); - ret = AVERROR(EINVAL); - goto end; + if (whip->h264_annexb_insert_sps_pps && st->codecpar->codec_id == AV_CODEC_ID_H264) { + if ((ret = h264_annexb_insert_sps_pps(s, pkt)) < 0) { + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to insert SPS/PPS before IDR\n"); + goto end; + } } - if (!whip->ice_protocol || !whip->ice_host || !whip->ice_port) { - av_log(whip, AV_LOG_ERROR, "WHIP: No ice candidate parsed from %s\n", whip->sdp_answer); - ret = AVERROR(EINVAL); + ret = ff_write_chained(rtp_ctx, 0, pkt, s, 0); + if (ret < 0) { + if (ret == AVERROR(EINVAL)) { + av_log(whip, AV_LOG_WARNING, "WHIP: Ignore failed to write packet=%dB, ret=%d\n", pkt->size, ret); + ret = 0; + } else + av_log(whip, AV_LOG_ERROR, "WHIP: Failed to write packet, size=%d\n", pkt->size); goto end; } - if (whip->state < WHIP_STATE_NEGOTIATED) - whip->state = WHIP_STATE_NEGOTIATED; - whip->whip_answer_time = av_gettime(); - av_log(whip, AV_LOG_VERBOSE, "WHIP: SDP state=%d, offer=%luB, answer=%luB, ufrag=%s, pwd=%luB, transport=%s://%s:%d, elapsed=%dms\n", - whip->state, strlen(whip->sdp_offer), strlen(whip->sdp_answer), whip->ice_ufrag_remote, strlen(whip->ice_pwd_remote), - whip->ice_protocol, whip->ice_host, whip->ice_port, ELAPSED(whip->whip_starttime, av_gettime())); - end: - avio_context_free(&pb); + if (ret < 0 && whip->state < WHIP_STATE_FAILED) + whip->state = WHIP_STATE_FAILED; + if (ret >= 0 && whip->state >= WHIP_STATE_FAILED && whip->dtls_ret < 0) + ret = whip->dtls_ret; + if (ret >= 0 && whip->dtls_closed) + ret = AVERROR(EIO); return ret; } -/** - * Creates and marshals an ICE binding request packet. - * - * This function creates and marshals an ICE binding request packet. The function only - * generates the username attribute and does not include goog-network-info, ice-controlling, - * use-candidate, and priority. However, some of these attributes may be added in the future. - * - * @param s Pointer to the AVFormatContext - * @param buf Pointer to memory buffer to store the request packet - * @param buf_size Size of the memory buffer - * @param request_size Pointer to an integer that receives the size of the request packet - * @return Returns 0 if successful or AVERROR_xxx if an error occurs. - */ -static int ice_create_request(AVFormatContext *s, uint8_t *buf, int buf_size, int *request_size) +static av_cold void whip_deinit(AVFormatContext *s) { - int ret, size, crc32; - char username[128]; - AVIOContext *pb = NULL; - AVHMAC *hmac = NULL; + int i, ret; WHIPContext *whip = s->priv_data; - pb = avio_alloc_context(buf, buf_size, 1, NULL, NULL, NULL, NULL); - if (!pb) - return AVERROR(ENOMEM); + ret = dispose_session(s); + if (ret < 0) + av_log(whip, AV_LOG_WARNING, "WHIP: Failed to dispose resource, ret=%d\n", ret); - hmac = av_hmac_alloc(AV_HMAC_SHA1); - if (!hmac) { - ret = AVERROR(ENOMEM); - goto end; + for (i = 0; i < s->nb_streams; i++) { + AVFormatContext* rtp_ctx = s->streams[i]->priv_data; + if (!rtp_ctx) + continue; + + av_write_trailer(rtp_ctx); + /** + * Keep in mind that it is necessary to free the buffer of pb since we allocate + * it and pass it to pb using avio_alloc_context, while avio_context_free does + * not perform this action. + */ + av_freep(&rtp_ctx->pb->buffer); + avio_context_free(&rtp_ctx->pb); + avformat_free_context(rtp_ctx); + s->streams[i]->priv_data = NULL; } - /* Write 20 bytes header */ - avio_wb16(pb, 0x0001); /* STUN binding request */ - avio_wb16(pb, 0); /* length */ - avio_wb32(pb, STUN_MAGIC_COOKIE); /* magic cookie */ - avio_wb32(pb, av_lfg_get(&whip->rnd)); /* transaction ID */ - avio_wb32(pb, av_lfg_get(&whip->rnd)); /* transaction ID */ - avio_wb32(pb, av_lfg_get(&whip->rnd)); /* transaction ID */ + av_freep(&whip->sdp_offer); + av_freep(&whip->sdp_answer); + av_freep(&whip->whip_resource_url); + av_freep(&whip->ice_ufrag_remote); + av_freep(&whip->ice_pwd_remote); + av_freep(&whip->ice_protocol); + av_freep(&whip->ice_host); + av_freep(&whip->authorization); + av_freep(&whip->cert_file); + av_freep(&whip->key_file); + ffurl_closep(&whip->udp_uc); + ff_srtp_free(&whip->srtp_audio_send); + ff_srtp_free(&whip->srtp_video_send); + ff_srtp_free(&whip->srtp_rtcp_send); + ff_srtp_free(&whip->srtp_recv); + dtls_context_deinit(whip->dtls_ctx); +} - /* The username is the concatenation of the two ICE ufrag */ - ret = snprintf(username, sizeof(username), "%s:%s", whip->ice_ufrag_remote, whip->ice_ufrag_local); - if (ret <= 0 || ret >= sizeof(username)) { - av_log(whip, AV_LOG_ERROR, "WHIP: Failed to build username %s:%s, max=%lu, ret=%d\n", - whip->ice_ufrag_remote, whip->ice_ufrag_local, sizeof(username), ret); - ret = AVERROR(EIO); - goto end; +static int whip_check_bitstream(AVFormatContext *s, AVStream *st, const AVPacket *pkt) +{ + int ret = 1, extradata_isom = 0; + uint8_t *b = pkt->data; + WHIPContext *whip = s->priv_data; + + if (st->codecpar->codec_id == AV_CODEC_ID_H264) { + extradata_isom = st->codecpar->extradata_size > 0 && st->codecpar->extradata[0] == 1; + if (pkt->size >= 5 && AV_RB32(b) != 0x0000001 && (AV_RB24(b) != 0x000001 || extradata_isom)) { + ret = ff_stream_add_bitstream_filter(st, "h264_mp4toannexb", NULL); + av_log(whip, AV_LOG_VERBOSE, "WHIP: Enable BSF h264_mp4toannexb, packet=[%x %x %x %x %x ...], extradata_isom=%d\n", + b[0], b[1], b[2], b[3], b[4], extradata_isom); + } else + whip->h264_annexb_insert_sps_pps = 1; } - /* Write the username attribute */ - avio_wb16(pb, STUN_ATTR_USERNAME); /* attribute type username */ - avio_wb16(pb, ret); /* size of username */ - avio_write(pb, username, ret); /* bytes of username */ - ffio_fill(pb, 0, (4 - (ret % 4)) % 4); /* padding */ + return ret; +} - /* Write the use-candidate attribute */ - avio_wb16(pb, STUN_ATTR_USE_CANDIDATE); /* attribute type use-candidate */ - avio_wb16(pb, 0); /* size of use-candidate */ +#if 1 +/** + * The DTLS content type. + * See https://tools.ietf.org/html/rfc2246#section-6.2.1 + * change_cipher_spec(20), alert(21), handshake(22), application_data(23) + */ +#define DTLS_CONTENT_TYPE_CHANGE_CIPHER_SPEC 20 - /* Build and update message integrity */ - avio_wb16(pb, STUN_ATTR_MESSAGE_INTEGRITY); /* attribute type message integrity */ - avio_wb16(pb, 20); /* size of message integrity */ - ffio_fill(pb, 0, 20); /* fill with zero to directly write and skip it */ - size = avio_tell(pb); - buf[2] = (size - 20) >> 8; - buf[3] = (size - 20) & 0xFF; - av_hmac_init(hmac, whip->ice_pwd_remote, strlen(whip->ice_pwd_remote)); - av_hmac_update(hmac, buf, size - 24); - av_hmac_final(hmac, buf + size - 20, 20); +/** + * The DTLS record layer header has a total size of 13 bytes, consisting of + * ContentType (1 byte), ProtocolVersion (2 bytes), Epoch (2 bytes), + * SequenceNumber (6 bytes), and Length (2 bytes). + * See https://datatracker.ietf.org/doc/html/rfc9147#section-4 + */ +#define DTLS_RECORD_LAYER_HEADER_LEN 13 + +/** + * The DTLS version number, which is 0xfeff for DTLS 1.0, or 0xfefd for DTLS 1.2. + * See https://datatracker.ietf.org/doc/html/rfc9147#name-the-dtls-record-layer + */ +#define DTLS_VERSION_10 0xfeff +#define DTLS_VERSION_12 0xfefd + +/** + * Maximum size limit of a certificate and private key size. + */ +#define DTLS_MAX_CERTIFICATE_SIZE 8192 + +typedef struct DTLSContext { + AVClass *av_class; + + /* For callback. */ + dtls_fn_on_state on_state; + dtls_fn_on_write on_write; + void* opaque; + + /* For logging. */ + AVClass *log_avcl; + + /* The DTLS context. */ + SSL_CTX *dtls_ctx; + SSL *dtls; + /* The DTLS BIOs. */ + BIO *bio_in; + + /* The private key for DTLS handshake. */ + EVP_PKEY *dtls_pkey; + /* The EC key for DTLS handshake. */ + EC_KEY* dtls_eckey; + /* The SSL certificate used for fingerprint in SDP and DTLS handshake. */ + X509 *dtls_cert; + /* The fingerprint of certificate, used in SDP offer. */ + char *dtls_fingerprint; + + /** + * This represents the material used to build the SRTP master key. It is + * generated by DTLS and has the following layout: + * 16B 16B 14B 14B + * client_key | server_key | client_salt | server_salt + */ + uint8_t dtls_srtp_materials[(DTLS_SRTP_KEY_LEN + DTLS_SRTP_SALT_LEN) * 2]; + + /* Whether the DTLS is done at least for us. */ + int dtls_done_for_us; + /* Whether the SRTP key is exported. */ + int dtls_srtp_key_exported; + /* The number of packets retransmitted for DTLS. */ + int dtls_arq_packets; + /** + * This is the last DTLS content type and handshake type that is used to detect + * the ARQ packet. + */ + uint8_t dtls_last_content_type; + uint8_t dtls_last_handshake_type; - /* Write the fingerprint attribute */ - avio_wb16(pb, STUN_ATTR_FINGERPRINT); /* attribute type fingerprint */ - avio_wb16(pb, 4); /* size of fingerprint */ - ffio_fill(pb, 0, 4); /* fill with zero to directly write and skip it */ - size = avio_tell(pb); - buf[2] = (size - 20) >> 8; - buf[3] = (size - 20) & 0xFF; - /* Refer to the av_hash_alloc("CRC32"), av_hash_init and av_hash_final */ - crc32 = av_crc(av_crc_get_table(AV_CRC_32_IEEE_LE), 0xFFFFFFFF, buf, size - 8) ^ 0xFFFFFFFF; - avio_skip(pb, -4); - avio_wb32(pb, crc32 ^ 0x5354554E); /* xor with "STUN" */ + /* These variables represent timestamps used for calculating and tracking the cost. */ + int64_t dtls_init_starttime; + int64_t dtls_init_endtime; + int64_t dtls_handshake_starttime; + int64_t dtls_handshake_endtime; - *request_size = size; + /* Helper for get error code and message. */ + int error_code; + char error_message[256]; -end: - avio_context_free(&pb); - av_hmac_free(hmac); - return ret; -} + /* The certificate and private key used for DTLS handshake. */ + char* cert_file; + char* key_file; + /** + * The size of RTP packet, should generally be set to MTU. + * Note that pion requires a smaller value, for example, 1200. + */ + int mtu; +} DTLSContext; /** - * Create an ICE binding response. - * - * This function generates an ICE binding response and writes it to the provided - * buffer. The response is signed using the local password for message integrity. - * - * @param s Pointer to the AVFormatContext structure. - * @param tid Pointer to the transaction ID of the binding request. The tid_size should be 12. - * @param tid_size The size of the transaction ID, should be 12. - * @param buf Pointer to the buffer where the response will be written. - * @param buf_size The size of the buffer provided for the response. - * @param response_size Pointer to an integer that will store the size of the generated response. - * @return Returns 0 if successful or AVERROR_xxx if an error occurs. + * Read all data from the given URL url and store it in the given buffer bp. */ -static int ice_create_response(AVFormatContext *s, char *tid, int tid_size, uint8_t *buf, int buf_size, int *response_size) +static int dtls_url_read_all(AVFormatContext *s, const char *url, AVBPrint *bp) { - int ret = 0, size, crc32; - AVIOContext *pb = NULL; - AVHMAC *hmac = NULL; - WHIPContext *whip = s->priv_data; - - if (tid_size != 12) { - av_log(whip, AV_LOG_ERROR, "WHIP: Invalid transaction ID size. Expected 12, got %d\n", tid_size); - return AVERROR(EINVAL); - } - - pb = avio_alloc_context(buf, buf_size, 1, NULL, NULL, NULL, NULL); - if (!pb) - return AVERROR(ENOMEM); + int ret = 0; + AVDictionary *opts = NULL; + URLContext *uc = NULL; + char buf[MAX_URL_SIZE]; - hmac = av_hmac_alloc(AV_HMAC_SHA1); - if (!hmac) { - ret = AVERROR(ENOMEM); + ret = ffurl_open_whitelist(&uc, url, AVIO_FLAG_READ, &s->interrupt_callback, + &opts, s->protocol_whitelist, s->protocol_blacklist, NULL); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "WHIP: Failed to open url %s\n", url); goto end; } - /* Write 20 bytes header */ - avio_wb16(pb, 0x0101); /* STUN binding response */ - avio_wb16(pb, 0); /* length */ - avio_wb32(pb, STUN_MAGIC_COOKIE); /* magic cookie */ - avio_write(pb, tid, tid_size); /* transaction ID */ - - /* Build and update message integrity */ - avio_wb16(pb, STUN_ATTR_MESSAGE_INTEGRITY); /* attribute type message integrity */ - avio_wb16(pb, 20); /* size of message integrity */ - ffio_fill(pb, 0, 20); /* fill with zero to directly write and skip it */ - size = avio_tell(pb); - buf[2] = (size - 20) >> 8; - buf[3] = (size - 20) & 0xFF; - av_hmac_init(hmac, whip->ice_pwd_local, strlen(whip->ice_pwd_local)); - av_hmac_update(hmac, buf, size - 24); - av_hmac_final(hmac, buf + size - 20, 20); - - /* Write the fingerprint attribute */ - avio_wb16(pb, STUN_ATTR_FINGERPRINT); /* attribute type fingerprint */ - avio_wb16(pb, 4); /* size of fingerprint */ - ffio_fill(pb, 0, 4); /* fill with zero to directly write and skip it */ - size = avio_tell(pb); - buf[2] = (size - 20) >> 8; - buf[3] = (size - 20) & 0xFF; - /* Refer to the av_hash_alloc("CRC32"), av_hash_init and av_hash_final */ - crc32 = av_crc(av_crc_get_table(AV_CRC_32_IEEE_LE), 0xFFFFFFFF, buf, size - 8) ^ 0xFFFFFFFF; - avio_skip(pb, -4); - avio_wb32(pb, crc32 ^ 0x5354554E); /* xor with "STUN" */ + while (1) { + ret = ffurl_read(uc, buf, sizeof(buf)); + if (ret == AVERROR_EOF) { + /* Reset the error because we read all response as answer util EOF. */ + ret = 0; + break; + } + if (ret <= 0) { + av_log(s, AV_LOG_ERROR, "WHIP: Failed to read from url=%s, key is %s\n", url, bp->str); + goto end; + } - *response_size = size; + av_bprintf(bp, "%.*s", ret, buf); + if (!av_bprint_is_complete(bp)) { + av_log(s, AV_LOG_ERROR, "WHIP: Exceed max size %.*s, %s\n", ret, buf, bp->str); + ret = AVERROR(EIO); + goto end; + } + } end: - avio_context_free(&pb); - av_hmac_free(hmac); + ffurl_closep(&uc); + av_dict_free(&opts); return ret; } -/** - * A Binding request has class=0b00 (request) and method=0b000000000001 (Binding) - * and is encoded into the first 16 bits as 0x0001. - * See https://datatracker.ietf.org/doc/html/rfc5389#section-6 - */ -static int ice_is_binding_request(uint8_t *b, int size) +static char* dtls_get_fingerprint(void *pctx) { - return size >= ICE_STUN_HEADER_SIZE && AV_RB16(&b[0]) == 0x0001; + DTLSContext *ctx = pctx; + return ctx->dtls_fingerprint; } -/** - * A Binding response has class=0b10 (success response) and method=0b000000000001, - * and is encoded into the first 16 bits as 0x0101. - */ -static int ice_is_binding_response(uint8_t *b, int size) +static uint8_t* dtls_get_srtp_client_key(void *pctx) { - return size >= ICE_STUN_HEADER_SIZE && AV_RB16(&b[0]) == 0x0101; + DTLSContext *ctx = pctx; + return ctx->dtls_srtp_materials; } -/** - * In RTP packets, the first byte is represented as 0b10xxxxxx, where the initial - * two bits (0b10) indicate the RTP version, - * see https://www.rfc-editor.org/rfc/rfc3550#section-5.1 - * The RTCP packet header is similar to RTP, - * see https://www.rfc-editor.org/rfc/rfc3550#section-6.4.1 - */ -static int media_is_rtp_rtcp(uint8_t *b, int size) +static uint8_t* dtls_get_srtp_server_key(void *pctx) { - return size >= WHIP_RTP_HEADER_SIZE && (b[0] & 0xC0) == 0x80; + return dtls_get_srtp_client_key(pctx) + DTLS_SRTP_KEY_LEN; } -/* Whether the packet is RTCP. */ -static int media_is_rtcp(uint8_t *b, int size) +static uint8_t* dtls_get_srtp_client_salt(void *pctx) { - return size >= WHIP_RTP_HEADER_SIZE && b[1] >= WHIP_RTCP_PT_START && b[1] <= WHIP_RTCP_PT_END; + return dtls_get_srtp_server_key(pctx) + DTLS_SRTP_KEY_LEN; +} + +static uint8_t* dtls_get_srtp_server_salt(void *pctx) +{ + return dtls_get_srtp_client_salt(pctx) + DTLS_SRTP_SALT_LEN; } /** - * This function handles incoming binding request messages by responding to them. - * If the message is not a binding request, it will be ignored. + * Whether the packet is a DTLS packet. */ -static int ice_handle_binding_request(AVFormatContext *s, char *buf, int buf_size) +static int dtls_can_handle_packet(uint8_t *b, int size) { - int ret = 0, size; - char tid[12]; - WHIPContext *whip = s->priv_data; - - /* Ignore if not a binding request. */ - if (!ice_is_binding_request(buf, buf_size)) - return ret; - - if (buf_size < ICE_STUN_HEADER_SIZE) { - av_log(whip, AV_LOG_ERROR, "WHIP: Invalid STUN message, expected at least %d, got %d\n", - ICE_STUN_HEADER_SIZE, buf_size); - return AVERROR(EINVAL); - } - - /* Parse transaction id from binding request in buf. */ - memcpy(tid, buf + 8, 12); - - /* Build the STUN binding response. */ - ret = ice_create_response(s, tid, sizeof(tid), whip->buf, sizeof(whip->buf), &size); - if (ret < 0) { - av_log(whip, AV_LOG_ERROR, "WHIP: Failed to create STUN binding response, size=%d\n", size); - return ret; - } - - ret = ffurl_write(whip->udp_uc, whip->buf, size); - if (ret < 0) { - av_log(whip, AV_LOG_ERROR, "WHIP: Failed to send STUN binding response, size=%d\n", size); - return ret; - } - - return 0; + uint16_t version = AV_RB16(&b[1]); + return size > DTLS_RECORD_LAYER_HEADER_LEN && + b[0] >= DTLS_CONTENT_TYPE_CHANGE_CIPHER_SPEC && + (version == DTLS_VERSION_10 || version == DTLS_VERSION_12); } /** - * To establish a connection with the UDP server, we utilize ICE-LITE in a Client-Server - * mode. In this setup, FFmpeg acts as the UDP client, while the peer functions as the - * UDP server. + * Retrieves the error message for the latest OpenSSL error. + * + * This function retrieves the error code from the thread's error queue, converts it + * to a human-readable string, and stores it in the DTLSContext's error_message field. + * The error queue is then cleared using ERR_clear_error(). */ -static int udp_connect(AVFormatContext *s) +static const char* openssl_get_error(DTLSContext *ctx) { - int ret = 0; - char url[256]; - AVDictionary *opts = NULL; - WHIPContext *whip = s->priv_data; - - /* Build UDP URL and create the UDP context as transport. */ - ff_url_join(url, sizeof(url), "udp", NULL, whip->ice_host, whip->ice_port, NULL); - - av_dict_set_int(&opts, "connect", 1, 0); - av_dict_set_int(&opts, "fifo_size", 0, 0); - /* Set the max packet size to the buffer size. */ - av_dict_set_int(&opts, "pkt_size", whip->pkt_size, 0); + int r2 = ERR_get_error(); + if (r2) + ERR_error_string_n(r2, ctx->error_message, sizeof(ctx->error_message)); + else + ctx->error_message[0] = '\0'; - ret = ffurl_open_whitelist(&whip->udp_uc, url, AVIO_FLAG_WRITE, &s->interrupt_callback, - &opts, s->protocol_whitelist, s->protocol_blacklist, NULL); - if (ret < 0) { - av_log(whip, AV_LOG_ERROR, "WHIP: Failed to connect udp://%s:%d\n", whip->ice_host, whip->ice_port); - goto end; - } + ERR_clear_error(); + return ctx->error_message; +} - /* Make the socket non-blocking, set to READ and WRITE mode after connected */ - ff_socket_nonblock(ffurl_get_file_handle(whip->udp_uc), 1); - whip->udp_uc->flags |= AVIO_FLAG_READ | AVIO_FLAG_NONBLOCK; +/** + * Get the error code for the given SSL operation result. + * + * This function retrieves the error code for the given SSL operation result + * and stores the error message in the DTLS context if an error occurred. + * It also clears the error queue. + */ +static int openssl_ssl_get_error(DTLSContext *ctx, int ret) +{ + SSL *dtls = ctx->dtls; + int r1 = SSL_ERROR_NONE; - if (whip->state < WHIP_STATE_UDP_CONNECTED) - whip->state = WHIP_STATE_UDP_CONNECTED; - whip->whip_udp_time = av_gettime(); - av_log(whip, AV_LOG_VERBOSE, "WHIP: UDP state=%d, elapsed=%dms, connected to udp://%s:%d\n", - whip->state, ELAPSED(whip->whip_starttime, av_gettime()), whip->ice_host, whip->ice_port); + if (ret <= 0) + r1 = SSL_get_error(dtls, ret); -end: - av_dict_free(&opts); - return ret; + openssl_get_error(ctx); + return r1; } -static int ice_dtls_handshake(AVFormatContext *s) +/** + * Callback function to print the OpenSSL SSL status. + */ +static void openssl_dtls_on_info(const SSL *dtls, int where, int r0) { - int ret = 0, size, i; - int64_t starttime = av_gettime(), now; - WHIPContext *whip = s->priv_data; + int w, r1, is_fatal, is_warning, is_close_notify; + const char *method = "undefined", *alert_type, *alert_desc; + int state; + DTLSContext *ctx = (DTLSContext*)SSL_get_ex_data(dtls, 0); - if (whip->state < WHIP_STATE_UDP_CONNECTED || !whip->udp_uc) { - av_log(whip, AV_LOG_ERROR, "WHIP: UDP not connected, state=%d, udp_uc=%p\n", whip->state, whip->udp_uc); - return AVERROR(EINVAL); - } + w = where & ~SSL_ST_MASK; + if (w & SSL_ST_CONNECT) + method = "SSL_connect"; + else if (w & SSL_ST_ACCEPT) + method = "SSL_accept"; - while (1) { - if (whip->state <= WHIP_STATE_ICE_CONNECTING) { - /* Build the STUN binding request. */ - ret = ice_create_request(s, whip->buf, sizeof(whip->buf), &size); - if (ret < 0) { - av_log(whip, AV_LOG_ERROR, "WHIP: Failed to create STUN binding request, size=%d\n", size); - goto end; - } + r1 = openssl_ssl_get_error(ctx, r0); + if (where & SSL_CB_LOOP) { + av_log(ctx, AV_LOG_VERBOSE, "DTLS: Info method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", + method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, r0, r1); + } else if (where & SSL_CB_ALERT) { + method = (where & SSL_CB_READ) ? "read":"write"; - ret = ffurl_write(whip->udp_uc, whip->buf, size); - if (ret < 0) { - av_log(whip, AV_LOG_ERROR, "WHIP: Failed to send STUN binding request, size=%d\n", size); - goto end; - } + alert_type = SSL_alert_type_string_long(r0); + alert_desc = SSL_alert_desc_string(r0); - if (whip->state < WHIP_STATE_ICE_CONNECTING) - whip->state = WHIP_STATE_ICE_CONNECTING; + if (!av_strcasecmp(alert_type, "warning") && !av_strcasecmp(alert_desc, "CN")) + av_log(ctx, AV_LOG_WARNING, "DTLS: SSL3 alert method=%s type=%s, desc=%s(%s), where=%d, ret=%d, r1=%d\n", + method, alert_type, alert_desc, SSL_alert_desc_string_long(r0), where, r0, r1); + else + av_log(ctx, AV_LOG_ERROR, "DTLS: SSL3 alert method=%s type=%s, desc=%s(%s), where=%d, ret=%d, r1=%d %s\n", + method, alert_type, alert_desc, SSL_alert_desc_string_long(r0), where, r0, r1, ctx->error_message); + + /** + * Notify the DTLS to handle the ALERT message, which maybe means media connection disconnect. + * CN(Close Notify) is sent when peer close the PeerConnection. fatal, IP(Illegal Parameter) + * is sent when DTLS failed. + */ + is_fatal = !av_strncasecmp(alert_type, "fatal", 5); + is_warning = !av_strncasecmp(alert_type, "warning", 7); + is_close_notify = !av_strncasecmp(alert_desc, "CN", 2); + state = is_fatal ? DTLS_STATE_FAILED : (is_warning && is_close_notify ? DTLS_STATE_CLOSED : DTLS_STATE_NONE); + if (state != DTLS_STATE_NONE && ctx->on_state) { + av_log(ctx, AV_LOG_INFO, "DTLS: Notify ctx=%p, state=%d, fatal=%d, warning=%d, cn=%d\n", + ctx, state, is_fatal, is_warning, is_close_notify); + ctx->on_state(ctx, ctx->opaque, state, alert_type, alert_desc); } + } else if (where & SSL_CB_EXIT) { + if (!r0) + av_log(ctx, AV_LOG_WARNING, "DTLS: Fail method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", + method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, r0, r1); + else if (r0 < 0) + if (r1 != SSL_ERROR_NONE && r1 != SSL_ERROR_WANT_READ && r1 != SSL_ERROR_WANT_WRITE) + av_log(ctx, AV_LOG_ERROR, "DTLS: Error method=%s state=%s(%s), where=%d, ret=%d, r1=%d %s\n", + method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, r0, r1, ctx->error_message); + else + av_log(ctx, AV_LOG_VERBOSE, "DTLS: Info method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", + method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, r0, r1); + } +} -next_packet: - if (whip->state >= WHIP_STATE_DTLS_FINISHED) - /* DTLS handshake is done, exit the loop. */ - break; +static void openssl_dtls_state_trace(DTLSContext *ctx, uint8_t *data, int length, int incoming) +{ + uint8_t content_type = 0; + uint16_t size = 0; + uint8_t handshake_type = 0; - now = av_gettime(); - if (now - starttime >= whip->handshake_timeout * 1000) { - av_log(whip, AV_LOG_ERROR, "WHIP: DTLS handshake timeout=%dms, cost=%dms, elapsed=%dms, state=%d\n", - whip->handshake_timeout, ELAPSED(starttime, now), ELAPSED(whip->whip_starttime, now), whip->state); - ret = AVERROR(ETIMEDOUT); - goto end; - } + /* Change_cipher_spec(20), alert(21), handshake(22), application_data(23) */ + if (length >= 1) + content_type = AV_RB8(&data[0]); + if (length >= 13) + size = AV_RB16(&data[11]); + if (length >= 14) + handshake_type = AV_RB8(&data[13]); - /* Read the STUN or DTLS messages from peer. */ - for (i = 0; i < ICE_DTLS_READ_INTERVAL / 5; i++) { - ret = ffurl_read(whip->udp_uc, whip->buf, sizeof(whip->buf)); - if (ret > 0) - break; - if (ret == AVERROR(EAGAIN)) { - av_usleep(5 * 1000); - continue; - } - av_log(whip, AV_LOG_ERROR, "WHIP: Failed to read message\n"); - goto end; - } + av_log(ctx, AV_LOG_VERBOSE, "DTLS: Trace %s, done=%u, arq=%u, len=%u, cnt=%u, size=%u, hs=%u\n", + (incoming? "RECV":"SEND"), ctx->dtls_done_for_us, ctx->dtls_arq_packets, length, + content_type, size, handshake_type); +} - /* Got nothing, continue to process handshake. */ - if (ret <= 0) - continue; +/** + * Always return 1 to accept any certificate. This is because we allow the peer to + * use a temporary self-signed certificate for DTLS. + */ +static int openssl_dtls_verify_callback(int preverify_ok, X509_STORE_CTX *ctx) +{ + return 1; +} - /* Handle the ICE binding response. */ - if (ice_is_binding_response(whip->buf, ret)) { - if (whip->state < WHIP_STATE_ICE_CONNECTED) { - whip->state = WHIP_STATE_ICE_CONNECTED; - whip->whip_ice_time = av_gettime(); - av_log(whip, AV_LOG_VERBOSE, "WHIP: ICE STUN ok, state=%d, url=udp://%s:%d, location=%s, username=%s:%s, res=%dB, elapsed=%dms\n", - whip->state, whip->ice_host, whip->ice_port, whip->whip_resource_url ? whip->whip_resource_url : "", - whip->ice_ufrag_remote, whip->ice_ufrag_local, ret, ELAPSED(whip->whip_starttime, av_gettime())); +/** + * DTLS BIO read callback. + */ +#if OPENSSL_VERSION_NUMBER < 0x30000000L // v3.0.x +static long openssl_dtls_bio_out_callback(BIO* b, int oper, const char* argp, int argi, long argl, long retvalue) +#else +static long openssl_dtls_bio_out_callback_ex(BIO *b, int oper, const char *argp, size_t len, int argi, long argl, int retvalue, size_t *processed) +#endif +{ + int ret, req_size = argi, is_arq = 0; + uint8_t content_type, handshake_type; + uint8_t *data = (uint8_t*)argp; + DTLSContext* ctx = b ? (DTLSContext*)BIO_get_callback_arg(b) : NULL; - /* If got the first binding response, start DTLS handshake. */ - if ((ret = dtls_context_start(&whip->dtls_ctx)) < 0) - goto end; - } - goto next_packet; - } +#if OPENSSL_VERSION_NUMBER >= 0x30000000L // v3.0.x + req_size = len; + av_log(ctx, AV_LOG_DEBUG, "DTLS: BIO callback b=%p, oper=%d, argp=%p, len=%ld, argi=%d, argl=%ld, retvalue=%d, processed=%p, req_size=%d\n", + b, oper, argp, len, argi, argl, retvalue, processed, req_size); +#else + av_log(ctx, AV_LOG_DEBUG, "DTLS: BIO callback b=%p, oper=%d, argp=%p, argi=%d, argl=%ld, retvalue=%ld, req_size=%d\n", + b, oper, argp, argi, argl, retvalue, req_size); +#endif - /* When a binding request is received, it is necessary to respond immediately. */ - if (ice_is_binding_request(whip->buf, ret)) { - if ((ret = ice_handle_binding_request(s, whip->buf, ret)) < 0) - goto end; - goto next_packet; - } + if (oper != BIO_CB_WRITE || !argp || req_size <= 0) + return retvalue; - /* If got any DTLS messages, handle it. */ - if (is_dtls_packet(whip->buf, ret) && whip->state >= WHIP_STATE_ICE_CONNECTED) { - if ((ret = dtls_context_write(&whip->dtls_ctx, whip->buf, ret)) < 0) - goto end; - goto next_packet; - } + openssl_dtls_state_trace(ctx, data, req_size, 0); + ret = ctx->on_write ? ctx->on_write(ctx, ctx->opaque, data, req_size) : 0; + content_type = req_size > 0 ? AV_RB8(&data[0]) : 0; + handshake_type = req_size > 13 ? AV_RB8(&data[13]) : 0; + + is_arq = ctx->dtls_last_content_type == content_type && ctx->dtls_last_handshake_type == handshake_type; + ctx->dtls_arq_packets += is_arq; + ctx->dtls_last_content_type = content_type; + ctx->dtls_last_handshake_type = handshake_type; + + if (ret < 0) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Send request failed, oper=%d, content=%d, handshake=%d, size=%d, is_arq=%d\n", + oper, content_type, handshake_type, req_size, is_arq); + return ret; } -end: - return ret; + return retvalue; } -/** - * Establish the SRTP context using the keying material exported from DTLS. - * - * Create separate SRTP contexts for sending video and audio, as their sequences differ - * and should not share a single context. Generate a single SRTP context for receiving - * RTCP only. - * - * @return 0 if OK, AVERROR_xxx on error - */ -static int setup_srtp(AVFormatContext *s) +static int openssl_read_certificate(AVFormatContext *s, DTLSContext *ctx) { - int ret; - char recv_key[DTLS_SRTP_KEY_LEN + DTLS_SRTP_SALT_LEN]; - char send_key[DTLS_SRTP_KEY_LEN + DTLS_SRTP_SALT_LEN]; - char buf[AV_BASE64_SIZE(DTLS_SRTP_KEY_LEN + DTLS_SRTP_SALT_LEN)]; - /** - * The profile for OpenSSL's SRTP is SRTP_AES128_CM_SHA1_80, see ssl/d1_srtp.c. - * The profile for FFmpeg's SRTP is SRTP_AES128_CM_HMAC_SHA1_80, see libavformat/srtp.c. - */ - const char* suite = "SRTP_AES128_CM_HMAC_SHA1_80"; - WHIPContext *whip = s->priv_data; - - /** - * This represents the material used to build the SRTP master key. It is - * generated by DTLS and has the following layout: - * 16B 16B 14B 14B - * client_key | server_key | client_salt | server_salt - */ - char *client_key = whip->dtls_ctx.dtls_srtp_materials; - char *server_key = whip->dtls_ctx.dtls_srtp_materials + DTLS_SRTP_KEY_LEN; - char *client_salt = server_key + DTLS_SRTP_KEY_LEN; - char *server_salt = client_salt + DTLS_SRTP_SALT_LEN; - - /* As DTLS server, the recv key is client master key plus salt. */ - memcpy(recv_key, client_key, DTLS_SRTP_KEY_LEN); - memcpy(recv_key + DTLS_SRTP_KEY_LEN, client_salt, DTLS_SRTP_SALT_LEN); + int ret = 0; + BIO *key_b = NULL, *cert_b = NULL; + AVBPrint key_bp, cert_bp; - /* As DTLS server, the send key is server master key plus salt. */ - memcpy(send_key, server_key, DTLS_SRTP_KEY_LEN); - memcpy(send_key + DTLS_SRTP_KEY_LEN, server_salt, DTLS_SRTP_SALT_LEN); + /* To prevent a crash during cleanup, always initialize it. */ + av_bprint_init(&key_bp, 1, DTLS_MAX_CERTIFICATE_SIZE); + av_bprint_init(&cert_bp, 1, DTLS_MAX_CERTIFICATE_SIZE); - /* Setup SRTP context for outgoing packets */ - if (!av_base64_encode(buf, sizeof(buf), send_key, sizeof(send_key))) { - av_log(whip, AV_LOG_ERROR, "WHIP: Failed to encode send key\n"); - ret = AVERROR(EIO); + /* Read key file. */ + ret = dtls_url_read_all(s, ctx->key_file, &key_bp); + if (ret < 0) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to open key file %s\n", ctx->key_file); goto end; } - ret = ff_srtp_set_crypto(&whip->srtp_audio_send, suite, buf); - if (ret < 0) { - av_log(whip, AV_LOG_ERROR, "WHIP: Failed to set crypto for audio send\n"); + if ((key_b = BIO_new(BIO_s_mem())) == NULL) { + ret = AVERROR(ENOMEM); goto end; } - ret = ff_srtp_set_crypto(&whip->srtp_video_send, suite, buf); - if (ret < 0) { - av_log(whip, AV_LOG_ERROR, "WHIP: Failed to set crypto for video send\n"); + BIO_write(key_b, key_bp.str, key_bp.len); + ctx->dtls_pkey = PEM_read_bio_PrivateKey(key_b, NULL, NULL, NULL); + if (!ctx->dtls_pkey) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to read private key from %s\n", ctx->key_file); + ret = AVERROR(EIO); goto end; } - ret = ff_srtp_set_crypto(&whip->srtp_rtcp_send, suite, buf); + /* Read certificate. */ + ret = dtls_url_read_all(s, ctx->cert_file, &cert_bp); if (ret < 0) { - av_log(whip, AV_LOG_ERROR, "Failed to set crypto for rtcp send\n"); + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to open cert file %s\n", ctx->cert_file); goto end; } - /* Setup SRTP context for incoming packets */ - if (!av_base64_encode(buf, sizeof(buf), recv_key, sizeof(recv_key))) { - av_log(whip, AV_LOG_ERROR, "WHIP: Failed to encode recv key\n"); - ret = AVERROR(EIO); + if ((cert_b = BIO_new(BIO_s_mem())) == NULL) { + ret = AVERROR(ENOMEM); goto end; } - ret = ff_srtp_set_crypto(&whip->srtp_recv, suite, buf); - if (ret < 0) { - av_log(whip, AV_LOG_ERROR, "WHIP: Failed to set crypto for recv\n"); + BIO_write(cert_b, cert_bp.str, cert_bp.len); + ctx->dtls_cert = PEM_read_bio_X509(cert_b, NULL, NULL, NULL); + if (!ctx->dtls_cert) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to read certificate from %s\n", ctx->cert_file); + ret = AVERROR(EIO); goto end; } - if (whip->state < WHIP_STATE_SRTP_FINISHED) - whip->state = WHIP_STATE_SRTP_FINISHED; - whip->whip_srtp_time = av_gettime(); - av_log(whip, AV_LOG_VERBOSE, "WHIP: SRTP setup done, state=%d, suite=%s, key=%luB, elapsed=%dms\n", - whip->state, suite, sizeof(send_key), ELAPSED(whip->whip_starttime, av_gettime())); - end: + BIO_free(key_b); + av_bprint_finalize(&key_bp, NULL); + BIO_free(cert_b); + av_bprint_finalize(&cert_bp, NULL); return ret; } -/** - * Callback triggered by the RTP muxer when it creates and sends out an RTP packet. - * - * This function modifies the video STAP packet, removing the markers, and updating the - * NRI of the first NALU. Additionally, it uses the corresponding SRTP context to encrypt - * the RTP packet, where the video packet is handled by the video SRTP context. - */ -static int on_rtp_write_packet(void *opaque, uint8_t *buf, int buf_size) +static int openssl_dtls_gen_private_key(DTLSContext *ctx) { - int ret, cipher_size, is_rtcp, is_video; - uint8_t payload_type; - AVFormatContext *s = opaque; - WHIPContext *whip = s->priv_data; - SRTPContext *srtp; + int ret = 0; - /* Ignore if not RTP or RTCP packet. */ - if (!media_is_rtp_rtcp(buf, buf_size)) - return 0; + /** + * Note that secp256r1 in openssl is called NID_X9_62_prime256v1 or prime256v1 in string, + * not NID_secp256k1 or secp256k1 in string. + * + * TODO: Should choose the curves in ClientHello.supported_groups, for example: + * Supported Group: x25519 (0x001d) + * Supported Group: secp256r1 (0x0017) + * Supported Group: secp384r1 (0x0018) + */ +#if OPENSSL_VERSION_NUMBER < 0x30000000L /* OpenSSL 3.0 */ + EC_GROUP *ecgroup = NULL; + int curve = NID_X9_62_prime256v1; +#else + const char *curve = SN_X9_62_prime256v1; +#endif - /* Only support audio, video and rtcp. */ - is_rtcp = media_is_rtcp(buf, buf_size); - payload_type = buf[1] & 0x7f; - is_video = payload_type == whip->video_payload_type; - if (!is_rtcp && payload_type != whip->video_payload_type && payload_type != whip->audio_payload_type) - return 0; +#if OPENSSL_VERSION_NUMBER < 0x30000000L /* OpenSSL 3.0 */ + ctx->dtls_pkey = EVP_PKEY_new(); + ctx->dtls_eckey = EC_KEY_new(); + ecgroup = EC_GROUP_new_by_curve_name(curve); + if (!ecgroup) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Create EC group by curve=%d failed, %s", curve, openssl_get_error(ctx)); + goto einval_end; + } - /* Get the corresponding SRTP context. */ - srtp = is_rtcp ? &whip->srtp_rtcp_send : (is_video? &whip->srtp_video_send : &whip->srtp_audio_send); +#if OPENSSL_VERSION_NUMBER < 0x10100000L // v1.1.x + /* For openssl 1.0, we must set the group parameters, so that cert is ok. */ + EC_GROUP_set_asn1_flag(ecgroup, OPENSSL_EC_NAMED_CURVE); +#endif - /* Encrypt by SRTP and send out. */ - cipher_size = ff_srtp_encrypt(srtp, buf, buf_size, whip->buf, sizeof(whip->buf)); - if (cipher_size <= 0 || cipher_size < buf_size) { - av_log(whip, AV_LOG_WARNING, "WHIP: Failed to encrypt packet=%dB, cipher=%dB\n", buf_size, cipher_size); - return 0; + if (EC_KEY_set_group(ctx->dtls_eckey, ecgroup) != 1) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Generate private key, EC_KEY_set_group failed, %s\n", openssl_get_error(ctx)); + goto einval_end; } - ret = ffurl_write(whip->udp_uc, whip->buf, cipher_size); - if (ret < 0) { - av_log(whip, AV_LOG_ERROR, "WHIP: Failed to write packet=%dB, ret=%d\n", cipher_size, ret); - return ret; + if (EC_KEY_generate_key(ctx->dtls_eckey) != 1) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Generate private key, EC_KEY_generate_key failed, %s\n", openssl_get_error(ctx)); + goto einval_end; } + if (EVP_PKEY_set1_EC_KEY(ctx->dtls_pkey, ctx->dtls_eckey) != 1) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Generate private key, EVP_PKEY_set1_EC_KEY failed, %s\n", openssl_get_error(ctx)); + goto einval_end; + } +#else + ctx->dtls_pkey = EVP_EC_gen(curve); + if (!ctx->dtls_pkey) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Generate private key, EVP_EC_gen curve=%s failed, %s\n", curve, openssl_get_error(ctx)); + goto einval_end; + } +#endif + goto end; + +einval_end: + ret = AVERROR(EINVAL); +end: +#if OPENSSL_VERSION_NUMBER < 0x30000000L /* OpenSSL 3.0 */ + EC_GROUP_free(ecgroup); +#endif return ret; } -/** - * Creates dedicated RTP muxers for each stream in the AVFormatContext to build RTP - * packets from the encoded frames. - * - * The corresponding SRTP context is utilized to encrypt each stream's RTP packets. For - * example, a video SRTP context is used for the video stream. Additionally, the - * "on_rtp_write_packet" callback function is set as the write function for each RTP - * muxer to send out encrypted RTP packets. - * - * @return 0 if OK, AVERROR_xxx on error - */ -static int create_rtp_muxer(AVFormatContext *s) +static int openssl_dtls_gen_certificate(DTLSContext *ctx) { - int ret, i, is_video, buffer_size, max_packet_size; - AVFormatContext *rtp_ctx = NULL; - AVDictionary *opts = NULL; - uint8_t *buffer = NULL; - char buf[64]; - WHIPContext *whip = s->priv_data; + int ret = 0, serial, expire_day, i, n = 0; + AVBPrint fingerprint; + unsigned char md[EVP_MAX_MD_SIZE]; + const char *aor = "lavf"; + X509_NAME* subject = NULL; + X509 *dtls_cert = NULL; - const AVOutputFormat *rtp_format = av_guess_format("rtp", NULL, NULL); - if (!rtp_format) { - av_log(whip, AV_LOG_ERROR, "WHIP: Failed to guess rtp muxer\n"); - ret = AVERROR(ENOSYS); - goto end; - } + /* To prevent a crash during cleanup, always initialize it. */ + av_bprint_init(&fingerprint, 1, MAX_URL_SIZE); - /* The UDP buffer size, may greater than MTU. */ - buffer_size = MAX_UDP_BUFFER_SIZE; - /* The RTP payload max size. Reserved some bytes for SRTP checksum and padding. */ - max_packet_size = whip->pkt_size - DTLS_SRTP_CHECKSUM_LEN; + dtls_cert = ctx->dtls_cert = X509_new(); + if (!dtls_cert) { + goto enomem_end; + } - for (i = 0; i < s->nb_streams; i++) { - rtp_ctx = avformat_alloc_context(); - if (!rtp_ctx) { - ret = AVERROR(ENOMEM); - goto end; - } + // TODO: Support non-self-signed certificate, for example, load from a file. + subject = X509_NAME_new(); + if (!subject) { + goto enomem_end; + } - rtp_ctx->oformat = rtp_format; - if (!avformat_new_stream(rtp_ctx, NULL)) { - ret = AVERROR(ENOMEM); - goto end; - } - /* Pass the interrupt callback on */ - rtp_ctx->interrupt_callback = s->interrupt_callback; - /* Copy the max delay setting; the rtp muxer reads this. */ - rtp_ctx->max_delay = s->max_delay; - /* Copy other stream parameters. */ - rtp_ctx->streams[0]->sample_aspect_ratio = s->streams[i]->sample_aspect_ratio; - rtp_ctx->flags |= s->flags & AVFMT_FLAG_BITEXACT; - rtp_ctx->strict_std_compliance = s->strict_std_compliance; + serial = (int)av_get_random_seed(); + if (ASN1_INTEGER_set(X509_get_serialNumber(dtls_cert), serial) != 1) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set serial, %s\n", openssl_get_error(ctx)); + goto einval_end; + } - /* Set the synchronized start time. */ - rtp_ctx->start_time_realtime = s->start_time_realtime; + if (X509_NAME_add_entry_by_txt(subject, "CN", MBSTRING_ASC, aor, strlen(aor), -1, 0) != 1) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set CN, %s\n", openssl_get_error(ctx)); + goto einval_end; + } - avcodec_parameters_copy(rtp_ctx->streams[0]->codecpar, s->streams[i]->codecpar); - rtp_ctx->streams[0]->time_base = s->streams[i]->time_base; + if (X509_set_issuer_name(dtls_cert, subject) != 1) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set issuer, %s\n", openssl_get_error(ctx)); + goto einval_end; + } + if (X509_set_subject_name(dtls_cert, subject) != 1) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set subject name, %s\n", openssl_get_error(ctx)); + goto einval_end; + } - /** - * For H.264, consistently utilize the annexb format through the Bitstream Filter (BSF); - * therefore, we deactivate the extradata detection for the RTP muxer. - */ - if (s->streams[i]->codecpar->codec_id == AV_CODEC_ID_H264) { - av_freep(&rtp_ctx->streams[i]->codecpar->extradata); - rtp_ctx->streams[i]->codecpar->extradata_size = 0; - } + expire_day = 365; + if (!X509_gmtime_adj(X509_get_notBefore(dtls_cert), 0)) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set notBefore, %s\n", openssl_get_error(ctx)); + goto einval_end; + } + if (!X509_gmtime_adj(X509_get_notAfter(dtls_cert), 60*60*24*expire_day)) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set notAfter, %s\n", openssl_get_error(ctx)); + goto einval_end; + } - buffer = av_malloc(buffer_size); - if (!buffer) { - ret = AVERROR(ENOMEM); - goto end; - } + if (X509_set_version(dtls_cert, 2) != 1) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set version, %s\n", openssl_get_error(ctx)); + goto einval_end; + } - rtp_ctx->pb = avio_alloc_context(buffer, buffer_size, 1, s, NULL, on_rtp_write_packet, NULL); - if (!rtp_ctx->pb) { - ret = AVERROR(ENOMEM); - goto end; - } - rtp_ctx->pb->max_packet_size = max_packet_size; - rtp_ctx->pb->av_class = &ff_avio_class; + if (X509_set_pubkey(dtls_cert, ctx->dtls_pkey) != 1) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set public key, %s\n", openssl_get_error(ctx)); + goto einval_end; + } - is_video = s->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO; - snprintf(buf, sizeof(buf), "%d", is_video? whip->video_payload_type : whip->audio_payload_type); - av_dict_set(&opts, "payload_type", buf, 0); - snprintf(buf, sizeof(buf), "%d", is_video? whip->video_ssrc : whip->audio_ssrc); - av_dict_set(&opts, "ssrc", buf, 0); + if (!X509_sign(dtls_cert, ctx->dtls_pkey, EVP_sha1())) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to sign certificate, %s\n", openssl_get_error(ctx)); + goto einval_end; + } - ret = avformat_write_header(rtp_ctx, &opts); - if (ret < 0) { - av_log(whip, AV_LOG_ERROR, "WHIP: Failed to write rtp header\n"); - goto end; - } + /* Generate the fingerpint of certficate. */ + if (X509_digest(dtls_cert, EVP_sha256(), md, &n) != 1) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to generate fingerprint, %s\n", openssl_get_error(ctx)); + goto eio_end; + } + for (i = 0; i < n; i++) { + av_bprintf(&fingerprint, "%02X", md[i]); + if (i < n - 1) + av_bprintf(&fingerprint, ":"); + } + if (!fingerprint.str || !strlen(fingerprint.str)) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Fingerprint is empty\n"); + goto einval_end; + } - ff_format_set_url(rtp_ctx, av_strdup(s->url)); - s->streams[i]->time_base = rtp_ctx->streams[0]->time_base; - s->streams[i]->priv_data = rtp_ctx; - rtp_ctx = NULL; + ctx->dtls_fingerprint = av_strdup(fingerprint.str); + if (!ctx->dtls_fingerprint) { + goto enomem_end; } - if (whip->state < WHIP_STATE_READY) - whip->state = WHIP_STATE_READY; - av_log(whip, AV_LOG_INFO, "WHIP: Muxer state=%d, buffer_size=%d, max_packet_size=%d, " - "elapsed=%dms(init:%d,offer:%d,answer:%d,udp:%d,ice:%d,dtls:%d,srtp:%d)\n", - whip->state, buffer_size, max_packet_size, ELAPSED(whip->whip_starttime, av_gettime()), - ELAPSED(whip->whip_starttime, whip->whip_init_time), - ELAPSED(whip->whip_init_time, whip->whip_offer_time), - ELAPSED(whip->whip_offer_time, whip->whip_answer_time), - ELAPSED(whip->whip_answer_time, whip->whip_udp_time), - ELAPSED(whip->whip_udp_time, whip->whip_ice_time), - ELAPSED(whip->whip_ice_time, whip->whip_dtls_time), - ELAPSED(whip->whip_dtls_time, whip->whip_srtp_time)); - + goto end; +enomem_end: + ret = AVERROR(ENOMEM); + goto end; +eio_end: + ret = AVERROR(EIO); + goto end; +einval_end: + ret = AVERROR(EINVAL); end: - if (rtp_ctx) - avio_context_free(&rtp_ctx->pb); - avformat_free_context(rtp_ctx); - av_dict_free(&opts); + X509_NAME_free(subject); + av_bprint_finalize(&fingerprint, NULL); return ret; } /** - * RTC is connectionless, for it's based on UDP, so it check whether sesison is - * timeout. In such case, publishers can't republish the stream util the session - * is timeout. - * This function is called to notify the server that the stream is ended, server - * should expire and close the session immediately, so that publishers can republish - * the stream quickly. + * Initializes DTLS context using ECDHE. */ -static int dispose_session(AVFormatContext *s) +static av_cold int openssl_dtls_init_context(DTLSContext *ctx) { - int ret; - char buf[MAX_URL_SIZE]; - URLContext *whip_uc = NULL; - AVDictionary *opts = NULL; - WHIPContext *whip = s->priv_data; + int ret = 0; + EVP_PKEY *dtls_pkey = ctx->dtls_pkey; + X509 *dtls_cert = ctx->dtls_cert; + SSL_CTX *dtls_ctx = NULL; + SSL *dtls = NULL; + BIO *bio_in = NULL, *bio_out = NULL; + const char* ciphers = "ALL"; + /** + * The profile for OpenSSL's SRTP is SRTP_AES128_CM_SHA1_80, see ssl/d1_srtp.c. + * The profile for FFmpeg's SRTP is SRTP_AES128_CM_HMAC_SHA1_80, see libavformat/srtp.c. + */ + const char* profiles = "SRTP_AES128_CM_SHA1_80"; - if (!whip->whip_resource_url) - return 0; + /* Refer to the test cases regarding these curves in the WebRTC code. */ +#if OPENSSL_VERSION_NUMBER >= 0x10100000L /* OpenSSL 1.1.0 */ + const char* curves = "X25519:P-256:P-384:P-521"; +#elif OPENSSL_VERSION_NUMBER >= 0x10002000L /* OpenSSL 1.0.2 */ + const char* curves = "P-256:P-384:P-521"; +#endif - ret = snprintf(buf, sizeof(buf), "Cache-Control: no-cache\r\n"); - if (whip->authorization) - ret += snprintf(buf + ret, sizeof(buf) - ret, "Authorization: Bearer %s\r\n", whip->authorization); - if (ret <= 0 || ret >= sizeof(buf)) { - av_log(whip, AV_LOG_ERROR, "WHIP: Failed to generate headers, size=%d, %s\n", ret, buf); +#if OPENSSL_VERSION_NUMBER < 0x10002000L /* OpenSSL v1.0.2 */ + dtls_ctx = ctx->dtls_ctx = SSL_CTX_new(DTLSv1_method()); +#else + dtls_ctx = ctx->dtls_ctx = SSL_CTX_new(DTLS_method()); +#endif + if (!dtls_ctx) { + ret = AVERROR(ENOMEM); + goto end; + } + +#if OPENSSL_VERSION_NUMBER >= 0x10002000L /* OpenSSL 1.0.2 */ + /* For ECDSA, we could set the curves list. */ + if (SSL_CTX_set1_curves_list(dtls_ctx, curves) != 1) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Init SSL_CTX_set1_curves_list failed, curves=%s, %s\n", + curves, openssl_get_error(ctx)); + ret = AVERROR(EINVAL); + return ret; + } +#endif + +#if OPENSSL_VERSION_NUMBER < 0x10100000L // v1.1.x + #if OPENSSL_VERSION_NUMBER < 0x10002000L // v1.0.2 + if (ctx->dtls_eckey) + SSL_CTX_set_tmp_ecdh(dtls_ctx, ctx->dtls_eckey); +#else + SSL_CTX_set_ecdh_auto(dtls_ctx, 1); +#endif +#endif + + /** + * We activate "ALL" cipher suites to align with the peer's capabilities, + * ensuring maximum compatibility. + */ + if (SSL_CTX_set_cipher_list(dtls_ctx, ciphers) != 1) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Init SSL_CTX_set_cipher_list failed, ciphers=%s, %s\n", + ciphers, openssl_get_error(ctx)); + ret = AVERROR(EINVAL); + return ret; + } + /* Setup the certificate. */ + if (SSL_CTX_use_certificate(dtls_ctx, dtls_cert) != 1) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Init SSL_CTX_use_certificate failed, %s\n", openssl_get_error(ctx)); + ret = AVERROR(EINVAL); + return ret; + } + if (SSL_CTX_use_PrivateKey(dtls_ctx, dtls_pkey) != 1) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Init SSL_CTX_use_PrivateKey failed, %s\n", openssl_get_error(ctx)); ret = AVERROR(EINVAL); + return ret; + } + + /* Server will send Certificate Request. */ + SSL_CTX_set_verify(dtls_ctx, SSL_VERIFY_PEER | SSL_VERIFY_CLIENT_ONCE, openssl_dtls_verify_callback); + /* The depth count is "level 0:peer certificate", "level 1: CA certificate", + * "level 2: higher level CA certificate", and so on. */ + SSL_CTX_set_verify_depth(dtls_ctx, 4); + /* Whether we should read as many input bytes as possible (for non-blocking reads) or not. */ + SSL_CTX_set_read_ahead(dtls_ctx, 1); + /* Setup the SRTP context */ + if (SSL_CTX_set_tlsext_use_srtp(dtls_ctx, profiles)) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Init SSL_CTX_set_tlsext_use_srtp failed, profiles=%s, %s\n", + profiles, openssl_get_error(ctx)); + ret = AVERROR(EINVAL); + return ret; + } + + /* The dtls should not be created unless the dtls_ctx has been initialized. */ + dtls = ctx->dtls = SSL_new(dtls_ctx); + if (!dtls) { + ret = AVERROR(ENOMEM); goto end; } - av_dict_set(&opts, "headers", buf, 0); - av_dict_set_int(&opts, "chunked_post", 0, 0); - av_dict_set(&opts, "method", "DELETE", 0); - ret = ffurl_open_whitelist(&whip_uc, whip->whip_resource_url, AVIO_FLAG_READ_WRITE, &s->interrupt_callback, - &opts, s->protocol_whitelist, s->protocol_blacklist, NULL); - if (ret < 0) { - av_log(whip, AV_LOG_ERROR, "WHIP: Failed to DELETE url=%s\n", whip->whip_resource_url); + /* Setup the callback for logging. */ + SSL_set_ex_data(dtls, 0, ctx); + SSL_set_info_callback(dtls, openssl_dtls_on_info); + + /** + * We have set the MTU to fragment the DTLS packet. It is important to note that the + * packet is split to ensure that each handshake packet is smaller than the MTU. + */ + SSL_set_options(dtls, SSL_OP_NO_QUERY_MTU); + SSL_set_mtu(dtls, ctx->mtu); +#if OPENSSL_VERSION_NUMBER >= 0x100010b0L /* OpenSSL 1.0.1k */ + DTLS_set_link_mtu(dtls, ctx->mtu); +#endif + + bio_in = BIO_new(BIO_s_mem()); + if (!bio_in) { + ret = AVERROR(ENOMEM); goto end; } - while (1) { - ret = ffurl_read(whip_uc, buf, sizeof(buf)); - if (ret == AVERROR_EOF) { - ret = 0; - break; - } - if (ret < 0) { - av_log(whip, AV_LOG_ERROR, "WHIP: Failed to read response from DELETE url=%s\n", whip->whip_resource_url); - goto end; - } + bio_out = BIO_new(BIO_s_mem()); + if (!bio_out) { + ret = AVERROR(ENOMEM); + goto end; } - av_log(whip, AV_LOG_INFO, "WHIP: Dispose resource %s ok\n", whip->whip_resource_url); + /** + * Please be aware that it is necessary to use a callback to obtain the packet to be written out. It is + * imperative that BIO_get_mem_data is not used to retrieve the packet, as it returns all the bytes that + * need to be sent out. + * For example, if MTU is set to 1200, and we got two DTLS packets to sendout: + * ServerHello, 95bytes. + * Certificate, 1105+143=1248bytes. + * If use BIO_get_mem_data, it will return 95+1248=1343bytes, which is larger than MTU 1200. + * If use callback, it will return two UDP packets: + * ServerHello+Certificate(Frament) = 95+1105=1200bytes. + * Certificate(Fragment) = 143bytes. + * Note that there should be more packets in real world, like ServerKeyExchange, CertificateRequest, + * and ServerHelloDone. Here we just use two packets for example. + */ +#if OPENSSL_VERSION_NUMBER < 0x30000000L // v3.0.x + BIO_set_callback(bio_out, openssl_dtls_bio_out_callback); +#else + BIO_set_callback_ex(bio_out, openssl_dtls_bio_out_callback_ex); +#endif + BIO_set_callback_arg(bio_out, (char*)ctx); + + ctx->bio_in = bio_in; + SSL_set_bio(dtls, bio_in, bio_out); + /* Now the bio_in and bio_out are owned by dtls, so we should set them to NULL. */ + bio_in = bio_out = NULL; end: - ffurl_closep(&whip_uc); - av_dict_free(&opts); + BIO_free(bio_in); + BIO_free(bio_out); return ret; } +static void* dtls_context_new(AVClass *av_class, void *opaque, int pkt_size, dtls_fn_on_state on_state, dtls_fn_on_write on_write, const char* cert_file, const char* key_file) +{ + DTLSContext *ctx = av_mallocz(sizeof(DTLSContext)); + + ctx->av_class = av_class; + ctx->mtu = pkt_size; + ctx->opaque = opaque; + ctx->on_state = on_state; + ctx->on_write = on_write; + if (cert_file) + ctx->cert_file = av_strdup(cert_file); + if (key_file) + ctx->key_file = av_strdup(key_file); + + return ctx; +} + /** - * Since the h264_mp4toannexb filter only processes the MP4 ISOM format and bypasses - * the annexb format, it is necessary to manually insert encoder metadata before each - * IDR when dealing with annexb format packets. For instance, in the case of H.264, - * we must insert SPS and PPS before the IDR frame. + * Generate a self-signed certificate and private key for DTLS. Please note that the + * ff_openssl_init in tls_openssl.c has already called SSL_library_init(), and therefore, + * there is no need to call it again. */ -static int h264_annexb_insert_sps_pps(AVFormatContext *s, AVPacket *pkt) +static av_cold int dtls_context_init(AVFormatContext *s, void *pctx) { - int ret = 0; - AVPacket *in = NULL; - AVCodecParameters *par = s->streams[pkt->stream_index]->codecpar; - uint32_t nal_size = 0, out_size = par ? par->extradata_size : 0; - uint8_t unit_type, sps_seen = 0, pps_seen = 0, idr_seen = 0, *out; - const uint8_t *buf, *buf_end, *r1; - - if (!pkt || !pkt->data || pkt->size <= 0) - return ret; - if (!par || !par->extradata || par->extradata_size <= 0) - return ret; - - /* Discover NALU type from packet. */ - buf_end = pkt->data + pkt->size; - for (buf = ff_avc_find_startcode(pkt->data, buf_end); buf < buf_end; buf += nal_size) { - while (!*(buf++)); - r1 = ff_avc_find_startcode(buf, buf_end); - if ((nal_size = r1 - buf) > 0) { - unit_type = *buf & 0x1f; - if (unit_type == H264_NAL_SPS) { - sps_seen = 1; - } else if (unit_type == H264_NAL_PPS) { - pps_seen = 1; - } else if (unit_type == H264_NAL_IDR_SLICE) { - idr_seen = 1; - } - - out_size += 3 + nal_size; - } - } - - if (!idr_seen || (sps_seen && pps_seen)) - return ret; - - /* See av_bsf_send_packet */ - in = av_packet_alloc(); - if (!in) - return AVERROR(ENOMEM); - - ret = av_packet_make_refcounted(pkt); - if (ret < 0) - goto fail; + DTLSContext *ctx = pctx; - av_packet_move_ref(in, pkt); + int ret = 0; - /* Create a new packet with sps/pps inserted. */ - ret = av_new_packet(pkt, out_size); - if (ret < 0) - goto fail; + ctx->dtls_init_starttime = av_gettime(); - ret = av_packet_copy_props(pkt, in); - if (ret < 0) - goto fail; + if (ctx->cert_file && ctx->key_file) { + /* Read the private key and file from the file. */ + if ((ret = openssl_read_certificate(s, ctx)) < 0) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to read DTLS certificate from cert=%s, key=%s\n", + ctx->cert_file, ctx->key_file); + return ret; + } + } else { + /* Generate a private key to ctx->dtls_pkey. */ + if ((ret = openssl_dtls_gen_private_key(ctx)) < 0) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to generate DTLS private key\n"); + return ret; + } - memcpy(pkt->data, par->extradata, par->extradata_size); - out = pkt->data + par->extradata_size; - buf_end = in->data + in->size; - for (buf = ff_avc_find_startcode(in->data, buf_end); buf < buf_end; buf += nal_size) { - while (!*(buf++)); - r1 = ff_avc_find_startcode(buf, buf_end); - if ((nal_size = r1 - buf) > 0) { - AV_WB24(out, 0x00001); - memcpy(out + 3, buf, nal_size); - out += 3 + nal_size; + /* Generate a self-signed certificate. */ + if ((ret = openssl_dtls_gen_certificate(ctx)) < 0) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to generate DTLS certificate\n"); + return ret; } } -fail: - if (ret < 0) - av_packet_unref(pkt); - av_packet_free(&in); + if ((ret = openssl_dtls_init_context(ctx)) < 0) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to initialize DTLS context\n"); + return ret; + } + + ctx->dtls_init_endtime = av_gettime(); + av_log(ctx, AV_LOG_VERBOSE, "DTLS: Setup ok, MTU=%d, fingerprint %s\n", + ctx->mtu, ctx->dtls_fingerprint); return ret; } -static av_cold int whip_init(AVFormatContext *s) +/** + * Once the DTLS role has been negotiated - active for the DTLS client or passive for the + * DTLS server - we proceed to set up the DTLS state and initiate the handshake. + */ +static int dtls_context_start(void *pctx) { - int ret; - WHIPContext *whip = s->priv_data; - - if ((ret = initialize(s)) < 0) - goto end; - - if ((ret = parse_codec(s)) < 0) - goto end; - - if ((ret = generate_sdp_offer(s)) < 0) - goto end; - - if ((ret = exchange_sdp(s)) < 0) - goto end; - - if ((ret = parse_answer(s)) < 0) - goto end; + DTLSContext *ctx = pctx; - if ((ret = udp_connect(s)) < 0) - goto end; + int ret = 0, r0, r1; + SSL *dtls = ctx->dtls; - if ((ret = ice_dtls_handshake(s)) < 0) - goto end; + ctx->dtls_handshake_starttime = av_gettime(); - if ((ret = setup_srtp(s)) < 0) - goto end; + /* Setup DTLS as passive, which is server role. */ + SSL_set_accept_state(dtls); - if ((ret = create_rtp_muxer(s)) < 0) - goto end; + /** + * During initialization, we only need to call SSL_do_handshake once because SSL_read consumes + * the handshake message if the handshake is incomplete. + * To simplify maintenance, we initiate the handshake for both the DTLS server and client after + * sending out the ICE response in the start_active_handshake function. It's worth noting that + * although the DTLS server may receive the ClientHello immediately after sending out the ICE + * response, this shouldn't be an issue as the handshake function is called before any DTLS + * packets are received. + */ + r0 = SSL_do_handshake(dtls); + r1 = openssl_ssl_get_error(ctx, r0); + // Fatal SSL error, for example, no available suite when peer is DTLS 1.0 while we are DTLS 1.2. + if (r0 < 0 && (r1 != SSL_ERROR_NONE && r1 != SSL_ERROR_WANT_READ && r1 != SSL_ERROR_WANT_WRITE)) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to drive SSL context, r0=%d, r1=%d %s\n", r0, r1, ctx->error_message); + return AVERROR(EIO); + } -end: - if (ret < 0 && whip->state < WHIP_STATE_FAILED) - whip->state = WHIP_STATE_FAILED; - if (ret >= 0 && whip->state >= WHIP_STATE_FAILED && whip->dtls_ret < 0) - ret = whip->dtls_ret; return ret; } -static int whip_write_packet(AVFormatContext *s, AVPacket *pkt) +/** + * DTLS handshake with server, as a server in passive mode, using openssl. + * + * This function initializes the SSL context as the client role using OpenSSL and + * then performs the DTLS handshake until success. Upon successful completion, it + * exports the SRTP material key. + * + * @return 0 if OK, AVERROR_xxx on error + */ +static int dtls_context_write(void *pctx, char* buf, int size) { - int ret; - WHIPContext *whip = s->priv_data; - AVStream *st = s->streams[pkt->stream_index]; - AVFormatContext *rtp_ctx = st->priv_data; + DTLSContext *ctx = pctx; - /* TODO: Send binding request every 1s as WebRTC heartbeat. */ + int ret = 0, res_ct, res_ht, r0, r1, do_callback; + SSL *dtls = ctx->dtls; + const char* dst = "EXTRACTOR-dtls_srtp"; + BIO *bio_in = ctx->bio_in; - /** - * Receive packets from the server such as ICE binding requests, DTLS messages, - * and RTCP like PLI requests, then respond to them. - */ - ret = ffurl_read(whip->udp_uc, whip->buf, sizeof(whip->buf)); - if (ret > 0) { - if (is_dtls_packet(whip->buf, ret)) { - if ((ret = dtls_context_write(&whip->dtls_ctx, whip->buf, ret)) < 0) { - av_log(whip, AV_LOG_ERROR, "WHIP: Failed to handle DTLS message\n"); - goto end; - } - } - } else if (ret != AVERROR(EAGAIN)) { - av_log(whip, AV_LOG_ERROR, "WHIP: Failed to read from UDP socket\n"); + /* Got DTLS response successfully. */ + openssl_dtls_state_trace(ctx, buf, size, 1); + if ((r0 = BIO_write(bio_in, buf, size)) <= 0) { + res_ct = size > 0 ? buf[0]: 0; + res_ht = size > 13 ? buf[13] : 0; + av_log(ctx, AV_LOG_ERROR, "DTLS: Feed response failed, content=%d, handshake=%d, size=%d, r0=%d\n", + res_ct, res_ht, size, r0); + ret = AVERROR(EIO); goto end; } - if (whip->h264_annexb_insert_sps_pps && st->codecpar->codec_id == AV_CODEC_ID_H264) { - if ((ret = h264_annexb_insert_sps_pps(s, pkt)) < 0) { - av_log(whip, AV_LOG_ERROR, "WHIP: Failed to insert SPS/PPS before IDR\n"); + /** + * If there is data available in bio_in, use SSL_read to allow SSL to process it. + * We limit the MTU to 1200 for DTLS handshake, which ensures that the buffer is large enough for reading. + */ + r0 = SSL_read(dtls, buf, sizeof(buf)); + r1 = openssl_ssl_get_error(ctx, r0); + if (r0 <= 0) { + if (r1 != SSL_ERROR_WANT_READ && r1 != SSL_ERROR_WANT_WRITE && r1 != SSL_ERROR_ZERO_RETURN) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Read failed, r0=%d, r1=%d %s\n", r0, r1, ctx->error_message); + ret = AVERROR(EIO); goto end; } + } else { + av_log(ctx, AV_LOG_TRACE, "DTLS: Read %d bytes, r0=%d, r1=%d\n", r0, r0, r1); } - ret = ff_write_chained(rtp_ctx, 0, pkt, s, 0); - if (ret < 0) { - if (ret == AVERROR(EINVAL)) { - av_log(whip, AV_LOG_WARNING, "WHIP: Ignore failed to write packet=%dB, ret=%d\n", pkt->size, ret); - ret = 0; - } else - av_log(whip, AV_LOG_ERROR, "WHIP: Failed to write packet, size=%d\n", pkt->size); + /* Check whether the DTLS is completed. */ + if (SSL_is_init_finished(dtls) != 1) goto end; - } - -end: - if (ret < 0 && whip->state < WHIP_STATE_FAILED) - whip->state = WHIP_STATE_FAILED; - if (ret >= 0 && whip->state >= WHIP_STATE_FAILED && whip->dtls_ret < 0) - ret = whip->dtls_ret; - if (ret >= 0 && whip->dtls_closed) - ret = AVERROR(EIO); - return ret; -} - -static av_cold void whip_deinit(AVFormatContext *s) -{ - int i, ret; - WHIPContext *whip = s->priv_data; - ret = dispose_session(s); - if (ret < 0) - av_log(whip, AV_LOG_WARNING, "WHIP: Failed to dispose resource, ret=%d\n", ret); + do_callback = ctx->on_state && !ctx->dtls_done_for_us; + ctx->dtls_done_for_us = 1; + ctx->dtls_handshake_endtime = av_gettime(); - for (i = 0; i < s->nb_streams; i++) { - AVFormatContext* rtp_ctx = s->streams[i]->priv_data; - if (!rtp_ctx) - continue; + /* Export SRTP master key after DTLS done */ + if (!ctx->dtls_srtp_key_exported) { + ret = SSL_export_keying_material(dtls, ctx->dtls_srtp_materials, sizeof(ctx->dtls_srtp_materials), + dst, strlen(dst), NULL, 0, 0); + r1 = openssl_ssl_get_error(ctx, r0); + if (!ret) { + av_log(ctx, AV_LOG_ERROR, "DTLS: SSL export key ret=%d, r1=%d %s\n", ret, r1, ctx->error_message); + ret = AVERROR(EIO); + goto end; + } - av_write_trailer(rtp_ctx); - /** - * Keep in mind that it is necessary to free the buffer of pb since we allocate - * it and pass it to pb using avio_alloc_context, while avio_context_free does - * not perform this action. - */ - av_freep(&rtp_ctx->pb->buffer); - avio_context_free(&rtp_ctx->pb); - avformat_free_context(rtp_ctx); - s->streams[i]->priv_data = NULL; + ctx->dtls_srtp_key_exported = 1; } - av_freep(&whip->sdp_offer); - av_freep(&whip->sdp_answer); - av_freep(&whip->whip_resource_url); - av_freep(&whip->ice_ufrag_remote); - av_freep(&whip->ice_pwd_remote); - av_freep(&whip->ice_protocol); - av_freep(&whip->ice_host); - av_freep(&whip->authorization); - av_freep(&whip->cert_file); - av_freep(&whip->key_file); - ffurl_closep(&whip->udp_uc); - ff_srtp_free(&whip->srtp_audio_send); - ff_srtp_free(&whip->srtp_video_send); - ff_srtp_free(&whip->srtp_rtcp_send); - ff_srtp_free(&whip->srtp_recv); - dtls_context_deinit(&whip->dtls_ctx); + if (do_callback && (ret = ctx->on_state(ctx, ctx->opaque, DTLS_STATE_FINISHED, NULL, NULL)) < 0) + goto end; + +end: + return ret; } -static int whip_check_bitstream(AVFormatContext *s, AVStream *st, const AVPacket *pkt) +/** + * Cleanup the DTLS context. + */ +static av_cold void dtls_context_deinit(void *pctx) { - int ret = 1, extradata_isom = 0; - uint8_t *b = pkt->data; - WHIPContext *whip = s->priv_data; - - if (st->codecpar->codec_id == AV_CODEC_ID_H264) { - extradata_isom = st->codecpar->extradata_size > 0 && st->codecpar->extradata[0] == 1; - if (pkt->size >= 5 && AV_RB32(b) != 0x0000001 && (AV_RB24(b) != 0x000001 || extradata_isom)) { - ret = ff_stream_add_bitstream_filter(st, "h264_mp4toannexb", NULL); - av_log(whip, AV_LOG_VERBOSE, "WHIP: Enable BSF h264_mp4toannexb, packet=[%x %x %x %x %x ...], extradata_isom=%d\n", - b[0], b[1], b[2], b[3], b[4], extradata_isom); - } else - whip->h264_annexb_insert_sps_pps = 1; - } + DTLSContext *ctx = pctx; - return ret; + SSL_free(ctx->dtls); + SSL_CTX_free(ctx->dtls_ctx); + X509_free(ctx->dtls_cert); + EVP_PKEY_free(ctx->dtls_pkey); + av_freep(&ctx->dtls_fingerprint); + av_freep(&ctx->cert_file); + av_freep(&ctx->key_file); +#if OPENSSL_VERSION_NUMBER < 0x30000000L /* OpenSSL 3.0 */ + EC_KEY_free(ctx->dtls_eckey); +#endif } +#endif #define OFFSET(x) offsetof(WHIPContext, x) #define DEC AV_OPT_FLAG_DECODING_PARAM From 172d34b47accb51608ec664189ae40e9d3272592 Mon Sep 17 00:00:00 2001 From: winlin Date: Tue, 17 Oct 2023 11:01:36 +0800 Subject: [PATCH 58/60] Sync with FFmpeg master 5ddab49d48 --- libavformat/whip.c | 1 + 1 file changed, 1 insertion(+) diff --git a/libavformat/whip.c b/libavformat/whip.c index d76bda48ec296..416c486105c09 100644 --- a/libavformat/whip.c +++ b/libavformat/whip.c @@ -23,6 +23,7 @@ #include #include "libavcodec/avcodec.h" +#include "libavcodec/codec_desc.h" #include "libavcodec/h264.h" #include "libavcodec/startcode.h" #include "libavutil/base64.h" From d3561e6113aba1dff6e4a8e4d9658885d46e9214 Mon Sep 17 00:00:00 2001 From: winlin Date: Tue, 17 Oct 2023 11:23:13 +0800 Subject: [PATCH 59/60] Extract DTLS to dtls.c --- libavformat/Makefile | 2 +- libavformat/dtls.c | 954 ++++++++++++++++++++++++++++++++++++++++++ libavformat/dtls.h | 62 +++ libavformat/whip.c | 967 +------------------------------------------ 4 files changed, 1018 insertions(+), 967 deletions(-) create mode 100644 libavformat/dtls.c create mode 100644 libavformat/dtls.h diff --git a/libavformat/Makefile b/libavformat/Makefile index dd429944234eb..c97f861cd786e 100644 --- a/libavformat/Makefile +++ b/libavformat/Makefile @@ -621,7 +621,7 @@ OBJS-$(CONFIG_WEBM_CHUNK_MUXER) += webm_chunk.o OBJS-$(CONFIG_WEBP_MUXER) += webpenc.o OBJS-$(CONFIG_WEBVTT_DEMUXER) += webvttdec.o subtitles.o OBJS-$(CONFIG_WEBVTT_MUXER) += webvttenc.o -OBJS-$(CONFIG_WHIP_MUXER) += whip.o avc.o http.o srtp.o +OBJS-$(CONFIG_WHIP_MUXER) += whip.o dtls.o avc.o http.o srtp.o OBJS-$(CONFIG_WSAUD_DEMUXER) += westwood_aud.o OBJS-$(CONFIG_WSAUD_MUXER) += westwood_audenc.o OBJS-$(CONFIG_WSD_DEMUXER) += wsddec.o rawdec.o diff --git a/libavformat/dtls.c b/libavformat/dtls.c new file mode 100644 index 0000000000000..777110697855e --- /dev/null +++ b/libavformat/dtls.c @@ -0,0 +1,954 @@ +/* + * WebRTC-HTTP ingestion protocol (WHIP) muxer + * Copyright (c) 2023 The FFmpeg Project + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include + +#include "dtls.h" +#include "libavutil/bprint.h" +#include "libavutil/intreadwrite.h" +#include "libavutil/random_seed.h" +#include "libavutil/time.h" +#include "internal.h" +#include "network.h" + +/** + * The DTLS content type. + * See https://tools.ietf.org/html/rfc2246#section-6.2.1 + * change_cipher_spec(20), alert(21), handshake(22), application_data(23) + */ +#define DTLS_CONTENT_TYPE_CHANGE_CIPHER_SPEC 20 + +/** + * The DTLS record layer header has a total size of 13 bytes, consisting of + * ContentType (1 byte), ProtocolVersion (2 bytes), Epoch (2 bytes), + * SequenceNumber (6 bytes), and Length (2 bytes). + * See https://datatracker.ietf.org/doc/html/rfc9147#section-4 + */ +#define DTLS_RECORD_LAYER_HEADER_LEN 13 + +/** + * The DTLS version number, which is 0xfeff for DTLS 1.0, or 0xfefd for DTLS 1.2. + * See https://datatracker.ietf.org/doc/html/rfc9147#name-the-dtls-record-layer + */ +#define DTLS_VERSION_10 0xfeff +#define DTLS_VERSION_12 0xfefd + +/** + * Maximum size limit of a certificate and private key size. + */ +#define DTLS_MAX_CERTIFICATE_SIZE 8192 + +typedef struct DTLSContext { + AVClass *av_class; + + /* For callback. */ + dtls_fn_on_state on_state; + dtls_fn_on_write on_write; + void* opaque; + + /* For logging. */ + AVClass *log_avcl; + + /* The DTLS context. */ + SSL_CTX *dtls_ctx; + SSL *dtls; + /* The DTLS BIOs. */ + BIO *bio_in; + + /* The private key for DTLS handshake. */ + EVP_PKEY *dtls_pkey; + /* The EC key for DTLS handshake. */ + EC_KEY* dtls_eckey; + /* The SSL certificate used for fingerprint in SDP and DTLS handshake. */ + X509 *dtls_cert; + /* The fingerprint of certificate, used in SDP offer. */ + char *dtls_fingerprint; + + /** + * This represents the material used to build the SRTP master key. It is + * generated by DTLS and has the following layout: + * 16B 16B 14B 14B + * client_key | server_key | client_salt | server_salt + */ + uint8_t dtls_srtp_materials[(DTLS_SRTP_KEY_LEN + DTLS_SRTP_SALT_LEN) * 2]; + + /* Whether the DTLS is done at least for us. */ + int dtls_done_for_us; + /* Whether the SRTP key is exported. */ + int dtls_srtp_key_exported; + /* The number of packets retransmitted for DTLS. */ + int dtls_arq_packets; + /** + * This is the last DTLS content type and handshake type that is used to detect + * the ARQ packet. + */ + uint8_t dtls_last_content_type; + uint8_t dtls_last_handshake_type; + + /* These variables represent timestamps used for calculating and tracking the cost. */ + int64_t dtls_init_starttime; + int64_t dtls_init_endtime; + int64_t dtls_handshake_starttime; + int64_t dtls_handshake_endtime; + + /* Helper for get error code and message. */ + int error_code; + char error_message[256]; + + /* The certificate and private key used for DTLS handshake. */ + char* cert_file; + char* key_file; + /** + * The size of RTP packet, should generally be set to MTU. + * Note that pion requires a smaller value, for example, 1200. + */ + int mtu; +} DTLSContext; + +/** + * Read all data from the given URL url and store it in the given buffer bp. + */ +static int dtls_url_read_all(AVFormatContext *s, const char *url, AVBPrint *bp) +{ + int ret = 0; + AVDictionary *opts = NULL; + URLContext *uc = NULL; + char buf[MAX_URL_SIZE]; + + ret = ffurl_open_whitelist(&uc, url, AVIO_FLAG_READ, &s->interrupt_callback, + &opts, s->protocol_whitelist, s->protocol_blacklist, NULL); + if (ret < 0) { + av_log(s, AV_LOG_ERROR, "WHIP: Failed to open url %s\n", url); + goto end; + } + + while (1) { + ret = ffurl_read(uc, buf, sizeof(buf)); + if (ret == AVERROR_EOF) { + /* Reset the error because we read all response as answer util EOF. */ + ret = 0; + break; + } + if (ret <= 0) { + av_log(s, AV_LOG_ERROR, "WHIP: Failed to read from url=%s, key is %s\n", url, bp->str); + goto end; + } + + av_bprintf(bp, "%.*s", ret, buf); + if (!av_bprint_is_complete(bp)) { + av_log(s, AV_LOG_ERROR, "WHIP: Exceed max size %.*s, %s\n", ret, buf, bp->str); + ret = AVERROR(EIO); + goto end; + } + } + + end: + ffurl_closep(&uc); + av_dict_free(&opts); + return ret; +} + +char* dtls_get_fingerprint(void *pctx) +{ + DTLSContext *ctx = pctx; + return ctx->dtls_fingerprint; +} + +uint8_t* dtls_get_srtp_client_key(void *pctx) +{ + DTLSContext *ctx = pctx; + return ctx->dtls_srtp_materials; +} + +uint8_t* dtls_get_srtp_server_key(void *pctx) +{ + return dtls_get_srtp_client_key(pctx) + DTLS_SRTP_KEY_LEN; +} + +uint8_t* dtls_get_srtp_client_salt(void *pctx) +{ + return dtls_get_srtp_server_key(pctx) + DTLS_SRTP_KEY_LEN; +} + +uint8_t* dtls_get_srtp_server_salt(void *pctx) +{ + return dtls_get_srtp_client_salt(pctx) + DTLS_SRTP_SALT_LEN; +} + +/** + * Whether the packet is a DTLS packet. + */ +int dtls_can_handle_packet(uint8_t *b, int size) +{ + uint16_t version = AV_RB16(&b[1]); + return size > DTLS_RECORD_LAYER_HEADER_LEN && + b[0] >= DTLS_CONTENT_TYPE_CHANGE_CIPHER_SPEC && + (version == DTLS_VERSION_10 || version == DTLS_VERSION_12); +} + +/** + * Retrieves the error message for the latest OpenSSL error. + * + * This function retrieves the error code from the thread's error queue, converts it + * to a human-readable string, and stores it in the DTLSContext's error_message field. + * The error queue is then cleared using ERR_clear_error(). + */ +static const char* openssl_get_error(DTLSContext *ctx) +{ + int r2 = ERR_get_error(); + if (r2) + ERR_error_string_n(r2, ctx->error_message, sizeof(ctx->error_message)); + else + ctx->error_message[0] = '\0'; + + ERR_clear_error(); + return ctx->error_message; +} + +/** + * Get the error code for the given SSL operation result. + * + * This function retrieves the error code for the given SSL operation result + * and stores the error message in the DTLS context if an error occurred. + * It also clears the error queue. + */ +static int openssl_ssl_get_error(DTLSContext *ctx, int ret) +{ + SSL *dtls = ctx->dtls; + int r1 = SSL_ERROR_NONE; + + if (ret <= 0) + r1 = SSL_get_error(dtls, ret); + + openssl_get_error(ctx); + return r1; +} + +/** + * Callback function to print the OpenSSL SSL status. + */ +static void openssl_dtls_on_info(const SSL *dtls, int where, int r0) +{ + int w, r1, is_fatal, is_warning, is_close_notify; + const char *method = "undefined", *alert_type, *alert_desc; + int state; + DTLSContext *ctx = (DTLSContext*)SSL_get_ex_data(dtls, 0); + + w = where & ~SSL_ST_MASK; + if (w & SSL_ST_CONNECT) + method = "SSL_connect"; + else if (w & SSL_ST_ACCEPT) + method = "SSL_accept"; + + r1 = openssl_ssl_get_error(ctx, r0); + if (where & SSL_CB_LOOP) { + av_log(ctx, AV_LOG_VERBOSE, "DTLS: Info method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", + method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, r0, r1); + } else if (where & SSL_CB_ALERT) { + method = (where & SSL_CB_READ) ? "read":"write"; + + alert_type = SSL_alert_type_string_long(r0); + alert_desc = SSL_alert_desc_string(r0); + + if (!av_strcasecmp(alert_type, "warning") && !av_strcasecmp(alert_desc, "CN")) + av_log(ctx, AV_LOG_WARNING, "DTLS: SSL3 alert method=%s type=%s, desc=%s(%s), where=%d, ret=%d, r1=%d\n", + method, alert_type, alert_desc, SSL_alert_desc_string_long(r0), where, r0, r1); + else + av_log(ctx, AV_LOG_ERROR, "DTLS: SSL3 alert method=%s type=%s, desc=%s(%s), where=%d, ret=%d, r1=%d %s\n", + method, alert_type, alert_desc, SSL_alert_desc_string_long(r0), where, r0, r1, ctx->error_message); + + /** + * Notify the DTLS to handle the ALERT message, which maybe means media connection disconnect. + * CN(Close Notify) is sent when peer close the PeerConnection. fatal, IP(Illegal Parameter) + * is sent when DTLS failed. + */ + is_fatal = !av_strncasecmp(alert_type, "fatal", 5); + is_warning = !av_strncasecmp(alert_type, "warning", 7); + is_close_notify = !av_strncasecmp(alert_desc, "CN", 2); + state = is_fatal ? DTLS_STATE_FAILED : (is_warning && is_close_notify ? DTLS_STATE_CLOSED : DTLS_STATE_NONE); + if (state != DTLS_STATE_NONE && ctx->on_state) { + av_log(ctx, AV_LOG_INFO, "DTLS: Notify ctx=%p, state=%d, fatal=%d, warning=%d, cn=%d\n", + ctx, state, is_fatal, is_warning, is_close_notify); + ctx->on_state(ctx, ctx->opaque, state, alert_type, alert_desc); + } + } else if (where & SSL_CB_EXIT) { + if (!r0) + av_log(ctx, AV_LOG_WARNING, "DTLS: Fail method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", + method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, r0, r1); + else if (r0 < 0) + if (r1 != SSL_ERROR_NONE && r1 != SSL_ERROR_WANT_READ && r1 != SSL_ERROR_WANT_WRITE) + av_log(ctx, AV_LOG_ERROR, "DTLS: Error method=%s state=%s(%s), where=%d, ret=%d, r1=%d %s\n", + method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, r0, r1, ctx->error_message); + else + av_log(ctx, AV_LOG_VERBOSE, "DTLS: Info method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", + method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, r0, r1); + } +} + +static void openssl_dtls_state_trace(DTLSContext *ctx, uint8_t *data, int length, int incoming) +{ + uint8_t content_type = 0; + uint16_t size = 0; + uint8_t handshake_type = 0; + + /* Change_cipher_spec(20), alert(21), handshake(22), application_data(23) */ + if (length >= 1) + content_type = AV_RB8(&data[0]); + if (length >= 13) + size = AV_RB16(&data[11]); + if (length >= 14) + handshake_type = AV_RB8(&data[13]); + + av_log(ctx, AV_LOG_VERBOSE, "DTLS: Trace %s, done=%u, arq=%u, len=%u, cnt=%u, size=%u, hs=%u\n", + (incoming? "RECV":"SEND"), ctx->dtls_done_for_us, ctx->dtls_arq_packets, length, + content_type, size, handshake_type); +} + +/** + * Always return 1 to accept any certificate. This is because we allow the peer to + * use a temporary self-signed certificate for DTLS. + */ +static int openssl_dtls_verify_callback(int preverify_ok, X509_STORE_CTX *ctx) +{ + return 1; +} + +/** + * DTLS BIO read callback. + */ +#if OPENSSL_VERSION_NUMBER < 0x30000000L // v3.0.x +static long openssl_dtls_bio_out_callback(BIO* b, int oper, const char* argp, int argi, long argl, long retvalue) +#else +static long openssl_dtls_bio_out_callback_ex(BIO *b, int oper, const char *argp, size_t len, int argi, long argl, int retvalue, size_t *processed) +#endif +{ + int ret, req_size = argi, is_arq = 0; + uint8_t content_type, handshake_type; + uint8_t *data = (uint8_t*)argp; + DTLSContext* ctx = b ? (DTLSContext*)BIO_get_callback_arg(b) : NULL; + +#if OPENSSL_VERSION_NUMBER >= 0x30000000L // v3.0.x + req_size = len; + av_log(ctx, AV_LOG_DEBUG, "DTLS: BIO callback b=%p, oper=%d, argp=%p, len=%ld, argi=%d, argl=%ld, retvalue=%d, processed=%p, req_size=%d\n", + b, oper, argp, len, argi, argl, retvalue, processed, req_size); +#else + av_log(ctx, AV_LOG_DEBUG, "DTLS: BIO callback b=%p, oper=%d, argp=%p, argi=%d, argl=%ld, retvalue=%ld, req_size=%d\n", + b, oper, argp, argi, argl, retvalue, req_size); +#endif + + if (oper != BIO_CB_WRITE || !argp || req_size <= 0) + return retvalue; + + openssl_dtls_state_trace(ctx, data, req_size, 0); + ret = ctx->on_write ? ctx->on_write(ctx, ctx->opaque, data, req_size) : 0; + content_type = req_size > 0 ? AV_RB8(&data[0]) : 0; + handshake_type = req_size > 13 ? AV_RB8(&data[13]) : 0; + + is_arq = ctx->dtls_last_content_type == content_type && ctx->dtls_last_handshake_type == handshake_type; + ctx->dtls_arq_packets += is_arq; + ctx->dtls_last_content_type = content_type; + ctx->dtls_last_handshake_type = handshake_type; + + if (ret < 0) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Send request failed, oper=%d, content=%d, handshake=%d, size=%d, is_arq=%d\n", + oper, content_type, handshake_type, req_size, is_arq); + return ret; + } + + return retvalue; +} + +static int openssl_read_certificate(AVFormatContext *s, DTLSContext *ctx) +{ + int ret = 0; + BIO *key_b = NULL, *cert_b = NULL; + AVBPrint key_bp, cert_bp; + + /* To prevent a crash during cleanup, always initialize it. */ + av_bprint_init(&key_bp, 1, DTLS_MAX_CERTIFICATE_SIZE); + av_bprint_init(&cert_bp, 1, DTLS_MAX_CERTIFICATE_SIZE); + + /* Read key file. */ + ret = dtls_url_read_all(s, ctx->key_file, &key_bp); + if (ret < 0) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to open key file %s\n", ctx->key_file); + goto end; + } + + if ((key_b = BIO_new(BIO_s_mem())) == NULL) { + ret = AVERROR(ENOMEM); + goto end; + } + + BIO_write(key_b, key_bp.str, key_bp.len); + ctx->dtls_pkey = PEM_read_bio_PrivateKey(key_b, NULL, NULL, NULL); + if (!ctx->dtls_pkey) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to read private key from %s\n", ctx->key_file); + ret = AVERROR(EIO); + goto end; + } + + /* Read certificate. */ + ret = dtls_url_read_all(s, ctx->cert_file, &cert_bp); + if (ret < 0) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to open cert file %s\n", ctx->cert_file); + goto end; + } + + if ((cert_b = BIO_new(BIO_s_mem())) == NULL) { + ret = AVERROR(ENOMEM); + goto end; + } + + BIO_write(cert_b, cert_bp.str, cert_bp.len); + ctx->dtls_cert = PEM_read_bio_X509(cert_b, NULL, NULL, NULL); + if (!ctx->dtls_cert) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to read certificate from %s\n", ctx->cert_file); + ret = AVERROR(EIO); + goto end; + } + + end: + BIO_free(key_b); + av_bprint_finalize(&key_bp, NULL); + BIO_free(cert_b); + av_bprint_finalize(&cert_bp, NULL); + return ret; +} + +static int openssl_dtls_gen_private_key(DTLSContext *ctx) +{ + int ret = 0; + + /** + * Note that secp256r1 in openssl is called NID_X9_62_prime256v1 or prime256v1 in string, + * not NID_secp256k1 or secp256k1 in string. + * + * TODO: Should choose the curves in ClientHello.supported_groups, for example: + * Supported Group: x25519 (0x001d) + * Supported Group: secp256r1 (0x0017) + * Supported Group: secp384r1 (0x0018) + */ +#if OPENSSL_VERSION_NUMBER < 0x30000000L /* OpenSSL 3.0 */ + EC_GROUP *ecgroup = NULL; + int curve = NID_X9_62_prime256v1; +#else + const char *curve = SN_X9_62_prime256v1; +#endif + +#if OPENSSL_VERSION_NUMBER < 0x30000000L /* OpenSSL 3.0 */ + ctx->dtls_pkey = EVP_PKEY_new(); + ctx->dtls_eckey = EC_KEY_new(); + ecgroup = EC_GROUP_new_by_curve_name(curve); + if (!ecgroup) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Create EC group by curve=%d failed, %s", curve, openssl_get_error(ctx)); + goto einval_end; + } + +#if OPENSSL_VERSION_NUMBER < 0x10100000L // v1.1.x + /* For openssl 1.0, we must set the group parameters, so that cert is ok. */ + EC_GROUP_set_asn1_flag(ecgroup, OPENSSL_EC_NAMED_CURVE); +#endif + + if (EC_KEY_set_group(ctx->dtls_eckey, ecgroup) != 1) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Generate private key, EC_KEY_set_group failed, %s\n", openssl_get_error(ctx)); + goto einval_end; + } + + if (EC_KEY_generate_key(ctx->dtls_eckey) != 1) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Generate private key, EC_KEY_generate_key failed, %s\n", openssl_get_error(ctx)); + goto einval_end; + } + + if (EVP_PKEY_set1_EC_KEY(ctx->dtls_pkey, ctx->dtls_eckey) != 1) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Generate private key, EVP_PKEY_set1_EC_KEY failed, %s\n", openssl_get_error(ctx)); + goto einval_end; + } +#else + ctx->dtls_pkey = EVP_EC_gen(curve); + if (!ctx->dtls_pkey) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Generate private key, EVP_EC_gen curve=%s failed, %s\n", curve, openssl_get_error(ctx)); + goto einval_end; + } +#endif + goto end; + + einval_end: + ret = AVERROR(EINVAL); + end: +#if OPENSSL_VERSION_NUMBER < 0x30000000L /* OpenSSL 3.0 */ + EC_GROUP_free(ecgroup); +#endif + return ret; +} + +static int openssl_dtls_gen_certificate(DTLSContext *ctx) +{ + int ret = 0, serial, expire_day, i, n = 0; + AVBPrint fingerprint; + unsigned char md[EVP_MAX_MD_SIZE]; + const char *aor = "lavf"; + X509_NAME* subject = NULL; + X509 *dtls_cert = NULL; + + /* To prevent a crash during cleanup, always initialize it. */ + av_bprint_init(&fingerprint, 1, MAX_URL_SIZE); + + dtls_cert = ctx->dtls_cert = X509_new(); + if (!dtls_cert) { + goto enomem_end; + } + + // TODO: Support non-self-signed certificate, for example, load from a file. + subject = X509_NAME_new(); + if (!subject) { + goto enomem_end; + } + + serial = (int)av_get_random_seed(); + if (ASN1_INTEGER_set(X509_get_serialNumber(dtls_cert), serial) != 1) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set serial, %s\n", openssl_get_error(ctx)); + goto einval_end; + } + + if (X509_NAME_add_entry_by_txt(subject, "CN", MBSTRING_ASC, aor, strlen(aor), -1, 0) != 1) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set CN, %s\n", openssl_get_error(ctx)); + goto einval_end; + } + + if (X509_set_issuer_name(dtls_cert, subject) != 1) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set issuer, %s\n", openssl_get_error(ctx)); + goto einval_end; + } + if (X509_set_subject_name(dtls_cert, subject) != 1) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set subject name, %s\n", openssl_get_error(ctx)); + goto einval_end; + } + + expire_day = 365; + if (!X509_gmtime_adj(X509_get_notBefore(dtls_cert), 0)) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set notBefore, %s\n", openssl_get_error(ctx)); + goto einval_end; + } + if (!X509_gmtime_adj(X509_get_notAfter(dtls_cert), 60*60*24*expire_day)) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set notAfter, %s\n", openssl_get_error(ctx)); + goto einval_end; + } + + if (X509_set_version(dtls_cert, 2) != 1) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set version, %s\n", openssl_get_error(ctx)); + goto einval_end; + } + + if (X509_set_pubkey(dtls_cert, ctx->dtls_pkey) != 1) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set public key, %s\n", openssl_get_error(ctx)); + goto einval_end; + } + + if (!X509_sign(dtls_cert, ctx->dtls_pkey, EVP_sha1())) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to sign certificate, %s\n", openssl_get_error(ctx)); + goto einval_end; + } + + /* Generate the fingerpint of certficate. */ + if (X509_digest(dtls_cert, EVP_sha256(), md, &n) != 1) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to generate fingerprint, %s\n", openssl_get_error(ctx)); + goto eio_end; + } + for (i = 0; i < n; i++) { + av_bprintf(&fingerprint, "%02X", md[i]); + if (i < n - 1) + av_bprintf(&fingerprint, ":"); + } + if (!fingerprint.str || !strlen(fingerprint.str)) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Fingerprint is empty\n"); + goto einval_end; + } + + ctx->dtls_fingerprint = av_strdup(fingerprint.str); + if (!ctx->dtls_fingerprint) { + goto enomem_end; + } + + goto end; + enomem_end: + ret = AVERROR(ENOMEM); + goto end; + eio_end: + ret = AVERROR(EIO); + goto end; + einval_end: + ret = AVERROR(EINVAL); + end: + X509_NAME_free(subject); + av_bprint_finalize(&fingerprint, NULL); + return ret; +} + +/** + * Initializes DTLS context using ECDHE. + */ +static av_cold int openssl_dtls_init_context(DTLSContext *ctx) +{ + int ret = 0; + EVP_PKEY *dtls_pkey = ctx->dtls_pkey; + X509 *dtls_cert = ctx->dtls_cert; + SSL_CTX *dtls_ctx = NULL; + SSL *dtls = NULL; + BIO *bio_in = NULL, *bio_out = NULL; + const char* ciphers = "ALL"; + /** + * The profile for OpenSSL's SRTP is SRTP_AES128_CM_SHA1_80, see ssl/d1_srtp.c. + * The profile for FFmpeg's SRTP is SRTP_AES128_CM_HMAC_SHA1_80, see libavformat/srtp.c. + */ + const char* profiles = "SRTP_AES128_CM_SHA1_80"; + + /* Refer to the test cases regarding these curves in the WebRTC code. */ +#if OPENSSL_VERSION_NUMBER >= 0x10100000L /* OpenSSL 1.1.0 */ + const char* curves = "X25519:P-256:P-384:P-521"; +#elif OPENSSL_VERSION_NUMBER >= 0x10002000L /* OpenSSL 1.0.2 */ + const char* curves = "P-256:P-384:P-521"; +#endif + +#if OPENSSL_VERSION_NUMBER < 0x10002000L /* OpenSSL v1.0.2 */ + dtls_ctx = ctx->dtls_ctx = SSL_CTX_new(DTLSv1_method()); +#else + dtls_ctx = ctx->dtls_ctx = SSL_CTX_new(DTLS_method()); +#endif + if (!dtls_ctx) { + ret = AVERROR(ENOMEM); + goto end; + } + +#if OPENSSL_VERSION_NUMBER >= 0x10002000L /* OpenSSL 1.0.2 */ + /* For ECDSA, we could set the curves list. */ + if (SSL_CTX_set1_curves_list(dtls_ctx, curves) != 1) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Init SSL_CTX_set1_curves_list failed, curves=%s, %s\n", + curves, openssl_get_error(ctx)); + ret = AVERROR(EINVAL); + return ret; + } +#endif + +#if OPENSSL_VERSION_NUMBER < 0x10100000L // v1.1.x +#if OPENSSL_VERSION_NUMBER < 0x10002000L // v1.0.2 + if (ctx->dtls_eckey) + SSL_CTX_set_tmp_ecdh(dtls_ctx, ctx->dtls_eckey); +#else + SSL_CTX_set_ecdh_auto(dtls_ctx, 1); +#endif +#endif + + /** + * We activate "ALL" cipher suites to align with the peer's capabilities, + * ensuring maximum compatibility. + */ + if (SSL_CTX_set_cipher_list(dtls_ctx, ciphers) != 1) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Init SSL_CTX_set_cipher_list failed, ciphers=%s, %s\n", + ciphers, openssl_get_error(ctx)); + ret = AVERROR(EINVAL); + return ret; + } + /* Setup the certificate. */ + if (SSL_CTX_use_certificate(dtls_ctx, dtls_cert) != 1) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Init SSL_CTX_use_certificate failed, %s\n", openssl_get_error(ctx)); + ret = AVERROR(EINVAL); + return ret; + } + if (SSL_CTX_use_PrivateKey(dtls_ctx, dtls_pkey) != 1) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Init SSL_CTX_use_PrivateKey failed, %s\n", openssl_get_error(ctx)); + ret = AVERROR(EINVAL); + return ret; + } + + /* Server will send Certificate Request. */ + SSL_CTX_set_verify(dtls_ctx, SSL_VERIFY_PEER | SSL_VERIFY_CLIENT_ONCE, openssl_dtls_verify_callback); + /* The depth count is "level 0:peer certificate", "level 1: CA certificate", + * "level 2: higher level CA certificate", and so on. */ + SSL_CTX_set_verify_depth(dtls_ctx, 4); + /* Whether we should read as many input bytes as possible (for non-blocking reads) or not. */ + SSL_CTX_set_read_ahead(dtls_ctx, 1); + /* Setup the SRTP context */ + if (SSL_CTX_set_tlsext_use_srtp(dtls_ctx, profiles)) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Init SSL_CTX_set_tlsext_use_srtp failed, profiles=%s, %s\n", + profiles, openssl_get_error(ctx)); + ret = AVERROR(EINVAL); + return ret; + } + + /* The dtls should not be created unless the dtls_ctx has been initialized. */ + dtls = ctx->dtls = SSL_new(dtls_ctx); + if (!dtls) { + ret = AVERROR(ENOMEM); + goto end; + } + + /* Setup the callback for logging. */ + SSL_set_ex_data(dtls, 0, ctx); + SSL_set_info_callback(dtls, openssl_dtls_on_info); + + /** + * We have set the MTU to fragment the DTLS packet. It is important to note that the + * packet is split to ensure that each handshake packet is smaller than the MTU. + */ + SSL_set_options(dtls, SSL_OP_NO_QUERY_MTU); + SSL_set_mtu(dtls, ctx->mtu); +#if OPENSSL_VERSION_NUMBER >= 0x100010b0L /* OpenSSL 1.0.1k */ + DTLS_set_link_mtu(dtls, ctx->mtu); +#endif + + bio_in = BIO_new(BIO_s_mem()); + if (!bio_in) { + ret = AVERROR(ENOMEM); + goto end; + } + + bio_out = BIO_new(BIO_s_mem()); + if (!bio_out) { + ret = AVERROR(ENOMEM); + goto end; + } + + /** + * Please be aware that it is necessary to use a callback to obtain the packet to be written out. It is + * imperative that BIO_get_mem_data is not used to retrieve the packet, as it returns all the bytes that + * need to be sent out. + * For example, if MTU is set to 1200, and we got two DTLS packets to sendout: + * ServerHello, 95bytes. + * Certificate, 1105+143=1248bytes. + * If use BIO_get_mem_data, it will return 95+1248=1343bytes, which is larger than MTU 1200. + * If use callback, it will return two UDP packets: + * ServerHello+Certificate(Frament) = 95+1105=1200bytes. + * Certificate(Fragment) = 143bytes. + * Note that there should be more packets in real world, like ServerKeyExchange, CertificateRequest, + * and ServerHelloDone. Here we just use two packets for example. + */ +#if OPENSSL_VERSION_NUMBER < 0x30000000L // v3.0.x + BIO_set_callback(bio_out, openssl_dtls_bio_out_callback); +#else + BIO_set_callback_ex(bio_out, openssl_dtls_bio_out_callback_ex); +#endif + BIO_set_callback_arg(bio_out, (char*)ctx); + + ctx->bio_in = bio_in; + SSL_set_bio(dtls, bio_in, bio_out); + /* Now the bio_in and bio_out are owned by dtls, so we should set them to NULL. */ + bio_in = bio_out = NULL; + + end: + BIO_free(bio_in); + BIO_free(bio_out); + return ret; +} + +void* dtls_context_new(AVClass *av_class, void *opaque, int pkt_size, dtls_fn_on_state on_state, dtls_fn_on_write on_write, const char* cert_file, const char* key_file) +{ + DTLSContext *ctx = av_mallocz(sizeof(DTLSContext)); + + ctx->av_class = av_class; + ctx->mtu = pkt_size; + ctx->opaque = opaque; + ctx->on_state = on_state; + ctx->on_write = on_write; + if (cert_file) + ctx->cert_file = av_strdup(cert_file); + if (key_file) + ctx->key_file = av_strdup(key_file); + + return ctx; +} + +/** + * Generate a self-signed certificate and private key for DTLS. Please note that the + * ff_openssl_init in tls_openssl.c has already called SSL_library_init(), and therefore, + * there is no need to call it again. + */ +av_cold int dtls_context_init(AVFormatContext *s, void *pctx) +{ + DTLSContext *ctx = pctx; + + int ret = 0; + + ctx->dtls_init_starttime = av_gettime(); + + if (ctx->cert_file && ctx->key_file) { + /* Read the private key and file from the file. */ + if ((ret = openssl_read_certificate(s, ctx)) < 0) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to read DTLS certificate from cert=%s, key=%s\n", + ctx->cert_file, ctx->key_file); + return ret; + } + } else { + /* Generate a private key to ctx->dtls_pkey. */ + if ((ret = openssl_dtls_gen_private_key(ctx)) < 0) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to generate DTLS private key\n"); + return ret; + } + + /* Generate a self-signed certificate. */ + if ((ret = openssl_dtls_gen_certificate(ctx)) < 0) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to generate DTLS certificate\n"); + return ret; + } + } + + if ((ret = openssl_dtls_init_context(ctx)) < 0) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to initialize DTLS context\n"); + return ret; + } + + ctx->dtls_init_endtime = av_gettime(); + av_log(ctx, AV_LOG_VERBOSE, "DTLS: Setup ok, MTU=%d, fingerprint %s\n", + ctx->mtu, ctx->dtls_fingerprint); + + return ret; +} + +/** + * Once the DTLS role has been negotiated - active for the DTLS client or passive for the + * DTLS server - we proceed to set up the DTLS state and initiate the handshake. + */ +int dtls_context_start(void *pctx) +{ + DTLSContext *ctx = pctx; + + int ret = 0, r0, r1; + SSL *dtls = ctx->dtls; + + ctx->dtls_handshake_starttime = av_gettime(); + + /* Setup DTLS as passive, which is server role. */ + SSL_set_accept_state(dtls); + + /** + * During initialization, we only need to call SSL_do_handshake once because SSL_read consumes + * the handshake message if the handshake is incomplete. + * To simplify maintenance, we initiate the handshake for both the DTLS server and client after + * sending out the ICE response in the start_active_handshake function. It's worth noting that + * although the DTLS server may receive the ClientHello immediately after sending out the ICE + * response, this shouldn't be an issue as the handshake function is called before any DTLS + * packets are received. + */ + r0 = SSL_do_handshake(dtls); + r1 = openssl_ssl_get_error(ctx, r0); + // Fatal SSL error, for example, no available suite when peer is DTLS 1.0 while we are DTLS 1.2. + if (r0 < 0 && (r1 != SSL_ERROR_NONE && r1 != SSL_ERROR_WANT_READ && r1 != SSL_ERROR_WANT_WRITE)) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to drive SSL context, r0=%d, r1=%d %s\n", r0, r1, ctx->error_message); + return AVERROR(EIO); + } + + return ret; +} + +/** + * DTLS handshake with server, as a server in passive mode, using openssl. + * + * This function initializes the SSL context as the client role using OpenSSL and + * then performs the DTLS handshake until success. Upon successful completion, it + * exports the SRTP material key. + * + * @return 0 if OK, AVERROR_xxx on error + */ +int dtls_context_write(void *pctx, char* buf, int size) +{ + DTLSContext *ctx = pctx; + + int ret = 0, res_ct, res_ht, r0, r1, do_callback; + SSL *dtls = ctx->dtls; + const char* dst = "EXTRACTOR-dtls_srtp"; + BIO *bio_in = ctx->bio_in; + + /* Got DTLS response successfully. */ + openssl_dtls_state_trace(ctx, buf, size, 1); + if ((r0 = BIO_write(bio_in, buf, size)) <= 0) { + res_ct = size > 0 ? buf[0]: 0; + res_ht = size > 13 ? buf[13] : 0; + av_log(ctx, AV_LOG_ERROR, "DTLS: Feed response failed, content=%d, handshake=%d, size=%d, r0=%d\n", + res_ct, res_ht, size, r0); + ret = AVERROR(EIO); + goto end; + } + + /** + * If there is data available in bio_in, use SSL_read to allow SSL to process it. + * We limit the MTU to 1200 for DTLS handshake, which ensures that the buffer is large enough for reading. + */ + r0 = SSL_read(dtls, buf, sizeof(buf)); + r1 = openssl_ssl_get_error(ctx, r0); + if (r0 <= 0) { + if (r1 != SSL_ERROR_WANT_READ && r1 != SSL_ERROR_WANT_WRITE && r1 != SSL_ERROR_ZERO_RETURN) { + av_log(ctx, AV_LOG_ERROR, "DTLS: Read failed, r0=%d, r1=%d %s\n", r0, r1, ctx->error_message); + ret = AVERROR(EIO); + goto end; + } + } else { + av_log(ctx, AV_LOG_TRACE, "DTLS: Read %d bytes, r0=%d, r1=%d\n", r0, r0, r1); + } + + /* Check whether the DTLS is completed. */ + if (SSL_is_init_finished(dtls) != 1) + goto end; + + do_callback = ctx->on_state && !ctx->dtls_done_for_us; + ctx->dtls_done_for_us = 1; + ctx->dtls_handshake_endtime = av_gettime(); + + /* Export SRTP master key after DTLS done */ + if (!ctx->dtls_srtp_key_exported) { + ret = SSL_export_keying_material(dtls, ctx->dtls_srtp_materials, sizeof(ctx->dtls_srtp_materials), + dst, strlen(dst), NULL, 0, 0); + r1 = openssl_ssl_get_error(ctx, r0); + if (!ret) { + av_log(ctx, AV_LOG_ERROR, "DTLS: SSL export key ret=%d, r1=%d %s\n", ret, r1, ctx->error_message); + ret = AVERROR(EIO); + goto end; + } + + ctx->dtls_srtp_key_exported = 1; + } + + if (do_callback && (ret = ctx->on_state(ctx, ctx->opaque, DTLS_STATE_FINISHED, NULL, NULL)) < 0) + goto end; + + end: + return ret; +} + +/** + * Cleanup the DTLS context. + */ +av_cold void dtls_context_deinit(void *pctx) +{ + DTLSContext *ctx = pctx; + + SSL_free(ctx->dtls); + SSL_CTX_free(ctx->dtls_ctx); + X509_free(ctx->dtls_cert); + EVP_PKEY_free(ctx->dtls_pkey); + av_freep(&ctx->dtls_fingerprint); + av_freep(&ctx->cert_file); + av_freep(&ctx->key_file); +#if OPENSSL_VERSION_NUMBER < 0x30000000L /* OpenSSL 3.0 */ + EC_KEY_free(ctx->dtls_eckey); +#endif +} + diff --git a/libavformat/dtls.h b/libavformat/dtls.h new file mode 100644 index 0000000000000..52b168ecfad78 --- /dev/null +++ b/libavformat/dtls.h @@ -0,0 +1,62 @@ +/* + * WebRTC-HTTP ingestion protocol (WHIP) muxer + * Copyright (c) 2023 The FFmpeg Project + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "avformat.h" + +/** + * The size of the Secure Real-time Transport Protocol (SRTP) master key material + * that is exported by Secure Sockets Layer (SSL) after a successful Datagram + * Transport Layer Security (DTLS) handshake. This material consists of a key + * of 16 bytes and a salt of 14 bytes. + */ +#define DTLS_SRTP_KEY_LEN 16 +#define DTLS_SRTP_SALT_LEN 14 +/** + * The maximum size of the Secure Real-time Transport Protocol (SRTP) HMAC checksum + * and padding that is appended to the end of the packet. To calculate the maximum + * size of the User Datagram Protocol (UDP) packet that can be sent out, subtract + * this size from the `pkt_size`. + */ +#define DTLS_SRTP_CHECKSUM_LEN 16 +/* DTLS init state. */ +#define DTLS_STATE_NONE 0 +/* Whether DTLS handshake is finished. */ +#define DTLS_STATE_FINISHED 1 +/* Whether DTLS session is closed. */ +#define DTLS_STATE_CLOSED 2 +/* Whether DTLS handshake is failed. */ +#define DTLS_STATE_FAILED 3 +typedef int (*dtls_fn_on_state)(void *ctx, void *opaque, int state, const char* type, const char* desc); +typedef int (*dtls_fn_on_write)(void *ctx, void *opaque, char* data, int size); + +void* dtls_context_new(AVClass *av_class, void *opaque, int pkt_size, dtls_fn_on_state on_state, dtls_fn_on_write on_write, const char* cert_file, const char* key_file); +av_cold int dtls_context_init(AVFormatContext *s, void *ctx); +int dtls_context_start(void *ctx); +int dtls_context_write(void *ctx, char* buf, int size); +av_cold void dtls_context_deinit(void *ctx); + +int dtls_can_handle_packet(uint8_t *b, int size); +char* dtls_get_fingerprint(void *ctx); +uint8_t* dtls_get_srtp_client_key(void *ctx); +uint8_t* dtls_get_srtp_server_key(void *ctx); +uint8_t* dtls_get_srtp_client_salt(void *ctx); +uint8_t* dtls_get_srtp_server_salt(void *ctx); + diff --git a/libavformat/whip.c b/libavformat/whip.c index 416c486105c09..9bcab69db33cb 100644 --- a/libavformat/whip.c +++ b/libavformat/whip.c @@ -19,9 +19,6 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include -#include - #include "libavcodec/avcodec.h" #include "libavcodec/codec_desc.h" #include "libavcodec/h264.h" @@ -42,45 +39,7 @@ #include "mux.h" #include "network.h" #include "srtp.h" - -/** - * The size of the Secure Real-time Transport Protocol (SRTP) master key material - * that is exported by Secure Sockets Layer (SSL) after a successful Datagram - * Transport Layer Security (DTLS) handshake. This material consists of a key - * of 16 bytes and a salt of 14 bytes. - */ -#define DTLS_SRTP_KEY_LEN 16 -#define DTLS_SRTP_SALT_LEN 14 -/** - * The maximum size of the Secure Real-time Transport Protocol (SRTP) HMAC checksum - * and padding that is appended to the end of the packet. To calculate the maximum - * size of the User Datagram Protocol (UDP) packet that can be sent out, subtract - * this size from the `pkt_size`. - */ -#define DTLS_SRTP_CHECKSUM_LEN 16 -/* DTLS init state. */ -#define DTLS_STATE_NONE 0 -/* Whether DTLS handshake is finished. */ -#define DTLS_STATE_FINISHED 1 -/* Whether DTLS session is closed. */ -#define DTLS_STATE_CLOSED 2 -/* Whether DTLS handshake is failed. */ -#define DTLS_STATE_FAILED 3 -typedef int (*dtls_fn_on_state)(void *ctx, void *opaque, int state, const char* type, const char* desc); -typedef int (*dtls_fn_on_write)(void *ctx, void *opaque, char* data, int size); - -static void* dtls_context_new(AVClass *av_class, void *opaque, int pkt_size, dtls_fn_on_state on_state, dtls_fn_on_write on_write, const char* cert_file, const char* key_file); -static av_cold int dtls_context_init(AVFormatContext *s, void *ctx); -static int dtls_context_start(void *ctx); -static int dtls_context_write(void *ctx, char* buf, int size); -static av_cold void dtls_context_deinit(void *ctx); - -static int dtls_can_handle_packet(uint8_t *b, int size); -static char* dtls_get_fingerprint(void *ctx); -static uint8_t* dtls_get_srtp_client_key(void *ctx); -static uint8_t* dtls_get_srtp_server_key(void *ctx); -static uint8_t* dtls_get_srtp_client_salt(void *ctx); -static uint8_t* dtls_get_srtp_server_salt(void *ctx); +#include "dtls.h" /** * Maximum size limit of a Session Description Protocol (SDP), @@ -1804,930 +1763,6 @@ static int whip_check_bitstream(AVFormatContext *s, AVStream *st, const AVPacket return ret; } -#if 1 -/** - * The DTLS content type. - * See https://tools.ietf.org/html/rfc2246#section-6.2.1 - * change_cipher_spec(20), alert(21), handshake(22), application_data(23) - */ -#define DTLS_CONTENT_TYPE_CHANGE_CIPHER_SPEC 20 - -/** - * The DTLS record layer header has a total size of 13 bytes, consisting of - * ContentType (1 byte), ProtocolVersion (2 bytes), Epoch (2 bytes), - * SequenceNumber (6 bytes), and Length (2 bytes). - * See https://datatracker.ietf.org/doc/html/rfc9147#section-4 - */ -#define DTLS_RECORD_LAYER_HEADER_LEN 13 - -/** - * The DTLS version number, which is 0xfeff for DTLS 1.0, or 0xfefd for DTLS 1.2. - * See https://datatracker.ietf.org/doc/html/rfc9147#name-the-dtls-record-layer - */ -#define DTLS_VERSION_10 0xfeff -#define DTLS_VERSION_12 0xfefd - -/** - * Maximum size limit of a certificate and private key size. - */ -#define DTLS_MAX_CERTIFICATE_SIZE 8192 - -typedef struct DTLSContext { - AVClass *av_class; - - /* For callback. */ - dtls_fn_on_state on_state; - dtls_fn_on_write on_write; - void* opaque; - - /* For logging. */ - AVClass *log_avcl; - - /* The DTLS context. */ - SSL_CTX *dtls_ctx; - SSL *dtls; - /* The DTLS BIOs. */ - BIO *bio_in; - - /* The private key for DTLS handshake. */ - EVP_PKEY *dtls_pkey; - /* The EC key for DTLS handshake. */ - EC_KEY* dtls_eckey; - /* The SSL certificate used for fingerprint in SDP and DTLS handshake. */ - X509 *dtls_cert; - /* The fingerprint of certificate, used in SDP offer. */ - char *dtls_fingerprint; - - /** - * This represents the material used to build the SRTP master key. It is - * generated by DTLS and has the following layout: - * 16B 16B 14B 14B - * client_key | server_key | client_salt | server_salt - */ - uint8_t dtls_srtp_materials[(DTLS_SRTP_KEY_LEN + DTLS_SRTP_SALT_LEN) * 2]; - - /* Whether the DTLS is done at least for us. */ - int dtls_done_for_us; - /* Whether the SRTP key is exported. */ - int dtls_srtp_key_exported; - /* The number of packets retransmitted for DTLS. */ - int dtls_arq_packets; - /** - * This is the last DTLS content type and handshake type that is used to detect - * the ARQ packet. - */ - uint8_t dtls_last_content_type; - uint8_t dtls_last_handshake_type; - - /* These variables represent timestamps used for calculating and tracking the cost. */ - int64_t dtls_init_starttime; - int64_t dtls_init_endtime; - int64_t dtls_handshake_starttime; - int64_t dtls_handshake_endtime; - - /* Helper for get error code and message. */ - int error_code; - char error_message[256]; - - /* The certificate and private key used for DTLS handshake. */ - char* cert_file; - char* key_file; - /** - * The size of RTP packet, should generally be set to MTU. - * Note that pion requires a smaller value, for example, 1200. - */ - int mtu; -} DTLSContext; - -/** - * Read all data from the given URL url and store it in the given buffer bp. - */ -static int dtls_url_read_all(AVFormatContext *s, const char *url, AVBPrint *bp) -{ - int ret = 0; - AVDictionary *opts = NULL; - URLContext *uc = NULL; - char buf[MAX_URL_SIZE]; - - ret = ffurl_open_whitelist(&uc, url, AVIO_FLAG_READ, &s->interrupt_callback, - &opts, s->protocol_whitelist, s->protocol_blacklist, NULL); - if (ret < 0) { - av_log(s, AV_LOG_ERROR, "WHIP: Failed to open url %s\n", url); - goto end; - } - - while (1) { - ret = ffurl_read(uc, buf, sizeof(buf)); - if (ret == AVERROR_EOF) { - /* Reset the error because we read all response as answer util EOF. */ - ret = 0; - break; - } - if (ret <= 0) { - av_log(s, AV_LOG_ERROR, "WHIP: Failed to read from url=%s, key is %s\n", url, bp->str); - goto end; - } - - av_bprintf(bp, "%.*s", ret, buf); - if (!av_bprint_is_complete(bp)) { - av_log(s, AV_LOG_ERROR, "WHIP: Exceed max size %.*s, %s\n", ret, buf, bp->str); - ret = AVERROR(EIO); - goto end; - } - } - -end: - ffurl_closep(&uc); - av_dict_free(&opts); - return ret; -} - -static char* dtls_get_fingerprint(void *pctx) -{ - DTLSContext *ctx = pctx; - return ctx->dtls_fingerprint; -} - -static uint8_t* dtls_get_srtp_client_key(void *pctx) -{ - DTLSContext *ctx = pctx; - return ctx->dtls_srtp_materials; -} - -static uint8_t* dtls_get_srtp_server_key(void *pctx) -{ - return dtls_get_srtp_client_key(pctx) + DTLS_SRTP_KEY_LEN; -} - -static uint8_t* dtls_get_srtp_client_salt(void *pctx) -{ - return dtls_get_srtp_server_key(pctx) + DTLS_SRTP_KEY_LEN; -} - -static uint8_t* dtls_get_srtp_server_salt(void *pctx) -{ - return dtls_get_srtp_client_salt(pctx) + DTLS_SRTP_SALT_LEN; -} - -/** - * Whether the packet is a DTLS packet. - */ -static int dtls_can_handle_packet(uint8_t *b, int size) -{ - uint16_t version = AV_RB16(&b[1]); - return size > DTLS_RECORD_LAYER_HEADER_LEN && - b[0] >= DTLS_CONTENT_TYPE_CHANGE_CIPHER_SPEC && - (version == DTLS_VERSION_10 || version == DTLS_VERSION_12); -} - -/** - * Retrieves the error message for the latest OpenSSL error. - * - * This function retrieves the error code from the thread's error queue, converts it - * to a human-readable string, and stores it in the DTLSContext's error_message field. - * The error queue is then cleared using ERR_clear_error(). - */ -static const char* openssl_get_error(DTLSContext *ctx) -{ - int r2 = ERR_get_error(); - if (r2) - ERR_error_string_n(r2, ctx->error_message, sizeof(ctx->error_message)); - else - ctx->error_message[0] = '\0'; - - ERR_clear_error(); - return ctx->error_message; -} - -/** - * Get the error code for the given SSL operation result. - * - * This function retrieves the error code for the given SSL operation result - * and stores the error message in the DTLS context if an error occurred. - * It also clears the error queue. - */ -static int openssl_ssl_get_error(DTLSContext *ctx, int ret) -{ - SSL *dtls = ctx->dtls; - int r1 = SSL_ERROR_NONE; - - if (ret <= 0) - r1 = SSL_get_error(dtls, ret); - - openssl_get_error(ctx); - return r1; -} - -/** - * Callback function to print the OpenSSL SSL status. - */ -static void openssl_dtls_on_info(const SSL *dtls, int where, int r0) -{ - int w, r1, is_fatal, is_warning, is_close_notify; - const char *method = "undefined", *alert_type, *alert_desc; - int state; - DTLSContext *ctx = (DTLSContext*)SSL_get_ex_data(dtls, 0); - - w = where & ~SSL_ST_MASK; - if (w & SSL_ST_CONNECT) - method = "SSL_connect"; - else if (w & SSL_ST_ACCEPT) - method = "SSL_accept"; - - r1 = openssl_ssl_get_error(ctx, r0); - if (where & SSL_CB_LOOP) { - av_log(ctx, AV_LOG_VERBOSE, "DTLS: Info method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", - method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, r0, r1); - } else if (where & SSL_CB_ALERT) { - method = (where & SSL_CB_READ) ? "read":"write"; - - alert_type = SSL_alert_type_string_long(r0); - alert_desc = SSL_alert_desc_string(r0); - - if (!av_strcasecmp(alert_type, "warning") && !av_strcasecmp(alert_desc, "CN")) - av_log(ctx, AV_LOG_WARNING, "DTLS: SSL3 alert method=%s type=%s, desc=%s(%s), where=%d, ret=%d, r1=%d\n", - method, alert_type, alert_desc, SSL_alert_desc_string_long(r0), where, r0, r1); - else - av_log(ctx, AV_LOG_ERROR, "DTLS: SSL3 alert method=%s type=%s, desc=%s(%s), where=%d, ret=%d, r1=%d %s\n", - method, alert_type, alert_desc, SSL_alert_desc_string_long(r0), where, r0, r1, ctx->error_message); - - /** - * Notify the DTLS to handle the ALERT message, which maybe means media connection disconnect. - * CN(Close Notify) is sent when peer close the PeerConnection. fatal, IP(Illegal Parameter) - * is sent when DTLS failed. - */ - is_fatal = !av_strncasecmp(alert_type, "fatal", 5); - is_warning = !av_strncasecmp(alert_type, "warning", 7); - is_close_notify = !av_strncasecmp(alert_desc, "CN", 2); - state = is_fatal ? DTLS_STATE_FAILED : (is_warning && is_close_notify ? DTLS_STATE_CLOSED : DTLS_STATE_NONE); - if (state != DTLS_STATE_NONE && ctx->on_state) { - av_log(ctx, AV_LOG_INFO, "DTLS: Notify ctx=%p, state=%d, fatal=%d, warning=%d, cn=%d\n", - ctx, state, is_fatal, is_warning, is_close_notify); - ctx->on_state(ctx, ctx->opaque, state, alert_type, alert_desc); - } - } else if (where & SSL_CB_EXIT) { - if (!r0) - av_log(ctx, AV_LOG_WARNING, "DTLS: Fail method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", - method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, r0, r1); - else if (r0 < 0) - if (r1 != SSL_ERROR_NONE && r1 != SSL_ERROR_WANT_READ && r1 != SSL_ERROR_WANT_WRITE) - av_log(ctx, AV_LOG_ERROR, "DTLS: Error method=%s state=%s(%s), where=%d, ret=%d, r1=%d %s\n", - method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, r0, r1, ctx->error_message); - else - av_log(ctx, AV_LOG_VERBOSE, "DTLS: Info method=%s state=%s(%s), where=%d, ret=%d, r1=%d\n", - method, SSL_state_string(dtls), SSL_state_string_long(dtls), where, r0, r1); - } -} - -static void openssl_dtls_state_trace(DTLSContext *ctx, uint8_t *data, int length, int incoming) -{ - uint8_t content_type = 0; - uint16_t size = 0; - uint8_t handshake_type = 0; - - /* Change_cipher_spec(20), alert(21), handshake(22), application_data(23) */ - if (length >= 1) - content_type = AV_RB8(&data[0]); - if (length >= 13) - size = AV_RB16(&data[11]); - if (length >= 14) - handshake_type = AV_RB8(&data[13]); - - av_log(ctx, AV_LOG_VERBOSE, "DTLS: Trace %s, done=%u, arq=%u, len=%u, cnt=%u, size=%u, hs=%u\n", - (incoming? "RECV":"SEND"), ctx->dtls_done_for_us, ctx->dtls_arq_packets, length, - content_type, size, handshake_type); -} - -/** - * Always return 1 to accept any certificate. This is because we allow the peer to - * use a temporary self-signed certificate for DTLS. - */ -static int openssl_dtls_verify_callback(int preverify_ok, X509_STORE_CTX *ctx) -{ - return 1; -} - -/** - * DTLS BIO read callback. - */ -#if OPENSSL_VERSION_NUMBER < 0x30000000L // v3.0.x -static long openssl_dtls_bio_out_callback(BIO* b, int oper, const char* argp, int argi, long argl, long retvalue) -#else -static long openssl_dtls_bio_out_callback_ex(BIO *b, int oper, const char *argp, size_t len, int argi, long argl, int retvalue, size_t *processed) -#endif -{ - int ret, req_size = argi, is_arq = 0; - uint8_t content_type, handshake_type; - uint8_t *data = (uint8_t*)argp; - DTLSContext* ctx = b ? (DTLSContext*)BIO_get_callback_arg(b) : NULL; - -#if OPENSSL_VERSION_NUMBER >= 0x30000000L // v3.0.x - req_size = len; - av_log(ctx, AV_LOG_DEBUG, "DTLS: BIO callback b=%p, oper=%d, argp=%p, len=%ld, argi=%d, argl=%ld, retvalue=%d, processed=%p, req_size=%d\n", - b, oper, argp, len, argi, argl, retvalue, processed, req_size); -#else - av_log(ctx, AV_LOG_DEBUG, "DTLS: BIO callback b=%p, oper=%d, argp=%p, argi=%d, argl=%ld, retvalue=%ld, req_size=%d\n", - b, oper, argp, argi, argl, retvalue, req_size); -#endif - - if (oper != BIO_CB_WRITE || !argp || req_size <= 0) - return retvalue; - - openssl_dtls_state_trace(ctx, data, req_size, 0); - ret = ctx->on_write ? ctx->on_write(ctx, ctx->opaque, data, req_size) : 0; - content_type = req_size > 0 ? AV_RB8(&data[0]) : 0; - handshake_type = req_size > 13 ? AV_RB8(&data[13]) : 0; - - is_arq = ctx->dtls_last_content_type == content_type && ctx->dtls_last_handshake_type == handshake_type; - ctx->dtls_arq_packets += is_arq; - ctx->dtls_last_content_type = content_type; - ctx->dtls_last_handshake_type = handshake_type; - - if (ret < 0) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Send request failed, oper=%d, content=%d, handshake=%d, size=%d, is_arq=%d\n", - oper, content_type, handshake_type, req_size, is_arq); - return ret; - } - - return retvalue; -} - -static int openssl_read_certificate(AVFormatContext *s, DTLSContext *ctx) -{ - int ret = 0; - BIO *key_b = NULL, *cert_b = NULL; - AVBPrint key_bp, cert_bp; - - /* To prevent a crash during cleanup, always initialize it. */ - av_bprint_init(&key_bp, 1, DTLS_MAX_CERTIFICATE_SIZE); - av_bprint_init(&cert_bp, 1, DTLS_MAX_CERTIFICATE_SIZE); - - /* Read key file. */ - ret = dtls_url_read_all(s, ctx->key_file, &key_bp); - if (ret < 0) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to open key file %s\n", ctx->key_file); - goto end; - } - - if ((key_b = BIO_new(BIO_s_mem())) == NULL) { - ret = AVERROR(ENOMEM); - goto end; - } - - BIO_write(key_b, key_bp.str, key_bp.len); - ctx->dtls_pkey = PEM_read_bio_PrivateKey(key_b, NULL, NULL, NULL); - if (!ctx->dtls_pkey) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to read private key from %s\n", ctx->key_file); - ret = AVERROR(EIO); - goto end; - } - - /* Read certificate. */ - ret = dtls_url_read_all(s, ctx->cert_file, &cert_bp); - if (ret < 0) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to open cert file %s\n", ctx->cert_file); - goto end; - } - - if ((cert_b = BIO_new(BIO_s_mem())) == NULL) { - ret = AVERROR(ENOMEM); - goto end; - } - - BIO_write(cert_b, cert_bp.str, cert_bp.len); - ctx->dtls_cert = PEM_read_bio_X509(cert_b, NULL, NULL, NULL); - if (!ctx->dtls_cert) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to read certificate from %s\n", ctx->cert_file); - ret = AVERROR(EIO); - goto end; - } - -end: - BIO_free(key_b); - av_bprint_finalize(&key_bp, NULL); - BIO_free(cert_b); - av_bprint_finalize(&cert_bp, NULL); - return ret; -} - -static int openssl_dtls_gen_private_key(DTLSContext *ctx) -{ - int ret = 0; - - /** - * Note that secp256r1 in openssl is called NID_X9_62_prime256v1 or prime256v1 in string, - * not NID_secp256k1 or secp256k1 in string. - * - * TODO: Should choose the curves in ClientHello.supported_groups, for example: - * Supported Group: x25519 (0x001d) - * Supported Group: secp256r1 (0x0017) - * Supported Group: secp384r1 (0x0018) - */ -#if OPENSSL_VERSION_NUMBER < 0x30000000L /* OpenSSL 3.0 */ - EC_GROUP *ecgroup = NULL; - int curve = NID_X9_62_prime256v1; -#else - const char *curve = SN_X9_62_prime256v1; -#endif - -#if OPENSSL_VERSION_NUMBER < 0x30000000L /* OpenSSL 3.0 */ - ctx->dtls_pkey = EVP_PKEY_new(); - ctx->dtls_eckey = EC_KEY_new(); - ecgroup = EC_GROUP_new_by_curve_name(curve); - if (!ecgroup) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Create EC group by curve=%d failed, %s", curve, openssl_get_error(ctx)); - goto einval_end; - } - -#if OPENSSL_VERSION_NUMBER < 0x10100000L // v1.1.x - /* For openssl 1.0, we must set the group parameters, so that cert is ok. */ - EC_GROUP_set_asn1_flag(ecgroup, OPENSSL_EC_NAMED_CURVE); -#endif - - if (EC_KEY_set_group(ctx->dtls_eckey, ecgroup) != 1) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Generate private key, EC_KEY_set_group failed, %s\n", openssl_get_error(ctx)); - goto einval_end; - } - - if (EC_KEY_generate_key(ctx->dtls_eckey) != 1) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Generate private key, EC_KEY_generate_key failed, %s\n", openssl_get_error(ctx)); - goto einval_end; - } - - if (EVP_PKEY_set1_EC_KEY(ctx->dtls_pkey, ctx->dtls_eckey) != 1) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Generate private key, EVP_PKEY_set1_EC_KEY failed, %s\n", openssl_get_error(ctx)); - goto einval_end; - } -#else - ctx->dtls_pkey = EVP_EC_gen(curve); - if (!ctx->dtls_pkey) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Generate private key, EVP_EC_gen curve=%s failed, %s\n", curve, openssl_get_error(ctx)); - goto einval_end; - } -#endif - goto end; - -einval_end: - ret = AVERROR(EINVAL); -end: -#if OPENSSL_VERSION_NUMBER < 0x30000000L /* OpenSSL 3.0 */ - EC_GROUP_free(ecgroup); -#endif - return ret; -} - -static int openssl_dtls_gen_certificate(DTLSContext *ctx) -{ - int ret = 0, serial, expire_day, i, n = 0; - AVBPrint fingerprint; - unsigned char md[EVP_MAX_MD_SIZE]; - const char *aor = "lavf"; - X509_NAME* subject = NULL; - X509 *dtls_cert = NULL; - - /* To prevent a crash during cleanup, always initialize it. */ - av_bprint_init(&fingerprint, 1, MAX_URL_SIZE); - - dtls_cert = ctx->dtls_cert = X509_new(); - if (!dtls_cert) { - goto enomem_end; - } - - // TODO: Support non-self-signed certificate, for example, load from a file. - subject = X509_NAME_new(); - if (!subject) { - goto enomem_end; - } - - serial = (int)av_get_random_seed(); - if (ASN1_INTEGER_set(X509_get_serialNumber(dtls_cert), serial) != 1) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set serial, %s\n", openssl_get_error(ctx)); - goto einval_end; - } - - if (X509_NAME_add_entry_by_txt(subject, "CN", MBSTRING_ASC, aor, strlen(aor), -1, 0) != 1) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set CN, %s\n", openssl_get_error(ctx)); - goto einval_end; - } - - if (X509_set_issuer_name(dtls_cert, subject) != 1) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set issuer, %s\n", openssl_get_error(ctx)); - goto einval_end; - } - if (X509_set_subject_name(dtls_cert, subject) != 1) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set subject name, %s\n", openssl_get_error(ctx)); - goto einval_end; - } - - expire_day = 365; - if (!X509_gmtime_adj(X509_get_notBefore(dtls_cert), 0)) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set notBefore, %s\n", openssl_get_error(ctx)); - goto einval_end; - } - if (!X509_gmtime_adj(X509_get_notAfter(dtls_cert), 60*60*24*expire_day)) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set notAfter, %s\n", openssl_get_error(ctx)); - goto einval_end; - } - - if (X509_set_version(dtls_cert, 2) != 1) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set version, %s\n", openssl_get_error(ctx)); - goto einval_end; - } - - if (X509_set_pubkey(dtls_cert, ctx->dtls_pkey) != 1) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to set public key, %s\n", openssl_get_error(ctx)); - goto einval_end; - } - - if (!X509_sign(dtls_cert, ctx->dtls_pkey, EVP_sha1())) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to sign certificate, %s\n", openssl_get_error(ctx)); - goto einval_end; - } - - /* Generate the fingerpint of certficate. */ - if (X509_digest(dtls_cert, EVP_sha256(), md, &n) != 1) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to generate fingerprint, %s\n", openssl_get_error(ctx)); - goto eio_end; - } - for (i = 0; i < n; i++) { - av_bprintf(&fingerprint, "%02X", md[i]); - if (i < n - 1) - av_bprintf(&fingerprint, ":"); - } - if (!fingerprint.str || !strlen(fingerprint.str)) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Fingerprint is empty\n"); - goto einval_end; - } - - ctx->dtls_fingerprint = av_strdup(fingerprint.str); - if (!ctx->dtls_fingerprint) { - goto enomem_end; - } - - goto end; -enomem_end: - ret = AVERROR(ENOMEM); - goto end; -eio_end: - ret = AVERROR(EIO); - goto end; -einval_end: - ret = AVERROR(EINVAL); -end: - X509_NAME_free(subject); - av_bprint_finalize(&fingerprint, NULL); - return ret; -} - -/** - * Initializes DTLS context using ECDHE. - */ -static av_cold int openssl_dtls_init_context(DTLSContext *ctx) -{ - int ret = 0; - EVP_PKEY *dtls_pkey = ctx->dtls_pkey; - X509 *dtls_cert = ctx->dtls_cert; - SSL_CTX *dtls_ctx = NULL; - SSL *dtls = NULL; - BIO *bio_in = NULL, *bio_out = NULL; - const char* ciphers = "ALL"; - /** - * The profile for OpenSSL's SRTP is SRTP_AES128_CM_SHA1_80, see ssl/d1_srtp.c. - * The profile for FFmpeg's SRTP is SRTP_AES128_CM_HMAC_SHA1_80, see libavformat/srtp.c. - */ - const char* profiles = "SRTP_AES128_CM_SHA1_80"; - - /* Refer to the test cases regarding these curves in the WebRTC code. */ -#if OPENSSL_VERSION_NUMBER >= 0x10100000L /* OpenSSL 1.1.0 */ - const char* curves = "X25519:P-256:P-384:P-521"; -#elif OPENSSL_VERSION_NUMBER >= 0x10002000L /* OpenSSL 1.0.2 */ - const char* curves = "P-256:P-384:P-521"; -#endif - -#if OPENSSL_VERSION_NUMBER < 0x10002000L /* OpenSSL v1.0.2 */ - dtls_ctx = ctx->dtls_ctx = SSL_CTX_new(DTLSv1_method()); -#else - dtls_ctx = ctx->dtls_ctx = SSL_CTX_new(DTLS_method()); -#endif - if (!dtls_ctx) { - ret = AVERROR(ENOMEM); - goto end; - } - -#if OPENSSL_VERSION_NUMBER >= 0x10002000L /* OpenSSL 1.0.2 */ - /* For ECDSA, we could set the curves list. */ - if (SSL_CTX_set1_curves_list(dtls_ctx, curves) != 1) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Init SSL_CTX_set1_curves_list failed, curves=%s, %s\n", - curves, openssl_get_error(ctx)); - ret = AVERROR(EINVAL); - return ret; - } -#endif - -#if OPENSSL_VERSION_NUMBER < 0x10100000L // v1.1.x - #if OPENSSL_VERSION_NUMBER < 0x10002000L // v1.0.2 - if (ctx->dtls_eckey) - SSL_CTX_set_tmp_ecdh(dtls_ctx, ctx->dtls_eckey); -#else - SSL_CTX_set_ecdh_auto(dtls_ctx, 1); -#endif -#endif - - /** - * We activate "ALL" cipher suites to align with the peer's capabilities, - * ensuring maximum compatibility. - */ - if (SSL_CTX_set_cipher_list(dtls_ctx, ciphers) != 1) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Init SSL_CTX_set_cipher_list failed, ciphers=%s, %s\n", - ciphers, openssl_get_error(ctx)); - ret = AVERROR(EINVAL); - return ret; - } - /* Setup the certificate. */ - if (SSL_CTX_use_certificate(dtls_ctx, dtls_cert) != 1) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Init SSL_CTX_use_certificate failed, %s\n", openssl_get_error(ctx)); - ret = AVERROR(EINVAL); - return ret; - } - if (SSL_CTX_use_PrivateKey(dtls_ctx, dtls_pkey) != 1) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Init SSL_CTX_use_PrivateKey failed, %s\n", openssl_get_error(ctx)); - ret = AVERROR(EINVAL); - return ret; - } - - /* Server will send Certificate Request. */ - SSL_CTX_set_verify(dtls_ctx, SSL_VERIFY_PEER | SSL_VERIFY_CLIENT_ONCE, openssl_dtls_verify_callback); - /* The depth count is "level 0:peer certificate", "level 1: CA certificate", - * "level 2: higher level CA certificate", and so on. */ - SSL_CTX_set_verify_depth(dtls_ctx, 4); - /* Whether we should read as many input bytes as possible (for non-blocking reads) or not. */ - SSL_CTX_set_read_ahead(dtls_ctx, 1); - /* Setup the SRTP context */ - if (SSL_CTX_set_tlsext_use_srtp(dtls_ctx, profiles)) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Init SSL_CTX_set_tlsext_use_srtp failed, profiles=%s, %s\n", - profiles, openssl_get_error(ctx)); - ret = AVERROR(EINVAL); - return ret; - } - - /* The dtls should not be created unless the dtls_ctx has been initialized. */ - dtls = ctx->dtls = SSL_new(dtls_ctx); - if (!dtls) { - ret = AVERROR(ENOMEM); - goto end; - } - - /* Setup the callback for logging. */ - SSL_set_ex_data(dtls, 0, ctx); - SSL_set_info_callback(dtls, openssl_dtls_on_info); - - /** - * We have set the MTU to fragment the DTLS packet. It is important to note that the - * packet is split to ensure that each handshake packet is smaller than the MTU. - */ - SSL_set_options(dtls, SSL_OP_NO_QUERY_MTU); - SSL_set_mtu(dtls, ctx->mtu); -#if OPENSSL_VERSION_NUMBER >= 0x100010b0L /* OpenSSL 1.0.1k */ - DTLS_set_link_mtu(dtls, ctx->mtu); -#endif - - bio_in = BIO_new(BIO_s_mem()); - if (!bio_in) { - ret = AVERROR(ENOMEM); - goto end; - } - - bio_out = BIO_new(BIO_s_mem()); - if (!bio_out) { - ret = AVERROR(ENOMEM); - goto end; - } - - /** - * Please be aware that it is necessary to use a callback to obtain the packet to be written out. It is - * imperative that BIO_get_mem_data is not used to retrieve the packet, as it returns all the bytes that - * need to be sent out. - * For example, if MTU is set to 1200, and we got two DTLS packets to sendout: - * ServerHello, 95bytes. - * Certificate, 1105+143=1248bytes. - * If use BIO_get_mem_data, it will return 95+1248=1343bytes, which is larger than MTU 1200. - * If use callback, it will return two UDP packets: - * ServerHello+Certificate(Frament) = 95+1105=1200bytes. - * Certificate(Fragment) = 143bytes. - * Note that there should be more packets in real world, like ServerKeyExchange, CertificateRequest, - * and ServerHelloDone. Here we just use two packets for example. - */ -#if OPENSSL_VERSION_NUMBER < 0x30000000L // v3.0.x - BIO_set_callback(bio_out, openssl_dtls_bio_out_callback); -#else - BIO_set_callback_ex(bio_out, openssl_dtls_bio_out_callback_ex); -#endif - BIO_set_callback_arg(bio_out, (char*)ctx); - - ctx->bio_in = bio_in; - SSL_set_bio(dtls, bio_in, bio_out); - /* Now the bio_in and bio_out are owned by dtls, so we should set them to NULL. */ - bio_in = bio_out = NULL; - -end: - BIO_free(bio_in); - BIO_free(bio_out); - return ret; -} - -static void* dtls_context_new(AVClass *av_class, void *opaque, int pkt_size, dtls_fn_on_state on_state, dtls_fn_on_write on_write, const char* cert_file, const char* key_file) -{ - DTLSContext *ctx = av_mallocz(sizeof(DTLSContext)); - - ctx->av_class = av_class; - ctx->mtu = pkt_size; - ctx->opaque = opaque; - ctx->on_state = on_state; - ctx->on_write = on_write; - if (cert_file) - ctx->cert_file = av_strdup(cert_file); - if (key_file) - ctx->key_file = av_strdup(key_file); - - return ctx; -} - -/** - * Generate a self-signed certificate and private key for DTLS. Please note that the - * ff_openssl_init in tls_openssl.c has already called SSL_library_init(), and therefore, - * there is no need to call it again. - */ -static av_cold int dtls_context_init(AVFormatContext *s, void *pctx) -{ - DTLSContext *ctx = pctx; - - int ret = 0; - - ctx->dtls_init_starttime = av_gettime(); - - if (ctx->cert_file && ctx->key_file) { - /* Read the private key and file from the file. */ - if ((ret = openssl_read_certificate(s, ctx)) < 0) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to read DTLS certificate from cert=%s, key=%s\n", - ctx->cert_file, ctx->key_file); - return ret; - } - } else { - /* Generate a private key to ctx->dtls_pkey. */ - if ((ret = openssl_dtls_gen_private_key(ctx)) < 0) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to generate DTLS private key\n"); - return ret; - } - - /* Generate a self-signed certificate. */ - if ((ret = openssl_dtls_gen_certificate(ctx)) < 0) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to generate DTLS certificate\n"); - return ret; - } - } - - if ((ret = openssl_dtls_init_context(ctx)) < 0) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to initialize DTLS context\n"); - return ret; - } - - ctx->dtls_init_endtime = av_gettime(); - av_log(ctx, AV_LOG_VERBOSE, "DTLS: Setup ok, MTU=%d, fingerprint %s\n", - ctx->mtu, ctx->dtls_fingerprint); - - return ret; -} - -/** - * Once the DTLS role has been negotiated - active for the DTLS client or passive for the - * DTLS server - we proceed to set up the DTLS state and initiate the handshake. - */ -static int dtls_context_start(void *pctx) -{ - DTLSContext *ctx = pctx; - - int ret = 0, r0, r1; - SSL *dtls = ctx->dtls; - - ctx->dtls_handshake_starttime = av_gettime(); - - /* Setup DTLS as passive, which is server role. */ - SSL_set_accept_state(dtls); - - /** - * During initialization, we only need to call SSL_do_handshake once because SSL_read consumes - * the handshake message if the handshake is incomplete. - * To simplify maintenance, we initiate the handshake for both the DTLS server and client after - * sending out the ICE response in the start_active_handshake function. It's worth noting that - * although the DTLS server may receive the ClientHello immediately after sending out the ICE - * response, this shouldn't be an issue as the handshake function is called before any DTLS - * packets are received. - */ - r0 = SSL_do_handshake(dtls); - r1 = openssl_ssl_get_error(ctx, r0); - // Fatal SSL error, for example, no available suite when peer is DTLS 1.0 while we are DTLS 1.2. - if (r0 < 0 && (r1 != SSL_ERROR_NONE && r1 != SSL_ERROR_WANT_READ && r1 != SSL_ERROR_WANT_WRITE)) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Failed to drive SSL context, r0=%d, r1=%d %s\n", r0, r1, ctx->error_message); - return AVERROR(EIO); - } - - return ret; -} - -/** - * DTLS handshake with server, as a server in passive mode, using openssl. - * - * This function initializes the SSL context as the client role using OpenSSL and - * then performs the DTLS handshake until success. Upon successful completion, it - * exports the SRTP material key. - * - * @return 0 if OK, AVERROR_xxx on error - */ -static int dtls_context_write(void *pctx, char* buf, int size) -{ - DTLSContext *ctx = pctx; - - int ret = 0, res_ct, res_ht, r0, r1, do_callback; - SSL *dtls = ctx->dtls; - const char* dst = "EXTRACTOR-dtls_srtp"; - BIO *bio_in = ctx->bio_in; - - /* Got DTLS response successfully. */ - openssl_dtls_state_trace(ctx, buf, size, 1); - if ((r0 = BIO_write(bio_in, buf, size)) <= 0) { - res_ct = size > 0 ? buf[0]: 0; - res_ht = size > 13 ? buf[13] : 0; - av_log(ctx, AV_LOG_ERROR, "DTLS: Feed response failed, content=%d, handshake=%d, size=%d, r0=%d\n", - res_ct, res_ht, size, r0); - ret = AVERROR(EIO); - goto end; - } - - /** - * If there is data available in bio_in, use SSL_read to allow SSL to process it. - * We limit the MTU to 1200 for DTLS handshake, which ensures that the buffer is large enough for reading. - */ - r0 = SSL_read(dtls, buf, sizeof(buf)); - r1 = openssl_ssl_get_error(ctx, r0); - if (r0 <= 0) { - if (r1 != SSL_ERROR_WANT_READ && r1 != SSL_ERROR_WANT_WRITE && r1 != SSL_ERROR_ZERO_RETURN) { - av_log(ctx, AV_LOG_ERROR, "DTLS: Read failed, r0=%d, r1=%d %s\n", r0, r1, ctx->error_message); - ret = AVERROR(EIO); - goto end; - } - } else { - av_log(ctx, AV_LOG_TRACE, "DTLS: Read %d bytes, r0=%d, r1=%d\n", r0, r0, r1); - } - - /* Check whether the DTLS is completed. */ - if (SSL_is_init_finished(dtls) != 1) - goto end; - - do_callback = ctx->on_state && !ctx->dtls_done_for_us; - ctx->dtls_done_for_us = 1; - ctx->dtls_handshake_endtime = av_gettime(); - - /* Export SRTP master key after DTLS done */ - if (!ctx->dtls_srtp_key_exported) { - ret = SSL_export_keying_material(dtls, ctx->dtls_srtp_materials, sizeof(ctx->dtls_srtp_materials), - dst, strlen(dst), NULL, 0, 0); - r1 = openssl_ssl_get_error(ctx, r0); - if (!ret) { - av_log(ctx, AV_LOG_ERROR, "DTLS: SSL export key ret=%d, r1=%d %s\n", ret, r1, ctx->error_message); - ret = AVERROR(EIO); - goto end; - } - - ctx->dtls_srtp_key_exported = 1; - } - - if (do_callback && (ret = ctx->on_state(ctx, ctx->opaque, DTLS_STATE_FINISHED, NULL, NULL)) < 0) - goto end; - -end: - return ret; -} - -/** - * Cleanup the DTLS context. - */ -static av_cold void dtls_context_deinit(void *pctx) -{ - DTLSContext *ctx = pctx; - - SSL_free(ctx->dtls); - SSL_CTX_free(ctx->dtls_ctx); - X509_free(ctx->dtls_cert); - EVP_PKEY_free(ctx->dtls_pkey); - av_freep(&ctx->dtls_fingerprint); - av_freep(&ctx->cert_file); - av_freep(&ctx->key_file); -#if OPENSSL_VERSION_NUMBER < 0x30000000L /* OpenSSL 3.0 */ - EC_KEY_free(ctx->dtls_eckey); -#endif -} -#endif - #define OFFSET(x) offsetof(WHIPContext, x) #define DEC AV_OPT_FLAG_DECODING_PARAM static const AVOption options[] = { From 3b15ba1692e7ac37008d7bb1156ad7d9a57a4a8b Mon Sep 17 00:00:00 2001 From: winlin Date: Tue, 17 Oct 2023 11:53:42 +0800 Subject: [PATCH 60/60] Change some important step log level to info. --- libavformat/whip.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/libavformat/whip.c b/libavformat/whip.c index 9bcab69db33cb..a9cb64645959f 100644 --- a/libavformat/whip.c +++ b/libavformat/whip.c @@ -275,7 +275,7 @@ static int dtls_context_on_state(void *pctx, void *opaque, int state, const char if (state == DTLS_STATE_FINISHED && whip->state < WHIP_STATE_DTLS_FINISHED) { whip->state = WHIP_STATE_DTLS_FINISHED; whip->whip_dtls_time = av_gettime(); - av_log(whip, AV_LOG_VERBOSE, "WHIP: DTLS handshake, elapsed=%dms\n", ELAPSED(whip->whip_starttime, av_gettime())); + av_log(whip, AV_LOG_INFO, "WHIP: DTLS handshake ok, elapsed=%dms\n", ELAPSED(whip->whip_starttime, av_gettime())); return ret; } @@ -619,7 +619,7 @@ static int generate_sdp_offer(AVFormatContext *s) if (whip->state < WHIP_STATE_OFFER) whip->state = WHIP_STATE_OFFER; whip->whip_offer_time = av_gettime(); - av_log(whip, AV_LOG_VERBOSE, "WHIP: Generated state=%d, offer: %s\n", whip->state, whip->sdp_offer); + av_log(whip, AV_LOG_INFO, "WHIP: Generated state=%d, offer: %s\n", whip->state, whip->sdp_offer); end: av_bprint_finalize(&bp, NULL); @@ -721,7 +721,8 @@ static int exchange_sdp(AVFormatContext *s) if (whip->state < WHIP_STATE_ANSWER) whip->state = WHIP_STATE_ANSWER; - av_log(whip, AV_LOG_VERBOSE, "WHIP: Got state=%d, answer: %s\n", whip->state, whip->sdp_answer); + av_log(whip, AV_LOG_INFO, "WHIP: Got state=%d, answer: %s, location=%s\n", whip->state, whip->sdp_answer, + whip->whip_resource_url ? whip->whip_resource_url : ""); end: ffurl_closep(&whip_uc); @@ -827,7 +828,7 @@ static int parse_answer(AVFormatContext *s) if (whip->state < WHIP_STATE_NEGOTIATED) whip->state = WHIP_STATE_NEGOTIATED; whip->whip_answer_time = av_gettime(); - av_log(whip, AV_LOG_VERBOSE, "WHIP: SDP state=%d, offer=%luB, answer=%luB, ufrag=%s, pwd=%luB, transport=%s://%s:%d, elapsed=%dms\n", + av_log(whip, AV_LOG_INFO, "WHIP: SDP state=%d, offer=%luB, answer=%luB, ufrag=%s, pwd=%luB, transport=%s://%s:%d, elapsed=%dms\n", whip->state, strlen(whip->sdp_offer), strlen(whip->sdp_answer), whip->ice_ufrag_remote, strlen(whip->ice_pwd_remote), whip->ice_protocol, whip->ice_host, whip->ice_port, ELAPSED(whip->whip_starttime, av_gettime())); @@ -1181,9 +1182,9 @@ static int ice_dtls_handshake(AVFormatContext *s) if (whip->state < WHIP_STATE_ICE_CONNECTED) { whip->state = WHIP_STATE_ICE_CONNECTED; whip->whip_ice_time = av_gettime(); - av_log(whip, AV_LOG_VERBOSE, "WHIP: ICE STUN ok, state=%d, url=udp://%s:%d, location=%s, username=%s:%s, res=%dB, elapsed=%dms\n", - whip->state, whip->ice_host, whip->ice_port, whip->whip_resource_url ? whip->whip_resource_url : "", - whip->ice_ufrag_remote, whip->ice_ufrag_local, ret, ELAPSED(whip->whip_starttime, av_gettime())); + av_log(whip, AV_LOG_INFO, "WHIP: ICE STUN ok, state=%d, url=udp://%s:%d, username=%s:%s, res=%dB, elapsed=%dms\n", + whip->state, whip->ice_host, whip->ice_port, whip->ice_ufrag_remote, whip->ice_ufrag_local, + ret, ELAPSED(whip->whip_starttime, av_gettime())); /* If got the first binding response, start DTLS handshake. */ if ((ret = dtls_context_start(whip->dtls_ctx)) < 0)