Home Reference Source

src/loader/m3u8-parser.ts

  1. import * as URLToolkit from 'url-toolkit';
  2.  
  3. import Fragment from './fragment';
  4. import Level from './level';
  5. import LevelKey from './level-key';
  6.  
  7. import AttrList from '../utils/attr-list';
  8. import { logger } from '../utils/logger';
  9. import { isCodecType, CodecType } from '../utils/codecs';
  10. import { MediaPlaylist, AudioGroup, MediaPlaylistType } from '../types/media-playlist';
  11. import { PlaylistLevelType } from '../types/loader';
  12.  
  13. /**
  14. * M3U8 parser
  15. * @module
  16. */
  17.  
  18. // https://regex101.com is your friend
  19. const MASTER_PLAYLIST_REGEX = /(?:#EXT-X-STREAM-INF:([^\n\r]*)[\r\n]+([^\r\n]+)|#EXT-X-SESSION-DATA:([^\n\r]*)[\r\n]+)/g;
  20. const MASTER_PLAYLIST_MEDIA_REGEX = /#EXT-X-MEDIA:(.*)/g;
  21.  
  22. const LEVEL_PLAYLIST_REGEX_FAST = new RegExp([
  23. /#EXTINF:\s*(\d*(?:\.\d+)?)(?:,(.*)\s+)?/.source, // duration (#EXTINF:<duration>,<title>), group 1 => duration, group 2 => title
  24. /|(?!#)([\S+ ?]+)/.source, // segment URI, group 3 => the URI (note newline is not eaten)
  25. /|#EXT-X-BYTERANGE:*(.+)/.source, // next segment's byterange, group 4 => range spec (x@y)
  26. /|#EXT-X-PROGRAM-DATE-TIME:(.+)/.source, // next segment's program date/time group 5 => the datetime spec
  27. /|#.*/.source // All other non-segment oriented tags will match with all groups empty
  28. ].join(''), 'g');
  29.  
  30. const LEVEL_PLAYLIST_REGEX_SLOW = /(?:(?:#(EXTM3U))|(?:#EXT-X-(PLAYLIST-TYPE):(.+))|(?:#EXT-X-(MEDIA-SEQUENCE): *(\d+))|(?:#EXT-X-(TARGETDURATION): *(\d+))|(?:#EXT-X-(KEY):(.+))|(?:#EXT-X-(START):(.+))|(?:#EXT-X-(ENDLIST))|(?:#EXT-X-(DISCONTINUITY-SEQ)UENCE:(\d+))|(?:#EXT-X-(DIS)CONTINUITY))|(?:#EXT-X-(VERSION):(\d+))|(?:#EXT-X-(MAP):(.+))|(?:(#)([^:]*):(.*))|(?:(#)(.*))(?:.*)\r?\n?/;
  31.  
  32. const MP4_REGEX_SUFFIX = /\.(mp4|m4s|m4v|m4a)$/i;
  33.  
  34. export default class M3U8Parser {
  35. static findGroup (groups: Array<AudioGroup>, mediaGroupId: string): AudioGroup | undefined {
  36. for (let i = 0; i < groups.length; i++) {
  37. const group = groups[i];
  38. if (group.id === mediaGroupId) {
  39. return group;
  40. }
  41. }
  42. }
  43.  
  44. static convertAVC1ToAVCOTI (codec) {
  45. let avcdata = codec.split('.');
  46. let result;
  47. if (avcdata.length > 2) {
  48. result = avcdata.shift() + '.';
  49. result += parseInt(avcdata.shift()).toString(16);
  50. result += ('000' + parseInt(avcdata.shift()).toString(16)).substr(-4);
  51. } else {
  52. result = codec;
  53. }
  54. return result;
  55. }
  56.  
  57. static resolve (url, baseUrl) {
  58. return URLToolkit.buildAbsoluteURL(baseUrl, url, { alwaysNormalize: true });
  59. }
  60.  
  61. static parseMasterPlaylist (string: string, baseurl: string) {
  62. // TODO(typescript-level)
  63. let levels: Array<any> = [];
  64. let sessionData: Record<string, AttrList> = {};
  65. let hasSessionData = false;
  66. MASTER_PLAYLIST_REGEX.lastIndex = 0;
  67.  
  68. // TODO(typescript-level)
  69. function setCodecs (codecs: Array<string>, level: any) {
  70. ['video', 'audio'].forEach((type: CodecType) => {
  71. const filtered = codecs.filter((codec) => isCodecType(codec, type));
  72. if (filtered.length) {
  73. const preferred = filtered.filter((codec) => {
  74. return codec.lastIndexOf('avc1', 0) === 0 || codec.lastIndexOf('mp4a', 0) === 0;
  75. });
  76. level[`${type}Codec`] = preferred.length > 0 ? preferred[0] : filtered[0];
  77.  
  78. // remove from list
  79. codecs = codecs.filter((codec) => filtered.indexOf(codec) === -1);
  80. }
  81. });
  82.  
  83. level.unknownCodecs = codecs;
  84. }
  85.  
  86. let result: RegExpExecArray | null;
  87. while ((result = MASTER_PLAYLIST_REGEX.exec(string)) != null) {
  88. if (result[1]) {
  89. // '#EXT-X-STREAM-INF' is found, parse level tag in group 1
  90.  
  91. // TODO(typescript-level)
  92. const level: any = {};
  93.  
  94. const attrs = level.attrs = new AttrList(result[1]);
  95. level.url = M3U8Parser.resolve(result[2], baseurl);
  96.  
  97. const resolution = attrs.decimalResolution('RESOLUTION');
  98. if (resolution) {
  99. level.width = resolution.width;
  100. level.height = resolution.height;
  101. }
  102. level.bitrate = attrs.decimalInteger('AVERAGE-BANDWIDTH') || attrs.decimalInteger('BANDWIDTH');
  103. level.name = attrs.NAME;
  104.  
  105. setCodecs([].concat((attrs.CODECS || '').split(/[ ,]+/)), level);
  106.  
  107. if (level.videoCodec && level.videoCodec.indexOf('avc1') !== -1) {
  108. level.videoCodec = M3U8Parser.convertAVC1ToAVCOTI(level.videoCodec);
  109. }
  110.  
  111. levels.push(level);
  112. } else if (result[3]) {
  113. // '#EXT-X-SESSION-DATA' is found, parse session data in group 3
  114. let sessionAttrs = new AttrList(result[3]);
  115. if (sessionAttrs['DATA-ID']) {
  116. hasSessionData = true;
  117. sessionData[sessionAttrs['DATA-ID']] = sessionAttrs;
  118. }
  119. }
  120. }
  121. return {
  122. levels,
  123. sessionData: hasSessionData ? sessionData : null
  124. };
  125. }
  126.  
  127. static parseMasterPlaylistMedia (string: string, baseurl: string, type: MediaPlaylistType, audioGroups: Array<AudioGroup> = []): Array<MediaPlaylist> {
  128. let result: RegExpExecArray | null;
  129. let medias: Array<MediaPlaylist> = [];
  130. let id = 0;
  131. MASTER_PLAYLIST_MEDIA_REGEX.lastIndex = 0;
  132. while ((result = MASTER_PLAYLIST_MEDIA_REGEX.exec(string)) !== null) {
  133. const attrs = new AttrList(result[1]);
  134. if (attrs.TYPE === type) {
  135. const media: MediaPlaylist = {
  136. attrs,
  137. id: id++,
  138. groupId: attrs['GROUP-ID'],
  139. instreamId: attrs['INSTREAM-ID'],
  140. name: attrs.NAME || attrs.LANGUAGE,
  141. type,
  142. default: (attrs.DEFAULT === 'YES'),
  143. autoselect: (attrs.AUTOSELECT === 'YES'),
  144. forced: (attrs.FORCED === 'YES'),
  145. lang: attrs.LANGUAGE
  146. };
  147.  
  148. if (attrs.URI) {
  149. media.url = M3U8Parser.resolve(attrs.URI, baseurl);
  150. }
  151.  
  152. if (audioGroups.length) {
  153. // If there are audio groups signalled in the manifest, let's look for a matching codec string for this track
  154. const groupCodec = M3U8Parser.findGroup(audioGroups, media.groupId as string);
  155.  
  156. // If we don't find the track signalled, lets use the first audio groups codec we have
  157. // Acting as a best guess
  158. media.audioCodec = groupCodec ? groupCodec.codec : audioGroups[0].codec;
  159. }
  160.  
  161. medias.push(media);
  162. }
  163. }
  164. return medias;
  165. }
  166.  
  167. static parseLevelPlaylist (string: string, baseurl: string, id: number, type: PlaylistLevelType, levelUrlId: number) {
  168. let currentSN = 0;
  169. let totalduration = 0;
  170. let level = new Level(baseurl);
  171. let discontinuityCounter = 0;
  172. let prevFrag: Fragment | null = null;
  173. let frag: Fragment | null = new Fragment();
  174. let result: RegExpExecArray | RegExpMatchArray | null;
  175. let i: number;
  176. let levelkey: LevelKey | undefined;
  177.  
  178. let firstPdtIndex = null;
  179.  
  180. LEVEL_PLAYLIST_REGEX_FAST.lastIndex = 0;
  181.  
  182. while ((result = LEVEL_PLAYLIST_REGEX_FAST.exec(string)) !== null) {
  183. const duration = result[1];
  184. if (duration) { // INF
  185. frag.duration = parseFloat(duration);
  186. // avoid sliced strings https://github.com/video-dev/hls.js/issues/939
  187. const title = (' ' + result[2]).slice(1);
  188. frag.title = title || null;
  189. frag.tagList.push(title ? [ 'INF', duration, title ] : [ 'INF', duration ]);
  190. } else if (result[3]) { // url
  191. if (Number.isFinite(frag.duration)) {
  192. const sn = currentSN++;
  193. frag.type = type;
  194. frag.start = totalduration;
  195. if (levelkey) {
  196. frag.levelkey = levelkey;
  197. }
  198. frag.sn = sn;
  199. frag.level = id;
  200. frag.cc = discontinuityCounter;
  201. frag.urlId = levelUrlId;
  202. frag.baseurl = baseurl;
  203. // avoid sliced strings https://github.com/video-dev/hls.js/issues/939
  204. frag.relurl = (' ' + result[3]).slice(1);
  205. assignProgramDateTime(frag, prevFrag);
  206.  
  207. level.fragments.push(frag);
  208. prevFrag = frag;
  209. totalduration += frag.duration;
  210.  
  211. frag = new Fragment();
  212. }
  213. } else if (result[4]) { // X-BYTERANGE
  214. const data = (' ' + result[4]).slice(1);
  215. if (prevFrag) {
  216. frag.setByteRange(data, prevFrag);
  217. } else {
  218. frag.setByteRange(data);
  219. }
  220. } else if (result[5]) { // PROGRAM-DATE-TIME
  221. // avoid sliced strings https://github.com/video-dev/hls.js/issues/939
  222. frag.rawProgramDateTime = (' ' + result[5]).slice(1);
  223. frag.tagList.push(['PROGRAM-DATE-TIME', frag.rawProgramDateTime]);
  224. if (firstPdtIndex === null) {
  225. firstPdtIndex = level.fragments.length;
  226. }
  227. } else {
  228. result = result[0].match(LEVEL_PLAYLIST_REGEX_SLOW);
  229. if (!result) {
  230. logger.warn('No matches on slow regex match for level playlist!');
  231. continue;
  232. }
  233. for (i = 1; i < result.length; i++) {
  234. if (typeof result[i] !== 'undefined') {
  235. break;
  236. }
  237. }
  238.  
  239. // avoid sliced strings https://github.com/video-dev/hls.js/issues/939
  240. const value1 = (' ' + result[i + 1]).slice(1);
  241. const value2 = (' ' + result[i + 2]).slice(1);
  242.  
  243. switch (result[i]) {
  244. case '#':
  245. frag.tagList.push(value2 ? [ value1, value2 ] : [ value1 ]);
  246. break;
  247. case 'PLAYLIST-TYPE':
  248. level.type = value1.toUpperCase();
  249. break;
  250. case 'MEDIA-SEQUENCE':
  251. currentSN = level.startSN = parseInt(value1);
  252. break;
  253. case 'TARGETDURATION':
  254. level.targetduration = parseFloat(value1);
  255. break;
  256. case 'VERSION':
  257. level.version = parseInt(value1);
  258. break;
  259. case 'EXTM3U':
  260. break;
  261. case 'ENDLIST':
  262. level.live = false;
  263. break;
  264. case 'DIS':
  265. discontinuityCounter++;
  266. frag.tagList.push(['DIS']);
  267. break;
  268. case 'DISCONTINUITY-SEQ':
  269. discontinuityCounter = parseInt(value1);
  270. break;
  271. case 'KEY': {
  272. // https://tools.ietf.org/html/rfc8216#section-4.3.2.4
  273. const decryptparams = value1;
  274. const keyAttrs = new AttrList(decryptparams);
  275. const decryptmethod = keyAttrs.enumeratedString('METHOD');
  276. const decrypturi = keyAttrs.URI;
  277. const decryptiv = keyAttrs.hexadecimalInteger('IV');
  278. // From RFC: This attribute is OPTIONAL; its absence indicates an implicit value of "identity".
  279. const decryptkeyformat = keyAttrs.KEYFORMAT || 'identity';
  280.  
  281. if (decryptkeyformat === 'com.apple.streamingkeydelivery') {
  282. logger.warn('Keyformat com.apple.streamingkeydelivery is not supported');
  283. continue;
  284. }
  285.  
  286. if (decryptmethod) {
  287. levelkey = new LevelKey(baseurl, decrypturi);
  288. if ((decrypturi) && (['AES-128', 'SAMPLE-AES', 'SAMPLE-AES-CENC'].indexOf(decryptmethod) >= 0)) {
  289. levelkey.method = decryptmethod;
  290. levelkey.key = null;
  291. // Initialization Vector (IV)
  292. levelkey.iv = decryptiv;
  293. }
  294. }
  295. break;
  296. }
  297. case 'START': {
  298. const startAttrs = new AttrList(value1);
  299. const startTimeOffset = startAttrs.decimalFloatingPoint('TIME-OFFSET');
  300. // TIME-OFFSET can be 0
  301. if (Number.isFinite(startTimeOffset)) {
  302. level.startTimeOffset = startTimeOffset;
  303. }
  304. break;
  305. }
  306. case 'MAP': {
  307. const mapAttrs = new AttrList(value1);
  308. frag.relurl = mapAttrs.URI;
  309. if (mapAttrs.BYTERANGE) {
  310. frag.setByteRange(mapAttrs.BYTERANGE);
  311. }
  312. frag.baseurl = baseurl;
  313. frag.level = id;
  314. frag.type = type;
  315. frag.sn = 'initSegment';
  316. level.initSegment = frag;
  317. frag = new Fragment();
  318. frag.rawProgramDateTime = level.initSegment.rawProgramDateTime;
  319. break;
  320. }
  321. default:
  322. logger.warn(`line parsed but not handled: ${result}`);
  323. break;
  324. }
  325. }
  326. }
  327. frag = prevFrag;
  328. // logger.log('found ' + level.fragments.length + ' fragments');
  329. if (frag && !frag.relurl) {
  330. level.fragments.pop();
  331. totalduration -= frag.duration;
  332. }
  333. level.totalduration = totalduration;
  334. level.averagetargetduration = totalduration / level.fragments.length;
  335. level.endSN = currentSN - 1;
  336. level.startCC = level.fragments[0] ? level.fragments[0].cc : 0;
  337. level.endCC = discontinuityCounter;
  338.  
  339. if (!level.initSegment && level.fragments.length) {
  340. // this is a bit lurky but HLS really has no other way to tell us
  341. // if the fragments are TS or MP4, except if we download them :/
  342. // but this is to be able to handle SIDX.
  343. if (level.fragments.every((frag) => MP4_REGEX_SUFFIX.test(frag.relurl))) {
  344. logger.warn('MP4 fragments found but no init segment (probably no MAP, incomplete M3U8), trying to fetch SIDX');
  345.  
  346. frag = new Fragment();
  347. frag.relurl = level.fragments[0].relurl;
  348. frag.baseurl = baseurl;
  349. frag.level = id;
  350. frag.type = type;
  351. frag.sn = 'initSegment';
  352.  
  353. level.initSegment = frag;
  354. level.needSidxRanges = true;
  355. }
  356. }
  357.  
  358. /**
  359. * Backfill any missing PDT values
  360. "If the first EXT-X-PROGRAM-DATE-TIME tag in a Playlist appears after
  361. one or more Media Segment URIs, the client SHOULD extrapolate
  362. backward from that tag (using EXTINF durations and/or media
  363. timestamps) to associate dates with those segments."
  364. * We have already extrapolated forward, but all fragments up to the first instance of PDT do not have their PDTs
  365. * computed.
  366. */
  367. if (firstPdtIndex) {
  368. backfillProgramDateTimes(level.fragments, firstPdtIndex);
  369. }
  370.  
  371. return level;
  372. }
  373. }
  374.  
  375. function backfillProgramDateTimes (fragments, startIndex) {
  376. let fragPrev = fragments[startIndex];
  377. for (let i = startIndex - 1; i >= 0; i--) {
  378. const frag = fragments[i];
  379. frag.programDateTime = fragPrev.programDateTime - (frag.duration * 1000);
  380. fragPrev = frag;
  381. }
  382. }
  383.  
  384. function assignProgramDateTime (frag, prevFrag) {
  385. if (frag.rawProgramDateTime) {
  386. frag.programDateTime = Date.parse(frag.rawProgramDateTime);
  387. } else if (prevFrag?.programDateTime) {
  388. frag.programDateTime = prevFrag.endProgramDateTime;
  389. }
  390.  
  391. if (!Number.isFinite(frag.programDateTime)) {
  392. frag.programDateTime = null;
  393. frag.rawProgramDateTime = null;
  394. }
  395. }