valid_rtmp 源代码

# encoding: UTF-8
# 基于conda env:py35(opencv)
"""
寻找过滤有效的rtmp,or,rtsp直播地址

帮助:python pkl2csv.py -h

使用示例:python valid_rtmp.py -u https://blog.csdn.net/osle123/article/details/52757886

步骤:

1,下载页面:https://blog.csdn.net/osle123/article/details/52757886(避免使用csdn等,需点击触发显示全部的网页)

2,正则匹配:rtmp://, rtsp://等地址

3,[rtmp;//xx.com,rtsp://yy.com]使用ping+cv2.read()验证有效性
"""
import argparse
import logging
import re
from contextlib import suppress
from multiprocessing import cpu_count, Pool
from typing import List

import cv2
import requests

logger = logging.getLogger()


[文档]class ValidRtmp(): """采集过滤有效的rtmp,or,rtsp播放地址""" headers = { 'Connection': 'keep-alive', 'Accept': 'application/json, text/javascript, */*; q=0.01', 'Sec-Fetch-Dest': 'empty', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36', 'Sec-Fetch-Site': 'same-origin', 'Sec-Fetch-Mode': 'cors', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,ja;q=0.7,tr;q=0.6,fr;q=0.5,zh-TW;q=0.4'}
[文档] def get_rtmp_url(self,url: str) -> List[str]: """ 获取url网页内容中的rtsp,rtmp地址 :param str url: 待采集的种子url地址 :return list: 种子url中的rtsp,rtmp地址 """ with suppress(Exception): response = requests.get(url, headers=self.headers, timeout=(3, 7)) content = response.text.replace(" ", "") res_url = r"((rtsp|rtmp):\/\/[\w\-_]+(\.[\w\-_]+)+([\w\-\.,@?^=%&:/~\+#]*[\w\-\@?^=%&/~\+#])?)" urls = re.findall(res_url, content, re.I | re.S | re.M) return [x[0] for x in urls] return list()
[文档] def valid(self,url: str) -> str: """ 是否是合法的rtsp,rtmp地址 :param str url:待验证的url(rtsp or rtmp)地址, :return str: 如果:是,返回入参的url,如果:不是,返回空串 """ with suppress(Exception): cap = cv2.VideoCapture(url) ret, frame = cap.read() if ret: return url return ''
if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('-u', '--url', help='url address') args = parser.parse_args() url = args.url valid_rtmp = ValidRtmp() candidate_urls = valid_rtmp.get_rtmp_url(url) logger.info('candidate_urls len:%d' % len(candidate_urls)) valid_urls = list() pool = Pool(processes=max(1, cpu_count() - 1)) valid_urls = pool.map(valid_rtmp.valid, candidate_urls) pool.close() pool.join() valid_urls = [x for x in set(valid_urls) if x] logger.info('valid_urls: len %d \n %s' % (len(valid_urls), valid_urls))