工具介绍
主要是针对哔哩哔哩的多种类型视频进行接口的封装,实现多种类型视频的爬取 。界面虽说是丑了点,但是功能很齐全,啊哈哈哈
功能介绍
文章插图
这个工具包括单个视频的爬取,多个视频的爬取,up主的所有视频爬取,关键字搜索的视频爬取,以及合集视频的爬取,我猜来B站学习的肯定大部分时间看的视频都是合集视频,所以我特意实现了合集视频的爬取 。
效果展示
代码展示
【我的哔哩哔哩视频小工具】
import sysimport requestsfrom tqdm import tqdmfrom pprint import pprintimport osimport reimport jsonclass Spider:def __init__(self):while True:print("--------------------------------------")print("------欢迎使用哔哩哔哩第三方链接平台--------")print("--关键字搜索爬取,请输入1------------------")print("--Up主视频爬取,请输入2-------------------")print("--单个视频爬取,请输入3--------------------")print("--合集视频爬取,请输入4--------------------")print("--退出程序,请输入0-----------------------")print("--------------------------------------")sort = input('请选择您想要爬取的类型:')self.check_choice(sort=sort)def check_choice(self, sort):if sort not in ['1', '2', '3', '4']:sys.exit()if str(sort) == '1':try:kw = input("请输入您想搜索的关键词:")pages = int(input("您想爬取多少页"))url = 'https://api.bilibili.com/x/web-interface/wbi/search/type'for page in range(1, pages + 1):json_data = http://www.kingceram.com/post/self.get_multiple_html_data(url=url, kw=kw, page=page, mode=sort)bvids, author = self.parse_data(data=json_data, mode=sort)self.download_video(bvids=bvids, kw=kw)except Exception as e:sys.exit()elif str(sort) == '2':try:mid = input("请输入您想爬取的up主id:")pages = int(input("您想爬取几页:"))url = 'https://api.bilibili.com/x/space/wbi/arc/search'for page in range(1, pages + 1):json_data = http://www.kingceram.com/post/self.get_multiple_html_data(url=url, kw=mid, page=page, mode=sort)bvids, author = self.parse_data(data=json_data, mode=sort)self.download_video(bvids=bvids, kw=author)except Exception as e:sys.exit()elif str(sort) == '3':try:url = input("请输入您想爬取的链接:")html_data = http://www.kingceram.com/post/self.get_simply_html_data(url=url, mode=sort)self.parse_data(data=html_data, mode=sort)except Exception as e:sys.exit()elif str(sort) == '4':try:bvid = input("请输入您要爬取的合集视频的bvid:")pages = int(input("请输入您想要爬取的页数或者总的合集数:"))url = f'https://www.bilibili.com/video/{bvid}/'for page in range(1, pages+1):html_data = http://www.kingceram.com/post/self.get_simply_html_data(url=url, mode=sort, page=page)self.parse_data(data=html_data, mode=sort)except Exception as e:sys.exit()def parse_data(self, data, mode):headers = {'Accept-Encoding': '','referer':"https://www.bilibili.com/",'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36 Edg/115.0.1901.203',}try:author = ''bvids = []if str(mode) == '1':vlist = data['data']['result']for v in vlist:bvids.append(v['bvid'])elif str(mode) == '2':vlist = data['data']['list']['vlist']author = vlist[0]['author']for v in vlist:bvids.append(v['bvid'])elif str(mode) == '3':title = self.deal_special_words(re.search(r'(.*?)_哔哩哔哩_bilibili', data, re.S).group(1))str_data = http://www.kingceram.com/post/re.findall(r'', data, re.S)[0]print(f'正在下载:{title}')json_data = json.loads(str_data)audio_url = json_data['data']['dash']['audio'][0]['baseUrl']video_url = json_data['data']['dash']['video'][0]['baseUrl']# # 请求音频视频audio_resp = requests.get(audio_url, headers=headers, stream=True)video_resp = requests.get(video_url, headers=headers, stream=True)# # 获取音频视频的总长度,并将它kb化audio_content_size = int(audio_resp.headers['Content-Length']) / 1024video_content_size = int(video_resp.headers['Content-Length']) / 1024with open(f'videos/simply/audio.mp3', mode='wb') as audio:for audio_data in tqdm(iterable=audio_resp.iter_content(1024),total=audio_content_size,unit='k',desc='音频下载:'):audio.write(audio_data)with open(f'videos/simply/video.mp4', mode='wb') as video:for video_data in tqdm(iterable=video_resp.iter_content(1024),total=video_content_size,unit='k',desc='视频下载'):video.write(video_data)print('下载完成')command = f'ffmpeg.exe -i E://Python//project//网络爬虫//14-爬取哔哩哔哩视频//videos//simply//video.mp4 -i E://Python//project//网络爬虫//14-爬取哔哩哔哩视频//videos//simply//audio.mp3 -acodec copy -vcodec copy E://Python//project//网络爬虫//14-爬取哔哩哔哩视频//videos//simply//{title}.mp4'os.system(command)# # 删除原有的音频,视频文件os.remove(f'E://Python//project//网络爬虫//14-爬取哔哩哔哩视频//videos//simply//audio.mp3')os.remove(f'E://Python//project//网络爬虫//14-爬取哔哩哔哩视频//videos//simply//video.mp4')print('合成完成')elif str(mode) == '4':total_title = self.deal_special_words(re.search(r'(.*?)', data, re.S).group(1))title = self.deal_special_words(re.search(r'(.*?)_哔哩哔哩_bilibili', data,re.S).group(1))if not os.path.exists(f'E://Python//project//网络爬虫//15-爬取哔哩哔哩视频//videos//multiple//{total_title}'):os.mkdir(f'E://Python//project//网络爬虫//15-爬取哔哩哔哩视频//videos//multiple//{total_title}')print(f'正在下载:{title}')data = re.findall(r'', data, re.S)[0]json_data = json.loads(data)audio_url = json_data['data']['dash']['audio'][0]['baseUrl']video_url = json_data['data']['dash']['video'][0]['baseUrl']# # 请求音频视频audio_resp = requests.get(audio_url, headers=headers, stream=True)video_resp = requests.get(video_url, headers=headers, stream=True)# # 获取音频视频的总长度,并将它kb化audio_content_size = int(audio_resp.headers['Content-Length']) / 1024video_content_size = int(video_resp.headers['Content-Length']) / 1024with open(f'videos/multiple/{total_title}/audio.mp3', mode='wb') as audio:for audio_data in tqdm(iterable=audio_resp.iter_content(1024),total=audio_content_size,unit='k',desc='音频下载:'):audio.write(audio_data)with open(f'videos/multiple/{total_title}/video.mp4', mode='wb') as video:for video_data in tqdm(iterable=video_resp.iter_content(1024),total=video_content_size,unit='k',desc='视频下载'):video.write(video_data)print('下载完成')command = f'ffmpeg.exe -i E://Python//project//网络爬虫//15-爬取哔哩哔哩视频//videos//multiple//{total_title}//video.mp4 -i E://Python//project//网络爬虫//15-爬取哔哩哔哩视频//videos//multiple//{total_title}//audio.mp3 -acodec copy -vcodec copy E://Python//project//网络爬虫//15-爬取哔哩哔哩视频//videos//multiple//{total_title}//{title}.mp4'os.system(command)# 删除原有的音频,视频文件os.remove(f'E://Python//project//网络爬虫//15-爬取哔哩哔哩视频//videos//multiple//{total_title}//audio.mp3')os.remove(f'E://Python//project//网络爬虫//15-爬取哔哩哔哩视频//videos//multiple//{total_title}//video.mp4')print('合成完成')return bvids, authorexcept Exception as e:print(e)def download_video(self, bvids, kw):headers = {'Accept-Encoding': '','referer':"https://www.bilibili.com/",'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36 Edg/115.0.1901.203',}prefix_url = 'https://www.bilibili.com/video/'if not os.path.exists(f'E://Python//project//网络爬虫//15-爬取哔哩哔哩视频//videos//multiple//{kw}'):os.mkdir(f'E://Python//project//网络爬虫//15-爬取哔哩哔哩视频//videos//multiple//{kw}')for bvid in bvids:try:data_url = prefix_url + str(bvid)html_data = http://www.kingceram.com/post/requests.get(url=data_url, headers=headers).texttitle = self.deal_special_words(re.search(r'(.*?)_哔哩哔哩_bilibili', html_data, re.S).group(1))data = re.findall(r'', html_data, re.S)[0]print(f'正在下载:{title}')json_data = json.loads(data)audio_url = json_data['data']['dash']['audio'][0]['baseUrl']video_url = json_data['data']['dash']['video'][0]['baseUrl']# # 请求音频视频audio_resp = requests.get(audio_url, headers=headers, stream=True)video_resp = requests.get(video_url, headers=headers, stream=True)# # 获取音频视频的总长度,并将它kb化audio_content_size = int(audio_resp.headers['Content-Length']) / 1024video_content_size = int(video_resp.headers['Content-Length']) / 1024with open(f'videos/multiple/{kw}/audio.mp3', mode='wb') as audio:for audio_data in tqdm(iterable=audio_resp.iter_content(1024),total=audio_content_size,unit='k',desc='音频下载:'):audio.write(audio_data)with open(f'videos/multiple/{kw}/video.mp4', mode='wb') as video:for video_data in tqdm(iterable=video_resp.iter_content(1024),total=video_content_size,unit='k',desc='视频下载'):video.write(video_data)print('下载完成')command = f'ffmpeg.exe -i E://Python//project//网络爬虫//15-爬取哔哩哔哩视频//videos//multiple//{kw}//video.mp4 -i E://Python//project//网络爬虫//15-爬取哔哩哔哩视频//videos//multiple//{kw}//audio.mp3 -acodec copy -vcodec copy E://Python//project//网络爬虫//15-爬取哔哩哔哩视频//videos//multiple//{kw}//{title}.mp4'os.system(command)# # 删除原有的音频,视频文件os.remove(f'E://Python//project//网络爬虫//15-爬取哔哩哔哩视频//videos//multiple//{kw}//audio.mp3')os.remove(f'E://Python//project//网络爬虫//15-爬取哔哩哔哩视频//videos//multiple//{kw}//video.mp4')print('合成完成')except Exception as e:print(e)continuedef get_simply_html_data(self, url, mode, page=0):headers = {'Accept-Encoding': '','referer':"https://www.bilibili.com/",'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36 Edg/115.0.1901.203',}params = {}if str(mode) == '4':params = {'p': page,'vd_source': '6a36e7aa5ec79d2b549f8573b84677f4',}try:res = requests.get(url=url, headers=headers, params=params)if res.status_code == 200:return res.textreturn Noneexcept requests.exceptions.RequestException as e:print(e)return Nonedef get_multiple_html_data(self, url, kw, page, mode):headers = {'Accept-Encoding': '','Cookie': '''buvid3=074A23DD-E4AE-5D1F-8971-EA89720687F779779infoc; b_nut=1691576779; i-wanna-go-back=-1; b_ut=7; _uuid=8D3FCF24-9E97-75910-4852-2DBC923F9109C80000infoc; buvid4=C27576F5-FF5E-A576-639E-EB05A9FF819380829-023080918-s0gqE0o3nh4DnG2wvekU1g%3D%3D; header_theme_version=CLOSE; CURRENT_FNVAL=4048; rpdid=|(k)~YRY|lRR0J'uYmk|Y~)~k; fingerprint=0764ade32b8670c148f76046f631044d; buvid_fp_plain=undefined; DedeUserID=1798617872; DedeUserID__ckMd5=c8a985b394955cd0; CURRENT_QUALITY=80; home_feed_column=5; browser_resolution=1482-754; LIVE_BUVID=AUTO4016924132587756; buvid_fp=0764ade32b8670c148f76046f631044d; PVID=2; SESSDATA=http://www.kingceram.com/post/0aadf5b6%2C1708141392%2C2b2e3%2A82E4uLHQ8ncsUYntQiyte-rrUGrgJoME82fX56N2xFREDBIy6O7rv2p4kvOQz2U9w3qq03DwAASAA; bili_jct=2214ba6db57fce89e7960924ebabe3ee; bp_video_offset_1798617872=832158533866749973; innersign=0; b_lsid=B861098109_18A1BC08373; bsource=search_baidu; sid=6x7kdn79''','Referer': 'https://search.bilibili.com/video?keyword=%E8%88%9E%E8%B9%88&from_source=webtop_search&spm_id_from=333.1007&search_source=5','User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36 Edg/115.0.1901.203',}params = {}if str(mode) == '1':params = {'__refresh__': 'true','_extra': '','context': '','page': page,'page_size': '42','from_source': '','from_spmid': '333.337','platform': ' pc','highlight': '1','single_column': '0','keyword': kw,'qv_id': '3HEcaoPMIGjW8n5ZhrGqw3d7CSAshw4D','ad_resource': '5654','source_tag': '3','gaia_vtoken': '','category_id': '','search_type': 'video','dynamic_offset': '0','web_location': '1430654','w_rid': '225ec322a1a3566deb059c125b62a9af','wts': '1692682729',}elif str(mode) == '2':params = {'mid': kw,'ps': '30','tid': '0','pn': page,'keyword': '','order': 'pubdate','platform': 'web','web_location': '1550101','order_avoided': 'true','w_rid': 'f1357748802f3a2b2bbeda6e85e8da60','wts': '1692350238',}try:res = requests.get(url=url, headers=headers, params=params)if res.status_code == 200:return res.json()return Noneexcept requests.exceptions.RequestException as e:print(e)return Nonedef deal_special_words(self, word):# 去掉特殊符号replaceWord = '/:*?"
- 《你是我的眼》,歌曲非常好听
- 音视频入门基础——笔记
- uni-app video 获取视频总时长
- 【重要】如何获取超1400页《深度学习之图像识别》书籍配套教学PPT与视频等完整
- win10 + centOS7双系统安装
- 0基础转行软件测试,3个月真的能学会吗?分享一波我的真实经验.....
- 用蒙以录课制作竖版的视频
- JWT-RESTful进行身份认证
- 安卓逆向小案例——某短视频APP搜索+推荐接口【rpc调用】
- 我的解放日志是什么