通过 python 读取 excel 表格内容 url 批量下载

本贴最后更新于 1900 天前,其中的信息可能已经事过景迁

现有表格内容如下:
image.png
BT 列有全部图片 url 地址:

最终版本优化

#!/usr/bin/python # -*- coding: utf-8 -*- # @Time : 2019/10/15 21:36 # @Author : cuijianzhe # @File : biaoge.py # @Software: PyCharm import xlrd import requests import os path = 'test.xls' workbook = xlrd.open_workbook(path) Data_sheet = workbook.sheets()[0] rowNum = Data_sheet.nrows #行数 colNum = Data_sheet.ncols #列数 def get_sitesname(): ''' 提取并添加站点名称 ''' #第一个方法使用列表 sites_list = [] i = 1 #从1开始跳过表头数据 try: for s in range(Data_sheet.ncols): if (Data_sheet.cell_value(0, s)) == '站点名称': while i <= rowNum: if Data_sheet.cell_value(i,s) != "": sites_list.append(Data_sheet.cell_value(i,s)+"_"+str(i)) i += 1 except: pass return sites_list #第二个方法使用列表推导式直接返回 # return [Data_sheet.cell_value(i, 3) + "_" + str(i) for i in range(1,2) if i not in [] for i in range(1,rowNum-2) if Data_sheet.cell_value(i, 3)!= ""] def get_url(): ''' 提取链接所有内容 提取对应行列的内容 ''' url_list = [] try: for b in range(Data_sheet.ncols): if (Data_sheet.cell_value(0,b)) == '图片地址': for row in range(1,rowNum-2): if Data_sheet.cell_value(row,b) != "": url_list.append(Data_sheet.cell_value(row, b).replace(';', '\n')) else: url_list.append('https://file.cjzshilong.cn/pictures_file/guohui-e67e7b3b.png\n') except: pass return url_list def Download(url,site_name): ''' 构建目录 批量下载图片家目录有问题。待解决优化 ''' for num in range(len(url)): name = site_name[num] os.mkdir(name) os.chdir(name) file_list = url[num].split('\n') with open('./' + name[:-2] + '图片url_共计'+ str(len(file_list)-1) + '条.txt', 'w', encoding='utf-8') as f: f.write(url[num]) d = 0 for url_1 in file_list: if url_1 != '': res = requests.get(url_1) with open(name[:-2] +'_'+ str(d+1) + '.jpg', 'wb') as p: p.write(res.content) print('%s下载完毕' %name) d += 1 os.chdir('..') if __name__ == '__main__': url_ss = get_url() site_name = get_sitesname() Download(url_ss,site_name)

第三版:

  • 根据图片找到对应站点名称
  • 每个站点生成单独的目录
#!/usr/bin/python # -*- coding: utf-8 -*- # @Time : 2019/10/15 21:36 # @Author : cuijianzhe # @File : biaoge.py # @Software: PyCharm import xlrd import requests import os path = 'test.xls' workbook = xlrd.open_workbook(path) Data_sheet = workbook.sheets()[0] #代表表格中sheet 0表格内容 rowNum = Data_sheet.nrows #行数 colNum = Data_sheet.ncols #列数 ''' 提取单元格所有内容 ''' # print(Data_sheet.cell_value(1,4)) #第1行第4列单元格内容 # print(Data_sheet.ncols) #有效列数 sites_list = [] def get_sitesname(): ''' 提取并添加站点名称 ''' i = 0 try: while i <= rowNum: sites_list.append(Data_sheet.cell_value(i,3)) i += 1 except: pass del sites_list[0] del sites_list[-1] del sites_list[-1] ''' 所有站点名称加上后缀,达到不重复效果 ''' s = 0 for i in sites_list: s += 1 weizhi = sites_list.index(i) sites_list[weizhi] = i + '_' + str(s) return sites_list def get_url(): ''' 提取链接所有内容 提取对应行列的内容 ''' name_col = '图片地址' url_list = [] try: for b in range(Data_sheet.ncols): if (Data_sheet.cell_value(0,b)) == name_col: for col in range(colNum): url_data = Data_sheet.cell_value(col,b) url_data2 = url_data.replace(';','\n') url_list.append(url_data2) except: pass del url_list[0] del url_list[-1] del url_list[-1] ''' 把列表中元素为空的值替换成自己定义的 ''' for i in range(int(len(url_list))): if url_list[i] == '': url_list[i] = 'https://file.cjzshilong.cn/pictures_file/guohui-e67e7b3b.png' return url_list def Download(url,site_name): ''' 构建目录 批量下载图片家目录有问题。待解决优化 ''' c = 0 for num in range(len(url)): name = site_name[num] os.mkdir(name) os.chdir(name) with open('./url_' + str(c) + '.txt', 'w', encoding='utf-8') as f: f.write(url[num]) with open('./url_' + str(c) + '.txt', 'r', encoding='utf-8') as d: file = d.read() file_list = file.split('\n') name = site_name[num] c += 1 d = 0 for url_1 in file_list: if url_1 == '': break res = requests.get(url_1) print(('%s下载完毕') % (name)) with open(name + str(d) + '.jpg', 'wb') as p: p.write(res.content) d += 1 os.chdir('..') if __name__ == '__main__': url_List = get_url() site_name = get_sitesname() Download(url_List,site_name)

1.2 异常捕获

表格中一个单元格中有一个 url 或者多个 url 或者空白,解决不识别情况,str 转 list 解决

#!/usr/bin/python # -*- coding: utf-8 -*- # @Time : 2019/10/15 21:36 # @Author : cuijianzhe # @File : biaoge.py # @Software: PyCharm import xlrd import requests import random import string import os import time path = 'test.xls' date = time.strftime('%Y-%m-%d',time.localtime()) workbook = xlrd.open_workbook(path) # print(workbook.sheet_names()) Data_sheet = workbook.sheets()[0] # print(Data_sheet.name) rowNum = Data_sheet.nrows #行数 colNum = Data_sheet.ncols #列数 ''' 提取单元格所有内容 ''' list = [] for i in range(rowNum): rowlist = [] for j in range(colNum): rowlist.append(Data_sheet.cell_value(i,j)) list.append(rowlist) ''' 提取对应行列的内容 ''' #print(Data_sheet.cell_value(0,43)) #列名称 #print(Data_sheet.ncols) #有效列数 name_col = '图片地址' url_list = [] try: for b in range(Data_sheet.ncols): if (Data_sheet.cell_value(0,b)) == name_col: for col in range(colNum): url_data = Data_sheet.cell_value(col,b) url_data2 = url_data.replace(';','\n') url_list.append(url_data2) except: pass del url_list[0] with open('./url.txt','w',encoding='utf-8') as f: for url in url_list: f.write(str(url)) # f.write(',') with open('./url.txt','r',encoding='utf-8') as d: file = d.read() file_list = file.split('\n') del file_list[-1] headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36' } name = 'pictures' os.mkdir(name) os.chdir(name) for url_1 in file_list: name_1 = ''.join(random.sample(string.ascii_letters + string.digits, 8)) #name_1 = url_1.replace('/','') res = requests.get(url_1,headers=headers) print(('%s下载完毕')%(name_1)) with open(name_1 + '.jpg','wb') as p: p.write(res.content)

如图:
image.png

打包 exe 文件

pip install pyinstaller
  • pyinstaller 的使用
pyinstaller -F biaoge.py
  • 找到文件
    image.png

参考:
pyinstaller 官方文档:
xlrd 官方文档:
https://www.cnblogs.com/insane-Mr-Li/p/9092619.html

  • Python

    Python 是一种面向对象、直译式电脑编程语言,具有近二十年的发展历史,成熟且稳定。它包含了一组完善而且容易理解的标准库,能够轻松完成很多常见的任务。它的语法简捷和清晰,尽量使用无异义的英语单词,与其它大多数程序设计语言使用大括号不一样,它使用缩进来定义语句块。

    556 引用 • 675 回帖

相关帖子

欢迎来到这里!

我们正在构建一个小众社区,大家在这里相互信任,以平等 • 自由 • 奔放的价值观进行分享交流。最终,希望大家能够找到与自己志同道合的伙伴,共同成长。

注册 关于
请输入回帖内容 ...