飘云阁

 找回密码
 加入我们

QQ登录

只需一步,快速开始

查看: 1344|回复: 1

[Python] python脚本获取桌酷壁纸

[复制链接]
  • TA的每日心情
    开心
    2024-12-14 17:15
  • 签到天数: 1339 天

    [LV.10]以坛为家III

    发表于 2022-2-27 21:58:34 | 显示全部楼层 |阅读模式
    本帖最后由 zyjsuper 于 2022-2-27 22:01 编辑

    [Python] 纯文本查看 复制代码
    """
    ************************************
        @version: 0.1
        @author: zayki
        @contact: [url=mailto:[email protected]][email protected][/url]
        @file: zhuoku_spider.py
        @time: 2022/2/27 15:24
    ************************************
    """
    
    import os
    import requests
    from lxml import etree
    from os.path import splitext,dirname,basename
    import re
    
    target_url = "http://www.zhuoku.com/"
    headers = {
            "Upgrade-Insecure-Requests": "1",
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36",
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
            "Accept-Encoding": "gzip, deflate",
            "Accept-Language": "zh-CN,zh;q=0.9",
            "Connection": "close"
    }
    
    response = requests.get(target_url, headers = headers,verify = False,timeout = 5)
    result = etree.HTML(response.content.decode('GBK'))
    last_bizhi_urls = result.xpath('//div[@id="bizhititle"]//li//@href')
    last_bizhi_titles = result.xpath('//div[@id="bizhititle"]//li//a')
    pic_urls = []
    savepath = os.getenv("USERPROFILE") + "\\desktop\\"
    
    def get_proxy():
        return requests.get("http://118.24.52.95:5010/get/").json()            # 参考[url=https://github.com/jhao104/proxy_pool]https://github.com/jhao104/proxy_pool[/url]搭建一个获取代理的服务器即可。
    
    def get_pic_urls(url):
        sub_url = target_url + url
        pic_urls.append(sub_url)
        proxy_ip = get_proxy()['proxy']
        print(proxy_ip)
        proxies = {"https://":proxy_ip,"http://":proxy_ip}
        print(sub_url)
        subresponse = requests.get(sub_url, headers=headers, verify=False,proxies = proxies, timeout=5)
        subresult = etree.HTML(subresponse.content.decode('GBK'))
        last_link_name = subresult.xpath('//div[@class="turn"]//select//@value')[-1]
        last_link = dirname(sub_url)+ '/' + last_link_name
        link_name, countlinks = splitext(last_link_name)[0].split('_')[0],splitext(last_link_name)[0].split('_')[-1]
        last_link_response = requests.get(last_link, headers=headers, verify=False, proxies = proxies, timeout=5)
        last_link_result = etree.HTML(last_link_response.content.decode('GBK'))
        last_pic_link = last_link_result.xpath('//div[@class="bizhiin"]//a/@href')[-1]
    
        p1 = re.compile(r'[(](.*?)[)]', re.S)
        pic_links_count = re.findall(p1, last_pic_link)[0]
    
        for index in range(1,int(pic_links_count) + 1):
            try:
                proxy_ip = get_proxy()['proxy']
                print(proxy_ip)
                proxies = {"https://": proxy_ip, "http://": proxy_ip}
                pic_url_link = dirname(sub_url) +  "/" + link_name + "(" + str(index) + ")" + ".htm"
                pic_url_link_resq = requests.get(pic_url_link, headers=headers, verify=False,proxies = proxies,  timeout=5)
                pic_url_link_html  = etree.HTML(pic_url_link_resq.content.decode('GBK'))
                target_pic_link = pic_url_link_html.xpath('//img[@id="imageview"]//@src')[0]
    
                headers_pic = {
                    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36",
                    "Accept": "*/*",
                    "Referer": pic_url_link,
                    "Accept-Encoding": "gzip, deflate",
                    "Accept-Language": "zh-CN,zh;q=0.9",
                    "Connection": "close"
                }
    
                target_pic_link_resq = requests.get(target_pic_link, headers=headers_pic, verify=False,proxies = proxies,  timeout=5)
                file_name = basename(target_pic_link)
                try:
                    os.mkdir(savepath + link_name)
                except:
                    pass
                try:
                    with open(savepath + link_name + "\\" + file_name,'wb') as file:
                        file.write(target_pic_link_resq.content)
                        print(target_pic_link)
                        print("The image {0} has been saved in local path {1}".format(file_name,savepath + link_name))
                except Exception as e:
                    print("index:" + index +" "+ str(e))
            except Exception as e:
                print(e)
    
    url = last_bizhi_urls[0]
    get_pic_urls(url)

    PYG19周年生日快乐!
  • TA的每日心情
    无聊
    2024-10-29 14:35
  • 签到天数: 26 天

    [LV.4]偶尔看看III

    发表于 2022-4-13 19:39:25 | 显示全部楼层
    PYG17周年生日快乐!
    PYG19周年生日快乐!
    回复 支持 反对

    使用道具 举报

    您需要登录后才可以回帖 登录 | 加入我们

    本版积分规则

    快速回复 返回顶部 返回列表