Open
Description
import requests
import re
url = 'https://jytyj.shangqiu.gov.cn/zwgk/fdzdgknr/zfcg31sqsjytyj/zbgg31sqsjytyj_2'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
}
def first_cookie():
"""
获取页面响应中JavaScript设置的cookie
"""
res = requests.get(url, headers=headers)
# 使用正则表达式从JavaScript中提取cookie值
cookie_pattern = re.search(r'return\s*"([^"]+)"', res.text)
if cookie_pattern:
cookie_string = cookie_pattern.group(1)
# 提取cookie值,去掉path部分
cookie = cookie_string.split(';')[0].strip()
return cookie
return None
def xpath_with_response(res):
"""
使用XPath解析响应内容
"""
from lxml import etree
# 解析HTML
html = etree.HTML(res.text)
# 使用XPath提取数据
data = html.xpath('//ul[@class="infoList"]/li')
return data
def request_with_cookie(cookie):
"""
使用提取的cookie进行请求
"""
headers = {
'Cookie': cookie,
}
response = requests.get(url, headers=headers)
list = xpath_with_response(response)
for item in list:
title = item.xpath('./a/text()')[0]
link = item.xpath('./a/@href')[0]
print(f"标题: {title}, 链接: {link}")
使用示例
if name == "main":
cookie = first_cookie()
print(f"提取的Cookie: {cookie}")
if cookie:
request_with_cookie(cookie)
else:
print("未能提取到Cookie")
Metadata
Metadata
Assignees
Labels
No labels