import requests
from bs4 import BeautifulSoup
import csv
'''
<thead>
<tr>
<th height="40" bgcolor="#0867b7">公示编号</th>
<th height="40" bgcolor="#0867b7">姓名</th>
<th height="40" bgcolor="#0867b7">出生年月</th>
<th height="40" bgcolor="#0867b7">单位名称</th>
<th height="40" bgcolor="#0867b7">积分分值</th>
<th height="40" bgcolor="#0867b7">操作</th>
</tr>
</thead>
<tbody>
<tr>
<td height="35" align="center">202300001</td>
<td height="35" align="center">张浩</td>
<td height="35" align="center">1977-02</td>
<td height="35" align="center">北京首钢股份有限公司</td>
<td height="35" align="center">140.05</td>
<td height="35" align="center">
<a style="font-size:16px;" href="javascript:void(0);" onclick="showDetails('375202')">查看</a>
</td>
</tr>
'''
# Set the base URL and page number
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36'}
site = '
http://fuwu.rsj.beijing.gov.cn/jfgs2023integralpublic/settlePerson/tablePage?name=&rows={}&page={}'
def getDetail(id):
url = '
http://fuwu.rsj.beijing.gov.cn/jfgs2023integralpublic/settlePerson/settlePersonDetails?id={}'
url = url.format(id)
detail = []
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')
table = soup.find('table')
rows = table.find_all('tr')[1:]
for row in rows:
last = row.find_all('td')[-1].text
detail.append(last)
return detail
def main():
people = []
hdrs = []
start = 0
for page in range(61):
url = site.format(100, page*100)
# start += 100
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.content, 'html.parser')
thead = soup.find('thead')
tbody = soup.find('tbody')
hdrs = [th.text for th in thead.find_all('th')]
hdrs += ['合法稳定就业', '合法稳定住所', '教育背景', '扣除取得学历(学位)期间累计的居住及就业分值', '职住区域', '创新创业', '纳税', '年龄', '荣誉表彰', '守法记录']
rows = tbody.find_all('tr')
for row in rows:
cells = row.find_all('td')
line = [cell.text.strip() for cell in cells]
onclick = cells[-1].find('a')
if onclick:
onclick = onclick.get('onclick')
if onclick:
start = onclick.find('(') + 2
end = onclick.find(')')-1
param = onclick[start:end]
line += (getDetail(param))
people.append(line)
with open('jflh.csv', 'w', newline='') as file:
writer = csv.writer(file)
writer.writerow(hdrs)
for row in people:
writer.writerow(row)
if __name__ == "__main__":
main()
【 在 lc21999 的大作中提到: 】
: 能提供一下您抓出来的清单吗?谢谢!
--
FROM 61.48.133.*