Gerapy Auto Extractor真好用
import json
import requests
from gerapy_auto_extractor.extractors.list import extract_list
# html = open('list.html').read()
html1 = requests.get('http://www.dapenti.com/blog/blog.asp?subjectid=70&name=xilei')
html1.encoding = 'gb2312'
html = html1.text
print(json.dumps(extract_list(html), indent=2, ensure_ascii=False, default=str))
import requests
from gerapy_auto_extractor.extractors.list import extract_list
# html = open('list.html').read()
html1 = requests.get('http://www.dapenti.com/blog/blog.asp?subjectid=70&name=xilei')
html1.encoding = 'gb2312'
html = html1.text
print(json.dumps(extract_list(html), indent=2, ensure_ascii=False, default=str))