a = html.find("<p>", start, end) while a < end: b = html.find("</p>", a, -1) if b < end: content.append(html[a + 3: b]) else: break a = html.find("<p>", b + 3, -1) print(content)
每次自能保存一章继续找找有没有下一章的连接
1 2 3 4
# 网页中存在url_next 那就取出即可 a = html.find("url_next:", 0, -1) b = html.find(",", a, a + 128) print(html[a + 10: b - 1])
保存文件
1 2 3 4 5
withopen(file_name, 'w', encoding="UTF-8") as f: f.write(title + "\n\n") for content in content_list: f.write(" " + content + "\n\n") f.close()
a = html.find("<p>", start, end) while a < end: b = html.find("</p>", a, -1) if b < end: content.append(html[a + 3: b]) else: break a = html.find("<p>", b + 3, -1) return content
defsave_file(file_name, title, content_list): withopen(file_name, 'w', encoding="UTF-8") as f: f.write(title + "\n\n") for content in content_list: f.write(" " + content + "\n\n") f.close()