urllib

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from urllib import request
 
html = str(request.urlopen('http://www.baidu.com').read())
f = open('url.txt', 'w')
s1 = 0  # 定义个开始坐标
while s1 >= 0:  # 循环开始
s1 = html.find(r'http:', s1)  # 找到以Http开始的位置,返回位置s1
s2 = html.find(r'"', s1)  # 以s1位置开始找到引号,该url结束
str1 = html[s1:s2]  # 获取每个地址的切片
if len(str1) > 5 and str1[-1] == '/':  # 剔除无用的地址
f.write(html[s1:s2])       # 将有效url文本写入到文件
f.write('\n')  # 插入换行符
s1 = s2  # 下移一位坐标
f.close()  # 关闭文件
DX

Explorer

urllib

Graph View