使用Pandas爬取简单表格数据

单个表格的爬取

1
2
3
4
5
6
7
8
9
10
import pandas as pd

df = pd.read_html("http://www.air-level.com/air/beijing/", encoding='utf-8',header=0)[0]
print(df.head())

df1 = pd.read_html("http://www.air-level.com/rank", encoding='utf-8',header=0)[0] # 获取左边的表格
print(df1.head())

df2 = pd.read_html("http://www.air-level.com/rank", encoding='utf-8',header=0)[1] # 获取右边的表格
print(df2.head())

批量爬取表格

1
2
3
4
5
6
7
8
9
10
import pandas as pd

df = pd.DataFrame()
for i in range(1, 48):
url = f'http://vip.stock.finance.sina.com.cn/q/go.php/vComStockHold/kind/jgcg/index.phtml?p={i}'
df = pd.concat([df, pd.read_html(url)[0]])

df.drop('明细', axis=1, inplace=True)
print(df.columns)
print(df.head())