1、★输出九九乘法口诀。
i=0
for i in range(1,10):
for j in range(1,10):
if j<=i:
print("%s×%s = %s" % (i,j ,i*j),end = " ")
i+=1
print("\n")
2、★将100以内的所有奇数存放在列表L中,并输出列表。
li=[]
for i in range(100):
if(i%2!=0):
li.append(i)
print(li)
3、★访问“上海证券交易所”网址,点开“披露”菜单中的“最新公告”,分析网址“http://www.sse.com.cn/disclosure/listedinfo/announcement/”从中找出真实的公司公告的URL地址,并通过json库解析数据,提取并打印出公告的真实证券代码、证券简称、公告标题、公告时间。
import requests
import json
link='http://www.sse.com.cn/disclosure/listedinfo/announcement/json/stock_bulletin_publish_order.json?v=0.8118570659224216'
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36'}
r=requests.get(link,headers=headers)
#print(r.text)
jsdata=json.loads(r.content.decode('utf-8'))
#print(jsdata['publishData'])
for ff in jsdata['publishData']:
print(ff['securityCode'],ff['publishTime'],ff['bulletinTitle'],'http://www.sse.com.cn/'+ff['bulletinUrl'])
4、★访问“https://pic.sogou.com/pics?query=%E9%A3%8E%E6%99%AF”网址,分析网址从中找出真实的风景图片的URL地址,并通过json库解析数据,提取图片的真实地址,结果截图粘贴到这里。
import requests
import json
link='http://www.szse.cn/api/disc/announcement/detailinfo?random=0.904812871168784&pageSize=50&pageNum=1&plateCode=szse'
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36'}
r=requests.get(link,headers=headers)
jsdata=json.loads(r.text)
#print(jsdata['data'])
for ff in jsdata['data']:
print(ff['secCode'],ff['secName'])
for i in ff['announList']:
print(i['title'],i['attachPath'])
5、访问“深圳证券交易所”网址,点开“信息披露”菜单中的“上市公司公告”,分析网址“http://www.szse.cn/disclosure/listed/notice/index.html”从中找出真实的公司公告的URL地址,并通过json库解析数据,提取并打印出公告的真实证券代码、证券简称、公告标题、公告时间。
import requests
import json
link='http://www.szse.cn/api/disc/announcement/detailinfo?random=0.0347913140950078&pageSize=50&pageNum=1&plateCode=szse'
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36'}
r=requests.get(link,headers=headers)
#print(r.text)
jsdata=json.loads(r.text)
#print(jsdata['data'])
for ff in jsdata['data']:
print(ff['secCode'],ff['secName'],end=' ')
for i in ff['announList']:
print(i['title'],i['attachPath'])
6、★打开学校主页,找到南院要闻https://www.nanshan.edu.cn/nyyw.htm,获取新闻标题和发布日期。
import requests
from bs4 import BeautifulSoup
link='https://www.nanshan.edu.cn/nyyw.htm'
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36'}
r=requests.get(link,headers=headers)
soup=BeautifulSoup(r.content.decode('utf-8'),'html.parser')
for kk in range(20):
fflist=soup.find_all('li',id='line_u6_'+str(kk))
for i in fflist:
print(i.a.text,i.span.text,'https://www.nanshan.edu.cn/'+i.a['href'])
7、打开虎牙直播的https://www.huya.com/search.php?hsk=%E7%BE%8E%E5%A5%B3网址,爬取图像的网址及名称并输出。
import requests
import json
link='https://search.cdn.huya.com/?callback=jQuery111305759255056851386_1625014578206&m=Search&do=getSearchContent&q=%E7%BE%8E%E5%A5%B3&uid=0&v=4&typ=-5&livestate=0&rows=40&start=0&_=1625014578208'
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; rv:89.0) Gecko/20100101 Firefox/89.0'}
r=requests.get(link,headers=headers)
aaa=r.text
aaa=aaa[aaa.find('{'):-2]
bbb=json.loads(aaa)
ccc=bbb['response']['3']['docs']
for i in ccc:
b=i['game_screenshot'],i['game_nick']
print(b)
8、打开http://www.gov.cn/zhengce/xxgk/index.htm,爬取前30条政策标题、发文字号、成文日期、发布日期及政策网址,并输出。
import requests
import json
for i in range(1,4):
link='http://xxgk.www.gov.cn/search-zhengce/?callback=jQuery1124008138061423001552_1624936658952&mode=smart&sort=relevant&page_index='+str(i)+'&page_size=10&title=&_=1624936658982'
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; rv:89.0) Gecko/20100101 Firefox/89.0'}
r=requests.get(link,headers=headers)
json_string = r.content.decode()
json_string =json_string[json_string.find("{"):-2]
json_data =json.loads(json_string)
comm=json_data['data']
for a in comm:
b=a['title'],a['tagno'],a['writetime'],a['pubtime'],a['url']
print(b)
9、获取唐松的博客中评论的真实地址,获取用户名和评论内容,输出并写到csv文件中。
import requests
import json
import csv
list1=[]
link='https://api-zero.livere.com/v1/comments/list?callback=jQuery112405944722401681988_1624932359762&limit=10&repSeq=4272904&requestPath=%2Fv1%2Fcomments%2Flist&consumerSeq=1020&livereSeq=28583&smartloginSeq=5154&code=&_=1624932359764'
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; rv:89.0) Gecko/20100101 Firefox/89.0'}
r=requests.get(link,headers=headers)
json_string =r.text
json_string =json_string[json_string.find("{"):-2]
json_data =json.loads(json_string)
comm=json_data['results']['parents']
for eachone in comm:
message=eachone['content']
aaa=eachone['name']
list1.append(aaa)
list1.append(message)
print(list1)
with open('e:\\111.csv',"a+")as f:
w=csv.writer(f)
w.writerow(list1)
10、打开百度图片https://image.baidu.com/,搜索“风景”或其他内容,解析网页代码,获取网页地址。并将图片下载到当前目录下的pic文件下,要求输出下载完成或无法下载。
提示:获取图片的地址,获取图片的字节码,使用write()函数写到文件中。获取图片地址,使用request.urlretrieve函数下载。
import requests
import json
link='https://image.baidu.com/search/acjson?tn=resultjson_com&logid=9211931413365455375&ipn=rj&ct=201326592&is=&fp=result&queryWord=%E9%A3%8E%E6%99%AF&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=0&hd=&latest=©right=&word=%E9%A3%8E%E6%99%AF&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&expermode=&nojc=&pn=60&rn=30&gsm=3c&1624870048152='
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36'}
r=requests.get(link,headers=headers)
json_string = r.content.decode()
json_data = json.loads(json_string)
comment_list = json_data['data']
for i in comment_list:
try:
print(i['hoverURL'],i['fromPageTitleEnc'])
t1=i['hoverURL']
t2=i['fromPageTitleEnc']
request.urlretrieve(t1,f'sougou/{t2}')
request.urlcleanup()
print(f'{t2} 下载完成')
s=str(t1)
with open('e:\\111.txt',"a+") as f:
f.write(s)
f.write('\t')
f.close()
except:
print('error:',t1)
11、打开东方财富网财经导读网址http://finance.eastmoney.com/a/ccjdd.html,获取第1至5页的新闻主题、内容、发表时间并输出。
import requests
from bs4 import BeautifulSoup
j=1
def paqu1(link):
global j
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36'}
r=requests.get(link,headers=headers)
soup=BeautifulSoup(r.text,'html.parser')
fflist=soup.find_all('p',class_='title')
for ff in fflist:
print(j,ff.a.text.strip(),ff.a['href'])
j+=1
for i in range(1,6):
link='https://finance.eastmoney.com/a/cywjh_'+str(i)+'.html'
paqu1(link)
i=0
for i in range(1,10):
for j in range(1,10):
if j<=i:
print("%s×%s = %s" % (i,j ,i*j),end = " ")
i+=1
print("\n")
2、★将100以内的所有奇数存放在列表L中,并输出列表。
li=[]
for i in range(100):
if(i%2!=0):
li.append(i)
print(li)
3、★访问“上海证券交易所”网址,点开“披露”菜单中的“最新公告”,分析网址“http://www.sse.com.cn/disclosure/listedinfo/announcement/”从中找出真实的公司公告的URL地址,并通过json库解析数据,提取并打印出公告的真实证券代码、证券简称、公告标题、公告时间。
import requests
import json
link='http://www.sse.com.cn/disclosure/listedinfo/announcement/json/stock_bulletin_publish_order.json?v=0.8118570659224216'
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36'}
r=requests.get(link,headers=headers)
#print(r.text)
jsdata=json.loads(r.content.decode('utf-8'))
#print(jsdata['publishData'])
for ff in jsdata['publishData']:
print(ff['securityCode'],ff['publishTime'],ff['bulletinTitle'],'http://www.sse.com.cn/'+ff['bulletinUrl'])
4、★访问“https://pic.sogou.com/pics?query=%E9%A3%8E%E6%99%AF”网址,分析网址从中找出真实的风景图片的URL地址,并通过json库解析数据,提取图片的真实地址,结果截图粘贴到这里。
import requests
import json
link='http://www.szse.cn/api/disc/announcement/detailinfo?random=0.904812871168784&pageSize=50&pageNum=1&plateCode=szse'
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36'}
r=requests.get(link,headers=headers)
jsdata=json.loads(r.text)
#print(jsdata['data'])
for ff in jsdata['data']:
print(ff['secCode'],ff['secName'])
for i in ff['announList']:
print(i['title'],i['attachPath'])
5、访问“深圳证券交易所”网址,点开“信息披露”菜单中的“上市公司公告”,分析网址“http://www.szse.cn/disclosure/listed/notice/index.html”从中找出真实的公司公告的URL地址,并通过json库解析数据,提取并打印出公告的真实证券代码、证券简称、公告标题、公告时间。
import requests
import json
link='http://www.szse.cn/api/disc/announcement/detailinfo?random=0.0347913140950078&pageSize=50&pageNum=1&plateCode=szse'
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36'}
r=requests.get(link,headers=headers)
#print(r.text)
jsdata=json.loads(r.text)
#print(jsdata['data'])
for ff in jsdata['data']:
print(ff['secCode'],ff['secName'],end=' ')
for i in ff['announList']:
print(i['title'],i['attachPath'])
6、★打开学校主页,找到南院要闻https://www.nanshan.edu.cn/nyyw.htm,获取新闻标题和发布日期。
import requests
from bs4 import BeautifulSoup
link='https://www.nanshan.edu.cn/nyyw.htm'
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36'}
r=requests.get(link,headers=headers)
soup=BeautifulSoup(r.content.decode('utf-8'),'html.parser')
for kk in range(20):
fflist=soup.find_all('li',id='line_u6_'+str(kk))
for i in fflist:
print(i.a.text,i.span.text,'https://www.nanshan.edu.cn/'+i.a['href'])
7、打开虎牙直播的https://www.huya.com/search.php?hsk=%E7%BE%8E%E5%A5%B3网址,爬取图像的网址及名称并输出。
import requests
import json
link='https://search.cdn.huya.com/?callback=jQuery111305759255056851386_1625014578206&m=Search&do=getSearchContent&q=%E7%BE%8E%E5%A5%B3&uid=0&v=4&typ=-5&livestate=0&rows=40&start=0&_=1625014578208'
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; rv:89.0) Gecko/20100101 Firefox/89.0'}
r=requests.get(link,headers=headers)
aaa=r.text
aaa=aaa[aaa.find('{'):-2]
bbb=json.loads(aaa)
ccc=bbb['response']['3']['docs']
for i in ccc:
b=i['game_screenshot'],i['game_nick']
print(b)
8、打开http://www.gov.cn/zhengce/xxgk/index.htm,爬取前30条政策标题、发文字号、成文日期、发布日期及政策网址,并输出。
import requests
import json
for i in range(1,4):
link='http://xxgk.www.gov.cn/search-zhengce/?callback=jQuery1124008138061423001552_1624936658952&mode=smart&sort=relevant&page_index='+str(i)+'&page_size=10&title=&_=1624936658982'
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; rv:89.0) Gecko/20100101 Firefox/89.0'}
r=requests.get(link,headers=headers)
json_string = r.content.decode()
json_string =json_string[json_string.find("{"):-2]
json_data =json.loads(json_string)
comm=json_data['data']
for a in comm:
b=a['title'],a['tagno'],a['writetime'],a['pubtime'],a['url']
print(b)
9、获取唐松的博客中评论的真实地址,获取用户名和评论内容,输出并写到csv文件中。
import requests
import json
import csv
list1=[]
link='https://api-zero.livere.com/v1/comments/list?callback=jQuery112405944722401681988_1624932359762&limit=10&repSeq=4272904&requestPath=%2Fv1%2Fcomments%2Flist&consumerSeq=1020&livereSeq=28583&smartloginSeq=5154&code=&_=1624932359764'
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; rv:89.0) Gecko/20100101 Firefox/89.0'}
r=requests.get(link,headers=headers)
json_string =r.text
json_string =json_string[json_string.find("{"):-2]
json_data =json.loads(json_string)
comm=json_data['results']['parents']
for eachone in comm:
message=eachone['content']
aaa=eachone['name']
list1.append(aaa)
list1.append(message)
print(list1)
with open('e:\\111.csv',"a+")as f:
w=csv.writer(f)
w.writerow(list1)
10、打开百度图片https://image.baidu.com/,搜索“风景”或其他内容,解析网页代码,获取网页地址。并将图片下载到当前目录下的pic文件下,要求输出下载完成或无法下载。
提示:获取图片的地址,获取图片的字节码,使用write()函数写到文件中。获取图片地址,使用request.urlretrieve函数下载。
import requests
import json
link='https://image.baidu.com/search/acjson?tn=resultjson_com&logid=9211931413365455375&ipn=rj&ct=201326592&is=&fp=result&queryWord=%E9%A3%8E%E6%99%AF&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=0&hd=&latest=©right=&word=%E9%A3%8E%E6%99%AF&s=&se=&tab=&width=&height=&face=0&istype=2&qc=&nc=1&fr=&expermode=&nojc=&pn=60&rn=30&gsm=3c&1624870048152='
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36'}
r=requests.get(link,headers=headers)
json_string = r.content.decode()
json_data = json.loads(json_string)
comment_list = json_data['data']
for i in comment_list:
try:
print(i['hoverURL'],i['fromPageTitleEnc'])
t1=i['hoverURL']
t2=i['fromPageTitleEnc']
request.urlretrieve(t1,f'sougou/{t2}')
request.urlcleanup()
print(f'{t2} 下载完成')
s=str(t1)
with open('e:\\111.txt',"a+") as f:
f.write(s)
f.write('\t')
f.close()
except:
print('error:',t1)
11、打开东方财富网财经导读网址http://finance.eastmoney.com/a/ccjdd.html,获取第1至5页的新闻主题、内容、发表时间并输出。
import requests
from bs4 import BeautifulSoup
j=1
def paqu1(link):
global j
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36'}
r=requests.get(link,headers=headers)
soup=BeautifulSoup(r.text,'html.parser')
fflist=soup.find_all('p',class_='title')
for ff in fflist:
print(j,ff.a.text.strip(),ff.a['href'])
j+=1
for i in range(1,6):
link='https://finance.eastmoney.com/a/cywjh_'+str(i)+'.html'
paqu1(link)