python詞云圖生成源碼,用python來實(shí)現(xiàn)的一個(gè)詞云圖生成代碼,可以生成某個(gè)論壇版塊的詞云圖數(shù)據(jù),使用python編譯環(huán)境即可編譯該程序,讓你輕松獲得想要的詞云圖數(shù)據(jù)。本次放出python詞云圖生成源碼資源下載,感興趣的朋友們不妨試試吧!
python詞云生成代碼說明:
由吾愛論壇大神制作
輸入序號即可生成相關(guān)板塊的詞云信息
python詞云圖代碼演示:
import requests
from lxml import etree
import jieba
import numpy as np
import matplotlib.pyplot as plt
import time
from PIL import Image
from wordcloud import WordCloud
def GetData(forumdata):
for j in forumdata:
url = 'https://www.52pojie.cn/forum-x-1'
url = url.split('-')
nurl = url[0] + '-' + j + '-' + url[2]+'.html'
html = requests.get(nurl)
ehtml = etree.HTML(html.text)
forumname=''.join(ehtml.xpath('//*[@id="ct"]/div/div[1]/div[1]/h1/a/text()')).replace('『','').replace('』','')
print(forumname)
pagenum = ehtml.xpath('//*[@id="fd_page_top"]/div/label/span/text()')
pagenum = pagenum[0].replace(' / ', '').replace(' 頁','')
txt=''
for i in range(1,int(pagenum)+1):
nurl = url[0] + '-' + j + '-' + str(i) + '.html'
html = requests.get(nurl)
ehtml = etree.HTML(html.text)
#pltxt = ' '.join(ehtml.xpath('//*[@id="threadlisttableid"]/tbody/tr/td[2]/cite/a/text()'))#獲取帖子作者
pltxt = ' '.join(ehtml.xpath('//*[@class="s xst"]/text()')) #帖子標(biāo)題
txt = txt + pltxt
time.sleep(1) # 給服務(wù)器留些喘氣的時(shí)間
cut_text =txt
wordcloud = WordCloud(
font_path='simhei.ttf',
background_color='white',
width=800,
height=500
).generate(cut_text)
wordcloud.to_file('c:\\'+forumname+'.png')
plt.imshow(wordcloud, interpolation="bilinear")
plt.axis("off")
plt.show()
if __name__ == '__main__':
forumdata=[
"2", # 原創(chuàng)發(fā)布區(qū) 0
"5", # 脫殼破解區(qū) 1
"65", # 移動(dòng)安全區(qū) 2
"59", # 軟件調(diào)試區(qū) 3
"24", # 編程語言區(qū) 4
"6", # 動(dòng)畫發(fā)布區(qū) 5
"4", # 逆向資源區(qū) 6
"16", # 精品軟件區(qū) 7
"8", # 懸賞問答區(qū) 8
"32", # 病毒分析區(qū) 9
"50", # 病毒救援區(qū) 10
"41"#安全工具區(qū) 11
]
print('開始獲取,請稍候...')
GetData(forumdata)