本文共 1124 字,大约阅读时间需要 3 分钟。
话不多说,上代码:
class get_article_content(object):#对象属性只有传入的是初始值和末位置 """抓取文章内容类""".............................省略 def run(initial_value, last_value): article_urls = get_article_content.read_csv()#读取原始url datas = get_article_content.crawl_article(article_urls, initial_value, last_value)#数据抓取 get_article_content.save_keyword_retrieval(datas)#数据保存class thread_crawl(object): """线程类""" def __init__(self): pass def five_threads(): #抓取文章内容并进行检索关键词保存,5个线程 all_article_num = len([i for i in get_article_content.read_csv()]) tem = all_article_num // 5 t1 = threading.Thread(target=get_article_content.run,args=[0, tem]) t2 = threading.Thread(target=get_article_content.run,args=[tem, tem*2]) t3 = threading.Thread(target=get_article_content.run,args=[tem*2, tem*3]) t4 = threading.Thread(target=get_article_content.run,args=[tem*3, tem*4]) t5 = threading.Thread(target=get_article_content.run,args=[tem*4, all_article_num]) t1.start() t2.start() t3.start() t4.start() t5.start() while 1:#如果线程都结束,就排序 if threading.activeCount() - 1 == 0: sort_article.sort_it('keyword_retrieval.csv')#文章排序 break time.sleep(10)
转载地址:http://rfkrn.baihongyu.com/