发布时间:2019-09-19 08:00:54编辑:auto阅读(1903)
生成激活码
#!/usr/bin/env python #encoding:utf-8 #Author:sean import string import random #激活码中的字符和数字 field = string.letters + string.digits #获得四个字母和数字的随机组合 def getRandom(): return ''.join(random.sample(field,4)) #生成的每个激活码中有几组 def concatenate(group): return '-'.join([getRandom() for i in range(group)]) #生成n组激活码 def generate(n): return [concatenate(4) for i in range(n)] if __name__ == '__main__': print generate(10)
统计单词
#!/usr/bin/env python #encoding:utf-8 import re from collections import Counter FileSource = './media/abc.txt' def getMostCommonWord(articlefilesource): '''输入一个英文的纯文本文件,统计其中的单词出现的个数''' pattern = r'[A-Za-z]+|\$?\d+%?$' with open(articlefilesource) as f: r = re.findall(pattern,f.read()) return Counter(r).most_common() if __name__ == '__main__': print getMostCommonWord(FileSource)
提取网页正文
#!/usr/bin/env python
#encoding:utf-8
from goose import Goose
from goose.text import StopWordsChinese
import sys
#要分析的网页url
url = '
def extract(url):
'''
提取网页正文
'''
g = Goose({'stopwords_class':StopWordsChinese})
artlcle = g.extract(url=url)
return artlcle.cleaned_text
if __name__ == '__main__':
print extract(url)
上一篇: 构建python的运行环境
下一篇: python dict 与list比较
51302
50751
41349
38159
32631
29529
28376
23250
23217
21540
1615°
2348°
1953°
1895°
2225°
1935°
2621°
4401°
4242°
3014°