#! /usr/bin/env python # -*- coding: UTF-8 -*- import os consonants = u"कखगघङचछजझञटठडढणतथदधनपफबभमयरलवशषसह" vowels = u"अआइईउऊऋएऐओऔ" joiners = u"ािीुूृेैोौ्ँंः" othersymbols = u"ॐ" # file and folder path textfolder = "./nepaliarticles/" # folder in which files are kept #resultfile = "freqency.txt" # file in which frequency is updates filelist = os.listdir(textfolder) # module definition # this module calculates the freq of given str in all the files and updates the result to resultfile def calculateAndUpdate(findString): count = 0 for f in filelist: in_file = open(textfolder+f, "r") text = in_file.read() in_file.close() count += text.count(findString.encode('utf-8')) print findString, count out_file = open(resultfile, "a") out_file.write(findString.encode('utf-8')+"\t"+str(count)+"\n") out_file.close() # calculate consonants resultfile = "frequency-consonants.txt" for i in range(len(consonants)): calculateAndUpdate(consonants[i]) # calculate vowels resultfile = "frequency-vowels.txt" for i in range(len(vowels)): calculateAndUpdate(vowels[i]) # calculate joiners resultfile = "frequency-joiners.txt" for i in range(len(joiners)): calculateAndUpdate(joiners[i]) # calculate othersymbols resultfile = "frequency-othersymbols.txt" for i in range(len(othersymbols)): calculateAndUpdate(othersymbols[i]) # calculate consonant+joiner resultfile = "frequency-consonant-joiner.txt" for i in range(len(consonants)): for j in range(len(joiners)): calculateAndUpdate(consonants[i]+joiners[j]) # calculate consonant+consonant resultfile = "frequency-consonant-consonant.txt" for i in range(len(consonants)): for j in range(len(consonants)): calculateAndUpdate(consonants[i]+consonants[j])