Custom Search

Saturday, March 12, 2011

python find differences between two text files

python find differences between two text files

==================== Example-1

fp1 = open('f1', 'r')
lines1 = fp1.readlines()
fp1.close()
print "-----lines1-----", lines1

fp2 = open('f2', 'r')
lines2 = fp2.readlines()
fp2.close()
print "-----lines2-----", lines2

newlist = []
for i, x in enumerate(lines1):
try:
if x != lines2[i]:
print x
newlist.append(x)
except IndexError, e:
newlist.append(x)

print "----newlist----", newlist


fp3 = open('f3', 'w')
fp3.writelines(newlist)
fp3.close()


OUTPUT
========

$ python test.py

-----lines1----- ['hello\n', 'testing1\n', 'testing3\n', 'hi\n']
-----lines2----- ['hello\n', 'testing2\n', '\n']
testing1

testing3

----newlist---- ['testing1\n', 'testing3\n', 'hi\n']

==================== Example-2

fp1 = open('f1', 'r')
lines1 = fp1.readlines()
fp1.close()
print "-----lines1-----", lines1

fp2 = open('f2', 'r')
lines2 = fp2.readlines()
fp2.close()
print "-----lines2-----", lines2

fp2 = open('f3', 'w')
for x in lines1:
if x not in lines2:
fp2.write(x)

#fp2.writelines([x for x in lines1 if x not in lines2]) # <--- OR fp2.close() ==================== Example-3

# python find differences between two text files
#http://www.daniweb.com/software-development/python/threads/96638

import difflib

fp1 = open('f1', 'r')
lines1 = fp1.readlines()
fp1.close()
print "-----lines1-----\n", lines1

fp2 = open('f2', 'r')
lines2 = fp2.readlines()
fp2.close()
print "-----lines2-----\n", lines2

result_list = list(difflib.Differ().compare(lines1, lines2))

print "-----result_list-------\n", result_list

lines_diff1 = []
lines_diff2 = []
for l in result_list:
if l[0] == '-':
lines_diff1.append(l)
elif l[0] == '+':
lines_diff2.append(l)

print "-----lines which are in lines1 and not in lines2 -----\n", lines_diff1
print "-----lines which are in lines2 and not in lines1 -----\n", lines_diff2


OUTPUT
========

-----lines1-----
['hello\n', 'testing1\n', 'testing3\n', 'hi\n']
-----lines2-----
['hello\n', 'testing2\n']
-----result_list-------
[' hello\n', '- testing1\n', '? ^\n', '+ testing2\n', '? ^\n',
'- testing3\n', '- hi\n']
-----lines which are in lines1 and not in lines2 -----
['- testing1\n', '- testing3\n', '- hi\n']
-----lines which are in lines2 and not in lines1 -----
['+ testing2\n']


No comments:

Post a Comment