# -*- coding: utf-8 -*-
#!/usr/bin/env python
'''
Cleansing HTMLs
'''
import sys
from optparse import OptionParser
import re
import os
newline = re.compile(r'[\r\n]+')
tag = re.compile(r'^\s*<(/?)(\w+).*?(/?)>$', re.DOTALL)
sw = 2
noindent = ('br',)
def clean(filename):
fo = open(filename, 'rb')
try:
data = fo.read()
finally:
fo.close()
data = newline.sub('\n', data)
data = data.replace('>', '>\n')
data = data.replace('<', '\n<')
data = data.splitlines()
new = []
for line in data:
line = line.strip()
if len(line):
new.append(line)
data = new
new = []
ilevel = 0
for line in data:
padding = ' ' * (sw * ilevel)
line = padding + line
try:
close1, tagname, close2 = tag.search(line).groups()
if close2 or tagname.lower() in noindent:
pass
elif close1:
if ilevel:
ilevel -=1
padding = ' ' * (sw * ilevel)
line = padding + line.strip()
else:
ilevel += 1
except:
pass
new.append(line)
data = '\n'.join(new) + '\n'
sys.stdout.write(data)
def main():
op = OptionParser()
opts, args = op.parse_args()
for filename in args:
clean(filename)
return 0
if __name__ == '__main__':
sys.exit(main())
Rekomendasi Game Android Bahasa Indonesia
3 years ago
0 comments:
Post a Comment