#! /usr/bin/env python3 # This file is under GNU General Public License 3.0 # see LICENSE.txt from argparse import ArgumentParser from fileinput import FileInput, hook_encoded import re, itertools, sys space = re.compile(r'^\s') p = ArgumentParser(description="re-write re-order csv and strip lines with too long words") p.add_argument('--input', '-i', type=str, default="somefile.cvs", help='input file') p.add_argument('--length', '-l', type=int, default=100, help='min word length to stripp a line') args = p.parse_args() try: from icu import UnicodeString, Locale except ImportError: print("warning: PyICU not installed, using fallback", file=sys.stderr) else: locale = Locale("utf-8") _all = ( line.split(',') for line in FileInput( args.input, openhook=hook_encoded("utf-8") ) if not space.match(line) ) _some = (line for line in _all if len(line[2]) < args.length) for i, w in enumerate(_some): print("{l},{i},{w},0".format(l=w[0], i=i, w=w[2]))