File: class/Extras/Other/_old-Tools/packaging/Scripts/fixeoln_one.py

#########################################################
# Use: "python fixeoln_one.py [tounix|todos] filename".
# convert end-lines in the single text file whose name 
# is passed in on the command line, to the target form
# (unix or dos).  The _one, _dir, and _all converters 
# resuse the convert function here; we could implement 
# this by inspecting command-line argument patterns 
# instead of writing 3 separate scripts, but that can 
# become complex for the user.  convertEndlines changes 
# endlines only if necessary--lines that are already in
# the target format are left unchanged, so it's okay to 
# convert a file > once in any of the 3 fixeoln scripts.
#########################################################
# Warning: must use binary mode read/write for this to 
# work on windows, else default text mode auto deletes
# the \r on read, and auto writes an extra \r for \n,
# such that text mode may yield one extra \r per line:
#
# C:\...>python
# >>> open('ttt1.txt', 'w').writelines(['a\n', 'b\n'])
# >>> open('ttt2.txt', 'wb').writelines(['a\n', 'b\n'])
# >>>
# C:\...>python
# >>> open('ttt1.txt', 'rb').read() # \r added on write
# 'a\015\012b\015\012'              
# >>> open('ttt1.txt', 'r').read()  # \r dropped on read 
# 'a\012b\012'
# >>> open('ttt2.txt', 'rb').read() # no \r auto added 
# 'a\012b\012'                      # still in unix form
#
# >>> open('ttt.txt', 'wb').writelines(['a\r\n', 'b\r\n'])
# >>> open('ttt.txt', 'rb').read()
# 'a\015\012b\015\012'              # no mapping if all b
# >>> open('ttt.txt', 'r').read()
# 'a\012b\012'
#
# >>> open('ttt.txt', 'w').writelines(['a\r\n', 'b\r\n'])
# >>> open('ttt.txt', 'rb').read()
# 'a\015\015\012b\015\015\012'      # one extra \r here
# >>> open('ttt.txt', 'r').read()
# 'a\015\012b\015\012'              # really are 2 \r's!
#
# Because of this behaviour, we might only need to read 
# and write files in text mode to perform the todos eoln
# mapping on dos (it will auto add the \r if missing); 
# but that won't work for the tounix mode when running 
# on Windows (we get the \r) or the todos mode on Unix.
# This was a bug in a prior version; fixed in loop with:
#        if string.find(line, '\r\r') != -1:
#            line = string.replace(line, '\r\r', '\r')
#########################################################

import os
listonly = 0   # 1=show file to be changed, don't rewrite


def convertEndlines(format, fname):                      # convert one file
    if not os.path.isfile(fname):                        # todos:  \n   => \r\n 
        print 'Not a text file', fname                   # tounix: \r\n => \n
        return                                           # skip dirrctory names

    newlines = []
    changed  = 0 
    for line in open(fname, 'rb').readlines():           # use binary i/o modes
        if format == 'todos':
            if line[-1:] == '\n' and line[-2:-1] != '\r':
                line = line[:-1] + '\r\n'
                changed = 1
        elif format == 'tounix':                         # avoids IndexError
            if line[-2:] == '\r\n':                      # slices are scaled
                line = line[:-2] + '\n'
                changed = 1
        newlines.append(line)

    if changed:
        try:                                             # might be read-only
            print 'Changing', fname
            if not listonly: open(fname, 'wb').writelines(newlines) 
        except IOError, why:
            print 'Error writing to file %s: skipped (%s)' % (fname, why)


if __name__ == '__main__':
    import sys
    errmsg = 'Required arguments missing: ["todos"|"tounix"] filename'
    assert (len(sys.argv) == 3 and sys.argv[1] in ['todos', 'tounix']), errmsg
    convertEndlines(sys.argv[1], sys.argv[2])
    print 'Converted', sys.argv[2]



[Home page] Books Code Blog Python Author Train Find ©M.Lutz