""" test various read and write file I/O modes for speed in the version of Python that is running this script; runs most common and valid read/write coding patterns; tests ascii and binary, but not wide-char unicode files; printed results can be parsed later for comparisons """ ###################################################################### # generic timer ###################################################################### import time def timeOnce(func, *args): start = time.clock() func(*args) # ignore any return value return time.clock() - start def timerAvg(func, *args): warmcache = timeOnce(func, *args) reps = 3 runavg = 0 # take average of 3 runs for i in range(reps): runavg += timeOnce(func, *args) return runavg / reps # CHANGED: take low = "best" # The following may be a bit better, but isn't directly comparable def timerBest(func, *args): warmcache = timeOnce(func, *args) # make sure disk caches active reps = 3 runs = [] # take min of N runs for i in range(reps): runs.append(timeOnce(func, *args)) return min(runs) timer = timerBest # CHANGED ###################################################################### # file read tests ###################################################################### #===================================================================== # all of the following are probably valid use cases for 2.6 and 3.0, # though lines/text and blocks/binary combos seem more typical in 3.0 # (programs will pick str xor bytes for text or binary data in 3.0); # truly binary files can only be read in binary mode in 3.0, because # they cannot be decoded into characters in text mode, and it makes # no sense to read truly binary files by lines: they have no lineends; # the allAtOnce modes may fail for pathologically large files; # # 3.0 has str + bytes; 2.6 has just str, plus binary files # open mode default = 'r' = 'rt' in 3.0, and 'r' in 2.6 # (both mean text mode input when the mode argument is omitted) #===================================================================== blocksize = 1024 * 32 def read_byLines_textMode(filename): for line in open(filename): # 2.6 text mode returns str, does not decode (use codecs.open) pass # 3.0 text mode returns str, after decoding content def read_byLines_binaryMode(filename): # less common in 3.0? for line in open(filename, 'rb'): # 2.6 binary mode returns str, does not decode pass # 3.0 binary mode returns bytes, does not decode def read_byBlocks_textMode(filename, size=blocksize): f = open(filename) while True: # less common in 3.0? block = f.read(size) if not block: break def read_byBlocks_binaryMode(filename, size=blocksize): f = open(filename, 'rb') while True: block = f.read(size) if not block: break def read_allAtOnce_textMode(filename): # not for very large files text = open(filename).read() def read_allAtOnce_binaryMode(filename): # not for very large files text = open(filename, 'rb').read() ###################################################################### # file write tests ###################################################################### #===================================================================== # all the following work, but tests "write_byLines_binaryMode" and # "write_byBlocks_textMode" are probably invalid use cases for 3.0, # where programs are more likely to pick str xor bytes for text # or binary data, and not convert to str or bytes just to write in # text or binary mode; portability issues: 3.0's encoding arg required # by 3.0's bytes() converter is not allowed in 2.6's bytes(), and # 2.6's str.decode() creates a unicode object which adds some cost; # # hoist set-up ops out to avoid charging to test funcs # 'xx' / b'xx' are str / bytes in 3.0, both are str in 2.6 # 'xx' == b'xx' and bytes(x) == str(X) in 2.6 # 2.6: str is a seq of bytes, unicode a distinct type # 3.0: str is seq of Unicode chars, bytes is seq of ints #===================================================================== oneMeg = 1024 * 1024 halfMeg = oneMeg // 2 # use truncating division in both 2.6 and 3.0 repsList = list(range(halfMeg)) # force list in both 2.6 and 3.0 aLine = '*' * 49 + '\n' # 25M in file ((50+\r?) * ((1024 * 1024) / 2)) aBlock = b'1\x0234\x05' * 10 # 25M in file ((5 * 10) * (1M / 2)) aFileStr = aLine * halfMeg # 25M characters aFileBin = aBlock * halfMeg # 25M bytes print ('\nOutput data sizes: %s %s %s %s %s' % (len(repsList), len(aLine), len(aBlock), len(aFileStr), len(aFileBin)) ) def write_byLines_textMode(filename): # writing by blocks in text mode is similar file = open(filename, 'w') # 3.0 text mode takes str, encodes content, xlates newlines for i in repsList: # 2.6 text mode takes str, xlates newlines file.write(aLine) # 3.0 text mode takes open() flag to control lineends file.close() def write_byLines_binaryMode(filename): # less common in 3.0? file = open(filename, 'wb') # 3.0 binary mode takes bytes, does not decode or xlate for i in repsList: # 2.6 binary mode takes str, does not xlate newlines file.write(aLine.encode()) # encode() makes bytes in 3.0, same str in 2.6 file.close() def write_byBlocks_textMode(filename): # less common in 3.0? file = open(filename, 'w') # decode() makes str in 3.0, unicode in 2.6 for i in repsList: file.write(aBlock.decode()) file.close() def write_byBlocks_binaryMode(filename): # writing by lines in binary mode is similar file = open(filename, 'wb') for i in repsList: file.write(aBlock) file.close() def write_allAtOnce_textMode(filename): # not for very large files open(filename, 'w').write(aFileStr) def write_allAtOnce_binaryMode(filename): # not for very large files open(filename, 'wb').write(aFileBin) ###################################################################### # run, collect test data for Python running me ###################################################################### def timePython(): import sys, os outputfile = 'timeIO.out' # hard-code: I create this textfile, binaryfile = sys.argv[1:3] # input files vary, command line tests = {textfile: (read_byLines_textMode, read_byLines_binaryMode, # less common in 3.0? read_byBlocks_textMode, # less common in 3.0? read_byBlocks_binaryMode, read_allAtOnce_textMode, # not for very large files read_allAtOnce_binaryMode), # not for very large files binaryfile: (read_byBlocks_binaryMode, # other read modes not valid, read_allAtOnce_binaryMode), # for truly binary data files outputfile: (write_byLines_textMode, write_byLines_binaryMode, # less common in 3.0? write_byBlocks_textMode, # less common in 3.0? write_byBlocks_binaryMode, write_allAtOnce_textMode, # not for very large files write_allAtOnce_binaryMode) # not for very large files } for filename in (textfile, binaryfile, outputfile): filesize = os.path.getsize(filename) if os.path.exists(filename) else '0' # CHANGED version = sys.version.split()[0] print('\n[Python {0}: {1}, {2} bytes]'.format(version, filename, filesize)) for func in tests[filename]: try: testtime = timer(func, filename) except: print('%-26s => %s, %s' % (func.__name__, '*fail*', sys.exc_info()[0])) else: # int/int=float+remainder in 3.0, but not 2.6 filemegs = float(filesize) / oneMeg testid = '%-26s (%s=%.2fM)' % (func.__name__, filename, filemegs) print('%-46s => %f' % (testid, testtime)) if __name__ == '__main__': timePython() # the version running me