#!/usr/bin/env python | |
# Demo program for zlib; it compresses or decompresses files, but *doesn't* | |
# delete the original. This doesn't support all of gzip's options. | |
# | |
# The 'gzip' module in the standard library provides a more complete | |
# implementation of gzip-format files. | |
import zlib, sys, os | |
FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16 | |
def write32(output, value): | |
output.write(chr(value & 255)) ; value=value // 256 | |
output.write(chr(value & 255)) ; value=value // 256 | |
output.write(chr(value & 255)) ; value=value // 256 | |
output.write(chr(value & 255)) | |
def read32(input): | |
v = ord(input.read(1)) | |
v += (ord(input.read(1)) << 8 ) | |
v += (ord(input.read(1)) << 16) | |
v += (ord(input.read(1)) << 24) | |
return v | |
def compress (filename, input, output): | |
output.write('\037\213\010') # Write the header, ... | |
output.write(chr(FNAME)) # ... flag byte ... | |
statval = os.stat(filename) # ... modification time ... | |
mtime = statval[8] | |
write32(output, mtime) | |
output.write('\002') # ... slowest compression alg. ... | |
output.write('\377') # ... OS (=unknown) ... | |
output.write(filename+'\000') # ... original filename ... | |
crcval = zlib.crc32("") | |
compobj = zlib.compressobj(9, zlib.DEFLATED, -zlib.MAX_WBITS, | |
zlib.DEF_MEM_LEVEL, 0) | |
while True: | |
data = input.read(1024) | |
if data == "": | |
break | |
crcval = zlib.crc32(data, crcval) | |
output.write(compobj.compress(data)) | |
output.write(compobj.flush()) | |
write32(output, crcval) # ... the CRC ... | |
write32(output, statval[6]) # and the file size. | |
def decompress (input, output): | |
magic = input.read(2) | |
if magic != '\037\213': | |
print 'Not a gzipped file' | |
sys.exit(0) | |
if ord(input.read(1)) != 8: | |
print 'Unknown compression method' | |
sys.exit(0) | |
flag = ord(input.read(1)) | |
input.read(4+1+1) # Discard modification time, | |
# extra flags, and OS byte. | |
if flag & FEXTRA: | |
# Read & discard the extra field, if present | |
xlen = ord(input.read(1)) | |
xlen += 256*ord(input.read(1)) | |
input.read(xlen) | |
if flag & FNAME: | |
# Read and discard a null-terminated string containing the filename | |
while True: | |
s = input.read(1) | |
if s == '\0': break | |
if flag & FCOMMENT: | |
# Read and discard a null-terminated string containing a comment | |
while True: | |
s=input.read(1) | |
if s=='\0': break | |
if flag & FHCRC: | |
input.read(2) # Read & discard the 16-bit header CRC | |
decompobj = zlib.decompressobj(-zlib.MAX_WBITS) | |
crcval = zlib.crc32("") | |
length = 0 | |
while True: | |
data=input.read(1024) | |
if data == "": | |
break | |
decompdata = decompobj.decompress(data) | |
output.write(decompdata) | |
length += len(decompdata) | |
crcval = zlib.crc32(decompdata, crcval) | |
decompdata = decompobj.flush() | |
output.write(decompdata) | |
length += len(decompdata) | |
crcval = zlib.crc32(decompdata, crcval) | |
# We've read to the end of the file, so we have to rewind in order | |
# to reread the 8 bytes containing the CRC and the file size. The | |
# decompressor is smart and knows when to stop, so feeding it | |
# extra data is harmless. | |
input.seek(-8, 2) | |
crc32 = read32(input) | |
isize = read32(input) | |
if crc32 != crcval: | |
print 'CRC check failed.' | |
if isize != length: | |
print 'Incorrect length of data produced' | |
def main(): | |
if len(sys.argv)!=2: | |
print 'Usage: minigzip.py <filename>' | |
print ' The file will be compressed or decompressed.' | |
sys.exit(0) | |
filename = sys.argv[1] | |
if filename.endswith('.gz'): | |
compressing = False | |
outputname = filename[:-3] | |
else: | |
compressing = True | |
outputname = filename + '.gz' | |
input = open(filename, 'rb') | |
output = open(outputname, 'wb') | |
if compressing: | |
compress(filename, input, output) | |
else: | |
decompress(input, output) | |
input.close() | |
output.close() | |
if __name__ == '__main__': | |
main() |