python分割文件为指定大小

本文最后更新于:4 年前

本文参考python学习–大文件分割与合并

一、说明

1、使用python将文件分割成指定大小,便于传输。例如,在文件大小大于U盘大小时,可使用改程序将数据进行切割。
2、本程序在每一个切割文件的前四个字节,添加了表示分割后的文件序号以及分割后的文件总数。

二、代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# coding=utf-8

import sys,os

kilobytes = 1024 #1K byte
megabytes = kilobytes*1000 #1M byte
chunksize = int(200*megabytes) #default chunksize

def getPartSum(fromfile,chunksize):
'''
get the total number of part
'''
if os.path.getsize(fromfile)%chunksize != 0:
return int(os.path.getsize(fromfile)/chunksize)+1
else:
return int(os.path.getsize(fromfile)/chunksize)

def split(fromfile,todir,chunksize=chunksize):
'''
split files by the chunksize
'''
if not os.path.exists(todir):#check whether todir exists or not
os.mkdir(todir) #make a folder
else:
for fname in os.listdir(todir):
os.remove(os.path.join(todir,fname))
partnum = 0 # the number of part
partsum = getPartSum(fromfile,chunksize) # the sum of parts
inputfile = open(fromfile,'rb')# open the fromfile
while True:
chunk = inputfile.read(chunksize)
if not chunk: # check the chunk is empty
break
partnum += 1
filename = os.path.join(todir,('part%04d'%partnum)) # make file name
fileobj = open(filename,'wb') # create partfile
fileobj.write(bytes.fromhex('%04x'%partnum)) #write the serial number
fileobj.write(bytes.fromhex('%04x'%partsum)) #write the sum of parts
fileobj.write(chunk) #write data into partfile
fileobj.close()
return partnum
if __name__=='__main__':
fromfile = input('File to be split?')
todir = input('Directory to store part files?')
chunksize = int(input('Chunksize to be split?'))
absfrom,absto = map(os.path.abspath,[fromfile,todir])
print('Splitting',absfrom,'to',absto,'by',chunksize)
try:
parts = split(fromfile,todir,chunksize)
except:
print('Error during split:')
print(sys.exc_info()[0],sys.exc_info()[1])
else:
print('split finished:',parts,'parts are in',absto)