对于文件数据相对较大的,可以首先将数据进行分块,然后使用多线程进行压缩与加密,这样不仅可以提高数据的安全性,同时可以提升性能。
from zlib import compress
from struct import pack
from concurrent.futures import ThreadPoolExecutor
# 分块压缩和加密数据集
def get_compress_encrypt_chunk(chunk_data, cipher_suite):
# 最大压缩级别,测试发现跟默认为6区别也不大
compressed_data = compress(chunk_data, 9)
encrypted_data = cipher_suite.encrypt(compressed_data)
return encrypted_data
def get_model_compress_encrypt_data(
file_path: str,
cipher_suite,
chunk_size: int = 1024 * 1024 * 4
):
with open(file_path, 'rb') as fr:
model_bytes = fr.read()
# 计算分块数量
num_chunks = (len(model_bytes) + chunk_size - 1) // chunk_size
print(f"==>> num_chunks: {num_chunks}")
# 分割数据为块,超过索引长度自动截断了
chunks = [
model_bytes[i * chunk_size:(i + 1) * chunk_size]
for i in range(num_chunks)
]
param_list = list(
zip(
chunks,
[cipher_suite] * num_chunks
)
)
with ThreadPoolExecutor(max_workers=num_chunks) as executor:
encrypted_chunks = list(
executor.map(
get_compress_encrypt_chunk,
*zip(*param_list)
)
)
return encrypted_chunks
if __name__ == '__main__':
import time
from cryptography.fernet import Fernet
model_name_prefix_list = [
model_test
]
model_name_postfix = ".onnx"
for model_name_prefix in model_name_prefix_list:
model_name = f"{model_name_prefix}{model_name_postfix}"
print(f"==>> model_name: {model_name}")
model_path = f"{model_name}"
# chunk_param_list = [1, 2, 4, 8, 16, 32, 64]
chunk_param_list = [4]
for param in chunk_param_list:
print(f"==>> chunk_size: {param} MB")
# 1024f*1024
chunk_size = 1048576*param
tic = time.time()
# 创建Fernet加密器
key = b''
cipher_suite = Fernet(key)
encrypted_chunks = get_model_compress_encrypt_data(
model_path,
cipher_suite,
chunk_size=chunk_size,
)
toc = time.time()
print(f"==>> encrypt time: {toc - tic}")
tic = time.time()
model_encrypted_path = f"{model_name_prefix}_{param}mb.enc"
with open(model_encrypted_path, 'wb') as fw:
for chunk in encrypted_chunks:
fw.write(pack('<I', len(chunk)))
fw.write(chunk)
toc = time.time()
print(f"==>> write time: {toc - tic}")
因篇幅问题不能全部显示,请点此查看更多更全内容