zip

# 使用 zipfile 解压含有中文文件名的 zip 文件

【Python】使用 zipfile 解压含有中文文件名的 zip 文件_secsilm的博客-CSDN博客 (opens new window)
with open(os.path.join(file_father_dir, new_zip_file), 'wb')as output_file:  # 创建并打开新文件
    with zip_file.open(file, 'r')as origin_file:  # 打开原文件
        shutil.copyfileobj(origin_file, output_file)# 将原文件内容复制到新文件
def extract_zip(file_path, file_father_dir, thread_queue):
"""解压zip文件并返回文件列表"""
print(file_path, file_father_dir,33222111)
zip_file = zipfile.ZipFile(file_path)
# 获取压缩包里所有文件
    zip_list = zip_file.namelist()
print(zip_file,111111111111111111,zip_list)

# 统计压缩包中的文件总数与完成度
    files_count = len(zip_list)
finished_count = 0
    percent = 0
    new_zip_list =[]
try:
        for file in zip_list:
            # zip_file.extract(file, file_father_dir)
            try:
                new_zip_file = file.encode('cp437').decode('gbk')
except:
                new_zip_file = file.encode('cp437').decode('utf-8')

with open(os.path.join(file_father_dir, new_zip_file), 'wb')as output_file:  # 创建并打开新文件
                with zip_file.open(file, 'r')as origin_file:  # 打开原文件
                    shutil.copyfileobj(origin_file, output_file)# 将原文件内容复制到新文件

            print(new_zip_file,111111,22222222222222222222222222222222)
new_zip_list.append(new_zip_file)

# os.rename(os.path.join(file_father_dir, file), os.path.join(file_father_dir, new_zip_file))

            finished_count += 1
            # 返回解压进度(解压完成时返回10%,后90%属于文件上传)
            if percent != finished_count / files_count * 100 // 10:
                percent = finished_count / files_count * 100 // 10
                # 发送异步信息
                thread_queue.put(my_response(data={'progress_percent': percent}))
zip_file.close()
thread_queue.put(my_response(data={'progress_percent': 10}))
os.remove(file_path)
except Exception as e:
        print(e,11111111111111111111111111111111111111111111111)
logging.error(e)
return new_zip_list
import os
import logging

# ====================================== 本地调试时注释掉以下代码 ======================================
import shutil
import queue
import threading
import yaml

DEBUG = False if os.environ.get('CABITS_ENV_PATH') else True
if DEBUG:
    env_path = r'../env.yml'
else:  # 读取configmap中的环境变量
    env_path = os.environ['CABITS_ENV_PATH']
with open(env_path, 'r', encoding='utf-8') as f:
    file_content = f.read()
content = yaml.load(file_content, yaml.FullLoader)
for key, value in content.items():
    if value is not None:
        logging.error('{} =======> {}'.format(key, value))
        os.environ[key] = str(value)
# ===================================
import re
import zipfile
import tarfile
import rarfile
import py7zr
from flask import Flask, request
from flask_socketio import SocketIO, emit

from common.response import my_response
from common import stat
from common.utils import get_file_md5, get_file_suffix, get_file_size
from libs.S3_client import s3_client

app = Flask(__name__)
app.config['SECRET_KEY'] = 'secret!'
socketio = SocketIO(app, cors_allowed_origins="*", async_mode='eventlet', logger=True, engineio_logger=True,
                    path='/compressed_files')

@app.route('/api/v1/test', methods=['GET'])
def test() -> dict:
    """测试接口"""
    return {
        'code': 1000,
        'message': '测试成功'
    }

def extract_zip(file_path, file_father_dir, thread_queue):
    """解压zip文件并返回文件列表"""
    zip_file = zipfile.ZipFile(file_path)
    # 获取压缩包里所有文件
    zip_list = zip_file.namelist()
    # 统计压缩包中的文件总数与完成度
    files_count = len(zip_list)
    finished_count = 0
    percent = 0
    new_zip_list = []
    try:
        for file in zip_list:
            zip_file.extract(file, file_father_dir)
            try:
                new_zip_file = file.encode('cp437').decode('gbk')
            except:
                new_zip_file = file.encode('cp437').decode('utf-8')
            new_zip_list.append(new_zip_file)
            os.rename(os.path.join(file_father_dir, file), os.path.join(file_father_dir, new_zip_file))
            finished_count += 1
            # 返回解压进度(解压完成时返回10%,后90%属于文件上传)
            if percent != finished_count / files_count * 100 // 10:
                percent = finished_count / files_count * 100 // 10
                # 发送异步信息
                thread_queue.put(my_response(data={'progress_percent': percent}))
        zip_file.close()
        thread_queue.put(my_response(data={'progress_percent': 10}))
        os.remove(file_path)
    except Exception as e:
        print(e)
        logging.error(e)
    return new_zip_list

def extract_rar(file_path, file_father_dir, thread_queue):
    """解压rar文件"""
    rar_list = []
    try:
        os.makedirs(file_father_dir)
    except:
        pass
    try:
        rar_file = rarfile.RarFile(file_path)
        # 获取压缩包里所有文件
        rar_list = rar_file.namelist()
        rar_file.extractall(file_father_dir)
        rar_file.close()
        thread_queue.put(my_response(data={'progress_percent': 10}))
        os.remove(file_path)

    except Exception as e:
        print(e)
        logging.error(e)
    return rar_list

def extract_7z(file_path, file_father_dir, thread_queue):
    """解压7z文件"""
    sevenz_list = []
    try:
        sevenz_file = py7zr.SevenZipFile(file_path)
        sevenz_list = sevenz_file.getnames()
        sevenz_file.extractall(file_father_dir)
        sevenz_file.close()
        thread_queue.put(my_response(data={'progress_percent': 10}))
        os.remove(file_path)

    except Exception as e:
        print(e)
        logging.error(e)
    return sevenz_list

def extract_tar(file_path, file_father_dir, thread_queue):
    """解压tar文件"""
    tar_list = []
    try:
        tar_file = tarfile.open(file_path)
        # 获取压缩包里所有文件
        tar_list = tar_file.getnames()
        # 统计压缩包中的文件总数与完成度
        files_count = len(tar_list)
        finished_count = 0
        percent = 0
        for name in tar_list:
            tar_file.extract(name, path=file_father_dir)
            finished_count += 1
            # 返回解压进度(解压完成时返回10%,后10%属于文件上传)
            if percent != finished_count / files_count * 100 // 10:
                percent = finished_count / files_count * 100 // 10
                # 发送异步信息
                thread_queue.put(my_response(data={'progress_percent': percent}))
        tar_file.close()
        thread_queue.put(my_response(data={'progress_percent': 10}))
        os.remove(file_path)

    except Exception as e:
        print(e)
        logging.error(e)
    return tar_list

def filter_file_list(file_list, resource_type, thread_queue):
    """筛选出符合资源类型的数据"""
    file_list_filter = []
    if resource_type == 'image':
        for file in file_list:
            if re.match(r'.*\\.(png|jpg|jpeg|bmp)$', file):
                file_list_filter.append(file)

    elif resource_type == 'label':
        for file in file_list:
            if re.match(r'.*\\.(txt|xml|json)$', file):
                file_list_filter.append(file)
    else:
        logging.error('暂不支持该格式上传')
        thread_queue.put(my_response(code=stat.ServerErr, message='暂不支持该格式的上传'))
        return False

    return file_list_filter

def upload_minio(file_father_dir, file_list, workspace_id, resource_type, thread_queue, host):
    """上传文件到Minio"""
    # 过滤文件列表
    file_list = filter_file_list(file_list, resource_type, thread_queue)
    # 记录上传进度
    total_count = len(file_list)
    finished_count = 0
    percent = 10
    # 初始化上传文件信息列表
    upload_response = []
    # 如果桶不存在则创建桶
    logging.error(workspace_id)

    if total_count == 0:
        thread_queue.put(my_response(data={'progress_percent': 100}))
        thread_queue.put(my_response(data={'results': [], 'count': total_count}))
    else:
        for file in file_list:
            # 初始化上传文件的信息
            file_info = dict()
            # 获取文件的md5, suffix, size, download
            file_info['md5'] = get_file_md5(os.path.join(file_father_dir, file))
            file_info['suffix'] = get_file_suffix(file)
            file_info['size'] = get_file_size(os.path.join(file_father_dir, file))
            file_info['name'] = os.path.split(file)[1].rsplit('.', 1)[0]
            object_name = '/'.join([workspace_id, resource_type, f'{file_info["md5"]}.{file_info["suffix"]}'])

            # 判断文件是否已在Minio中存在，若存在则返回链接， 不存在则上传
            if not s3_client.object_exists(obj_name=object_name):
                s3_client.upload_file(object_name, os.path.join(file_father_dir, file))
            # 删除已上传的文件
            os.remove(os.path.join(file_father_dir, file))
            file_info['download_url'] = '/' + s3_client.get_download_url(workspace_id,
                                                                         f'{file_info["md5"]}.{file_info["suffix"]}',
                                                                         "image")
            # 只有percent整数发生变化时才向前端发送请求
            finished_count += 1
            if percent != int(finished_count / total_count * 100 * 0.9) + 10:
                percent = int(finished_count / total_count * 100 * 0.9) + 10
                # 发送请求
                thread_queue.put(my_response(data={'progress_percent': percent}))
            upload_response.append(file_info)
        thread_queue.put(my_response(data={'results': upload_response, 'count': total_count}))

@socketio.on('connect', namespace='/compressed_files')
def connect():
    emit('response', my_response(code=stat.OK, message='连接成功'))

@socketio.on('message', namespace='/compressed_files')
def handle_json(data):
    emit('response', my_response(message='开始解压'))
    suffix = data.get('suffix')
    md5 = data.get('md5')
    workspace_id = data.get('workspace_id')
    resource_type = data.get('type')

    thread_queue = queue.Queue()
    thread = threading.Thread(target=loop, args=(thread_queue, suffix, md5, workspace_id, resource_type, request.host),
                              daemon=True)
    thread.start()
    while True:
        if not thread_queue.empty():
            response = thread_queue.get_nowait()
            if response is not None:
                if response == 'exit':
                    break
                emit('response', response, namespace='/compressed_files')
        socketio.sleep(1)

def loop(thread_queue, suffix, md5, workspace_id, resource_type, host):
    # 获取文件的路径与父路径
    file_path = f'/opt/{md5}.{suffix}'
    file_father_dir = f'/update/{md5}'
    s3_client.download_file(f'{workspace_id}/{resource_type}/{md5}.{suffix}', file_path)
    # 删除旧的文件夹
    try:
        shutil.rmtree(file_father_dir)
    except Exception as e:
        pass

    if not os.path.exists(file_path):
        thread_queue.put(my_response(code=stat.ServerErr, message='文件不存在，无法解压'))
        thread_queue.put('exit')
        return False

    # 校验资源类型
    if resource_type != 'image' and resource_type != 'label':
        thread_queue.put(my_response(code=stat.ServerErr, message='暂不支持该格式的上传'))
        thread_queue.put('exit')
        return False

    # 根据不同的文件类型选择不同的解压方法
    if suffix == 'zip':
        file_list = extract_zip(file_path, file_father_dir, thread_queue)
    elif suffix == 'rar':
        file_list = extract_rar(file_path, file_father_dir, thread_queue)
    elif suffix == '7z':
        file_list = extract_7z(file_path, file_father_dir, thread_queue)
    elif suffix == 'jar':
        file_list = extract_zip(file_path, file_father_dir, thread_queue)
    elif suffix == 'tar':
        file_list = extract_tar(file_path, file_father_dir, thread_queue)
    else:
        thread_queue.put(my_response(message='解压失败', code=stat.ServerErr))
        thread_queue.put('exit')
        return False
    # 根据文件列表来上传文件并返回文件的md5, suffix, size, name, download_url
    upload_minio(file_father_dir, file_list, workspace_id, resource_type, thread_queue, host)
    thread_queue.put('exit')

if __name__ == '__main__':
    socketio.run(app, host='0.0.0.0', port=8000, debug=True)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
编辑
← 10.re pymysql→