cleanup_oss.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238
  1. #!/usr/bin/env python3
  2. """
  3. 清理 OSS 存储桶中的旧文件
  4. """
  5. import boto3
  6. from botocore.config import Config
  7. import sys
  8. # OSS 配置
  9. OSS_CONFIG = {
  10. 'endpoint_url': 'http://58.213.48.57:15900',
  11. 'access_key_id': 'wvp',
  12. 'secret_access_key': '6MnZFxZxRwbvS01khA9ldiawJuc9mytyiq2kEv3k',
  13. 'bucket_name': 'wvp',
  14. 'path_prefix': 'device',
  15. }
  16. def get_s3_client():
  17. """创建 S3 客户端"""
  18. config = Config(
  19. s3={'addressing_style': 'path'},
  20. signature_version='s3v4'
  21. )
  22. return boto3.client(
  23. 's3',
  24. endpoint_url=OSS_CONFIG['endpoint_url'],
  25. aws_access_key_id=OSS_CONFIG['access_key_id'],
  26. aws_secret_access_key=OSS_CONFIG['secret_access_key'],
  27. region_name='us-east-1',
  28. config=config
  29. )
  30. def list_objects(s3_client, prefix=''):
  31. """列出对象"""
  32. objects = []
  33. continuation_token = None
  34. while True:
  35. params = {
  36. 'Bucket': OSS_CONFIG['bucket_name'],
  37. 'Prefix': prefix
  38. }
  39. if continuation_token:
  40. params['ContinuationToken'] = continuation_token
  41. response = s3_client.list_objects_v2(**params)
  42. if 'Contents' in response:
  43. for obj in response['Contents']:
  44. objects.append(obj['Key'])
  45. if response.get('IsTruncated'):
  46. continuation_token = response.get('NextContinuationToken')
  47. else:
  48. break
  49. return objects
  50. def delete_objects(s3_client, keys):
  51. """删除对象"""
  52. if not keys:
  53. print("没有需要删除的对象")
  54. return 0
  55. # 批量删除(每次最多1000个)
  56. deleted = 0
  57. use_batch = True
  58. for i in range(0, len(keys), 1000):
  59. batch = keys[i:i+1000]
  60. if use_batch:
  61. try:
  62. objects_to_delete = [{'Key': key} for key in batch]
  63. s3_client.delete_objects(
  64. Bucket=OSS_CONFIG['bucket_name'],
  65. Delete={'Objects': objects_to_delete, 'Quiet': True}
  66. )
  67. deleted += len(batch)
  68. print(f"已删除 {len(batch)} 个文件 (总计 {i+len(batch)}/{len(keys)})")
  69. continue
  70. except Exception as e:
  71. print(f"批量删除不支持,降级为逐个删除: {e}")
  72. use_batch = False
  73. for key in batch:
  74. try:
  75. s3_client.delete_object(Bucket=OSS_CONFIG['bucket_name'], Key=key)
  76. deleted += 1
  77. if deleted % 100 == 0 or deleted == len(keys):
  78. print(f" 进度: {deleted}/{len(keys)}")
  79. except Exception as e2:
  80. print(f" 删除 {key} 失败: {e2}")
  81. return deleted
  82. def cleanup_old_files(days=21, auto_confirm=False):
  83. """清理旧文件
  84. Args:
  85. days: 保留最近N天的文件,删除更早的
  86. auto_confirm: 是否自动确认
  87. """
  88. import time
  89. from datetime import datetime, timedelta
  90. s3_client = get_s3_client()
  91. # 计算截止时间
  92. cutoff_time = time.time() - (days * 24 * 3600)
  93. cutoff_date = datetime.fromtimestamp(cutoff_time).strftime('%Y-%m-%d')
  94. print(f"将删除 {cutoff_date} 之前的文件...")
  95. # 列出所有对象
  96. prefix = OSS_CONFIG['path_prefix'] + '/'
  97. print(f"正在列出 {OSS_CONFIG['bucket_name']}/{prefix} 下的对象...")
  98. all_objects = list_objects(s3_client, prefix)
  99. print(f"共找到 {len(all_objects)} 个对象")
  100. if not all_objects:
  101. print("没有对象需要删除")
  102. return 0
  103. # 筛选旧文件
  104. old_objects = []
  105. for key in all_objects:
  106. # 从 key 中提取日期 (格式: device/20260520/...)
  107. parts = key.split('/')
  108. if len(parts) >= 2 and parts[1]:
  109. try:
  110. # 尝试解析日期
  111. date_str = parts[1] # 如 20260520
  112. file_date = datetime.strptime(date_str, '%Y%m%d')
  113. file_timestamp = file_date.timestamp()
  114. if file_timestamp < cutoff_time:
  115. old_objects.append(key)
  116. except:
  117. pass
  118. print(f"将删除 {len(old_objects)} 个旧文件")
  119. if old_objects:
  120. # 显示前10个
  121. print("前10个将删除的文件:")
  122. for key in old_objects[:10]:
  123. print(f" - {key}")
  124. if len(old_objects) > 10:
  125. print(f" ... 还有 {len(old_objects) - 10} 个")
  126. # 确认
  127. if not auto_confirm:
  128. confirm = input(f"\n确认删除 {len(old_objects)} 个文件? (y/n): ")
  129. if confirm.lower() != 'y':
  130. print("已取消")
  131. return 0
  132. else:
  133. print(f"\n自动确认删除 {len(old_objects)} 个文件...")
  134. deleted = delete_objects(s3_client, old_objects)
  135. print(f"\n完成! 共删除 {deleted} 个文件")
  136. return deleted
  137. else:
  138. print("没有旧文件需要删除")
  139. return 0
  140. def clear_all():
  141. """清空所有文件"""
  142. s3_client = get_s3_client()
  143. prefix = OSS_CONFIG['path_prefix'] + '/'
  144. all_objects = list_objects(s3_client, prefix)
  145. print(f"存储桶中共有 {len(all_objects)} 个对象")
  146. if not all_objects:
  147. print("没有对象需要删除")
  148. return 0
  149. # 显示前10个
  150. print("前10个对象:")
  151. for key in all_objects[:10]:
  152. print(f" - {key}")
  153. if len(all_objects) > 10:
  154. print(f" ... 还有 {len(all_objects) - 10} 个")
  155. # 确认
  156. confirm = input(f"\n警告: 将删除 ALL {len(all_objects)} 个文件! 确认? (y/n): ")
  157. if confirm.lower() != 'y':
  158. print("已取消")
  159. return 0
  160. deleted = delete_objects(s3_client, all_objects)
  161. print(f"\n完成! 共删除 {deleted} 个文件")
  162. return deleted
  163. def main():
  164. if len(sys.argv) < 2:
  165. print("用法:")
  166. print(" python cleanup_oss.py cleanup [days] [--yes] - 清理N天前的旧文件")
  167. print(" python cleanup_oss.py clear [--yes] - 清空所有文件")
  168. print(" python cleanup_oss.py list - 列出所有文件")
  169. sys.exit(1)
  170. command = sys.argv[1]
  171. auto_confirm = '--yes' in sys.argv
  172. if command == 'cleanup':
  173. days = 7
  174. for arg in sys.argv[2:]:
  175. if arg.isdigit():
  176. days = int(arg)
  177. cleanup_old_files(days, auto_confirm)
  178. elif command == 'clear':
  179. if not auto_confirm:
  180. confirm = input("确定要清空所有文件吗? 此操作不可恢复! (输入 'yes' 确认): ")
  181. if confirm != 'yes':
  182. print("已取消")
  183. return
  184. clear_all()
  185. elif command == 'list':
  186. s3_client = get_s3_client()
  187. prefix = OSS_CONFIG['path_prefix'] + '/'
  188. objects = list_objects(s3_client, prefix)
  189. print(f"共 {len(objects)} 个对象:")
  190. for obj in objects[:50]:
  191. print(f" - {obj}")
  192. if len(objects) > 50:
  193. print(f" ... 还有 {len(objects) - 50} 个")
  194. else:
  195. print(f"未知命令: {command}")
  196. sys.exit(1)
  197. if __name__ == '__main__':
  198. main()