Upload files to "/"

This commit is contained in:
2025-02-27 11:41:00 +00:00
commit 5b7927ef2c
2 changed files with 964 additions and 0 deletions

101
extract_attachments.py Normal file
View File

@@ -0,0 +1,101 @@
import base64
from lxml import etree
import gzip
import shutil, os, io
def write_file_in_new_directory(directory, file_name, content):
# Create the new directory if it doesn't exist
os.makedirs(directory, exist_ok=True)
# Define the file path
file_path = os.path.join(directory, file_name)
# Write content to the file
with open(file_path, 'wb') as file:
file.write(content)
print(f"File '{file_name}' has been written in the directory '{directory}'")
def write_gunzip_bytestream_to_file(bytestream, output_directory, output_file):
# Create the directory if it doesn't exist
os.makedirs(output_directory, exist_ok=True)
# Define the file path
file_path = os.path.join(output_directory, output_file)
# Decompress the GZIP bytestream
with gzip.GzipFile(fileobj=io.BytesIO(bytestream), mode='rb') as gzip_file:
decompressed_data = gzip_file.read()
# Write the decompressed data to the file
with open(file_path, 'wb') as file:
file.write(decompressed_data)
print(f"Decompressed data has been written to '{file_path}'")
# Parse the XML file
tree = etree.parse('incident.xml')
root = tree.getroot()
# Extract and save attachments
for attachment in root.findall(".//sys_attachment"):
file_name_element = attachment.find('file_name')
if file_name_element is None:
continue
file_name = file_name_element.text
sys_id = attachment.find('sys_id').text
previous_element = None
for sibling in attachment.itersiblings(preceding=True):
if sibling.tag == 'incident':
previous_element = sibling
break
incident = previous_element.find('number').text
print(f"Processing attachment: {file_name}, sys_id: {sys_id} for incident: {incident}")
# Find the corresponding attachment data chunks
attachment_data_chunks = root.findall(f".//sys_attachment_doc[sys_attachment='{sys_id}']")
combined_data = ""
if not attachment_data_chunks:
print(f"No data chunks found for sys_id: {sys_id}")
continue
# Save the attachment to a file
# try:
# with open(f'./output/{file_name}.gz', 'wb') as file:
# for chunk in attachment_data_chunks:
# encoded_data = chunk.find('data').text
# # Decode the combined base64-encoded data
# try:
# decoded_data = base64.b64decode(encoded_data)
# file.write(decoded_data)
# # write_file_in_new_directory(incident, file_name + ".gz", decoded_data)
# except Exception as e:
# print(f"Error decoding data for sys_id: {sys_id}, Error: {e}")
# continue
try:
# with open(f'./output/{file_name}.gz', 'wb') as file:
decoded_data = b''
for chunk in attachment_data_chunks:
encoded_data = chunk.find('data').text
# Decode the combined base64-encoded data
try:
decoded_data += base64.b64decode(encoded_data)
except Exception as e:
print(f"Error decoding data for sys_id: {sys_id}, Error: {e}")
continue
# write_file_in_new_directory(incident, file_name + ".gz", decoded_data)
write_gunzip_bytestream_to_file(decoded_data, incident, file_name)
except Exception as e:
print(f"Error saving attachment: {file_name}, Error: {e}")
print("All attachments processed.")