102 lines
3.6 KiB
Python
102 lines
3.6 KiB
Python
import base64
|
|
from lxml import etree
|
|
import gzip
|
|
import shutil, os, io
|
|
|
|
|
|
def write_file_in_new_directory(directory, file_name, content):
|
|
# Create the new directory if it doesn't exist
|
|
os.makedirs(directory, exist_ok=True)
|
|
|
|
# Define the file path
|
|
file_path = os.path.join(directory, file_name)
|
|
|
|
# Write content to the file
|
|
with open(file_path, 'wb') as file:
|
|
file.write(content)
|
|
|
|
print(f"File '{file_name}' has been written in the directory '{directory}'")
|
|
|
|
def write_gunzip_bytestream_to_file(bytestream, output_directory, output_file):
|
|
# Create the directory if it doesn't exist
|
|
os.makedirs(output_directory, exist_ok=True)
|
|
|
|
# Define the file path
|
|
file_path = os.path.join(output_directory, output_file)
|
|
|
|
# Decompress the GZIP bytestream
|
|
with gzip.GzipFile(fileobj=io.BytesIO(bytestream), mode='rb') as gzip_file:
|
|
decompressed_data = gzip_file.read()
|
|
|
|
# Write the decompressed data to the file
|
|
with open(file_path, 'wb') as file:
|
|
file.write(decompressed_data)
|
|
|
|
print(f"Decompressed data has been written to '{file_path}'")
|
|
|
|
# Parse the XML file
|
|
tree = etree.parse('incident.xml')
|
|
root = tree.getroot()
|
|
|
|
# Extract and save attachments
|
|
for attachment in root.findall(".//sys_attachment"):
|
|
file_name_element = attachment.find('file_name')
|
|
if file_name_element is None:
|
|
continue
|
|
file_name = file_name_element.text
|
|
sys_id = attachment.find('sys_id').text
|
|
|
|
previous_element = None
|
|
for sibling in attachment.itersiblings(preceding=True):
|
|
if sibling.tag == 'incident':
|
|
previous_element = sibling
|
|
break
|
|
|
|
incident = previous_element.find('number').text
|
|
print(f"Processing attachment: {file_name}, sys_id: {sys_id} for incident: {incident}")
|
|
|
|
# Find the corresponding attachment data chunks
|
|
attachment_data_chunks = root.findall(f".//sys_attachment_doc[sys_attachment='{sys_id}']")
|
|
combined_data = ""
|
|
|
|
if not attachment_data_chunks:
|
|
print(f"No data chunks found for sys_id: {sys_id}")
|
|
continue
|
|
|
|
# Save the attachment to a file
|
|
# try:
|
|
# with open(f'./output/{file_name}.gz', 'wb') as file:
|
|
# for chunk in attachment_data_chunks:
|
|
# encoded_data = chunk.find('data').text
|
|
|
|
# # Decode the combined base64-encoded data
|
|
# try:
|
|
# decoded_data = base64.b64decode(encoded_data)
|
|
# file.write(decoded_data)
|
|
# # write_file_in_new_directory(incident, file_name + ".gz", decoded_data)
|
|
# except Exception as e:
|
|
# print(f"Error decoding data for sys_id: {sys_id}, Error: {e}")
|
|
# continue
|
|
|
|
try:
|
|
# with open(f'./output/{file_name}.gz', 'wb') as file:
|
|
decoded_data = b''
|
|
for chunk in attachment_data_chunks:
|
|
encoded_data = chunk.find('data').text
|
|
|
|
# Decode the combined base64-encoded data
|
|
try:
|
|
decoded_data += base64.b64decode(encoded_data)
|
|
|
|
except Exception as e:
|
|
print(f"Error decoding data for sys_id: {sys_id}, Error: {e}")
|
|
continue
|
|
|
|
# write_file_in_new_directory(incident, file_name + ".gz", decoded_data)
|
|
write_gunzip_bytestream_to_file(decoded_data, incident, file_name)
|
|
|
|
except Exception as e:
|
|
print(f"Error saving attachment: {file_name}, Error: {e}")
|
|
|
|
print("All attachments processed.")
|