import base64 from lxml import etree import gzip import shutil, os, io def write_file_in_new_directory(directory, file_name, content): # Create the new directory if it doesn't exist os.makedirs(directory, exist_ok=True) # Define the file path file_path = os.path.join(directory, file_name) # Write content to the file with open(file_path, 'wb') as file: file.write(content) print(f"File '{file_name}' has been written in the directory '{directory}'") def write_gunzip_bytestream_to_file(bytestream, output_directory, output_file): # Create the directory if it doesn't exist os.makedirs(output_directory, exist_ok=True) # Define the file path file_path = os.path.join(output_directory, output_file) # Decompress the GZIP bytestream with gzip.GzipFile(fileobj=io.BytesIO(bytestream), mode='rb') as gzip_file: decompressed_data = gzip_file.read() # Write the decompressed data to the file with open(file_path, 'wb') as file: file.write(decompressed_data) print(f"Decompressed data has been written to '{file_path}'") # Parse the XML file tree = etree.parse('incident.xml') root = tree.getroot() # Extract and save attachments for attachment in root.findall(".//sys_attachment"): file_name_element = attachment.find('file_name') if file_name_element is None: continue file_name = file_name_element.text sys_id = attachment.find('sys_id').text previous_element = None for sibling in attachment.itersiblings(preceding=True): if sibling.tag == 'incident': previous_element = sibling break incident = previous_element.find('number').text print(f"Processing attachment: {file_name}, sys_id: {sys_id} for incident: {incident}") # Find the corresponding attachment data chunks attachment_data_chunks = root.findall(f".//sys_attachment_doc[sys_attachment='{sys_id}']") combined_data = "" if not attachment_data_chunks: print(f"No data chunks found for sys_id: {sys_id}") continue # Save the attachment to a file # try: # with open(f'./output/{file_name}.gz', 'wb') as file: # for chunk in attachment_data_chunks: # encoded_data = chunk.find('data').text # # Decode the combined base64-encoded data # try: # decoded_data = base64.b64decode(encoded_data) # file.write(decoded_data) # # write_file_in_new_directory(incident, file_name + ".gz", decoded_data) # except Exception as e: # print(f"Error decoding data for sys_id: {sys_id}, Error: {e}") # continue try: # with open(f'./output/{file_name}.gz', 'wb') as file: decoded_data = b'' for chunk in attachment_data_chunks: encoded_data = chunk.find('data').text # Decode the combined base64-encoded data try: decoded_data += base64.b64decode(encoded_data) except Exception as e: print(f"Error decoding data for sys_id: {sys_id}, Error: {e}") continue # write_file_in_new_directory(incident, file_name + ".gz", decoded_data) write_gunzip_bytestream_to_file(decoded_data, incident, file_name) except Exception as e: print(f"Error saving attachment: {file_name}, Error: {e}") print("All attachments processed.")