From 6f7692586c8fe9faa22881d66b7593dc1eae76ee Mon Sep 17 00:00:00 2001 From: nabeel Date: Sat, 3 May 2025 06:18:44 +0000 Subject: [PATCH] / --- test.py | 185 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 185 insertions(+) create mode 100644 test.py diff --git a/test.py b/test.py new file mode 100644 index 0000000..e1735d4 --- /dev/null +++ b/test.py @@ -0,0 +1,185 @@ +import xml.etree.ElementTree as ET +from collections import defaultdict +import base64 +import os,csv +import gzip +from io import BytesIO + +# Load the XML file +tree = ET.parse('sc_req_item (short_descriptionSTARTSWITHsolution security).xml') +root = tree.getroot() + +# Output directory for attachments +os.makedirs("attachments", exist_ok=True) + +# Data containers +ritms = {} +tasks = defaultdict(list) +attachments = defaultdict(list) +attachment_docs = defaultdict(list) + +# Parse RITMs +for item in root.findall('sc_req_item'): + ritm_number = item.findtext('number') + sys_id = item.findtext('sys_id') + short_desc = item.findtext('short_description') + opened_by = item.find('opened_by').attrib.get('display_value', '') + requested_for = item.find('requested_for').attrib.get('display_value', '') + + ritms[sys_id] = { + 'ritm_number': ritm_number, + 'short_description': short_desc, + 'opened_by': opened_by, + 'requested_for': requested_for, + } + +# Parse Journal Fields +for journal in root.findall('sys_journal_field'): + element_id = journal.findtext('element_id') + element = journal.findtext('element') + value = journal.findtext('value') + if element_id in ritms: + tasks[element_id].append({ + 'type': element, + 'value': value.strip() + }) + +# Parse Attachment Metadata +attachment_info = {} +for attach in root.findall('sys_attachment'): + sys_id = attach.findtext('sys_id') + table_sys_id = attach.findtext('table_sys_id') + file_name = attach.findtext('file_name') + content_type = attach.findtext('content_type') + size = attach.findtext('size_bytes') + + attachment_info[sys_id] = { + 'file_name': file_name, + 'content_type': content_type, + 'size_bytes': size, + 'table_sys_id': table_sys_id + } + attachments[table_sys_id].append(sys_id) + +# Parse Attachment Data Chunks +for doc in root.findall('sys_attachment_doc'): + # This is the correct way to get the associated attachment ID + attachment_elem = doc.find('sys_attachment') + if attachment_elem is not None: + attachment_sys_id = attachment_elem.attrib.get('sys_id') + data = doc.findtext('data') + if attachment_sys_id and data: + attachment_docs[attachment_sys_id].append(data) + + +# Save Attachment Files +for attach_sys_id, info in attachment_info.items(): + chunks = attachment_docs.get(attach_sys_id, []) + + if not chunks: + continue + + # Sort chunks by + chunk_entries = [ + (int(doc.findtext('position')), doc.findtext('data')) + for doc in root.findall('sys_attachment_doc') + if doc.find('sys_attachment') is not None and + doc.find('sys_attachment').attrib.get('sys_id') == attach_sys_id + ] + chunk_entries.sort(key=lambda x: x[0]) + # b64_data = ''.join(data for _, data in chunk_entries) + + # # Decode base64 + # raw_data = base64.b64decode(b64_data) + raw_data = b''.join(base64.b64decode(data) for _, data in chunk_entries) + + # Attempt to decompress if it's gzipped + try: + raw_data = gzip.decompress(raw_data) + except OSError: + pass # Not gzipped + + + + # Create subfolder named after the RITM number + ritm_number = ritms[info['table_sys_id']]['ritm_number'] + output_dir = os.path.join("attachments", ritm_number) + os.makedirs(output_dir, exist_ok=True) + + output_path = os.path.join(output_dir, info['file_name']) + with open(output_path, 'wb') as f: + f.write(raw_data) + + print(f"Saved attachment: {output_path}") + +# Combine everything +ritm_data = [] +for sys_id, ritm in ritms.items(): + ritm['tasks'] = tasks.get(sys_id, []) + ritm['attachments'] = [attachment_info[a] for a in attachments.get(sys_id, [])] + ritm_data.append(ritm) + +# Print results +for r in ritm_data: + print(f"\nRITM: {r['ritm_number']}") + print(f" Description: {r['short_description']}") + print(f" Opened By: {r['opened_by']}") + print(f" Requested For: {r['requested_for']}") + print(" Tasks / Notes:") + for t in r['tasks']: + print(f" - [{t['type']}] {t['value']}") + print(" Attachments:") + for a in r['attachments']: + print(f" - {a['file_name']} ({a['content_type']}, {a['size_bytes']} bytes)") + +# Combine everything into RITM data structure +ritm_data = [] +for sys_id, ritm in ritms.items(): + ritm['tasks'] = tasks.get(sys_id, []) + ritm['attachments'] = [attachment_info[a] for a in attachments.get(sys_id, [])] + ritm_data.append(ritm) + +# Write RITMs to CSV +with open('ritms.csv', 'w', newline='') as csvfile: + fieldnames = ['ritm_number', 'short_description', 'opened_by', 'requested_for'] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + + writer.writeheader() + for r in ritm_data: + writer.writerow({ + 'ritm_number': r['ritm_number'], + 'short_description': r['short_description'], + 'opened_by': r['opened_by'], + 'requested_for': r['requested_for'] + }) + +# Write Attachments to CSV +with open('attachments.csv', 'w', newline='') as csvfile: + fieldnames = ['ritm_number', 'file_name', 'content_type', 'size_bytes'] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + + writer.writeheader() + for r in ritm_data: + for a in r['attachments']: + writer.writerow({ + 'ritm_number': r['ritm_number'], + 'file_name': a['file_name'], + 'content_type': a['content_type'], + 'size_bytes': a['size_bytes'] + }) + +# Write Tasks to CSV +with open('tasks.csv', 'w', newline='') as csvfile: + fieldnames = ['ritm_number', 'type', 'value'] + writer = csv.DictWriter(csvfile, fieldnames=fieldnames) + + writer.writeheader() + for r in ritm_data: + for t in r['tasks']: + writer.writerow({ + 'ritm_number': r['ritm_number'], + 'type': t['type'], + 'value': t['value'] + }) + +print("CSV files generated successfully.")