/
This commit is contained in:
185
test.py
Normal file
185
test.py
Normal file
@@ -0,0 +1,185 @@
|
|||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
from collections import defaultdict
|
||||||
|
import base64
|
||||||
|
import os,csv
|
||||||
|
import gzip
|
||||||
|
from io import BytesIO
|
||||||
|
|
||||||
|
# Load the XML file
|
||||||
|
tree = ET.parse('sc_req_item (short_descriptionSTARTSWITHsolution security).xml')
|
||||||
|
root = tree.getroot()
|
||||||
|
|
||||||
|
# Output directory for attachments
|
||||||
|
os.makedirs("attachments", exist_ok=True)
|
||||||
|
|
||||||
|
# Data containers
|
||||||
|
ritms = {}
|
||||||
|
tasks = defaultdict(list)
|
||||||
|
attachments = defaultdict(list)
|
||||||
|
attachment_docs = defaultdict(list)
|
||||||
|
|
||||||
|
# Parse RITMs
|
||||||
|
for item in root.findall('sc_req_item'):
|
||||||
|
ritm_number = item.findtext('number')
|
||||||
|
sys_id = item.findtext('sys_id')
|
||||||
|
short_desc = item.findtext('short_description')
|
||||||
|
opened_by = item.find('opened_by').attrib.get('display_value', '')
|
||||||
|
requested_for = item.find('requested_for').attrib.get('display_value', '')
|
||||||
|
|
||||||
|
ritms[sys_id] = {
|
||||||
|
'ritm_number': ritm_number,
|
||||||
|
'short_description': short_desc,
|
||||||
|
'opened_by': opened_by,
|
||||||
|
'requested_for': requested_for,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Parse Journal Fields
|
||||||
|
for journal in root.findall('sys_journal_field'):
|
||||||
|
element_id = journal.findtext('element_id')
|
||||||
|
element = journal.findtext('element')
|
||||||
|
value = journal.findtext('value')
|
||||||
|
if element_id in ritms:
|
||||||
|
tasks[element_id].append({
|
||||||
|
'type': element,
|
||||||
|
'value': value.strip()
|
||||||
|
})
|
||||||
|
|
||||||
|
# Parse Attachment Metadata
|
||||||
|
attachment_info = {}
|
||||||
|
for attach in root.findall('sys_attachment'):
|
||||||
|
sys_id = attach.findtext('sys_id')
|
||||||
|
table_sys_id = attach.findtext('table_sys_id')
|
||||||
|
file_name = attach.findtext('file_name')
|
||||||
|
content_type = attach.findtext('content_type')
|
||||||
|
size = attach.findtext('size_bytes')
|
||||||
|
|
||||||
|
attachment_info[sys_id] = {
|
||||||
|
'file_name': file_name,
|
||||||
|
'content_type': content_type,
|
||||||
|
'size_bytes': size,
|
||||||
|
'table_sys_id': table_sys_id
|
||||||
|
}
|
||||||
|
attachments[table_sys_id].append(sys_id)
|
||||||
|
|
||||||
|
# Parse Attachment Data Chunks
|
||||||
|
for doc in root.findall('sys_attachment_doc'):
|
||||||
|
# This is the correct way to get the associated attachment ID
|
||||||
|
attachment_elem = doc.find('sys_attachment')
|
||||||
|
if attachment_elem is not None:
|
||||||
|
attachment_sys_id = attachment_elem.attrib.get('sys_id')
|
||||||
|
data = doc.findtext('data')
|
||||||
|
if attachment_sys_id and data:
|
||||||
|
attachment_docs[attachment_sys_id].append(data)
|
||||||
|
|
||||||
|
|
||||||
|
# Save Attachment Files
|
||||||
|
for attach_sys_id, info in attachment_info.items():
|
||||||
|
chunks = attachment_docs.get(attach_sys_id, [])
|
||||||
|
|
||||||
|
if not chunks:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Sort chunks by <position>
|
||||||
|
chunk_entries = [
|
||||||
|
(int(doc.findtext('position')), doc.findtext('data'))
|
||||||
|
for doc in root.findall('sys_attachment_doc')
|
||||||
|
if doc.find('sys_attachment') is not None and
|
||||||
|
doc.find('sys_attachment').attrib.get('sys_id') == attach_sys_id
|
||||||
|
]
|
||||||
|
chunk_entries.sort(key=lambda x: x[0])
|
||||||
|
# b64_data = ''.join(data for _, data in chunk_entries)
|
||||||
|
|
||||||
|
# # Decode base64
|
||||||
|
# raw_data = base64.b64decode(b64_data)
|
||||||
|
raw_data = b''.join(base64.b64decode(data) for _, data in chunk_entries)
|
||||||
|
|
||||||
|
# Attempt to decompress if it's gzipped
|
||||||
|
try:
|
||||||
|
raw_data = gzip.decompress(raw_data)
|
||||||
|
except OSError:
|
||||||
|
pass # Not gzipped
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Create subfolder named after the RITM number
|
||||||
|
ritm_number = ritms[info['table_sys_id']]['ritm_number']
|
||||||
|
output_dir = os.path.join("attachments", ritm_number)
|
||||||
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
|
||||||
|
output_path = os.path.join(output_dir, info['file_name'])
|
||||||
|
with open(output_path, 'wb') as f:
|
||||||
|
f.write(raw_data)
|
||||||
|
|
||||||
|
print(f"Saved attachment: {output_path}")
|
||||||
|
|
||||||
|
# Combine everything
|
||||||
|
ritm_data = []
|
||||||
|
for sys_id, ritm in ritms.items():
|
||||||
|
ritm['tasks'] = tasks.get(sys_id, [])
|
||||||
|
ritm['attachments'] = [attachment_info[a] for a in attachments.get(sys_id, [])]
|
||||||
|
ritm_data.append(ritm)
|
||||||
|
|
||||||
|
# Print results
|
||||||
|
for r in ritm_data:
|
||||||
|
print(f"\nRITM: {r['ritm_number']}")
|
||||||
|
print(f" Description: {r['short_description']}")
|
||||||
|
print(f" Opened By: {r['opened_by']}")
|
||||||
|
print(f" Requested For: {r['requested_for']}")
|
||||||
|
print(" Tasks / Notes:")
|
||||||
|
for t in r['tasks']:
|
||||||
|
print(f" - [{t['type']}] {t['value']}")
|
||||||
|
print(" Attachments:")
|
||||||
|
for a in r['attachments']:
|
||||||
|
print(f" - {a['file_name']} ({a['content_type']}, {a['size_bytes']} bytes)")
|
||||||
|
|
||||||
|
# Combine everything into RITM data structure
|
||||||
|
ritm_data = []
|
||||||
|
for sys_id, ritm in ritms.items():
|
||||||
|
ritm['tasks'] = tasks.get(sys_id, [])
|
||||||
|
ritm['attachments'] = [attachment_info[a] for a in attachments.get(sys_id, [])]
|
||||||
|
ritm_data.append(ritm)
|
||||||
|
|
||||||
|
# Write RITMs to CSV
|
||||||
|
with open('ritms.csv', 'w', newline='') as csvfile:
|
||||||
|
fieldnames = ['ritm_number', 'short_description', 'opened_by', 'requested_for']
|
||||||
|
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
||||||
|
|
||||||
|
writer.writeheader()
|
||||||
|
for r in ritm_data:
|
||||||
|
writer.writerow({
|
||||||
|
'ritm_number': r['ritm_number'],
|
||||||
|
'short_description': r['short_description'],
|
||||||
|
'opened_by': r['opened_by'],
|
||||||
|
'requested_for': r['requested_for']
|
||||||
|
})
|
||||||
|
|
||||||
|
# Write Attachments to CSV
|
||||||
|
with open('attachments.csv', 'w', newline='') as csvfile:
|
||||||
|
fieldnames = ['ritm_number', 'file_name', 'content_type', 'size_bytes']
|
||||||
|
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
||||||
|
|
||||||
|
writer.writeheader()
|
||||||
|
for r in ritm_data:
|
||||||
|
for a in r['attachments']:
|
||||||
|
writer.writerow({
|
||||||
|
'ritm_number': r['ritm_number'],
|
||||||
|
'file_name': a['file_name'],
|
||||||
|
'content_type': a['content_type'],
|
||||||
|
'size_bytes': a['size_bytes']
|
||||||
|
})
|
||||||
|
|
||||||
|
# Write Tasks to CSV
|
||||||
|
with open('tasks.csv', 'w', newline='') as csvfile:
|
||||||
|
fieldnames = ['ritm_number', 'type', 'value']
|
||||||
|
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
|
||||||
|
|
||||||
|
writer.writeheader()
|
||||||
|
for r in ritm_data:
|
||||||
|
for t in r['tasks']:
|
||||||
|
writer.writerow({
|
||||||
|
'ritm_number': r['ritm_number'],
|
||||||
|
'type': t['type'],
|
||||||
|
'value': t['value']
|
||||||
|
})
|
||||||
|
|
||||||
|
print("CSV files generated successfully.")
|
||||||
Reference in New Issue
Block a user