another.im-ios/AnotherXMPP/modules/parsing/XMLParser.swift
2024-12-18 04:51:41 +01:00

258 lines
8 KiB
Swift

import Foundation
import libxml2
enum XMLParserError: Error {
case xmlDeclarationInside(Int, Int)
case xmlUnknown(Int)
}
enum XMLParserEvent {
case streamStarted(attributes: [String: String])
case streamEnded
case element(XMLElement)
case parserError(XMLParserError)
}
final class XMLParser {
let (element, elementContinuation) = AsyncStream.makeStream(of: XMLParserEvent.self, bufferingPolicy: .unbounded)
private var ctx: xmlParserCtxtPtr?
private var stack: [XMLElement] = []
private var xmlnss: [String: String] = [:]
init() {
ctx = xmlCreatePushParserCtxt(&saxHandler, Mem.bridge(self), nil, 0, nil)
}
deinit {
elementContinuation.finish()
xmlFreeParserCtxt(ctx)
stack = []
xmlnss = [:]
ctx = nil
}
func restart() {
stack = []
xmlnss = [:]
xmlFreeParserCtxt(ctx)
ctx = xmlCreatePushParserCtxt(&saxHandler, Mem.bridge(self), nil, 0, nil)
}
func parse(data: Data) {
data.withUnsafeBytes { [weak self] ptr in
if let addr = ptr.baseAddress {
let err = xmlParseChunk(self?.ctx, addr.assumingMemoryBound(to: CChar.self), Int32(data.count), 0)
if err > 0 {
if err == 64 {
let rng = Data("<?xml ".utf8)
if let position = data.range(of: rng)?.lowerBound {
self?.elementContinuation.yield(.parserError(.xmlDeclarationInside(Int(err), position)))
}
}
self?.elementContinuation.yield(.parserError(.xmlUnknown(Int(err))))
}
}
}
}
}
// MARK: Private part, parsing
private extension XMLParser {
func startElement(elementName: String, prefix: String?, namespaces: [String: String]?, attributes: [String: String]) {
// for session start
if elementName == "stream", prefix == "stream" {
var attrs = attributes
if let namespaces {
for (key, value) in namespaces {
attrs[key] = value
}
}
elementContinuation.yield(.streamStarted(attributes: attrs))
return
}
// for others elements
if let namespaces {
for (key, value) in namespaces where !key.isEmpty {
xmlnss[key] = value
}
}
var xmlns: String?
if let prefix {
xmlns = xmlnss[prefix] ?? namespaces?[""]
} else {
xmlns = namespaces?[""]
}
var name: String
if let prefix, xmlns == nil {
name = prefix + ":" + elementName
} else {
name = elementName
}
var element = XMLElement(name: name, xmlns: xmlns, attributes: attributes, content: nil, nodes: [])
if !stack.isEmpty {
element = element.updateXmlns(stack.last?.xmlns)
}
if xmlns != nil {
element = element.updateXmlns(xmlns)
}
stack.append(element)
}
func endElement(elementName: String, prefix: String?) {
if elementName == "stream", prefix == "stream" {
elementContinuation.yield(.streamEnded)
return
}
if let element = stack.popLast() {
if stack.isEmpty {
elementContinuation.yield(.element(element))
} else {
// because we just checked it
// swiftlint:disable:next force_unwrapping
var parent = stack.popLast()!
parent = parent.addNode(element)
stack.append(parent)
}
}
}
func charactersFound(_ line: String) {
guard var exists = stack.popLast() else { return }
exists = exists.updateContent(line)
stack.append(exists)
}
}
// =========================================================
// libxml2-specific stuff
private struct Attr {
var name: UnsafePointer<UInt8>?
var prefix: UnsafePointer<UInt8>?
var attrUri: UnsafePointer<UInt8>?
var valueBegin: UnsafePointer<UInt8>?
var valueEnd: UnsafePointer<UInt8>?
}
private struct Nss {
var prefix: UnsafePointer<UInt8>?
var uri: UnsafePointer<UInt8>?
}
private func strFromCUtf8(_ ptr: UnsafePointer<UInt8>?) -> String? {
if let ptr {
return String(cString: ptr)
}
return nil
}
private var saxHandler = xmlSAXHandler(
internalSubset: nil,
isStandalone: nil,
hasInternalSubset: nil,
hasExternalSubset: nil,
resolveEntity: nil,
getEntity: nil,
entityDecl: nil,
notationDecl: nil,
attributeDecl: nil,
elementDecl: nil,
unparsedEntityDecl: nil,
setDocumentLocator: nil,
startDocument: nil,
endDocument: nil,
startElement: nil,
endElement: nil,
reference: nil,
characters: SAX_charactersFound,
ignorableWhitespace: nil,
processingInstruction: nil,
comment: nil,
warning: nil,
error: nil, // unsafeBitCast(SAX_error, to: errorSAXFunc.self),
fatalError: nil,
getParameterEntity: nil,
cdataBlock: nil,
externalSubset: nil,
initialized: XML_SAX2_MAGIC,
_private: nil,
startElementNs: SAX_startElement,
endElementNs: SAX_endElement,
serror: nil
)
private let SAX_charactersFound: charactersSAXFunc = { ctx_, chars_, len_ in
guard let ctx_, let chars_ else {
return
}
let data = Data(bytes: UnsafePointer<UInt8>(chars_), count: Int(len_))
let chars = String(decoding: data, as: UTF8.self)
let parser = unsafeBitCast(ctx_, to: XMLParser.self)
parser.charactersFound(chars)
}
private let SAX_startElement: startElementNsSAX2Func = { ctx_, localName, prefix_, _, nb_namespaces, namespaces_, nb_attributes, _, attributes_ in
guard let name = strFromCUtf8(localName), let ctx_ else {
return
}
let prefix = strFromCUtf8(prefix_)
var attributes: [String: String] = [:]
var indx = 0
let parser = unsafeBitCast(ctx_, to: XMLParser.self)
// attributes
if let attributes_ {
attributes_.withMemoryRebound(to: Attr.self, capacity: Int(nb_attributes)) {
var attrsPtr = $0
while indx < Int(nb_attributes) {
if let name = strFromCUtf8(attrsPtr.pointee.name), let beginPtr = attrsPtr.pointee.valueBegin, let endPtr = attrsPtr.pointee.valueEnd {
let data = Data(bytes: UnsafePointer<UInt8>(beginPtr), count: endPtr - beginPtr)
var value = String(decoding: data, as: UTF8.self).unescaped
if let prefix = strFromCUtf8(attrsPtr.pointee.prefix) {
attributes[prefix + ":" + name] = value
} else {
attributes[name] = value
}
}
attrsPtr = attrsPtr.successor()
indx += 1
}
}
}
// namespaces
if nb_namespaces > 0, let namespaces_ {
var namespaces: [String: String] = [:]
namespaces_.withMemoryRebound(to: Nss.self, capacity: Int(nb_namespaces)) {
var nsPtr = $0
indx = 0
while indx < Int(nb_namespaces) {
let prefix = strFromCUtf8(nsPtr.pointee.prefix) ?? ""
if var uri = strFromCUtf8(nsPtr.pointee.uri) {
uri = uri.unescaped
namespaces[prefix] = uri
}
nsPtr = nsPtr.successor()
indx += 1
}
}
parser.startElement(elementName: name, prefix: prefix, namespaces: namespaces, attributes: attributes)
} else {
parser.startElement(elementName: name, prefix: prefix, namespaces: nil, attributes: attributes)
}
}
private let SAX_endElement: endElementNsSAX2Func = { ctx_, localName, prefix_, _ in
guard let name = strFromCUtf8(localName), let ctx_ else {
return
}
let prefix = strFromCUtf8(prefix_)
let parser = unsafeBitCast(ctx_, to: XMLParser.self)
parser.endElement(elementName: name, prefix: prefix)
}