package formatter import ( "sort" "unicode" log "github.com/sirupsen/logrus" "github.com/zelenin/go-tdlib/client" ) type insertionType int const ( insertionOpening insertionType = iota insertionClosing insertionUnpaired ) type MarkupModeType int const ( MarkupModeXEP0393 MarkupModeType = iota MarkupModeMarkdown ) // insertion is a piece of text in given position type insertion struct { Offset int32 Runes []rune Type insertionType } // insertionStack contains the sequence of insertions // from the start or from the end type insertionStack []*insertion var boldRunesMarkdown = []rune("**") var boldRunesXEP0393 = []rune("*") var italicRunes = []rune("_") var strikeRunesMarkdown = []rune("~~") var strikeRunesXEP0393 = []rune("~") var codeRunes = []rune("`") var preRuneStart = []rune("```\n") var preRuneEnd = []rune("\n```") var quoteRunes = []rune("> ") var newlineRunes = []rune("\n") var doubleNewlineRunes = []rune("\n\n") var newlineCode = rune(0x0000000a) var bmpCeil = rune(0x0000ffff) // rebalance pumps all the values until the given offset to current stack (growing // from start) from given stack (growing from end); should be called // before any insertions to the current stack at the given offset func (s insertionStack) rebalance(s2 insertionStack, offset int32) (insertionStack, insertionStack) { for len(s2) > 0 && s2[len(s2)-1].Offset <= offset { s = append(s, s2[len(s2)-1]) s2 = s2[:len(s2)-1] } return s, s2 } // NewIterator is a second order function that sequentially scans and returns // stack elements; starts returning nil when elements are ended func (s insertionStack) NewIterator() func() *insertion { i := -1 return func() *insertion { i++ if i < len(s) { return s[i] } return nil } } // SortEntities arranges the entities in traversal-ready order func SortEntities(entities []*client.TextEntity) []*client.TextEntity { sortedEntities := make([]*client.TextEntity, len(entities)) copy(sortedEntities, entities) sort.Slice(sortedEntities, func(i int, j int) bool { entity1 := sortedEntities[i] entity2 := sortedEntities[j] if entity1.Offset < entity2.Offset { return true } else if entity1.Offset == entity2.Offset { return entity1.Length > entity2.Length } return false }) return sortedEntities } // MergeAdjacentEntities merges entities of a same kind func MergeAdjacentEntities(entities []*client.TextEntity) []*client.TextEntity { mergedEntities := make([]*client.TextEntity, 0, len(entities)) excludedIndices := make(map[int]bool) for i, entity := range entities { if excludedIndices[i] || entity.Type == nil { continue } typ := entity.Type.TextEntityTypeType() start := entity.Offset end := start + entity.Length ei := make(map[int]bool) // collect continuations for j, entity2 := range entities[i+1:] { if entity2.Type != nil && entity2.Type.TextEntityTypeType() == typ && entity2.Offset == end { end += entity2.Length ei[j+i+1] = true } } // check for intersections with other entities var isIntersecting bool if len(ei) > 0 { for _, entity2 := range entities { entity2End := entity2.Offset + entity2.Length if (entity2.Offset < start && entity2End > start && entity2End < end) || (entity2.Offset > start && entity2.Offset < end && entity2End > end) { isIntersecting = true break } } } if !isIntersecting { entity.Length = end - start for j := range ei { excludedIndices[j] = true } } mergedEntities = append(mergedEntities, entity) } return mergedEntities } // ClaspDirectives to the following span as required by XEP-0393 func ClaspDirectives(doubledRunes []rune, entities []*client.TextEntity) []*client.TextEntity { alignedEntities := make([]*client.TextEntity, len(entities)) copy(alignedEntities, entities) for i, entity := range alignedEntities { var dirty bool endOffset := entity.Offset + entity.Length if unicode.IsSpace(doubledRunes[entity.Offset]) { for j, r := range doubledRunes[entity.Offset+1 : endOffset] { if !unicode.IsSpace(r) { dirty = true entity.Offset += int32(j + 1) entity.Length -= int32(j + 1) break } } } if unicode.IsSpace(doubledRunes[endOffset-1]) { for j := endOffset - 2; j >= entity.Offset; j-- { if !unicode.IsSpace(doubledRunes[j]) { dirty = true entity.Length = j + 1 - entity.Offset break } } } if dirty { alignedEntities[i] = entity } } return alignedEntities } func markupBraces(entity *client.TextEntity, lbrace, rbrace []rune) []*insertion { return []*insertion{ &insertion{ Offset: entity.Offset, Runes: lbrace, Type: insertionOpening, }, &insertion{ Offset: entity.Offset + entity.Length, Runes: rbrace, Type: insertionClosing, }, } } func quotePrependNewlines(entity *client.TextEntity, doubledRunes []rune, markupMode MarkupModeType) []*insertion { if len(doubledRunes) == 0 { return []*insertion{} } startRunes := []rune("\n> ") if entity.Offset == 0 || doubledRunes[entity.Offset-1] == newlineCode { startRunes = quoteRunes } insertions := []*insertion{ &insertion{ Offset: entity.Offset, Runes: startRunes, Type: insertionUnpaired, }, } entityEnd := entity.Offset + entity.Length entityEndInt := int(entityEnd) var wasNewline bool // last newline is omitted, there's no need to put quote mark after the quote for i := entity.Offset; i < entityEnd-1; i++ { isNewline := doubledRunes[i] == newlineCode if (isNewline && markupMode == MarkupModeXEP0393) || (wasNewline && isNewline && markupMode == MarkupModeMarkdown) { insertions = append(insertions, &insertion{ Offset: i+1, Runes: quoteRunes, Type: insertionUnpaired, }) } if isNewline { wasNewline = true } else { wasNewline = false } } var rbrace []rune if len(doubledRunes) > entityEndInt { if doubledRunes[entityEnd] == newlineCode { if markupMode == MarkupModeMarkdown && len(doubledRunes) > entityEndInt+1 && doubledRunes[entityEndInt+1] != newlineCode { rbrace = newlineRunes } } else { if markupMode == MarkupModeMarkdown { rbrace = doubleNewlineRunes } else { rbrace = newlineRunes } } } insertions = append(insertions, &insertion{ Offset: entityEnd, Runes: rbrace, Type: insertionClosing, }) return insertions } // entityToMarkdown generates the wrapping Markdown tags func entityToMarkdown(entity *client.TextEntity, doubledRunes []rune, markupMode MarkupModeType) []*insertion { if entity == nil || entity.Type == nil { return []*insertion{} } switch entity.Type.TextEntityTypeType() { case client.TypeTextEntityTypeBold: return markupBraces(entity, boldRunesMarkdown, boldRunesMarkdown) case client.TypeTextEntityTypeItalic: return markupBraces(entity, italicRunes, italicRunes) case client.TypeTextEntityTypeStrikethrough: return markupBraces(entity, strikeRunesMarkdown, strikeRunesMarkdown) case client.TypeTextEntityTypeCode: return markupBraces(entity, codeRunes, codeRunes) case client.TypeTextEntityTypePre: return markupBraces(entity, preRuneStart, preRuneEnd) case client.TypeTextEntityTypePreCode: preCode, _ := entity.Type.(*client.TextEntityTypePreCode) return markupBraces(entity, []rune("\n```"+preCode.Language+"\n"), codeRunes) case client.TypeTextEntityTypeBlockQuote: return quotePrependNewlines(entity, doubledRunes, MarkupModeMarkdown) case client.TypeTextEntityTypeTextUrl: textURL, _ := entity.Type.(*client.TextEntityTypeTextUrl) return markupBraces(entity, []rune("["), []rune("]("+textURL.Url+")")) } return []*insertion{} } // entityToXEP0393 generates the wrapping XEP-0393 tags func entityToXEP0393(entity *client.TextEntity, doubledRunes []rune, markupMode MarkupModeType) []*insertion { if entity == nil || entity.Type == nil { return []*insertion{} } switch entity.Type.TextEntityTypeType() { case client.TypeTextEntityTypeBold: return markupBraces(entity, boldRunesXEP0393, boldRunesXEP0393) case client.TypeTextEntityTypeItalic: return markupBraces(entity, italicRunes, italicRunes) case client.TypeTextEntityTypeStrikethrough: return markupBraces(entity, strikeRunesXEP0393, strikeRunesXEP0393) case client.TypeTextEntityTypeCode: return markupBraces(entity, codeRunes, codeRunes) case client.TypeTextEntityTypePre: return markupBraces(entity, preRuneStart, preRuneEnd) case client.TypeTextEntityTypePreCode: preCode, _ := entity.Type.(*client.TextEntityTypePreCode) return markupBraces(entity, []rune("\n```"+preCode.Language+"\n"), codeRunes) case client.TypeTextEntityTypeBlockQuote: return quotePrependNewlines(entity, doubledRunes, MarkupModeXEP0393) case client.TypeTextEntityTypeTextUrl: textURL, _ := entity.Type.(*client.TextEntityTypeTextUrl) // non-standard, Pidgin-specific return markupBraces(entity, []rune{}, []rune(" <"+textURL.Url+">")) } return []*insertion{} } // transform the source text into a form with uniform runes and code points, // by duplicating anything beyond the Basic Multilingual Plane func textToDoubledRunes(text string) []rune { doubledRunes := make([]rune, 0, len(text)*2) for _, cp := range text { if cp > bmpCeil { doubledRunes = append(doubledRunes, cp, cp) } else { doubledRunes = append(doubledRunes, cp) } } return doubledRunes } // Format traverses an already sorted list of entities and wraps the text in a markup func Format( sourceText string, entities []*client.TextEntity, markupMode MarkupModeType, ) string { if len(entities) == 0 { return sourceText } var entityToMarkup func(*client.TextEntity, []rune, MarkupModeType) []*insertion if markupMode == MarkupModeXEP0393 { entityToMarkup = entityToXEP0393 } else { entityToMarkup = entityToMarkdown } doubledRunes := textToDoubledRunes(sourceText) mergedEntities := SortEntities(ClaspDirectives(doubledRunes, MergeAdjacentEntities(SortEntities(entities)))) startStack := make(insertionStack, 0, len(sourceText)) endStack := make(insertionStack, 0, len(sourceText)) // convert entities to a stack of brackets var maxEndOffset int32 for _, entity := range mergedEntities { log.Debugf("%#v", entity) if entity.Length <= 0 { continue } endOffset := entity.Offset + entity.Length if endOffset > maxEndOffset { maxEndOffset = endOffset } startStack, endStack = startStack.rebalance(endStack, entity.Offset) insertions := entityToMarkup(entity, doubledRunes, markupMode) if len(insertions) > 1 { startStack = append(startStack, insertions[0:len(insertions)-1]...) } if len(insertions) > 0 { endStack = append(endStack, insertions[len(insertions)-1]) } } // flush the closing brackets that still remain in endStack startStack, endStack = startStack.rebalance(endStack, maxEndOffset) // sort unpaired insertions sort.SliceStable(startStack, func(i int, j int) bool { ins1 := startStack[i] ins2 := startStack[j] if ins1.Type == insertionUnpaired && ins2.Type == insertionUnpaired { return ins1.Offset < ins2.Offset } if ins1.Type == insertionUnpaired { if ins1.Offset == ins2.Offset { if ins2.Type == insertionOpening { // > ** return true } else if ins2.Type == insertionClosing { // **> return false } } else { return ins1.Offset < ins2.Offset } } if ins2.Type == insertionUnpaired { if ins1.Offset == ins2.Offset { if ins1.Type == insertionOpening { // > ** return false } else if ins1.Type == insertionClosing { // **> return true } } else { return ins1.Offset < ins2.Offset } } return false }) // merge brackets into text markupRunes := make([]rune, 0, len(sourceText)) nextInsertion := startStack.NewIterator() insertion := nextInsertion() var skipNext bool for i, cp := range doubledRunes { if skipNext { skipNext = false continue } for insertion != nil && int(insertion.Offset) <= i { markupRunes = append(markupRunes, insertion.Runes...) insertion = nextInsertion() } markupRunes = append(markupRunes, cp) // skip two UTF-16 code units (not points actually!) if needed if cp > bmpCeil { skipNext = true } } for insertion != nil { markupRunes = append(markupRunes, insertion.Runes...) insertion = nextInsertion() } return string(markupRunes) }