package formatter import ( "sort" "unicode" log "github.com/sirupsen/logrus" "github.com/zelenin/go-tdlib/client" ) // Insertion is a piece of text in given position type Insertion struct { Offset int32 Runes []rune } // InsertionStack contains the sequence of insertions // from the start or from the end type InsertionStack []*Insertion var boldRunesMarkdown = []rune("**") var boldRunesXEP0393 = []rune("*") var italicRunes = []rune("_") var strikeRunesMarkdown = []rune("~~") var strikeRunesXEP0393 = []rune("~") var codeRunes = []rune("`") var preRuneStart = []rune("```\n") var preRuneEnd = []rune("\n```") // rebalance pumps all the values until the given offset to current stack (growing // from start) from given stack (growing from end); should be called // before any insertions to the current stack at the given offset func (s InsertionStack) rebalance(s2 InsertionStack, offset int32) (InsertionStack, InsertionStack) { for len(s2) > 0 && s2[len(s2)-1].Offset <= offset { s = append(s, s2[len(s2)-1]) s2 = s2[:len(s2)-1] } return s, s2 } // NewIterator is a second order function that sequentially scans and returns // stack elements; starts returning nil when elements are ended func (s InsertionStack) NewIterator() func() *Insertion { i := -1 return func() *Insertion { i++ if i < len(s) { return s[i] } return nil } } // SortEntities arranges the entities in traversal-ready order func SortEntities(entities []*client.TextEntity) []*client.TextEntity { sortedEntities := make([]*client.TextEntity, len(entities)) copy(sortedEntities, entities) sort.Slice(sortedEntities, func(i int, j int) bool { entity1 := entities[i] entity2 := entities[j] if entity1.Offset < entity2.Offset { return true } else if entity1.Offset == entity2.Offset { return entity1.Length > entity2.Length } return false }) return sortedEntities } // MergeAdjacentEntities merges entities of a same kind func MergeAdjacentEntities(entities []*client.TextEntity) []*client.TextEntity { mergedEntities := make([]*client.TextEntity, 0, len(entities)) excludedIndices := make(map[int]bool) for i, entity := range entities { if excludedIndices[i] { continue } typ := entity.Type.TextEntityTypeType() start := entity.Offset end := start + entity.Length ei := make(map[int]bool) // collect continuations for j, entity2 := range entities[i+1:] { if entity2.Type.TextEntityTypeType() == typ && entity2.Offset == end { end += entity2.Length ei[j+i+1] = true } } // check for intersections with other entities var isIntersecting bool if len(ei) > 0 { for _, entity2 := range entities { entity2End := entity2.Offset + entity2.Length if (entity2.Offset < start && entity2End > start && entity2End < end) || (entity2.Offset > start && entity2.Offset < end && entity2End > end) { isIntersecting = true break } } } if !isIntersecting { entity.Length = end - start for j := range ei { excludedIndices[j] = true } } mergedEntities = append(mergedEntities, entity) } return mergedEntities } // ClaspDirectives to the following span as required by XEP-0393 func ClaspDirectives(text string, entities []*client.TextEntity) []*client.TextEntity { alignedEntities := make([]*client.TextEntity, len(entities)) copy(alignedEntities, entities) // transform the source text into a form with uniform runes and code points, // by duplicating the Basic Multilingual Plane doubledRunes := make([]rune, 0, len(text)*2) for _, cp := range text { if cp > 0x0000ffff { doubledRunes = append(doubledRunes, cp, cp) } else { doubledRunes = append(doubledRunes, cp) } } for i, entity := range alignedEntities { var dirty bool endOffset := entity.Offset + entity.Length if unicode.IsSpace(doubledRunes[entity.Offset]) { for j, r := range doubledRunes[entity.Offset+1:endOffset] { if !unicode.IsSpace(r) { dirty = true entity.Offset += int32(j+1) entity.Length -= int32(j+1) break } } } if unicode.IsSpace(doubledRunes[endOffset-1]) { for j := endOffset-2; j >= entity.Offset; j-- { if !unicode.IsSpace(doubledRunes[j]) { dirty = true entity.Length = j+1-entity.Offset break } } } if dirty { alignedEntities[i] = entity } } return alignedEntities } func markupBraces(entity *client.TextEntity, lbrace, rbrace []rune) (*Insertion, *Insertion) { return &Insertion{ Offset: entity.Offset, Runes: lbrace, }, &Insertion{ Offset: entity.Offset + entity.Length, Runes: rbrace, } } // EntityToMarkdown generates the wrapping Markdown tags func EntityToMarkdown(entity *client.TextEntity) (*Insertion, *Insertion) { switch entity.Type.TextEntityTypeType() { case client.TypeTextEntityTypeBold: return markupBraces(entity, boldRunesMarkdown, boldRunesMarkdown) case client.TypeTextEntityTypeItalic: return markupBraces(entity, italicRunes, italicRunes) case client.TypeTextEntityTypeStrikethrough: return markupBraces(entity, strikeRunesMarkdown, strikeRunesMarkdown) case client.TypeTextEntityTypeCode: return markupBraces(entity, codeRunes, codeRunes) case client.TypeTextEntityTypePre: return markupBraces(entity, preRuneStart, preRuneEnd) case client.TypeTextEntityTypePreCode: preCode, _ := entity.Type.(*client.TextEntityTypePreCode) return markupBraces(entity, []rune("\n```"+preCode.Language+"\n"), codeRunes) case client.TypeTextEntityTypeTextUrl: textURL, _ := entity.Type.(*client.TextEntityTypeTextUrl) return markupBraces(entity, []rune("["), []rune("]("+textURL.Url+")")) } return nil, nil } // EntityToXEP0393 generates the wrapping XEP-0393 tags func EntityToXEP0393(entity *client.TextEntity) (*Insertion, *Insertion) { if entity == nil || entity.Type == nil { return nil, nil } switch entity.Type.TextEntityTypeType() { case client.TypeTextEntityTypeBold: return markupBraces(entity, boldRunesXEP0393, boldRunesXEP0393) case client.TypeTextEntityTypeItalic: return markupBraces(entity, italicRunes, italicRunes) case client.TypeTextEntityTypeStrikethrough: return markupBraces(entity, strikeRunesXEP0393, strikeRunesXEP0393) case client.TypeTextEntityTypeCode: return markupBraces(entity, codeRunes, codeRunes) case client.TypeTextEntityTypePre: return markupBraces(entity, preRuneStart, preRuneEnd) case client.TypeTextEntityTypePreCode: preCode, _ := entity.Type.(*client.TextEntityTypePreCode) return markupBraces(entity, []rune("\n```"+preCode.Language+"\n"), codeRunes) case client.TypeTextEntityTypeTextUrl: textURL, _ := entity.Type.(*client.TextEntityTypeTextUrl) // non-standard, Pidgin-specific return markupBraces(entity, []rune{}, []rune(" <"+textURL.Url+">")) } return nil, nil } // Format traverses an already sorted list of entities and wraps the text in a markup func Format( sourceText string, entities []*client.TextEntity, entityToMarkup func(*client.TextEntity) (*Insertion, *Insertion), ) string { if len(entities) == 0 { return sourceText } mergedEntities := SortEntities(ClaspDirectives(sourceText, MergeAdjacentEntities(SortEntities(entities)))) startStack := make(InsertionStack, 0, len(sourceText)) endStack := make(InsertionStack, 0, len(sourceText)) // convert entities to a stack of brackets var maxEndOffset int32 for _, entity := range mergedEntities { log.Debugf("%#v", entity) if entity.Length <= 0 { continue } endOffset := entity.Offset + entity.Length if endOffset > maxEndOffset { maxEndOffset = endOffset } startStack, endStack = startStack.rebalance(endStack, entity.Offset) startInsertion, endInsertion := entityToMarkup(entity) if startInsertion != nil { startStack = append(startStack, startInsertion) } if endInsertion != nil { endStack = append(endStack, endInsertion) } } // flush the closing brackets that still remain in endStack startStack, endStack = startStack.rebalance(endStack, maxEndOffset) // merge brackets into text markupRunes := make([]rune, 0, len(sourceText)) nextInsertion := startStack.NewIterator() insertion := nextInsertion() var runeI int32 for _, cp := range sourceText { for insertion != nil && insertion.Offset <= runeI { markupRunes = append(markupRunes, insertion.Runes...) insertion = nextInsertion() } markupRunes = append(markupRunes, cp) // skip two UTF-16 code units (not points actually!) if needed if cp > 0x0000ffff { runeI += 2 } else { runeI++ } } for insertion != nil { markupRunes = append(markupRunes, insertion.Runes...) insertion = nextInsertion() } return string(markupRunes) }