telegabber/telegram/formatter/formatter.go

302 lines
8.6 KiB
Go
Raw Permalink Normal View History

package formatter
import (
"sort"
"unicode"
log "github.com/sirupsen/logrus"
2022-01-17 20:45:40 +00:00
"github.com/zelenin/go-tdlib/client"
)
// Insertion is a piece of text in given position
type Insertion struct {
Offset int32
Runes []rune
}
// InsertionStack contains the sequence of insertions
// from the start or from the end
type InsertionStack []*Insertion
var boldRunesMarkdown = []rune("**")
var boldRunesXEP0393 = []rune("*")
var italicRunes = []rune("_")
2022-03-11 17:54:03 +00:00
var strikeRunesMarkdown = []rune("~~")
var strikeRunesXEP0393 = []rune("~")
2022-03-11 17:01:38 +00:00
var codeRunes = []rune("`")
var preRuneStart = []rune("```\n")
var preRuneEnd = []rune("\n```")
// rebalance pumps all the values until the given offset to current stack (growing
// from start) from given stack (growing from end); should be called
// before any insertions to the current stack at the given offset
func (s InsertionStack) rebalance(s2 InsertionStack, offset int32) (InsertionStack, InsertionStack) {
for len(s2) > 0 && s2[len(s2)-1].Offset <= offset {
s = append(s, s2[len(s2)-1])
s2 = s2[:len(s2)-1]
}
return s, s2
}
// NewIterator is a second order function that sequentially scans and returns
// stack elements; starts returning nil when elements are ended
func (s InsertionStack) NewIterator() func() *Insertion {
i := -1
return func() *Insertion {
i++
if i < len(s) {
return s[i]
}
return nil
}
}
// SortEntities arranges the entities in traversal-ready order
func SortEntities(entities []*client.TextEntity) []*client.TextEntity {
sortedEntities := make([]*client.TextEntity, len(entities))
copy(sortedEntities, entities)
sort.Slice(sortedEntities, func(i int, j int) bool {
2022-03-13 12:55:59 +00:00
entity1 := sortedEntities[i]
entity2 := sortedEntities[j]
if entity1.Offset < entity2.Offset {
return true
} else if entity1.Offset == entity2.Offset {
return entity1.Length > entity2.Length
}
return false
})
return sortedEntities
}
// MergeAdjacentEntities merges entities of a same kind
func MergeAdjacentEntities(entities []*client.TextEntity) []*client.TextEntity {
mergedEntities := make([]*client.TextEntity, 0, len(entities))
excludedIndices := make(map[int]bool)
for i, entity := range entities {
2022-03-14 20:00:00 +00:00
if excludedIndices[i] || entity.Type == nil {
continue
}
typ := entity.Type.TextEntityTypeType()
start := entity.Offset
end := start + entity.Length
ei := make(map[int]bool)
// collect continuations
for j, entity2 := range entities[i+1:] {
2022-03-14 20:00:00 +00:00
if entity2.Type != nil && entity2.Type.TextEntityTypeType() == typ && entity2.Offset == end {
end += entity2.Length
ei[j+i+1] = true
}
}
// check for intersections with other entities
var isIntersecting bool
if len(ei) > 0 {
for _, entity2 := range entities {
entity2End := entity2.Offset + entity2.Length
if (entity2.Offset < start && entity2End > start && entity2End < end) ||
(entity2.Offset > start && entity2.Offset < end && entity2End > end) {
isIntersecting = true
break
}
}
}
if !isIntersecting {
entity.Length = end - start
for j := range ei {
excludedIndices[j] = true
}
}
mergedEntities = append(mergedEntities, entity)
}
return mergedEntities
}
// ClaspDirectives to the following span as required by XEP-0393
func ClaspDirectives(text string, entities []*client.TextEntity) []*client.TextEntity {
alignedEntities := make([]*client.TextEntity, len(entities))
copy(alignedEntities, entities)
// transform the source text into a form with uniform runes and code points,
// by duplicating the Basic Multilingual Plane
doubledRunes := make([]rune, 0, len(text)*2)
for _, cp := range text {
if cp > 0x0000ffff {
doubledRunes = append(doubledRunes, cp, cp)
} else {
doubledRunes = append(doubledRunes, cp)
}
}
for i, entity := range alignedEntities {
var dirty bool
endOffset := entity.Offset + entity.Length
if unicode.IsSpace(doubledRunes[entity.Offset]) {
2022-04-01 16:35:54 +00:00
for j, r := range doubledRunes[entity.Offset+1 : endOffset] {
if !unicode.IsSpace(r) {
dirty = true
2022-04-01 16:35:54 +00:00
entity.Offset += int32(j + 1)
entity.Length -= int32(j + 1)
break
}
}
}
if unicode.IsSpace(doubledRunes[endOffset-1]) {
2022-04-01 16:35:54 +00:00
for j := endOffset - 2; j >= entity.Offset; j-- {
if !unicode.IsSpace(doubledRunes[j]) {
dirty = true
2022-04-01 16:35:54 +00:00
entity.Length = j + 1 - entity.Offset
break
}
}
}
if dirty {
alignedEntities[i] = entity
}
}
return alignedEntities
}
func markupBraces(entity *client.TextEntity, lbrace, rbrace []rune) (*Insertion, *Insertion) {
return &Insertion{
Offset: entity.Offset,
Runes: lbrace,
}, &Insertion{
Offset: entity.Offset + entity.Length,
Runes: rbrace,
}
}
// EntityToMarkdown generates the wrapping Markdown tags
func EntityToMarkdown(entity *client.TextEntity) (*Insertion, *Insertion) {
switch entity.Type.TextEntityTypeType() {
case client.TypeTextEntityTypeBold:
return markupBraces(entity, boldRunesMarkdown, boldRunesMarkdown)
case client.TypeTextEntityTypeItalic:
return markupBraces(entity, italicRunes, italicRunes)
2022-03-11 17:54:03 +00:00
case client.TypeTextEntityTypeStrikethrough:
return markupBraces(entity, strikeRunesMarkdown, strikeRunesMarkdown)
2022-03-11 17:01:38 +00:00
case client.TypeTextEntityTypeCode:
return markupBraces(entity, codeRunes, codeRunes)
2022-03-11 17:01:38 +00:00
case client.TypeTextEntityTypePre:
return markupBraces(entity, preRuneStart, preRuneEnd)
case client.TypeTextEntityTypePreCode:
preCode, _ := entity.Type.(*client.TextEntityTypePreCode)
return markupBraces(entity, []rune("\n```"+preCode.Language+"\n"), codeRunes)
case client.TypeTextEntityTypeTextUrl:
textURL, _ := entity.Type.(*client.TextEntityTypeTextUrl)
return markupBraces(entity, []rune("["), []rune("]("+textURL.Url+")"))
}
return nil, nil
}
// EntityToXEP0393 generates the wrapping XEP-0393 tags
func EntityToXEP0393(entity *client.TextEntity) (*Insertion, *Insertion) {
2022-02-18 23:41:08 +00:00
if entity == nil || entity.Type == nil {
return nil, nil
}
switch entity.Type.TextEntityTypeType() {
case client.TypeTextEntityTypeBold:
return markupBraces(entity, boldRunesXEP0393, boldRunesXEP0393)
case client.TypeTextEntityTypeItalic:
return markupBraces(entity, italicRunes, italicRunes)
2022-03-11 17:54:03 +00:00
case client.TypeTextEntityTypeStrikethrough:
return markupBraces(entity, strikeRunesXEP0393, strikeRunesXEP0393)
2022-03-11 17:01:38 +00:00
case client.TypeTextEntityTypeCode:
return markupBraces(entity, codeRunes, codeRunes)
2022-03-11 17:01:38 +00:00
case client.TypeTextEntityTypePre:
return markupBraces(entity, preRuneStart, preRuneEnd)
case client.TypeTextEntityTypePreCode:
preCode, _ := entity.Type.(*client.TextEntityTypePreCode)
return markupBraces(entity, []rune("\n```"+preCode.Language+"\n"), codeRunes)
case client.TypeTextEntityTypeTextUrl:
textURL, _ := entity.Type.(*client.TextEntityTypeTextUrl)
// non-standard, Pidgin-specific
return markupBraces(entity, []rune{}, []rune(" <"+textURL.Url+">"))
}
return nil, nil
}
// Format traverses an already sorted list of entities and wraps the text in a markup
func Format(
sourceText string,
entities []*client.TextEntity,
entityToMarkup func(*client.TextEntity) (*Insertion, *Insertion),
) string {
if len(entities) == 0 {
return sourceText
}
mergedEntities := SortEntities(ClaspDirectives(sourceText, MergeAdjacentEntities(SortEntities(entities))))
startStack := make(InsertionStack, 0, len(sourceText))
endStack := make(InsertionStack, 0, len(sourceText))
// convert entities to a stack of brackets
var maxEndOffset int32
for _, entity := range mergedEntities {
log.Debugf("%#v", entity)
if entity.Length <= 0 {
continue
}
endOffset := entity.Offset + entity.Length
if endOffset > maxEndOffset {
maxEndOffset = endOffset
}
startStack, endStack = startStack.rebalance(endStack, entity.Offset)
startInsertion, endInsertion := entityToMarkup(entity)
if startInsertion != nil {
startStack = append(startStack, startInsertion)
}
if endInsertion != nil {
endStack = append(endStack, endInsertion)
}
}
// flush the closing brackets that still remain in endStack
startStack, endStack = startStack.rebalance(endStack, maxEndOffset)
// merge brackets into text
markupRunes := make([]rune, 0, len(sourceText))
nextInsertion := startStack.NewIterator()
insertion := nextInsertion()
var runeI int32
for _, cp := range sourceText {
for insertion != nil && insertion.Offset <= runeI {
markupRunes = append(markupRunes, insertion.Runes...)
insertion = nextInsertion()
}
markupRunes = append(markupRunes, cp)
// skip two UTF-16 code units (not points actually!) if needed
if cp > 0x0000ffff {
runeI += 2
} else {
runeI++
}
}
for insertion != nil {
markupRunes = append(markupRunes, insertion.Runes...)
insertion = nextInsertion()
}
return string(markupRunes)
}