Convert formatting entities to Markdown
This commit is contained in:
parent
b8fcac6ae2
commit
70383bee12
2
Makefile
2
Makefile
|
@ -4,7 +4,7 @@ all:
|
|||
go build -o telegabber
|
||||
|
||||
test:
|
||||
go test -v ./config ./ ./telegram ./xmpp/gateway ./persistence
|
||||
go test -v ./config ./ ./telegram ./xmpp/gateway ./persistence ./telegram/formatter
|
||||
|
||||
lint:
|
||||
$(GOPATH)/bin/golint ./...
|
||||
|
|
165
telegram/formatter/formatter.go
Normal file
165
telegram/formatter/formatter.go
Normal file
|
@ -0,0 +1,165 @@
|
|||
package formatter
|
||||
|
||||
import (
|
||||
"sort"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
"github.com/zelenin/go-tdlib/client"
|
||||
)
|
||||
|
||||
// Insertion is a piece of text in given position
|
||||
type Insertion struct {
|
||||
Offset int32
|
||||
Runes []rune
|
||||
}
|
||||
|
||||
// InsertionStack contains the sequence of insertions
|
||||
// from the start or from the end
|
||||
type InsertionStack []*Insertion
|
||||
|
||||
var boldRunes = []rune("**")
|
||||
var italicRunes = []rune("_")
|
||||
var codeRunes = []rune("\n```\n")
|
||||
var urlRuneL = []rune("[")
|
||||
|
||||
// rebalance pumps all the values at given offset to current stack (growing
|
||||
// from start) from given stack (growing from end); should be called
|
||||
// before any insertions to the current stack at the given offset
|
||||
func (s InsertionStack) rebalance(s2 InsertionStack, offset int32) (InsertionStack, InsertionStack) {
|
||||
for len(s2) > 0 && s2[len(s2)-1].Offset <= offset {
|
||||
s = append(s, s2[len(s2)-1])
|
||||
s2 = s2[:len(s2)-1]
|
||||
}
|
||||
|
||||
return s, s2
|
||||
}
|
||||
|
||||
// NewIterator is a second order function that sequentially scans and returns
|
||||
// stack elements; starts returning nil when elements are ended
|
||||
func (s InsertionStack) NewIterator() func() *Insertion {
|
||||
i := -1
|
||||
|
||||
return func() *Insertion {
|
||||
i++
|
||||
if i < len(s) {
|
||||
return s[i]
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// SortEntities arranges the entities in traversal-ready order
|
||||
func SortEntities(entities []*client.TextEntity) []*client.TextEntity {
|
||||
sortedEntities := make([]*client.TextEntity, len(entities))
|
||||
copy(sortedEntities, entities)
|
||||
|
||||
sort.Slice(sortedEntities, func(i int, j int) bool {
|
||||
entity1 := entities[i]
|
||||
entity2 := entities[j]
|
||||
if entity1.Offset < entity2.Offset {
|
||||
return true
|
||||
} else if entity1.Offset == entity2.Offset {
|
||||
return entity1.Length > entity2.Length
|
||||
}
|
||||
return false
|
||||
})
|
||||
return sortedEntities
|
||||
}
|
||||
|
||||
func markupBraces(entity *client.TextEntity, lbrace, rbrace []rune) (*Insertion, *Insertion) {
|
||||
return &Insertion{
|
||||
Offset: entity.Offset,
|
||||
Runes: lbrace,
|
||||
}, &Insertion{
|
||||
Offset: entity.Offset + entity.Length,
|
||||
Runes: rbrace,
|
||||
}
|
||||
}
|
||||
|
||||
// EntityToMarkdown generates the wrapping Markdown tags
|
||||
func EntityToMarkdown(entity *client.TextEntity) (*Insertion, *Insertion) {
|
||||
switch entity.Type.TextEntityTypeType() {
|
||||
case client.TypeTextEntityTypeBold:
|
||||
return markupBraces(entity, boldRunes, boldRunes)
|
||||
case client.TypeTextEntityTypeItalic:
|
||||
return markupBraces(entity, italicRunes, italicRunes)
|
||||
case client.TypeTextEntityTypeCode, client.TypeTextEntityTypePre:
|
||||
return markupBraces(entity, codeRunes, codeRunes)
|
||||
case client.TypeTextEntityTypePreCode:
|
||||
preCode, _ := entity.Type.(*client.TextEntityTypePreCode)
|
||||
return markupBraces(entity, []rune("\n```"+preCode.Language+"\n"), codeRunes)
|
||||
case client.TypeTextEntityTypeTextUrl:
|
||||
textURL, _ := entity.Type.(*client.TextEntityTypeTextUrl)
|
||||
return markupBraces(entity, urlRuneL, []rune("]("+textURL.Url+")"))
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Format traverses an already sorted list of entities and wraps the text in Markdown
|
||||
func Format(
|
||||
sourceText string,
|
||||
entities []*client.TextEntity,
|
||||
entityToMarkup func(*client.TextEntity) (*Insertion, *Insertion),
|
||||
) string {
|
||||
if len(entities) == 0 {
|
||||
return sourceText
|
||||
}
|
||||
|
||||
startStack := make(InsertionStack, 0, len(sourceText))
|
||||
endStack := make(InsertionStack, 0, len(sourceText))
|
||||
|
||||
// convert entities to a stack of brackets
|
||||
var maxEndOffset int32
|
||||
for _, entity := range entities {
|
||||
log.Debugf("%#v", entity)
|
||||
if entity.Length <= 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
endOffset := entity.Offset + entity.Length
|
||||
if endOffset > maxEndOffset {
|
||||
maxEndOffset = endOffset
|
||||
}
|
||||
|
||||
startStack, endStack = startStack.rebalance(endStack, entity.Offset)
|
||||
|
||||
startInsertion, endInsertion := entityToMarkup(entity)
|
||||
if startInsertion != nil {
|
||||
startStack = append(startStack, startInsertion)
|
||||
}
|
||||
if endInsertion != nil {
|
||||
endStack = append(endStack, endInsertion)
|
||||
}
|
||||
}
|
||||
// flush the closing brackets that still remain in endStack
|
||||
startStack, endStack = startStack.rebalance(endStack, maxEndOffset)
|
||||
|
||||
// merge brackets into text
|
||||
markupRunes := make([]rune, 0, len(sourceText))
|
||||
|
||||
nextInsertion := startStack.NewIterator()
|
||||
insertion := nextInsertion()
|
||||
var runeI int32
|
||||
|
||||
for _, cp := range sourceText {
|
||||
for insertion != nil && insertion.Offset <= runeI {
|
||||
markupRunes = append(markupRunes, insertion.Runes...)
|
||||
insertion = nextInsertion()
|
||||
}
|
||||
|
||||
markupRunes = append(markupRunes, cp)
|
||||
// skip two UTF-16 code units (not points actually!) if needed
|
||||
if cp > 0x0000ffff {
|
||||
runeI += 2
|
||||
} else {
|
||||
runeI++
|
||||
}
|
||||
}
|
||||
for insertion != nil {
|
||||
markupRunes = append(markupRunes, insertion.Runes...)
|
||||
insertion = nextInsertion()
|
||||
}
|
||||
|
||||
return string(markupRunes)
|
||||
}
|
208
telegram/formatter/formatter_test.go
Normal file
208
telegram/formatter/formatter_test.go
Normal file
|
@ -0,0 +1,208 @@
|
|||
package formatter
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/zelenin/go-tdlib/client"
|
||||
)
|
||||
|
||||
func TestNoFormatting(t *testing.T) {
|
||||
markup := Format("abc\ndef", []*client.TextEntity{}, EntityToMarkdown)
|
||||
if markup != "abc\ndef" {
|
||||
t.Errorf("No formatting expected, but: %v", markup)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFormattingSimple(t *testing.T) {
|
||||
markup := Format("👙🐧🐖", []*client.TextEntity{
|
||||
&client.TextEntity{
|
||||
Offset: 2,
|
||||
Length: 4,
|
||||
Type: &client.TextEntityTypeBold{},
|
||||
},
|
||||
}, EntityToMarkdown)
|
||||
if markup != "👙**🐧🐖**" {
|
||||
t.Errorf("Wrong simple formatting: %v", markup)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFormattingAdjacent(t *testing.T) {
|
||||
markup := Format("a👙🐧🐖", []*client.TextEntity{
|
||||
&client.TextEntity{
|
||||
Offset: 3,
|
||||
Length: 2,
|
||||
Type: &client.TextEntityTypeItalic{},
|
||||
},
|
||||
&client.TextEntity{
|
||||
Offset: 5,
|
||||
Length: 2,
|
||||
Type: &client.TextEntityTypeTextUrl{
|
||||
Url: "https://narayana.im/",
|
||||
},
|
||||
},
|
||||
}, EntityToMarkdown)
|
||||
if markup != "a👙_🐧_[🐖](https://narayana.im/)" {
|
||||
t.Errorf("Wrong adjacent formatting: %v", markup)
|
||||
}
|
||||
}
|
||||
|
||||
func TestFormattingAdjacentAndNested(t *testing.T) {
|
||||
markup := Format("👙🐧🐖", []*client.TextEntity{
|
||||
&client.TextEntity{
|
||||
Offset: 0,
|
||||
Length: 4,
|
||||
Type: &client.TextEntityTypePre{},
|
||||
},
|
||||
&client.TextEntity{
|
||||
Offset: 0,
|
||||
Length: 2,
|
||||
Type: &client.TextEntityTypeBold{},
|
||||
},
|
||||
&client.TextEntity{
|
||||
Offset: 4,
|
||||
Length: 2,
|
||||
Type: &client.TextEntityTypeItalic{},
|
||||
},
|
||||
}, EntityToMarkdown)
|
||||
if markup != "\n```\n**👙**🐧\n```\n_🐖_" {
|
||||
t.Errorf("Wrong adjacent&nested formatting: %v", markup)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRebalanceTwoZero(t *testing.T) {
|
||||
s1 := InsertionStack{
|
||||
&Insertion{Offset: 7},
|
||||
&Insertion{Offset: 8},
|
||||
}
|
||||
s2 := InsertionStack{}
|
||||
s1, s2 = s1.rebalance(s2, 7)
|
||||
if !(len(s1) == 2 && len(s2) == 0 && s1[0].Offset == 7 && s1[1].Offset == 8) {
|
||||
t.Errorf("Wrong rebalance 2–0: %#v %#v", s1, s2)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRebalanceNeeded(t *testing.T) {
|
||||
s1 := InsertionStack{
|
||||
&Insertion{Offset: 7},
|
||||
&Insertion{Offset: 8},
|
||||
}
|
||||
s2 := InsertionStack{
|
||||
&Insertion{Offset: 10},
|
||||
&Insertion{Offset: 9},
|
||||
}
|
||||
s1, s2 = s1.rebalance(s2, 9)
|
||||
if !(len(s1) == 3 && len(s2) == 1 &&
|
||||
s1[0].Offset == 7 && s1[1].Offset == 8 && s1[2].Offset == 9 &&
|
||||
s2[0].Offset == 10) {
|
||||
t.Errorf("Wrong rebalance when needed: %#v %#v", s1, s2)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRebalanceNotNeeded(t *testing.T) {
|
||||
s1 := InsertionStack{
|
||||
&Insertion{Offset: 7},
|
||||
&Insertion{Offset: 8},
|
||||
}
|
||||
s2 := InsertionStack{
|
||||
&Insertion{Offset: 10},
|
||||
&Insertion{Offset: 9},
|
||||
}
|
||||
s1, s2 = s1.rebalance(s2, 8)
|
||||
if !(len(s1) == 2 && len(s2) == 2 &&
|
||||
s1[0].Offset == 7 && s1[1].Offset == 8 &&
|
||||
s2[0].Offset == 10 && s2[1].Offset == 9) {
|
||||
t.Errorf("Wrong rebalance when not needed: %#v %#v", s1, s2)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRebalanceLate(t *testing.T) {
|
||||
s1 := InsertionStack{
|
||||
&Insertion{Offset: 7},
|
||||
&Insertion{Offset: 8},
|
||||
}
|
||||
s2 := InsertionStack{
|
||||
&Insertion{Offset: 10},
|
||||
&Insertion{Offset: 9},
|
||||
}
|
||||
s1, s2 = s1.rebalance(s2, 10)
|
||||
if !(len(s1) == 4 && len(s2) == 0 &&
|
||||
s1[0].Offset == 7 && s1[1].Offset == 8 &&
|
||||
s1[2].Offset == 9 && s1[3].Offset == 10) {
|
||||
t.Errorf("Wrong rebalance when late: %#v %#v", s1, s2)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIteratorEmpty(t *testing.T) {
|
||||
s := InsertionStack{}
|
||||
g := s.NewIterator()
|
||||
v := g()
|
||||
if v != nil {
|
||||
t.Errorf("Empty iterator should return nil but returned %#v", v)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIterator(t *testing.T) {
|
||||
s := InsertionStack{
|
||||
&Insertion{Offset: 7},
|
||||
&Insertion{Offset: 8},
|
||||
}
|
||||
g := s.NewIterator()
|
||||
v := g()
|
||||
if v == nil || v.Offset != 7 {
|
||||
t.Errorf("Wrong insertion instead of 7: %#v", v)
|
||||
}
|
||||
v = g()
|
||||
if v == nil || v.Offset != 8 {
|
||||
t.Errorf("Wrong insertion instead of 8: %#v", v)
|
||||
}
|
||||
v = g()
|
||||
if v != nil {
|
||||
t.Errorf("nil should be returned after end, %#v instead", v)
|
||||
}
|
||||
v = g()
|
||||
if v != nil {
|
||||
t.Errorf("Further attempts should return nil too, %#v instead", v)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSortEntities(t *testing.T) {
|
||||
entities := []*client.TextEntity{
|
||||
&client.TextEntity{
|
||||
Offset: 3,
|
||||
Length: 2,
|
||||
},
|
||||
&client.TextEntity{
|
||||
Offset: 5,
|
||||
Length: 2,
|
||||
},
|
||||
&client.TextEntity{
|
||||
Offset: 7,
|
||||
Length: 2,
|
||||
},
|
||||
&client.TextEntity{
|
||||
Offset: 6,
|
||||
Length: 1,
|
||||
},
|
||||
&client.TextEntity{
|
||||
Offset: 5,
|
||||
Length: 1,
|
||||
},
|
||||
}
|
||||
entities = SortEntities(entities)
|
||||
if !(len(entities) == 5 &&
|
||||
entities[0].Offset == 3 && entities[0].Length == 2 &&
|
||||
entities[1].Offset == 5 && entities[1].Length == 2 &&
|
||||
entities[2].Offset == 5 && entities[2].Length == 1 &&
|
||||
entities[3].Offset == 6 && entities[3].Length == 1 &&
|
||||
entities[4].Offset == 7 && entities[4].Length == 2) {
|
||||
t.Errorf("Wrong sorting order: %#v", entities)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSortEmpty(t *testing.T) {
|
||||
entities := []*client.TextEntity{}
|
||||
entities = SortEntities(entities)
|
||||
if len(entities) != 0 {
|
||||
t.Errorf("Empty entities set sorting error: %#v", entities)
|
||||
}
|
||||
}
|
|
@ -15,6 +15,7 @@ import (
|
|||
"time"
|
||||
|
||||
"dev.narayana.im/narayana/telegabber/telegram/cache"
|
||||
"dev.narayana.im/narayana/telegabber/telegram/formatter"
|
||||
"dev.narayana.im/narayana/telegabber/xmpp/gateway"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
@ -281,6 +282,7 @@ func (c *Client) formatContent(file *client.File, filename string) string {
|
|||
}
|
||||
|
||||
func (c *Client) messageToText(message *client.Message) string {
|
||||
markupFunction := formatter.EntityToMarkdown
|
||||
switch message.Content.MessageContentType() {
|
||||
case client.TypeMessageSticker:
|
||||
sticker, _ := message.Content.(*client.MessageSticker)
|
||||
|
@ -318,27 +320,55 @@ func (c *Client) messageToText(message *client.Message) string {
|
|||
)
|
||||
case client.TypeMessagePhoto:
|
||||
photo, _ := message.Content.(*client.MessagePhoto)
|
||||
return photo.Caption.Text
|
||||
return formatter.Format(
|
||||
photo.Caption.Text,
|
||||
formatter.SortEntities(photo.Caption.Entities),
|
||||
markupFunction,
|
||||
)
|
||||
case client.TypeMessageAudio:
|
||||
audio, _ := message.Content.(*client.MessageAudio)
|
||||
return audio.Caption.Text
|
||||
return formatter.Format(
|
||||
audio.Caption.Text,
|
||||
formatter.SortEntities(audio.Caption.Entities),
|
||||
markupFunction,
|
||||
)
|
||||
case client.TypeMessageVideo:
|
||||
video, _ := message.Content.(*client.MessageVideo)
|
||||
return video.Caption.Text
|
||||
return formatter.Format(
|
||||
video.Caption.Text,
|
||||
formatter.SortEntities(video.Caption.Entities),
|
||||
markupFunction,
|
||||
)
|
||||
case client.TypeMessageDocument:
|
||||
document, _ := message.Content.(*client.MessageDocument)
|
||||
return document.Caption.Text
|
||||
return formatter.Format(
|
||||
document.Caption.Text,
|
||||
formatter.SortEntities(document.Caption.Entities),
|
||||
markupFunction,
|
||||
)
|
||||
case client.TypeMessageText:
|
||||
text, _ := message.Content.(*client.MessageText)
|
||||
return text.Text.Text
|
||||
return formatter.Format(
|
||||
text.Text.Text,
|
||||
formatter.SortEntities(text.Text.Entities),
|
||||
markupFunction,
|
||||
)
|
||||
case client.TypeMessageVoiceNote:
|
||||
voice, _ := message.Content.(*client.MessageVoiceNote)
|
||||
return voice.Caption.Text
|
||||
return formatter.Format(
|
||||
voice.Caption.Text,
|
||||
formatter.SortEntities(voice.Caption.Entities),
|
||||
markupFunction,
|
||||
)
|
||||
case client.TypeMessageVideoNote:
|
||||
return ""
|
||||
case client.TypeMessageAnimation:
|
||||
animation, _ := message.Content.(*client.MessageAnimation)
|
||||
return animation.Caption.Text
|
||||
return formatter.Format(
|
||||
animation.Caption.Text,
|
||||
formatter.SortEntities(animation.Caption.Entities),
|
||||
markupFunction,
|
||||
)
|
||||
}
|
||||
|
||||
return fmt.Sprintf("unknown message (%s)", message.Content.MessageContentType())
|
||||
|
|
Loading…
Reference in a new issue