Convert formatting entities to Markdown
This commit is contained in:
parent
b8fcac6ae2
commit
70383bee12
2
Makefile
2
Makefile
|
@ -4,7 +4,7 @@ all:
|
||||||
go build -o telegabber
|
go build -o telegabber
|
||||||
|
|
||||||
test:
|
test:
|
||||||
go test -v ./config ./ ./telegram ./xmpp/gateway ./persistence
|
go test -v ./config ./ ./telegram ./xmpp/gateway ./persistence ./telegram/formatter
|
||||||
|
|
||||||
lint:
|
lint:
|
||||||
$(GOPATH)/bin/golint ./...
|
$(GOPATH)/bin/golint ./...
|
||||||
|
|
165
telegram/formatter/formatter.go
Normal file
165
telegram/formatter/formatter.go
Normal file
|
@ -0,0 +1,165 @@
|
||||||
|
package formatter
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sort"
|
||||||
|
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
|
"github.com/zelenin/go-tdlib/client"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Insertion is a piece of text in given position
|
||||||
|
type Insertion struct {
|
||||||
|
Offset int32
|
||||||
|
Runes []rune
|
||||||
|
}
|
||||||
|
|
||||||
|
// InsertionStack contains the sequence of insertions
|
||||||
|
// from the start or from the end
|
||||||
|
type InsertionStack []*Insertion
|
||||||
|
|
||||||
|
var boldRunes = []rune("**")
|
||||||
|
var italicRunes = []rune("_")
|
||||||
|
var codeRunes = []rune("\n```\n")
|
||||||
|
var urlRuneL = []rune("[")
|
||||||
|
|
||||||
|
// rebalance pumps all the values at given offset to current stack (growing
|
||||||
|
// from start) from given stack (growing from end); should be called
|
||||||
|
// before any insertions to the current stack at the given offset
|
||||||
|
func (s InsertionStack) rebalance(s2 InsertionStack, offset int32) (InsertionStack, InsertionStack) {
|
||||||
|
for len(s2) > 0 && s2[len(s2)-1].Offset <= offset {
|
||||||
|
s = append(s, s2[len(s2)-1])
|
||||||
|
s2 = s2[:len(s2)-1]
|
||||||
|
}
|
||||||
|
|
||||||
|
return s, s2
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewIterator is a second order function that sequentially scans and returns
|
||||||
|
// stack elements; starts returning nil when elements are ended
|
||||||
|
func (s InsertionStack) NewIterator() func() *Insertion {
|
||||||
|
i := -1
|
||||||
|
|
||||||
|
return func() *Insertion {
|
||||||
|
i++
|
||||||
|
if i < len(s) {
|
||||||
|
return s[i]
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// SortEntities arranges the entities in traversal-ready order
|
||||||
|
func SortEntities(entities []*client.TextEntity) []*client.TextEntity {
|
||||||
|
sortedEntities := make([]*client.TextEntity, len(entities))
|
||||||
|
copy(sortedEntities, entities)
|
||||||
|
|
||||||
|
sort.Slice(sortedEntities, func(i int, j int) bool {
|
||||||
|
entity1 := entities[i]
|
||||||
|
entity2 := entities[j]
|
||||||
|
if entity1.Offset < entity2.Offset {
|
||||||
|
return true
|
||||||
|
} else if entity1.Offset == entity2.Offset {
|
||||||
|
return entity1.Length > entity2.Length
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
})
|
||||||
|
return sortedEntities
|
||||||
|
}
|
||||||
|
|
||||||
|
func markupBraces(entity *client.TextEntity, lbrace, rbrace []rune) (*Insertion, *Insertion) {
|
||||||
|
return &Insertion{
|
||||||
|
Offset: entity.Offset,
|
||||||
|
Runes: lbrace,
|
||||||
|
}, &Insertion{
|
||||||
|
Offset: entity.Offset + entity.Length,
|
||||||
|
Runes: rbrace,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// EntityToMarkdown generates the wrapping Markdown tags
|
||||||
|
func EntityToMarkdown(entity *client.TextEntity) (*Insertion, *Insertion) {
|
||||||
|
switch entity.Type.TextEntityTypeType() {
|
||||||
|
case client.TypeTextEntityTypeBold:
|
||||||
|
return markupBraces(entity, boldRunes, boldRunes)
|
||||||
|
case client.TypeTextEntityTypeItalic:
|
||||||
|
return markupBraces(entity, italicRunes, italicRunes)
|
||||||
|
case client.TypeTextEntityTypeCode, client.TypeTextEntityTypePre:
|
||||||
|
return markupBraces(entity, codeRunes, codeRunes)
|
||||||
|
case client.TypeTextEntityTypePreCode:
|
||||||
|
preCode, _ := entity.Type.(*client.TextEntityTypePreCode)
|
||||||
|
return markupBraces(entity, []rune("\n```"+preCode.Language+"\n"), codeRunes)
|
||||||
|
case client.TypeTextEntityTypeTextUrl:
|
||||||
|
textURL, _ := entity.Type.(*client.TextEntityTypeTextUrl)
|
||||||
|
return markupBraces(entity, urlRuneL, []rune("]("+textURL.Url+")"))
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Format traverses an already sorted list of entities and wraps the text in Markdown
|
||||||
|
func Format(
|
||||||
|
sourceText string,
|
||||||
|
entities []*client.TextEntity,
|
||||||
|
entityToMarkup func(*client.TextEntity) (*Insertion, *Insertion),
|
||||||
|
) string {
|
||||||
|
if len(entities) == 0 {
|
||||||
|
return sourceText
|
||||||
|
}
|
||||||
|
|
||||||
|
startStack := make(InsertionStack, 0, len(sourceText))
|
||||||
|
endStack := make(InsertionStack, 0, len(sourceText))
|
||||||
|
|
||||||
|
// convert entities to a stack of brackets
|
||||||
|
var maxEndOffset int32
|
||||||
|
for _, entity := range entities {
|
||||||
|
log.Debugf("%#v", entity)
|
||||||
|
if entity.Length <= 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
endOffset := entity.Offset + entity.Length
|
||||||
|
if endOffset > maxEndOffset {
|
||||||
|
maxEndOffset = endOffset
|
||||||
|
}
|
||||||
|
|
||||||
|
startStack, endStack = startStack.rebalance(endStack, entity.Offset)
|
||||||
|
|
||||||
|
startInsertion, endInsertion := entityToMarkup(entity)
|
||||||
|
if startInsertion != nil {
|
||||||
|
startStack = append(startStack, startInsertion)
|
||||||
|
}
|
||||||
|
if endInsertion != nil {
|
||||||
|
endStack = append(endStack, endInsertion)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// flush the closing brackets that still remain in endStack
|
||||||
|
startStack, endStack = startStack.rebalance(endStack, maxEndOffset)
|
||||||
|
|
||||||
|
// merge brackets into text
|
||||||
|
markupRunes := make([]rune, 0, len(sourceText))
|
||||||
|
|
||||||
|
nextInsertion := startStack.NewIterator()
|
||||||
|
insertion := nextInsertion()
|
||||||
|
var runeI int32
|
||||||
|
|
||||||
|
for _, cp := range sourceText {
|
||||||
|
for insertion != nil && insertion.Offset <= runeI {
|
||||||
|
markupRunes = append(markupRunes, insertion.Runes...)
|
||||||
|
insertion = nextInsertion()
|
||||||
|
}
|
||||||
|
|
||||||
|
markupRunes = append(markupRunes, cp)
|
||||||
|
// skip two UTF-16 code units (not points actually!) if needed
|
||||||
|
if cp > 0x0000ffff {
|
||||||
|
runeI += 2
|
||||||
|
} else {
|
||||||
|
runeI++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for insertion != nil {
|
||||||
|
markupRunes = append(markupRunes, insertion.Runes...)
|
||||||
|
insertion = nextInsertion()
|
||||||
|
}
|
||||||
|
|
||||||
|
return string(markupRunes)
|
||||||
|
}
|
208
telegram/formatter/formatter_test.go
Normal file
208
telegram/formatter/formatter_test.go
Normal file
|
@ -0,0 +1,208 @@
|
||||||
|
package formatter
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/zelenin/go-tdlib/client"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestNoFormatting(t *testing.T) {
|
||||||
|
markup := Format("abc\ndef", []*client.TextEntity{}, EntityToMarkdown)
|
||||||
|
if markup != "abc\ndef" {
|
||||||
|
t.Errorf("No formatting expected, but: %v", markup)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFormattingSimple(t *testing.T) {
|
||||||
|
markup := Format("👙🐧🐖", []*client.TextEntity{
|
||||||
|
&client.TextEntity{
|
||||||
|
Offset: 2,
|
||||||
|
Length: 4,
|
||||||
|
Type: &client.TextEntityTypeBold{},
|
||||||
|
},
|
||||||
|
}, EntityToMarkdown)
|
||||||
|
if markup != "👙**🐧🐖**" {
|
||||||
|
t.Errorf("Wrong simple formatting: %v", markup)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFormattingAdjacent(t *testing.T) {
|
||||||
|
markup := Format("a👙🐧🐖", []*client.TextEntity{
|
||||||
|
&client.TextEntity{
|
||||||
|
Offset: 3,
|
||||||
|
Length: 2,
|
||||||
|
Type: &client.TextEntityTypeItalic{},
|
||||||
|
},
|
||||||
|
&client.TextEntity{
|
||||||
|
Offset: 5,
|
||||||
|
Length: 2,
|
||||||
|
Type: &client.TextEntityTypeTextUrl{
|
||||||
|
Url: "https://narayana.im/",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}, EntityToMarkdown)
|
||||||
|
if markup != "a👙_🐧_[🐖](https://narayana.im/)" {
|
||||||
|
t.Errorf("Wrong adjacent formatting: %v", markup)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestFormattingAdjacentAndNested(t *testing.T) {
|
||||||
|
markup := Format("👙🐧🐖", []*client.TextEntity{
|
||||||
|
&client.TextEntity{
|
||||||
|
Offset: 0,
|
||||||
|
Length: 4,
|
||||||
|
Type: &client.TextEntityTypePre{},
|
||||||
|
},
|
||||||
|
&client.TextEntity{
|
||||||
|
Offset: 0,
|
||||||
|
Length: 2,
|
||||||
|
Type: &client.TextEntityTypeBold{},
|
||||||
|
},
|
||||||
|
&client.TextEntity{
|
||||||
|
Offset: 4,
|
||||||
|
Length: 2,
|
||||||
|
Type: &client.TextEntityTypeItalic{},
|
||||||
|
},
|
||||||
|
}, EntityToMarkdown)
|
||||||
|
if markup != "\n```\n**👙**🐧\n```\n_🐖_" {
|
||||||
|
t.Errorf("Wrong adjacent&nested formatting: %v", markup)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRebalanceTwoZero(t *testing.T) {
|
||||||
|
s1 := InsertionStack{
|
||||||
|
&Insertion{Offset: 7},
|
||||||
|
&Insertion{Offset: 8},
|
||||||
|
}
|
||||||
|
s2 := InsertionStack{}
|
||||||
|
s1, s2 = s1.rebalance(s2, 7)
|
||||||
|
if !(len(s1) == 2 && len(s2) == 0 && s1[0].Offset == 7 && s1[1].Offset == 8) {
|
||||||
|
t.Errorf("Wrong rebalance 2–0: %#v %#v", s1, s2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRebalanceNeeded(t *testing.T) {
|
||||||
|
s1 := InsertionStack{
|
||||||
|
&Insertion{Offset: 7},
|
||||||
|
&Insertion{Offset: 8},
|
||||||
|
}
|
||||||
|
s2 := InsertionStack{
|
||||||
|
&Insertion{Offset: 10},
|
||||||
|
&Insertion{Offset: 9},
|
||||||
|
}
|
||||||
|
s1, s2 = s1.rebalance(s2, 9)
|
||||||
|
if !(len(s1) == 3 && len(s2) == 1 &&
|
||||||
|
s1[0].Offset == 7 && s1[1].Offset == 8 && s1[2].Offset == 9 &&
|
||||||
|
s2[0].Offset == 10) {
|
||||||
|
t.Errorf("Wrong rebalance when needed: %#v %#v", s1, s2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRebalanceNotNeeded(t *testing.T) {
|
||||||
|
s1 := InsertionStack{
|
||||||
|
&Insertion{Offset: 7},
|
||||||
|
&Insertion{Offset: 8},
|
||||||
|
}
|
||||||
|
s2 := InsertionStack{
|
||||||
|
&Insertion{Offset: 10},
|
||||||
|
&Insertion{Offset: 9},
|
||||||
|
}
|
||||||
|
s1, s2 = s1.rebalance(s2, 8)
|
||||||
|
if !(len(s1) == 2 && len(s2) == 2 &&
|
||||||
|
s1[0].Offset == 7 && s1[1].Offset == 8 &&
|
||||||
|
s2[0].Offset == 10 && s2[1].Offset == 9) {
|
||||||
|
t.Errorf("Wrong rebalance when not needed: %#v %#v", s1, s2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRebalanceLate(t *testing.T) {
|
||||||
|
s1 := InsertionStack{
|
||||||
|
&Insertion{Offset: 7},
|
||||||
|
&Insertion{Offset: 8},
|
||||||
|
}
|
||||||
|
s2 := InsertionStack{
|
||||||
|
&Insertion{Offset: 10},
|
||||||
|
&Insertion{Offset: 9},
|
||||||
|
}
|
||||||
|
s1, s2 = s1.rebalance(s2, 10)
|
||||||
|
if !(len(s1) == 4 && len(s2) == 0 &&
|
||||||
|
s1[0].Offset == 7 && s1[1].Offset == 8 &&
|
||||||
|
s1[2].Offset == 9 && s1[3].Offset == 10) {
|
||||||
|
t.Errorf("Wrong rebalance when late: %#v %#v", s1, s2)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIteratorEmpty(t *testing.T) {
|
||||||
|
s := InsertionStack{}
|
||||||
|
g := s.NewIterator()
|
||||||
|
v := g()
|
||||||
|
if v != nil {
|
||||||
|
t.Errorf("Empty iterator should return nil but returned %#v", v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIterator(t *testing.T) {
|
||||||
|
s := InsertionStack{
|
||||||
|
&Insertion{Offset: 7},
|
||||||
|
&Insertion{Offset: 8},
|
||||||
|
}
|
||||||
|
g := s.NewIterator()
|
||||||
|
v := g()
|
||||||
|
if v == nil || v.Offset != 7 {
|
||||||
|
t.Errorf("Wrong insertion instead of 7: %#v", v)
|
||||||
|
}
|
||||||
|
v = g()
|
||||||
|
if v == nil || v.Offset != 8 {
|
||||||
|
t.Errorf("Wrong insertion instead of 8: %#v", v)
|
||||||
|
}
|
||||||
|
v = g()
|
||||||
|
if v != nil {
|
||||||
|
t.Errorf("nil should be returned after end, %#v instead", v)
|
||||||
|
}
|
||||||
|
v = g()
|
||||||
|
if v != nil {
|
||||||
|
t.Errorf("Further attempts should return nil too, %#v instead", v)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSortEntities(t *testing.T) {
|
||||||
|
entities := []*client.TextEntity{
|
||||||
|
&client.TextEntity{
|
||||||
|
Offset: 3,
|
||||||
|
Length: 2,
|
||||||
|
},
|
||||||
|
&client.TextEntity{
|
||||||
|
Offset: 5,
|
||||||
|
Length: 2,
|
||||||
|
},
|
||||||
|
&client.TextEntity{
|
||||||
|
Offset: 7,
|
||||||
|
Length: 2,
|
||||||
|
},
|
||||||
|
&client.TextEntity{
|
||||||
|
Offset: 6,
|
||||||
|
Length: 1,
|
||||||
|
},
|
||||||
|
&client.TextEntity{
|
||||||
|
Offset: 5,
|
||||||
|
Length: 1,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
entities = SortEntities(entities)
|
||||||
|
if !(len(entities) == 5 &&
|
||||||
|
entities[0].Offset == 3 && entities[0].Length == 2 &&
|
||||||
|
entities[1].Offset == 5 && entities[1].Length == 2 &&
|
||||||
|
entities[2].Offset == 5 && entities[2].Length == 1 &&
|
||||||
|
entities[3].Offset == 6 && entities[3].Length == 1 &&
|
||||||
|
entities[4].Offset == 7 && entities[4].Length == 2) {
|
||||||
|
t.Errorf("Wrong sorting order: %#v", entities)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestSortEmpty(t *testing.T) {
|
||||||
|
entities := []*client.TextEntity{}
|
||||||
|
entities = SortEntities(entities)
|
||||||
|
if len(entities) != 0 {
|
||||||
|
t.Errorf("Empty entities set sorting error: %#v", entities)
|
||||||
|
}
|
||||||
|
}
|
|
@ -15,6 +15,7 @@ import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"dev.narayana.im/narayana/telegabber/telegram/cache"
|
"dev.narayana.im/narayana/telegabber/telegram/cache"
|
||||||
|
"dev.narayana.im/narayana/telegabber/telegram/formatter"
|
||||||
"dev.narayana.im/narayana/telegabber/xmpp/gateway"
|
"dev.narayana.im/narayana/telegabber/xmpp/gateway"
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
@ -281,6 +282,7 @@ func (c *Client) formatContent(file *client.File, filename string) string {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Client) messageToText(message *client.Message) string {
|
func (c *Client) messageToText(message *client.Message) string {
|
||||||
|
markupFunction := formatter.EntityToMarkdown
|
||||||
switch message.Content.MessageContentType() {
|
switch message.Content.MessageContentType() {
|
||||||
case client.TypeMessageSticker:
|
case client.TypeMessageSticker:
|
||||||
sticker, _ := message.Content.(*client.MessageSticker)
|
sticker, _ := message.Content.(*client.MessageSticker)
|
||||||
|
@ -318,27 +320,55 @@ func (c *Client) messageToText(message *client.Message) string {
|
||||||
)
|
)
|
||||||
case client.TypeMessagePhoto:
|
case client.TypeMessagePhoto:
|
||||||
photo, _ := message.Content.(*client.MessagePhoto)
|
photo, _ := message.Content.(*client.MessagePhoto)
|
||||||
return photo.Caption.Text
|
return formatter.Format(
|
||||||
|
photo.Caption.Text,
|
||||||
|
formatter.SortEntities(photo.Caption.Entities),
|
||||||
|
markupFunction,
|
||||||
|
)
|
||||||
case client.TypeMessageAudio:
|
case client.TypeMessageAudio:
|
||||||
audio, _ := message.Content.(*client.MessageAudio)
|
audio, _ := message.Content.(*client.MessageAudio)
|
||||||
return audio.Caption.Text
|
return formatter.Format(
|
||||||
|
audio.Caption.Text,
|
||||||
|
formatter.SortEntities(audio.Caption.Entities),
|
||||||
|
markupFunction,
|
||||||
|
)
|
||||||
case client.TypeMessageVideo:
|
case client.TypeMessageVideo:
|
||||||
video, _ := message.Content.(*client.MessageVideo)
|
video, _ := message.Content.(*client.MessageVideo)
|
||||||
return video.Caption.Text
|
return formatter.Format(
|
||||||
|
video.Caption.Text,
|
||||||
|
formatter.SortEntities(video.Caption.Entities),
|
||||||
|
markupFunction,
|
||||||
|
)
|
||||||
case client.TypeMessageDocument:
|
case client.TypeMessageDocument:
|
||||||
document, _ := message.Content.(*client.MessageDocument)
|
document, _ := message.Content.(*client.MessageDocument)
|
||||||
return document.Caption.Text
|
return formatter.Format(
|
||||||
|
document.Caption.Text,
|
||||||
|
formatter.SortEntities(document.Caption.Entities),
|
||||||
|
markupFunction,
|
||||||
|
)
|
||||||
case client.TypeMessageText:
|
case client.TypeMessageText:
|
||||||
text, _ := message.Content.(*client.MessageText)
|
text, _ := message.Content.(*client.MessageText)
|
||||||
return text.Text.Text
|
return formatter.Format(
|
||||||
|
text.Text.Text,
|
||||||
|
formatter.SortEntities(text.Text.Entities),
|
||||||
|
markupFunction,
|
||||||
|
)
|
||||||
case client.TypeMessageVoiceNote:
|
case client.TypeMessageVoiceNote:
|
||||||
voice, _ := message.Content.(*client.MessageVoiceNote)
|
voice, _ := message.Content.(*client.MessageVoiceNote)
|
||||||
return voice.Caption.Text
|
return formatter.Format(
|
||||||
|
voice.Caption.Text,
|
||||||
|
formatter.SortEntities(voice.Caption.Entities),
|
||||||
|
markupFunction,
|
||||||
|
)
|
||||||
case client.TypeMessageVideoNote:
|
case client.TypeMessageVideoNote:
|
||||||
return ""
|
return ""
|
||||||
case client.TypeMessageAnimation:
|
case client.TypeMessageAnimation:
|
||||||
animation, _ := message.Content.(*client.MessageAnimation)
|
animation, _ := message.Content.(*client.MessageAnimation)
|
||||||
return animation.Caption.Text
|
return formatter.Format(
|
||||||
|
animation.Caption.Text,
|
||||||
|
formatter.SortEntities(animation.Caption.Entities),
|
||||||
|
markupFunction,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
return fmt.Sprintf("unknown message (%s)", message.Content.MessageContentType())
|
return fmt.Sprintf("unknown message (%s)", message.Content.MessageContentType())
|
||||||
|
|
Loading…
Reference in a new issue