From 89b9110fcb54733999813a73c8bd881ec06c6dcb Mon Sep 17 00:00:00 2001 From: Marvin W Date: Mon, 20 Mar 2023 15:37:18 -0600 Subject: [PATCH] Improve history sync - Ensure we fully fetch desired history if possible (previously, duplicates from offline message queue could hinder MAM sync) - Early drop illegal MAM messages so they don't pile up in the pending queue waiting for their query to end (which it never will if they were not requested in first place). Fixes #1386 --- libdino/src/service/history_sync.vala | 65 +++++-------------- .../xep/0313_message_archive_management.vala | 46 ++++++++++--- 2 files changed, 56 insertions(+), 55 deletions(-) diff --git a/libdino/src/service/history_sync.vala b/libdino/src/service/history_sync.vala index e83b3cb4..2444a133 100644 --- a/libdino/src/service/history_sync.vala +++ b/libdino/src/service/history_sync.vala @@ -163,7 +163,7 @@ public class Dino.HistorySync { if (current_row[db.mam_catchup.from_end]) return; debug("[%s] Fetching between ranges %s - %s", mam_server.to_string(), previous_row[db.mam_catchup.to_time].to_string(), current_row[db.mam_catchup.from_time].to_string()); - current_row = yield fetch_between_ranges(account, mam_server, previous_row, current_row); + current_row = yield fetch_between_ranges(account, mam_server, previous_row, current_row, cancellable); if (current_row == null) return; RowOption previous_row_opt = db.mam_catchup.select() @@ -214,13 +214,11 @@ public class Dino.HistorySync { return null; } - // If we get PageResult.Duplicate, we still want to update the db row to the latest message. - // Catchup finished within first page. Update latest db entry. if (latest_row_id != -1 && - page_result.page_result in new PageResult[] { PageResult.TargetReached, PageResult.NoMoreMessages, PageResult.Duplicate }) { + page_result.page_result in new PageResult[] { PageResult.TargetReached, PageResult.NoMoreMessages }) { - if (page_result.stanzas == null || page_result.stanzas.is_empty) return null; + if (page_result.stanzas == null) return null; string latest_mam_id = page_result.query_result.last; long latest_mam_time = (long) mam_times[account][latest_mam_id].to_unix(); @@ -272,7 +270,7 @@ public class Dino.HistorySync { ** Merges the `earlier_range` db row into the `later_range` db row. ** @return The resulting range comprising `earlier_range`, `later_rage`, and everything in between. null if fetching/merge failed. **/ - private async Row? fetch_between_ranges(Account account, Jid mam_server, Row earlier_range, Row later_range) { + private async Row? fetch_between_ranges(Account account, Jid mam_server, Row earlier_range, Row later_range, Cancellable? cancellable = null) { int later_range_id = (int) later_range[db.mam_catchup.id]; DateTime earliest_time = new DateTime.from_unix_utc(earlier_range[db.mam_catchup.to_time]); DateTime latest_time = new DateTime.from_unix_utc(later_range[db.mam_catchup.from_time]); @@ -282,9 +280,9 @@ public class Dino.HistorySync { earliest_time, earlier_range[db.mam_catchup.to_id], latest_time, later_range[db.mam_catchup.from_id]); - PageRequestResult page_result = yield fetch_query(account, query_params, later_range_id); + PageRequestResult page_result = yield fetch_query(account, query_params, later_range_id, cancellable); - if (page_result.page_result == PageResult.TargetReached) { + if (page_result.page_result == PageResult.TargetReached || page_result.page_result == PageResult.NoMoreMessages) { debug("[%s | %s] Merging range %i into %i", account.bare_jid.to_string(), mam_server.to_string(), earlier_range[db.mam_catchup.id], later_range_id); // Merge earlier range into later one. db.mam_catchup.update() @@ -330,9 +328,9 @@ public class Dino.HistorySync { PageRequestResult? page_result = null; do { page_result = yield get_mam_page(account, query_params, page_result, cancellable); - debug("Page result %s %b", page_result.page_result.to_string(), page_result.stanzas == null); + debug("[%s | %s] Page result %s (got stanzas: %s)", account.bare_jid.to_string(), query_params.mam_server.to_string(), page_result.page_result.to_string(), (page_result.stanzas != null).to_string()); - if (page_result.page_result == PageResult.Error || page_result.page_result == PageResult.Cancelled || page_result.stanzas == null) return page_result; + if (page_result.page_result == PageResult.Error || page_result.page_result == PageResult.Cancelled || page_result.query_result.first == null) return page_result; string earliest_mam_id = page_result.query_result.first; long earliest_mam_time = (long)mam_times[account][earliest_mam_id].to_unix(); @@ -357,7 +355,6 @@ public class Dino.HistorySync { MorePagesAvailable, TargetReached, NoMoreMessages, - Duplicate, Error, Cancelled } @@ -399,23 +396,25 @@ public class Dino.HistorySync { string query_id = query_params.query_id; string? after_id = query_params.start_id; + var stanzas_for_query = stanzas.has_key(query_id) && !stanzas[query_id].is_empty ? stanzas[query_id] : null; if (cancellable != null && cancellable.is_cancelled()) { - return new PageRequestResult(PageResult.Cancelled, query_result, stanzas[query_id]); + stanzas.unset(query_id); + return new PageRequestResult(PageResult.Cancelled, query_result, stanzas_for_query); } - if (stanzas.has_key(query_id) && !stanzas[query_id].is_empty) { + if (stanzas_for_query != null) { // Check it we reached our target (from_id) - foreach (Xmpp.MessageStanza message in stanzas[query_id]) { + foreach (Xmpp.MessageStanza message in stanzas_for_query) { Xmpp.MessageArchiveManagement.MessageFlag? mam_message_flag = Xmpp.MessageArchiveManagement.MessageFlag.get_flag(message); if (mam_message_flag != null && mam_message_flag.mam_id != null) { if (after_id != null && mam_message_flag.mam_id == after_id) { // Successfully fetched the whole range yield send_messages_back_into_pipeline(account, query_id, cancellable); if (cancellable != null && cancellable.is_cancelled()) { - return new PageRequestResult(PageResult.Cancelled, query_result, stanzas[query_id]); + return new PageRequestResult(PageResult.Cancelled, query_result, stanzas_for_query); } - return new PageRequestResult(PageResult.TargetReached, query_result, stanzas[query_id]); + return new PageRequestResult(PageResult.TargetReached, query_result, stanzas_for_query); } } } @@ -423,37 +422,9 @@ public class Dino.HistorySync { // Message got filtered out by xmpp-vala, but succesful range fetch nevertheless yield send_messages_back_into_pipeline(account, query_id); if (cancellable != null && cancellable.is_cancelled()) { - return new PageRequestResult(PageResult.Cancelled, query_result, stanzas[query_id]); + return new PageRequestResult(PageResult.Cancelled, query_result, stanzas_for_query); } - return new PageRequestResult(PageResult.TargetReached, query_result, stanzas[query_id]); - } - - // Check for duplicates. Go through all messages and build a db query. - foreach (Xmpp.MessageStanza message in stanzas[query_id]) { - Xmpp.MessageArchiveManagement.MessageFlag? mam_message_flag = Xmpp.MessageArchiveManagement.MessageFlag.get_flag(message); - if (mam_message_flag != null && mam_message_flag.mam_id != null) { - if (selection == null) { - selection = @"$(db.message.server_id) = ?"; - } else { - selection += @" OR $(db.message.server_id) = ?"; - } - selection_args += mam_message_flag.mam_id; - } - } - var duplicates_qry = db.message.select() - .with(db.message.account_id, "=", account.id) - .where(selection, selection_args); - // We don't want messages from different MAM servers to interfere with each other. - if (!query_params.mam_server.equals_bare(account.bare_jid)) { - duplicates_qry.with(db.message.counterpart_id, "=", db.get_jid_id(query_params.mam_server)); - } else { - duplicates_qry.with(db.message.type_, "=", Message.Type.CHAT); - } - var duplicates_count = duplicates_qry.count(); - if (duplicates_count > 0) { - // We got a duplicate although we thought we have to catch up. - // There was a server bug where prosody would send all messages if it didn't know the after ID that was given - page_result = PageResult.Duplicate; + return new PageRequestResult(PageResult.TargetReached, query_result, stanzas_for_query); } } @@ -461,7 +432,7 @@ public class Dino.HistorySync { if (cancellable != null && cancellable.is_cancelled()) { page_result = PageResult.Cancelled; } - return new PageRequestResult(page_result, query_result, stanzas.has_key(query_id) ? stanzas[query_id] : null); + return new PageRequestResult(page_result, query_result, stanzas_for_query); } private async void send_messages_back_into_pipeline(Account account, string query_id, Cancellable? cancellable = null) { diff --git a/xmpp-vala/src/module/xep/0313_message_archive_management.vala b/xmpp-vala/src/module/xep/0313_message_archive_management.vala index 1caa1bc3..2235e118 100644 --- a/xmpp-vala/src/module/xep/0313_message_archive_management.vala +++ b/xmpp-vala/src/module/xep/0313_message_archive_management.vala @@ -11,8 +11,8 @@ public class QueryResult { public bool error { get; set; default=false; } public bool malformed { get; set; default=false; } public bool complete { get; set; default=false; } - public string first { get; set; } - public string last { get; set; } + public string? first { get; set; } + public string? last { get; set; } } public class Module : XmppStreamModule { @@ -65,16 +65,17 @@ public class Module : XmppStreamModule { } StanzaNode query_node = new StanzaNode.build("query", NS_VER(stream)).add_self_xmlns().put_node(data_form.get_submit_node()); - if (queryid != null) { - query_node.put_attribute("queryid", queryid); - } + query_node.put_attribute("queryid", queryid); return query_node; } internal async QueryResult query_archive(XmppStream stream, string ns, Jid? mam_server, StanzaNode query_node, Cancellable? cancellable = null) { - var res = new QueryResult(); - if (stream.get_flag(Flag.IDENTITY) == null) { res.error = true; return res; } + var res = new QueryResult(); + Flag? flag = stream.get_flag(Flag.IDENTITY); + string? query_id = query_node.get_attribute("queryid"); + if (flag == null || query_id == null) { res.error = true; return res; } + flag.active_query_ids.add(query_id); // Build and send query Iq.Stanza iq = new Iq.Stanza.set(query_node) { to=mam_server }; @@ -93,6 +94,11 @@ public class Module : XmppStreamModule { if ((res.first == null) != (res.last == null)) { res.malformed = true; return res; } res.complete = fin_node.get_attribute_bool("complete", false, ns); + Idle.add(() => { + flag.active_query_ids.remove(query_id); + return Source.REMOVE; + }, Priority.LOW); + return res; } @@ -104,7 +110,8 @@ public class ReceivedPipelineListener : StanzaListener { public override string[] after_actions { get { return after_actions_const; } } public override async bool run(XmppStream stream, MessageStanza message) { - if (stream.get_flag(Flag.IDENTITY) == null) return false; + Flag? flag = stream.get_flag(Flag.IDENTITY); + if (flag == null) return false; StanzaNode? message_node = message.stanza.get_deep_subnode(NS_VER(stream) + ":result", StanzaForwarding.NS_URI + ":forwarded", Xmpp.NS_URI + ":message"); if (message_node != null) { @@ -112,6 +119,28 @@ public class ReceivedPipelineListener : StanzaListener { DateTime? datetime = DelayedDelivery.get_time_for_node(forward_node); string? mam_id = message.stanza.get_deep_attribute(NS_VER(stream) + ":result", NS_VER(stream) + ":id"); string? query_id = message.stanza.get_deep_attribute(NS_VER(stream) + ":result", NS_VER(stream) + ":queryid"); + + if (query_id == null) { + warning("Received MAM message without queryid from %s, ignoring", message.from.to_string()); + return true; + } + + if (!flag.active_query_ids.contains(query_id)) { + warning("Received MAM message from %s with unknown query id %s, ignoring", message.from.to_string(), query_id ?? ""); + return true; + } + Jid? inner_from = null; + try { + inner_from = new Jid(message_node.get_attribute("from")); + } catch (InvalidJidError e) { + warning("Received MAM message with invalid from attribute in forwarded message from %s, ignoring", message.from.to_string()); + return true; + } + if (!message.from.equals(stream.get_flag(Bind.Flag.IDENTITY).my_jid.bare_jid) && !message.from.equals_bare(inner_from)) { + warning("Received MAM message from %s illegally impersonating %s, ignoring", message.from.to_string(), inner_from.to_string()); + return true; + } + message.add_flag(new MessageFlag(message.from, datetime, mam_id, query_id)); message.stanza = message_node; @@ -124,6 +153,7 @@ public class ReceivedPipelineListener : StanzaListener { public class Flag : XmppStreamFlag { public static FlagIdentity IDENTITY = new FlagIdentity(NS_URI, "message_archive_management"); public bool cought_up { get; set; default=false; } + public Gee.Set active_query_ids { get; set; default = new HashSet(); } public string ns_ver; public Flag(string ns_ver) {