Skip to content

Commit 206ed21

Browse files
committed
Make position translation to UTF16 more robust
When diagnostics were emitted for a previous version of the file the positions may no longer be valid Fixes #1002
1 parent 42f16b8 commit 206ed21

File tree

5 files changed

+127
-69
lines changed

5 files changed

+127
-69
lines changed

apps/language_server/lib/language_server/diagnostics.ex

Lines changed: 43 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -291,76 +291,87 @@ defmodule ElixirLS.LanguageServer.Diagnostics do
291291
# https://microsoft.github.io/language-server-protocol/specifications/specification-3-16/#diagnostic
292292

293293
# position is a 1 based line number
294-
# we return a range of trimmed text in that line
295-
defp range(position, source_file)
296-
when is_integer(position) and position >= 1 and not is_nil(source_file) do
294+
# we return a 0 length range at first non whitespace character in line
295+
defp range(line_start, source_file)
296+
when is_integer(line_start) and not is_nil(source_file) do
297297
# line is 1 based
298-
line = position - 1
299-
text = Enum.at(SourceFile.lines(source_file), line) || ""
298+
lines = SourceFile.lines(source_file)
300299

301-
start_idx = String.length(text) - String.length(String.trim_leading(text)) + 1
302-
length = max(String.length(String.trim(text)), 1)
300+
{line_start_lsp, char_start_lsp} =
301+
if line_start > 0 do
302+
case Enum.at(lines, line_start - 1) do
303+
nil ->
304+
# position is outside file range - this will return end of the file
305+
SourceFile.elixir_position_to_lsp(lines, {line_start, 1})
306+
307+
line ->
308+
# find first non whitespace character in line
309+
start_idx = String.length(line) - String.length(String.trim_leading(line)) + 1
310+
{line_start - 1, SourceFile.elixir_character_to_lsp(line, start_idx)}
311+
end
312+
else
313+
# return begin of the file
314+
{0, 0}
315+
end
303316

304317
%{
305318
"start" => %{
306-
"line" => line,
307-
"character" => SourceFile.elixir_character_to_lsp(text, start_idx)
319+
"line" => line_start_lsp,
320+
"character" => char_start_lsp
308321
},
309322
"end" => %{
310-
"line" => line,
311-
"character" => SourceFile.elixir_character_to_lsp(text, start_idx + length)
323+
"line" => line_start_lsp,
324+
"character" => char_start_lsp
312325
}
313326
}
314327
end
315328

316329
# position is a 1 based line number and 0 based character cursor (UTF8)
317330
# we return a 0 length range exactly at that location
318331
defp range({line_start, char_start}, source_file)
319-
when line_start >= 1 and not is_nil(source_file) do
332+
when not is_nil(source_file) do
320333
lines = SourceFile.lines(source_file)
321-
# line is 1 based
322-
start_line = Enum.at(lines, line_start - 1)
323-
# SourceFile.elixir_character_to_lsp assumes char to be 1 based but it's 0 based here
324-
character = SourceFile.elixir_character_to_lsp(start_line, char_start + 1)
334+
# elixir_position_to_lsp will handle positions outside file range
335+
{line_start_lsp, char_start_lsp} =
336+
SourceFile.elixir_position_to_lsp(lines, {line_start, char_start - 1})
325337

326338
%{
327339
"start" => %{
328-
"line" => line_start - 1,
329-
"character" => character
340+
"line" => line_start_lsp,
341+
"character" => char_start_lsp
330342
},
331343
"end" => %{
332-
"line" => line_start - 1,
333-
"character" => character
344+
"line" => line_start_lsp,
345+
"character" => char_start_lsp
334346
}
335347
}
336348
end
337349

338350
# position is a range defined by 1 based line numbers and 0 based character cursors (UTF8)
339351
# we return exactly that range
340352
defp range({line_start, char_start, line_end, char_end}, source_file)
341-
when line_start >= 1 and line_end >= 1 and not is_nil(source_file) do
353+
when not is_nil(source_file) do
342354
lines = SourceFile.lines(source_file)
343-
# line is 1 based
344-
start_line = Enum.at(lines, line_start - 1)
345-
end_line = Enum.at(lines, line_end - 1)
355+
# elixir_position_to_lsp will handle positions outside file range
356+
{line_start_lsp, char_start_lsp} =
357+
SourceFile.elixir_position_to_lsp(lines, {line_start, char_start - 1})
346358

347-
# SourceFile.elixir_character_to_lsp assumes char to be 1 based but it's 0 based here
348-
start_char = SourceFile.elixir_character_to_lsp(start_line, char_start + 1)
349-
end_char = SourceFile.elixir_character_to_lsp(end_line, char_end + 1)
359+
{line_end_lsp, char_end_lsp} =
360+
SourceFile.elixir_position_to_lsp(lines, {line_end, char_end - 1})
350361

351362
%{
352363
"start" => %{
353-
"line" => line_start - 1,
354-
"character" => start_char
364+
"line" => line_start_lsp,
365+
"character" => char_start_lsp
355366
},
356367
"end" => %{
357-
"line" => line_end - 1,
358-
"character" => end_char
368+
"line" => line_end_lsp,
369+
"character" => char_end_lsp
359370
}
360371
}
361372
end
362373

363-
# source file is unknown, position is 0 or invalid
374+
# source file is unknown
364375
# we discard any position information as it is meaningless
365376
# unfortunately LSP does not allow `null` range so we need to return something
366377
defp range(_, _) do

apps/language_server/lib/language_server/source_file.ex

Lines changed: 41 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -286,24 +286,29 @@ defmodule ElixirLS.LanguageServer.SourceFile do
286286
utf8_character + 1
287287
end
288288

289-
def lsp_position_to_elixir(_urf8_text, {lsp_line, _lsp_character}) when lsp_line < 0,
289+
def lsp_position_to_elixir(_urf8_text_or_lines, {lsp_line, _lsp_character}) when lsp_line < 0,
290290
do: {1, 1}
291291

292-
def lsp_position_to_elixir(_urf8_text, {lsp_line, lsp_character}) when lsp_character <= 0,
293-
do: {max(lsp_line + 1, 1), 1}
292+
def lsp_position_to_elixir(_urf8_text_or_lines, {lsp_line, lsp_character})
293+
when lsp_character <= 0,
294+
do: {max(lsp_line + 1, 1), 1}
294295

295-
def lsp_position_to_elixir(urf8_text, {lsp_line, lsp_character}) do
296-
source_file_lines = lines(urf8_text)
297-
total_lines = length(source_file_lines)
296+
def lsp_position_to_elixir(urf8_text, {lsp_line, lsp_character}) when is_binary(urf8_text) do
297+
lsp_position_to_elixir(lines(urf8_text), {lsp_line, lsp_character})
298+
end
299+
300+
def lsp_position_to_elixir([_ | _] = urf8_lines, {lsp_line, lsp_character})
301+
when lsp_line >= 0 do
302+
total_lines = length(urf8_lines)
298303

299304
if lsp_line > total_lines - 1 do
300305
# sanitize to position after last char in last line
301-
{total_lines, String.length(source_file_lines |> Enum.at(total_lines - 1)) + 1}
306+
last_line = Enum.at(urf8_lines, total_lines - 1)
307+
elixir_last_character = String.length(last_line) + 1
308+
{total_lines, elixir_last_character}
302309
else
303-
utf8_character =
304-
source_file_lines
305-
|> Enum.at(lsp_line)
306-
|> lsp_character_to_elixir(lsp_character)
310+
line = Enum.at(urf8_lines, lsp_line)
311+
utf8_character = lsp_character_to_elixir(line, lsp_character)
307312

308313
{lsp_line + 1, utf8_character}
309314
end
@@ -319,17 +324,35 @@ defmodule ElixirLS.LanguageServer.SourceFile do
319324
|> div(2)
320325
end
321326

322-
def elixir_position_to_lsp(_urf8_text, {elixir_line, elixir_character})
327+
def elixir_position_to_lsp(_urf8_text_or_lines, {elixir_line, _elixir_character})
328+
when elixir_line < 1,
329+
do: {0, 0}
330+
331+
def elixir_position_to_lsp(_urf8_text_or_lines, {elixir_line, elixir_character})
323332
when elixir_character <= 1,
324333
do: {max(elixir_line - 1, 0), 0}
325334

326-
def elixir_position_to_lsp(urf8_text, {elixir_line, elixir_character}) do
327-
line =
328-
lines(urf8_text)
329-
|> Enum.at(max(elixir_line - 1, 0))
335+
def elixir_position_to_lsp(urf8_text, {elixir_line, elixir_character})
336+
when is_binary(urf8_text) do
337+
elixir_position_to_lsp(lines(urf8_text), {elixir_line, elixir_character})
338+
end
330339

331-
utf16_character = elixir_character_to_lsp(line || "", elixir_character)
340+
def elixir_position_to_lsp([_ | _] = urf8_lines, {elixir_line, elixir_character})
341+
when elixir_line >= 1 do
342+
total_lines = length(urf8_lines)
332343

333-
{elixir_line - 1, utf16_character}
344+
if elixir_line > total_lines do
345+
# sanitize to position after last char in last line
346+
last_line = Enum.at(urf8_lines, total_lines - 1)
347+
elixir_last_character = String.length(last_line) + 1
348+
349+
utf16_character = elixir_character_to_lsp(last_line, elixir_last_character)
350+
{total_lines - 1, utf16_character}
351+
else
352+
line = Enum.at(urf8_lines, max(elixir_line - 1, 0))
353+
utf16_character = elixir_character_to_lsp(line, elixir_character)
354+
355+
{elixir_line - 1, utf16_character}
356+
end
334357
end
335358
end

apps/language_server/test/dialyzer_test.exs

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@ defmodule ElixirLS.LanguageServer.DialyzerTest do
5959
%{
6060
"message" => error_message1,
6161
"range" => %{
62-
"end" => %{"character" => 12, "line" => 1},
62+
"end" => %{"character" => 2, "line" => 1},
6363
"start" => %{"character" => 2, "line" => 1}
6464
},
6565
"severity" => 2,
@@ -68,7 +68,7 @@ defmodule ElixirLS.LanguageServer.DialyzerTest do
6868
%{
6969
"message" => error_message2,
7070
"range" => %{
71-
"end" => %{"character" => 17, "line" => 2},
71+
"end" => %{"character" => 4, "line" => 2},
7272
"start" => %{"character" => 4, "line" => 2}
7373
},
7474
"severity" => 2,
@@ -171,7 +171,7 @@ defmodule ElixirLS.LanguageServer.DialyzerTest do
171171
%{
172172
"message" => error_message1,
173173
"range" => %{
174-
"end" => %{"character" => 12, "line" => 1},
174+
"end" => %{"character" => 2, "line" => 1},
175175
"start" => %{"character" => 2, "line" => 1}
176176
},
177177
"severity" => 2,
@@ -180,7 +180,7 @@ defmodule ElixirLS.LanguageServer.DialyzerTest do
180180
%{
181181
"message" => error_message2,
182182
"range" => %{
183-
"end" => %{"character" => 17, "line" => 2},
183+
"end" => %{"character" => 4, "line" => 2},
184184
"start" => %{"character" => 4, "line" => 2}
185185
},
186186
"severity" => 2,
@@ -219,7 +219,7 @@ defmodule ElixirLS.LanguageServer.DialyzerTest do
219219
%{
220220
"message" => error_message1,
221221
"range" => %{
222-
"end" => %{"character" => 12, "line" => 1},
222+
"end" => %{"character" => 2, "line" => 1},
223223
"start" => %{"character" => 2, "line" => 1}
224224
},
225225
"severity" => 2,
@@ -228,7 +228,7 @@ defmodule ElixirLS.LanguageServer.DialyzerTest do
228228
%{
229229
"message" => error_message2,
230230
"range" => %{
231-
"end" => %{"character" => 17, "line" => 2},
231+
"end" => %{"character" => 4, "line" => 2},
232232
"start" => %{"character" => 4, "line" => 2}
233233
},
234234
"severity" => 2,
@@ -257,7 +257,7 @@ defmodule ElixirLS.LanguageServer.DialyzerTest do
257257
%{
258258
"message" => error_message1,
259259
"range" => %{
260-
"end" => %{"character" => 12, "line" => 1},
260+
"end" => %{"character" => 2, "line" => 1},
261261
"start" => %{"character" => 2, "line" => 1}
262262
},
263263
"severity" => 2,
@@ -266,7 +266,7 @@ defmodule ElixirLS.LanguageServer.DialyzerTest do
266266
%{
267267
"message" => _error_message2,
268268
"range" => %{
269-
"end" => %{"character" => 17, "line" => 2},
269+
"end" => %{"character" => 4, "line" => 2},
270270
"start" => %{"character" => 4, "line" => 2}
271271
},
272272
"severity" => 2,
@@ -296,7 +296,7 @@ defmodule ElixirLS.LanguageServer.DialyzerTest do
296296
%{
297297
"message" => error_message1,
298298
"range" => %{
299-
"end" => %{"character" => 22, "line" => 1},
299+
"end" => %{"character" => 2, "line" => 1},
300300
"start" => %{"character" => 2, "line" => 1}
301301
},
302302
"severity" => 2,
@@ -305,7 +305,7 @@ defmodule ElixirLS.LanguageServer.DialyzerTest do
305305
%{
306306
"message" => error_message2,
307307
"range" => %{
308-
"end" => %{"character" => 22, "line" => 2},
308+
"end" => %{"character" => 4, "line" => 2},
309309
"start" => %{"character" => 4, "line" => 2}
310310
},
311311
"severity" => 2,

apps/language_server/test/server_test.exs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1663,7 +1663,7 @@ defmodule ElixirLS.LanguageServer.ServerTest do
16631663
%{
16641664
"message" => "undefined function does_not_exist/0" <> _,
16651665
"range" => %{
1666-
"end" => %{"character" => 20, "line" => 3},
1666+
"end" => %{"character" => 4, "line" => 3},
16671667
"start" => %{"character" => 4, "line" => 3}
16681668
},
16691669
"severity" => 1,

apps/language_server/test/source_file_test.exs

Lines changed: 32 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -655,6 +655,14 @@ defmodule ElixirLS.LanguageServer.SourceFileTest do
655655
assert {1, 2} == SourceFile.lsp_position_to_elixir("abcde", {0, 1})
656656
end
657657

658+
test "lsp_position_to_elixir single line utf8" do
659+
assert {1, 2} == SourceFile.lsp_position_to_elixir("🏳️‍🌈abcde", {0, 6})
660+
end
661+
662+
test "lsp_position_to_elixir multi line" do
663+
assert {2, 2} == SourceFile.lsp_position_to_elixir("abcde\n1234", {1, 1})
664+
end
665+
658666
# This is not specified in LSP but some clients fail to synchronize text properly
659667
test "lsp_position_to_elixir single line before line start" do
660668
assert {1, 1} == SourceFile.lsp_position_to_elixir("abcde", {0, -1})
@@ -667,14 +675,6 @@ defmodule ElixirLS.LanguageServer.SourceFileTest do
667675
assert {1, 1} == SourceFile.lsp_position_to_elixir("", {0, 15})
668676
end
669677

670-
test "lsp_position_to_elixir single line utf8" do
671-
assert {1, 2} == SourceFile.lsp_position_to_elixir("🏳️‍🌈abcde", {0, 6})
672-
end
673-
674-
test "lsp_position_to_elixir multi line" do
675-
assert {2, 2} == SourceFile.lsp_position_to_elixir("abcde\n1234", {1, 1})
676-
end
677-
678678
# This is not specified in LSP but some clients fail to synchronize text properly
679679
test "lsp_position_to_elixir multi line before first line" do
680680
assert {1, 1} == SourceFile.lsp_position_to_elixir("abcde\n1234", {-1, 2})
@@ -705,6 +705,30 @@ defmodule ElixirLS.LanguageServer.SourceFileTest do
705705
assert {1, 1} == SourceFile.elixir_position_to_lsp("abcde\n1234", {2, 2})
706706
end
707707

708+
# This is not specified in LSP but some clients fail to synchronize text properly
709+
test "elixir_position_to_lsp single line before line start" do
710+
assert {0, 0} == SourceFile.elixir_position_to_lsp("abcde", {1, -1})
711+
assert {0, 0} == SourceFile.elixir_position_to_lsp("abcde", {1, 0})
712+
end
713+
714+
# LSP spec 3.17 https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#position
715+
# position character If the character value is greater than the line length it defaults back to the line length
716+
test "elixir_position_to_lsp single line after line end" do
717+
assert {0, 5} == SourceFile.elixir_position_to_lsp("abcde", {1, 15})
718+
assert {0, 0} == SourceFile.elixir_position_to_lsp("", {1, 15})
719+
end
720+
721+
# This is not specified in LSP but some clients fail to synchronize text properly
722+
test "elixir_position_to_lsp multi line before first line" do
723+
assert {0, 0} == SourceFile.elixir_position_to_lsp("abcde\n1234", {-1, 2})
724+
assert {0, 0} == SourceFile.elixir_position_to_lsp("abcde\n1234", {0, 2})
725+
end
726+
727+
# This is not specified in LSP but some clients fail to synchronize text properly
728+
test "elixir_position_to_lsp multi line after last line" do
729+
assert {1, 4} == SourceFile.elixir_position_to_lsp("abcde\n1234", {8, 2})
730+
end
731+
708732
test "sanity check" do
709733
text = "aąłsd🏳️‍🌈abcde"
710734

0 commit comments

Comments
 (0)