I am trying to convert from UTF 16 to UTF 8; this is a test program:
with Ada.Text_IO;
with Ada.Strings.UTF_Encoding.Conversions;
use Ada.Text_IO;
use Ada.Strings.Utf_Encoding.Conversions;
use Ada.Strings.UTF_Encoding;
procedure Main is
Str_8: UTF_8_String := "𝄞";
Str_16: UTF_16_Wide_String := Convert(Str_8);
Str_8_New: UTF_8_String := Convert(Str_16);
begin
if Str_8 = Str_8_New then
Put_Line("OK");
else
Put_Line("Bug");
end if;
end Main;
With latest GNAT community it prints "Bug". Is this a bug in the implementation of UTF conversion functions or am I doing something wrong here?
Edit: For reference, this issue has been accepted as Bug 95953 / Bug 95959.
As shown here, #DeeDee has identified a bug in the implementation of Convert for UTF_16 to UTF_8. The problem arises in byte three of the four byte value for code points in the range U+10000 to U+10FFFF, shown here. The source documents the relevant bit fields:
-- Codes in the range 16#10000# - 16#10FFFF#
-- UTF-16: 110110zzzzyyyyyy 110111yyxxxxxxxx
-- UTF-8: 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
-- Note: zzzzz in the output is input zzzz + 1
Byte three is constructed as follows:
Result (Len + 3) :=
Character'Val
(2#10_000000# or Shift_Left (yyyyyyyy and 2#1111#, 4)
or Shift_Right (xxxxxxxx, 6));
While the low four bits of yyyyyyyy are used to construct byte three, the value only needs to be shifted two places left to make room for the top two bits of xxxxxxxx. The correct formulation should be this:
Result (Len + 3) :=
Character'Val
(2#10_000000# or Shift_Left (yyyyyyyy and 2#1111#, 2)
or Shift_Right (xxxxxxxx, 6));
For reference, the complete example below recapitulates the original implementation, with enough additions to study the problem in isolation. The output shows the code point, the expected binary representation of the UTF-8 encoding, the conversion to UTF-16, the incorrect UTF-8 conversion, and the correct UTF-8 conversion.
Codepoint: 16#1D11E#
UTF-8: 4: 2#11110000# 2#10011101# 2#10000100# 2#10011110#
UTF-16: 2: 2#1101100000110100# 2#1101110100011110#
UTF-8: 4: 2#11110000# 2#10011101# 2#10010000# 2#10011110#
UTF-8: 4: 2#11110000# 2#10011101# 2#10000100# 2#10011110#
OK
Code:
-- https://stackoverflow.com/q/62564638/230513
with Ada.Text_IO; use Ada.Text_IO;
with Ada.Integer_Text_IO; use Ada.Integer_Text_IO;
with Ada.Strings.UTF_Encoding; use Ada.Strings.UTF_Encoding;
with Ada.Strings.UTF_Encoding.Conversions;
use Ada.Strings.UTF_Encoding.Conversions;
with Ada.Strings.UTF_Encoding.Wide_Wide_Strings;
use Ada.Strings.UTF_Encoding.Wide_Wide_Strings;
with Interfaces; use Interfaces;
with Unchecked_Conversion;
procedure UTFTest is
-- http://www.fileformat.info/info/unicode/char/1d11e/index.htm
Clef : constant Wide_Wide_String :=
(1 => Wide_Wide_Character'Val (16#1D11E#));
Str_8 : constant UTF_8_String := Encode (Clef);
Str_16 : constant UTF_16_Wide_String := Convert (Str_8);
Str_8_New : constant UTF_8_String := Convert (Str_16);
My_Str_8 : UTF_8_String := Convert (Str_16);
function To_Unsigned_16 is new Unchecked_Conversion (Wide_Character,
Interfaces.Unsigned_16);
procedure Raise_Encoding_Error (Index : Natural) is
Val : constant String := Index'Img;
begin
raise Encoding_Error
with "bad input at Item (" & Val (Val'First + 1 .. Val'Last) & ')';
end Raise_Encoding_Error;
function My_Convert (Item : UTF_16_Wide_String;
Output_BOM : Boolean := False) return UTF_8_String
is
Result : UTF_8_String (1 .. 3 * Item'Length + 3);
-- Worst case is 3 output codes for each input code + BOM space
Len : Natural;
-- Number of result codes stored
Iptr : Natural;
-- Pointer to next input character
C1, C2 : Unsigned_16;
zzzzz : Unsigned_16;
yyyyyyyy : Unsigned_16;
xxxxxxxx : Unsigned_16;
-- Components of double length case
begin
Iptr := Item'First;
-- Skip BOM at start of input
if Item'Length > 0 and then Item (Iptr) = BOM_16 (1) then
Iptr := Iptr + 1;
end if;
-- Generate output BOM if required
if Output_BOM then
Result (1 .. 3) := BOM_8;
Len := 3;
else
Len := 0;
end if;
-- Loop through input
while Iptr <= Item'Last loop
C1 := To_Unsigned_16 (Item (Iptr));
Iptr := Iptr + 1;
-- Codes in the range 16#0000# - 16#007F#
-- UTF-16: 000000000xxxxxxx
-- UTF-8: 0xxxxxxx
if C1 <= 16#007F# then
Result (Len + 1) := Character'Val (C1);
Len := Len + 1;
-- Codes in the range 16#80# - 16#7FF#
-- UTF-16: 00000yyyxxxxxxxx
-- UTF-8: 110yyyxx 10xxxxxx
elsif C1 <= 16#07FF# then
Result (Len + 1) :=
Character'Val (2#110_00000# or Shift_Right (C1, 6));
Result (Len + 2) :=
Character'Val (2#10_000000# or (C1 and 2#00_111111#));
Len := Len + 2;
-- Codes in the range 16#800# - 16#D7FF# or 16#E000# - 16#FFFF#
-- UTF-16: yyyyyyyyxxxxxxxx
-- UTF-8: 1110yyyy 10yyyyxx 10xxxxxx
elsif C1 <= 16#D7FF# or else C1 >= 16#E000# then
Result (Len + 1) :=
Character'Val (2#1110_0000# or Shift_Right (C1, 12));
Result (Len + 2) :=
Character'Val
(2#10_000000# or (Shift_Right (C1, 6) and 2#00_111111#));
Result (Len + 3) :=
Character'Val (2#10_000000# or (C1 and 2#00_111111#));
Len := Len + 3;
-- Codes in the range 16#10000# - 16#10FFFF#
-- UTF-16: 110110zzzzyyyyyy 110111yyxxxxxxxx
-- UTF-8: 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx
-- Note: zzzzz in the output is input zzzz + 1
elsif C1 <= 2#110110_11_11111111# then
if Iptr > Item'Last then
Raise_Encoding_Error (Iptr - 1);
else
C2 := To_Unsigned_16 (Item (Iptr));
Iptr := Iptr + 1;
end if;
if (C2 and 2#111111_00_00000000#) /= 2#110111_00_00000000# then
Raise_Encoding_Error (Iptr - 1);
end if;
zzzzz := (Shift_Right (C1, 6) and 2#1111#) + 1;
yyyyyyyy :=
((Shift_Left (C1, 2) and 2#111111_00#) or
(Shift_Right (C2, 8) and 2#000000_11#));
xxxxxxxx := C2 and 2#11111111#;
Result (Len + 1) :=
Character'Val (2#11110_000# or (Shift_Right (zzzzz, 2)));
Result (Len + 2) :=
Character'Val
(2#10_000000# or Shift_Left (zzzzz and 2#11#, 4) or
Shift_Right (yyyyyyyy, 4));
Result (Len + 3) :=
Character'Val
(2#10_000000# or Shift_Left (yyyyyyyy and 2#1111#, 2) or
Shift_Right (xxxxxxxx, 6));
Result (Len + 4) :=
Character'Val (2#10_000000# or (xxxxxxxx and 2#00_111111#));
Len := Len + 4;
-- Error if input in 16#DC00# - 16#DFFF# (2nd surrogate with no 1st)
else
Raise_Encoding_Error (Iptr - 2);
end if;
end loop;
return Result (1 .. Len);
end My_Convert;
procedure Show (S : String) is
begin
Put(" UTF-8: ");
Put (S'Length, 1);
Put (":");
for C of S loop
Put (Character'Pos (C), 12, 2);
end loop;
New_Line;
end Show;
procedure Show (S : Wide_String) is
begin
Put("UTF-16: ");
Put (S'Length, 1);
Put (":");
for C of S loop
Put (Wide_Character'Pos (C), 20, 2);
end loop;
New_Line;
end Show;
begin
Put ("Codepoint:");
Put (Wide_Wide_Character'Pos (Clef (1)), 10, 16);
New_Line;
Show (Str_8);
Show (Str_16);
Show (Str_8_New);
My_Str_8 := My_Convert (Str_16);
Show (My_Str_8);
if Str_8 = My_Str_8 then
Put_Line ("OK");
else
Put_Line ("Bug");
end if;
end UTFTest;
See also Bug 95953 / Bug 95959.
There's a mismatch between the 3rd byte of Str_8 and Str_8_New which causes the round-trip to fail. This seems a bug.
main.adb
with Ada.Text_IO; use Ada.Text_IO;
with Ada.Integer_Text_IO; use Ada.Integer_Text_IO;
with Ada.Strings.UTF_Encoding.Conversions;
use Ada.Strings.UTF_Encoding;
use Ada.Strings.UTF_Encoding.Conversions;
procedure Main is
-- UTF8 encoded Clef (U+1D11E)
-- (e.g.) https://unicode-table.com/en/1D11E/
Str_8 : constant UTF_8_String :=
Character'Val (16#F0#) &
Character'Val (16#9D#) &
Character'Val (16#84#) &
Character'Val (16#9E#);
Str_16 : constant UTF_16_Wide_String := Convert (Str_8);
Str_8_New : constant UTF_8_String := Convert (Str_16);
begin
for I in Str_8'Range loop
Put (Character'Pos (Str_8 (I)), 7, 16);
end loop;
New_Line (2);
for I in Str_16'Range loop
Put (Wide_Character'Pos (Str_16 (I)), 9, 16);
end loop;
New_Line (2);
for I in Str_8_New'Range loop
Put (Character'Pos (Str_8_New (I)), 7, 16);
end loop;
New_Line (2);
end Main;
output
$ ./main
16#F0# 16#9D# 16#84# 16#9E#
16#D834# 16#DD1E#
16#F0# 16#9D# 16#90# 16#9E#
Related
I am trying to prove that my implementation of Select Sort in Ada is correct. I have tried a few loop invariants, but using gnatprove only proves inner loop's invariant:
package body Selection with SPARK_Mode is
procedure Sort (A : in out Arr) is
I: Integer := A'First;
J: Integer;
Min_Idx: Integer;
Tmp: Integer;
begin
while I < A'Last loop
pragma Loop_Invariant
(Sorted( A (A'First .. I) ));
Min_Idx := I;
J := I + 1;
while J <= A'Last loop
if A (J) < A (Min_Idx) then
Min_Idx := J;
end if;
pragma Loop_Invariant
(for all Index in I .. J => (A (Min_Idx) <= A (Index)));
J := J + 1;
end loop;
Tmp := A (Min_Idx);
A (Min_Idx) := A (I);
A (I) := Tmp;
I := I + 1;
end loop;
end Sort;
end Selection;
package Selection with SPARK_Mode is
type Arr is array (Integer range <>) of Integer;
function Sorted (A : Arr) return Boolean
is (for all I in A'First .. A'Last - 1 => A(I) <= A(I + 1))
with
Ghost,
Pre => A'Last > Integer'First;
procedure Sort (A : in out Arr)
with
Pre => A'First in Integer'First + 1 .. Integer'Last - 1 and
A'Last in Integer'First + 1 .. Integer'Last - 1,
Post => Sorted (A);
end Selection;
Gnatprove tells me
selection.adb:15:14: medium: loop invariant might not be preserved by an arbitrary iteration, cannot prove Sorted( A (A'First..I)) (e.g. when A = (-1 => 0, 0 => 0, others => 1) and A'First = -1)
Do you have any ideas how to solve this problem?
I reworked the routine a little bit, added two loop invariants to the outer loops and moved all of them to the end of the loop. The two additional loop invariants state that the element being processed is always greater-than or equal-than those that have already been processed and less-than or equal-than those yet to be processed.
I also changed the Sorted ghost function / predicate to only apply the quantified expression to arrays with length greater than 1. This is to prevent problems with overflow. For arrays of length 0 or 1, the function returns True by definition as (if False then <bool_expr>) is True (or vacuously true, if I remember correctly).
All VCs can be discharged/proved with gnatprove that ships with GNAT/SPARK CE 2020 at level 1:
$ gnatprove -Pdefault.gpr -j0 --report=all --level=1
selection.ads
package Selection with SPARK_Mode is
type Arr is array (Integer range <>) of Integer;
function Sorted (A : Arr) return Boolean is
(if A'Length > 1 then
(for all I in A'First + 1 .. A'Last => A (I - 1) <= A (I)))
with Ghost;
procedure Sort (A : in out Arr)
with Post => Sorted (A);
end Selection;
selection.adb
package body Selection with SPARK_Mode is
----------
-- Sort --
----------
procedure Sort (A : in out Arr) is
M : Integer;
begin
if A'Length > 1 then
for I in A'First .. A'Last - 1 loop
M := I;
for J in I + 1 .. A'Last loop
if A (J) <= A (M) then
M := J;
end if;
pragma Loop_Invariant (M in I .. J);
pragma Loop_Invariant (for all K in I .. J => A (M) <= A (K));
end loop;
declare
T : constant Integer := A (I);
begin
A (I) := A (M);
A (M) := T;
end;
-- Linear incremental sorting in ascending order.
pragma Loop_Invariant (for all K in A'First .. I => A (K) <= A (I));
pragma Loop_Invariant (for all K in I .. A'Last => A (I) <= A (K));
pragma Loop_Invariant (Sorted (A (A'First .. I)));
end loop;
end if;
end Sort;
end Selection;
I turn to Stackoverflow yet again. having gotten help here previously I hope to be received equally friendly once more. I have an assignment where I need to draw a flag (including a box-like shape around it and a V-shape of crosses in its midst) in ADA. Ive managed to make the box and roughly half of the crosses. can anyone clue me in as to how one easiest fills in the remainder of the crosses?
Its supposed to be a V-shape, like this:
+ +
+ +
+
etc
with Ada.Text_IO; use Ada.Text_IO;
with Ada.Integer_Text_IO; use Ada.Integer_Text_IO;
procedure exercise2 is
subtype Cross_Rows is Integer range 2..80;
Rows : Cross_Rows;
Flag_Width : Cross_Rows;
Left : Positive;
Right : Positive;
procedure Row_Get (Rows: out Cross_Rows) is
begin
Put("Enter the number of cross rows (min is 3): ");
Get(Rows);
Skip_Line;
end Row_Get;
procedure Print_Top (Rows: in Cross_Rows) is
begin
Flag_Width := (Rows * 2) + 4;
Put("+");
for Col in 1..Flag_Width-3 loop
Put("-");
end loop;
Put("+");
New_Line;
end Print_Top;
procedure Print_Middle (Rows: in Cross_Rows) is
begin
Left := 1;
Right := Flag_Width - 5;
for R in 1..Rows loop
Put("! ");
for C in 1..Flag_Width - 4 loop
if C = Left or else C = Right then
Put("+");
else
Put(" ");
end if;
end loop;
Left := Left + 1;
Right := Right - 1;
Put_Line("!");
end loop;
end Print_Middle;
procedure Print_Bottom (Rows: in Cross_Rows) is
begin
Flag_Width := (Rows * 2) + 4;
Put("+");
for C in 1..Flag_Width-3 loop
Put("-");
end loop;
Put_Line("+");
end Print_Bottom;
begin
Row_Get(Rows);
Print_Top(Rows);
Print_Middle(Rows);
Print_Bottom(Rows);
end exercise2;
EDIT: Thanks to Jim Rogers I managed to edit my program to draw the flag. Unfortunately its not exactly the way its meant to be as the top crosses are supposed to touch the sides and not be spaced like they are now. Additionally the Main program and the subprograms arent allowed to be more than 15 lines each so I compartmentalized them.
The smallest flag is supposed to look like this. I'll try to work with his code to achieve this. But any help is of value! :)
n=1
+---+
!+ +!
! + !
+---+
n=2
+-----+
!+ +!
! + + !
! + !
+-----+
You need to keep track of the left and right columns for the '+' characters, increasing the left column position and decreasing the right column position with each iteration of your loop for printing the crosses.
The following program works for any number of rows of crosses from 3 to 80.
with Ada.Text_IO; use Ada.Text_IO;
with Ada.Integer_Text_IO; use Ada.Integer_Text_IO;
procedure Main is
subtype Cross_Rows is Integer range 3..80;
Rows : Cross_Rows;
Flag_Width : Cross_Rows;
Left : Positive;
Right : Positive;
begin
Put("Enter the number of cross rows (minimum is 3): ");
Get(Rows);
Skip_Line;
Flag_Width := (Rows * 2) + 4;
-- Print top row of flag boundary
for Col in 1..Flag_Width loop
Put("-");
end loop;
Put("-");
New_Line;
-- Print empty row below top flag boundary
Put("- ");
for C in 3..Flag_Width - 2 loop
Put(" ");
end loop;
Put_Line(" -");
-- Print crosses
Left := 1;
Right := Flag_Width - 5;
for R in 1..Rows loop
Put("- ");
for C in 1..Flag_Width - 4 loop
if C = Left or else C = Right then
Put("+");
else
Put(" ");
end if;
end loop;
Left := Left + 1;
Right := Right - 1;
Put_Line(" -");
end loop;
-- Print bottom flag rows
Put("- ");
for C in 3..Flag_Width - 2 loop
Put(" ");
end loop;
Put_Line(" -");
for C in 1..Flag_Width loop
Put("-");
end loop;
Put_Line("-");
end Main;
Example output is:
Enter the number of cross rows (minimum is 3): 7
-------------------
- -
- + + -
- + + -
- + + -
- + + -
- + + -
- + + -
- + -
- -
-------------------
Another approach uses the Set_Col procedure from Ada.Text_Io. Set_Col set the cursor to the specified column number in the current output line. For example, if the cursor starts at position 1 and you call Set_Col(10) the procedure will output 9 blank characters and set the column number to 10. You can then begin writing your non-blank output at column 10.
with Ada.Text_Io; use Ada.Text_IO;
with Ada.Integer_Text_Io; use Ada.Integer_Text_IO;
procedure V_columns is
subtype Cross_Rows is Integer range 3..80;
Rows : Cross_Rows;
Flag_Width : Positive;
Left : Positive;
Right : Positive;
begin
Put("Enter the number of cross rows (minimum is 3): ");
Get(Rows);
Skip_Line;
Flag_Width := (Rows * 2) + 4;
-- Print top row of flag boundary
for Col in 1..Flag_Width loop
Put("-");
end loop;
New_Line;
-- Print empty row below top flag boundary
Set_Col(1);
Put("|");
Set_Col(Positive_Count(Flag_Width));
Put_Line("|");
-- Print crosses
Left := 3;
Right := Flag_Width - 3;
for R in 1..Rows loop
Set_Col(1);
Put("|");
if Left < Right then
Set_Col(Positive_Count(Left));
Put("+");
Set_Col(Positive_Count(Right));
Put("+");
else
Set_Col(Positive_Count(Right));
Put("+");
end if;
Set_Col(Positive_Count(Flag_Width));
Put("|");
New_Line;
Left := Left + 1;
Right := Right - 1;
end loop;
-- Print bottom flag rows
Set_Col(1);
Put("|");
Set_Col(Positive_Count(Flag_Width));
Put_Line("|");
for C in 1..Flag_Width loop
Put("-");
end loop;
New_Line;
end V_Columns;
The output of the program is:
Enter the number of cross rows (minimum is 3): 7
------------------
| |
| + + |
| + + |
| + + |
| + + |
| + + |
| + + |
| + |
| |
------------------
You could also choose an approach in which the definition of the pattern (here: a flag) and the output mechanism are almost completely decoupled. This approach also allows you to parallelize the flag rendering in case you need to render really, really huge flags ;-):
main.adb
with Ada.Text_IO; use Ada.Text_IO;
procedure Main is
N : constant := 2;
Width : constant := 3 + 2 * N;
Height : constant := 3 + 1 * N;
type Screen_X is new Natural range 0 .. Width - 1;
type Screen_Y is new Natural range 0 .. Height - 1;
-------------
-- Pattern --
-------------
function Pattern (X : Screen_X; Y : Screen_Y) return Character is
Is_Border_LR : constant Boolean :=
X = Screen_X'First or else X = Screen_X'Last;
Is_Border_TB : constant Boolean :=
Y = Screen_Y'First or else Y = Screen_Y'Last;
-- The V-Shape is based on the implicit function:
--
-- abs (X - X0) + (Y - Y0) = 0
X0 : constant := (Screen_X'Last + Screen_X'First) / 2;
Y0 : constant := Screen_Y'Last - 1;
Is_V_Shape : constant Boolean :=
Integer (abs (X - X0)) + Integer (Y - Y0) = 0;
begin
if Is_Border_LR and Is_Border_TB then
return '+';
elsif Is_Border_LR then
return '!';
elsif Is_Border_TB then
return '-';
elsif Is_V_Shape then
return '+';
else
return ' ';
end if;
end Pattern;
begin
-- The Render loop.
for Y in Screen_Y loop
for X in Screen_X loop
Put (Pattern (X, Y));
end loop;
New_Line;
end loop;
end Main;
output (N = 1)
$ ./main
+---+
!+ +!
! + !
+---+
output (N = 2)
$ ./main
+-----+
!+ +!
! + + !
! + !
+-----+
I have a function that should return the count of Islands found.
I name this function Count_Islands that takes in a parameter of
Map_Array of type Map, of which Map is an array of Islands.
Islands is an enumerator type with set of Land, Water.
I have the function specification in the .ads and the body in the
.adb
The problem I face now is how to proof that my function
Count_Islands'Result will be less than (X * Y)
I have tried: with post => Count_Islands'Result < X * Y
-- Whenever I ran prove all I got: medium: postcondition might
fail cannot prove Count_Islands'Result < X * Y
Function in .ads:
function Count_Islands(Map_Array : Map)
return Integer with Pre => Map_Array'Length /= 0,
Post => Count_Islands'Result < X * Y;
Function in .adb:
function Count_Islands(Map_Array : Map) return Integer
is
Visited_Array : Visited := (others => (others=> False));
Count : Integer := 0;
begin
if (Map_Array'Length = 0)then
return 0;
end if;
for i in X_Range loop
for j in Y_Range loop
if (Map_Array(i, j) = Land and then not Visited_Array(i,j)) then
Visited_Array := Visit_Islands(Map_Array, i, j,Visited_Array);
Count := Count + 1;
end if;
end loop;
end loop;
return Count;
end Count_Islands;
In a matrix of 4 * 5 for instance,i.e my X = 4 And Y = 5:
I expect the output result of an Islands(Lands) found to be 1 which is less than 4 * 5. But GNATprove cannot prove my initial code to analyze that,using Post => Count_Islands'Result < X * Y;
Is there any better way to prove this arithmetic? Thanks for your help.
As the example is not complete, I took the liberty to change it a little bit. You can prove the post condition by adding loop invariants. The program below proves in GNAT CE 2019:
main.adb
procedure Main with SPARK_Mode is
-- Limit the range of the array indices in order to prevent
-- problems with overflow, i.e.:
--
-- Pos'Last * Pos'Last <= Natural'Last
--
-- Hence, as Natural'Last = 2**31 - 1,
--
-- Pos'Last <= Sqrt (2**31 - 1) =approx. 46340
--
-- If Pos'Last >= 46341, then overflow problems might occur.
subtype Pos is Positive range 1 .. 46340;
type Map_Item is (Water, Land);
type Map is
array (Pos range <>, Pos range <>) of Map_Item;
type Visited is
array (Pos range <>, Pos range <>) of Boolean;
function Count_Islands (Map_Array : Map) return Natural with
Post => Count_Islands'Result <= Map_Array'Length (1) * Map_Array'Length (2);
-------------------
-- Count_Islands --
-------------------
function Count_Islands (Map_Array : Map) return Natural is
Visited_Array : Visited (Map_Array'Range (1), Map_Array'Range (2)) :=
(others => (others => False));
Count : Natural := 0;
begin
for I in Map_Array'Range (1) loop
pragma Loop_Invariant
(Count <= (I - Map_Array'First (1)) * Map_Array'Length (2));
for J in Map_Array'Range (2) loop
pragma Loop_Invariant
(Count - Count'Loop_Entry <= J - Map_Array'First (2));
if Map_Array(I, J) = Land and then not Visited_Array(I, J) then
Visited_Array (I, J) := True; -- Simplified
Count := Count + 1;
end if;
end loop;
end loop;
return Count;
end Count_Islands;
begin
null;
end Main;
I am trying to find the greatest common factor for some numbers that i have put into a table. So far I have the function that is suppose to calculate the gcf
CREATE FUNCTION gcd (x INTEGER, y INTEGER) RETURN INTEGER AS
ans INTEGER;
BEGIN
IF (y <= x) AND (x MOD y = 0) THEN
ans := y;
ELSIF x < y THEN
ans := gcd(y, x);
ELSE
ans := gcd(y, x MOD y);
END IF;
RETURN ans;
END;
and here I create and random populate my table
DROP TABLE numere
/
CREATE TABLE numbers (number NUMBER(3) NOT NULL)
/
set serveroutput on
DECLARE
number NUMBER(3);
cursor c1 is
SELECT * FROM note;
BEGIN
FOR i IN 1 .. 10 LOOP
number:=dbms_random.value(20,100);
insert into numbers values(number);
end loop;
commit;
END;
/
How can I integrate the gcf into my code? I want to display the numbers followed by their gcf.
I am not sure about your gcd function. It seems to me not working. There are many on the web. This is one of them:
CREATE OR REPLACE FUNCTION find_gcd (
p_n1 IN POSITIVE
, p_n2 IN POSITIVE
)
RETURN POSITIVE
IS
l_n1 POSITIVE := p_n1;
l_n2 POSITIVE := p_n2;
BEGIN
WHILE NOT (l_n1 = l_n2)
LOOP
CASE SIGN(l_n1 - l_n2)
WHEN +1
THEN l_n1 := l_n1 - l_n2;
ELSE l_n2 := l_n2 - l_n1;
END CASE;
END LOOP;
RETURN (l_n1);
END find_gcd;
/
You can simply amend your PL/SQL block to call the gcd function and print out the results (I here assumed you want to find the gcd for each number and the following number in your table, so I used LEAD function):
DECLARE
lv_number NUMBER(3);
lv_gcd INTEGER;
BEGIN
FOR i IN 1 .. 10 LOOP
lv_number:=dbms_random.value(20,100);
insert into numbers values(lv_number);
end loop;
commit;
FOR i in (select COL_VAL, lead(COL_VAL) over (order by rowid) nxt_val from numbers)
LOOP
lv_gcd := find_gcd(i.COL_VAL, i.nxt_val);
DBMS_OUTPUT.PUT_LINE('GCD for '||TO_CHAR(i.COL_VAL)||' and '|| TO_CHAR(i.nxt_val) ||' is '||TO_CHAR(lv_gcd));
END LOOP;
END;
/
I am trying to compute a Log Likelihood score for occurrence of pairs of words in text and am getting the same anomalous results in my Delphi implementation which I've derived from Java and Python sources found online. Ted Dunning who published on this source in 1993 gives these results for one particular pair:
K11 (AB, ie joint frequency) = 110,
K12 (word A without B nearby) = 2442,
K21 (B without A nearby) = 111
K22 (number of words other than A or B in the text) = 29114
and gives the desired result as 270.72
Dunning also gives an implementation in R at
http://tdunning.blogspot.co.uk/2008/03/surprise-and-coincidence.html
Computing the log-likelihood ratio score (also known as G2) is very
simple, LLR = 2 sum(k) (H(k) - H(rowSums(k)) - H(colSums(k)))
where H is Shannon's entropy, computed as the sum of (k_ij / sum(k)) log (k_ij / sum(k)) . In R, this function is defined as
H = function(k) {N = sum(k) ; return (sum(k/N * log(k/N + (k==0)))}
but I do not know R and am unsure how to translate that to Pascal.
My translation attempts include these functions
function LnOK(x : integer): extended;
begin
if x<=0 then Result :=0
else Result := Ln(x);
end;
function Entropy2(a, b: Integer): extended;
begin
Result := LnOK(a + b) - LnOK(a) - LnOK(b);
end;
function Entropy4(a, b, c, d: Integer): extended;
begin
Result := LnOK(a + b + c + d) - LnOK(a) - LnOK(b) - LnOK(c) - LnOK(d);
end;
function Log_likelihood_from_Java(f1, f2, joint, total_tokens: Integer):
single;
var
k11, k12, k21, k22: Integer;
matrixEntropy, rowEntropy, colEntropy: extended;
begin
k11 := joint;
k12 := f2 - joint;
k21 := f1 - joint;
k22 := total_tokens - f1 - f2 + joint;
rowEntropy := Entropy2(k11 + k12, k21 + k22);
colEntropy := Entropy2(k11 + k21, k12 + k22);
matrixEntropy := Entropy4(k11, k12, k21, k22);
if (rowEntropy + colEntropy < matrixEntropy) then
Result := 0.0 // round off error
else
Result := 2.0 * (rowEntropy + colEntropy - matrixEntropy);
end;
The above returns 7.9419 instead of the desired 270.72 when it's called like this:
Log_likelihood_from_Java(2552, 221, 110, 31777);
Grateful for help!
I've found the issue in the translation of the LnOk function which should be as follows:
function LnOK(x: Integer): Extended;
begin
if x = 0 then
Result := 0
else
Result := x * Ln(x);
end;
Off topic
As a side note if I'm allowed, just to improve the coding style, you might prefer to overload the Entropy functions instead of calling them with different names:
function Entropy(a, b: Integer): Extended; overload;
begin
Result := LnOK(a + b) - LnOK(a) - LnOK(b);
end;
function Entropy(a, b, c, d: Integer): Extended; overload;
begin
Result := LnOK(a + b + c + d) - LnOK(a) - LnOK(b) - LnOK(c) - LnOK(d);
end;
I can't make any sense of the code that you wrote which bears no obvious relationship to the R code to which you linked. I did not make any attempt to reconcile these differences.
Here's a literal translation of the R code. The algorithm is much simpler written this way as I am sure you will agree.
{$APPTYPE CONSOLE}
uses
SysUtils, Math;
type
TVector2 = array [1..2] of Double;
TMatrix2 = array [1..2] of TVector2;
function rowSums(const M: TMatrix2): TVector2;
begin
Result[1] := M[1,1] + M[1,2];
Result[2] := M[2,1] + M[2,2];
end;
function colSums(const M: TMatrix2): TVector2;
begin
Result[1] := M[1,1] + M[2,1];
Result[2] := M[1,2] + M[2,2];
end;
function H(const k: array of Double): Double;
var
i: Integer;
N, kOverN: Double;
begin
N := Sum(k);
Result := 0.0;
for i := low(k) to high(k) do begin
kOverN := k[i]/N;
if kOverN>0.0 then begin
Result := Result + kOverN*Ln(kOverN);
end;
end;
end;
function LLR(const M: TMatrix2): Double;
var
k: array [1..4] of Double absolute M; // this is a little sneaky I admit
rs, cs: TVector2;
begin
rs := rowSums(M);
cs := colSums(M);
Result := 2.0*Sum(k)*(H(k) - H(rs) - H(cs));
end;
var
M: TMatrix2;
begin
M[1,1] := 110;
M[1,2] := 2442;
M[2,1] := 111;
M[2,2] := 29114;
Writeln(LLR(M));
end.
Output
2.70721876936232E+0002