unicode.pas
上传用户:raido2005
上传日期:2022-06-22
资源大小:5044k
文件大小:7k
- //*******************************************************//
- // //
- // DelphiFlash.com //
- // Copyright (c) 2004 FeatherySoft, Inc. //
- // info@delphiflash.com //
- // //
- //*******************************************************//
- // Description: This unit is a copy of an system.pas source
- // from Delphi 7 and necessary only for
- // compatibility with Delphi 5
- Unit Unicode;
- interface
- type
- UTF8String = type string;
- PUTF8String = ^UTF8String;
- { PChar/PWideChar Unicode <-> UTF8 conversion }
- // UnicodeToUTF8(3):
- // UTF8ToUnicode(3):
- // Scans the source data to find the null terminator, up to MaxBytes
- // Dest must have MaxBytes available in Dest.
- // MaxDestBytes includes the null terminator (last char in the buffer will be set to null)
- // Function result includes the null terminator.
- function UnicodeToUtf8(Dest: PChar; Source: PWideChar; MaxBytes: Integer): Integer; overload;
- function Utf8ToUnicode(Dest: PWideChar; Source: PChar; MaxChars: Integer): Integer; overload;
- // UnicodeToUtf8(4):
- // UTF8ToUnicode(4):
- // MaxDestBytes includes the null terminator (last char in the buffer will be set to null)
- // Function result includes the null terminator.
- // Nulls in the source data are not considered terminators - SourceChars must be accurate
- function UnicodeToUtf8(Dest: PChar; MaxDestBytes: Cardinal; Source: PWideChar; SourceChars: Cardinal): Cardinal; overload;
- function Utf8ToUnicode(Dest: PWideChar; MaxDestChars: Cardinal; Source: PChar; SourceBytes: Cardinal): Cardinal; overload;
- { WideString <-> UTF8 conversion }
- function UTF8Encode(const WS: WideString): UTF8String;
- function UTF8Decode(const S: UTF8String): WideString;
- { Ansi <-> UTF8 conversion }
- function AnsiToUtf8(const S: string): UTF8String;
- function Utf8ToAnsi(const S: UTF8String): string;
- implementation
- // UnicodeToUTF8(3):
- // Scans the source data to find the null terminator, up to MaxBytes
- // Dest must have MaxBytes available in Dest.
- function UnicodeToUtf8(Dest: PChar; Source: PWideChar; MaxBytes: Integer): Integer;
- var
- len: Cardinal;
- begin
- len := 0;
- if Source <> nil then
- while Source[len] <> #0 do
- Inc(len);
- Result := UnicodeToUtf8(Dest, MaxBytes, Source, len);
- end;
- // UnicodeToUtf8(4):
- // MaxDestBytes includes the null terminator (last char in the buffer will be set to null)
- // Function result includes the null terminator.
- // Nulls in the source data are not considered terminators - SourceChars must be accurate
- function UnicodeToUtf8(Dest: PChar; MaxDestBytes: Cardinal; Source: PWideChar; SourceChars: Cardinal): Cardinal;
- var
- i, count: Cardinal;
- c: Cardinal;
- begin
- Result := 0;
- if Source = nil then Exit;
- count := 0;
- i := 0;
- if Dest <> nil then
- begin
- while (i < SourceChars) and (count < MaxDestBytes) do
- begin
- c := Cardinal(Source[i]);
- Inc(i);
- if c <= $7F then
- begin
- Dest[count] := Char(c);
- Inc(count);
- end
- else if c > $7FF then
- begin
- if count + 3 > MaxDestBytes then
- break;
- Dest[count] := Char($E0 or (c shr 12));
- Dest[count+1] := Char($80 or ((c shr 6) and $3F));
- Dest[count+2] := Char($80 or (c and $3F));
- Inc(count,3);
- end
- else // $7F < Source[i] <= $7FF
- begin
- if count + 2 > MaxDestBytes then
- break;
- Dest[count] := Char($C0 or (c shr 6));
- Dest[count+1] := Char($80 or (c and $3F));
- Inc(count,2);
- end;
- end;
- if count >= MaxDestBytes then count := MaxDestBytes-1;
- Dest[count] := #0;
- end
- else
- begin
- while i < SourceChars do
- begin
- c := Integer(Source[i]);
- Inc(i);
- if c > $7F then
- begin
- if c > $7FF then
- Inc(count);
- Inc(count);
- end;
- Inc(count);
- end;
- end;
- Result := count+1; // convert zero based index to byte count
- end;
- function Utf8ToUnicode(Dest: PWideChar; Source: PChar; MaxChars: Integer): Integer;
- var
- len: Cardinal;
- begin
- len := 0;
- if Source <> nil then
- while Source[len] <> #0 do
- Inc(len);
- Result := Utf8ToUnicode(Dest, MaxChars, Source, len);
- end;
- function Utf8ToUnicode(Dest: PWideChar; MaxDestChars: Cardinal; Source: PChar; SourceBytes: Cardinal): Cardinal;
- var
- i, count: Cardinal;
- c: Byte;
- wc: Cardinal;
- begin
- if Source = nil then
- begin
- Result := 0;
- Exit;
- end;
- Result := Cardinal(-1);
- count := 0;
- i := 0;
- if Dest <> nil then
- begin
- while (i < SourceBytes) and (count < MaxDestChars) do
- begin
- wc := Cardinal(Source[i]);
- Inc(i);
- if (wc and $80) <> 0 then
- begin
- if i >= SourceBytes then Exit; // incomplete multibyte char
- wc := wc and $3F;
- if (wc and $20) <> 0 then
- begin
- c := Byte(Source[i]);
- Inc(i);
- if (c and $C0) <> $80 then Exit; // malformed trail byte or out of range char
- if i >= SourceBytes then Exit; // incomplete multibyte char
- wc := (wc shl 6) or (c and $3F);
- end;
- c := Byte(Source[i]);
- Inc(i);
- if (c and $C0) <> $80 then Exit; // malformed trail byte
- Dest[count] := WideChar((wc shl 6) or (c and $3F));
- end
- else
- Dest[count] := WideChar(wc);
- Inc(count);
- end;
- if count >= MaxDestChars then count := MaxDestChars-1;
- Dest[count] := #0;
- end
- else
- begin
- while (i < SourceBytes) do
- begin
- c := Byte(Source[i]);
- Inc(i);
- if (c and $80) <> 0 then
- begin
- if i >= SourceBytes then Exit; // incomplete multibyte char
- c := c and $3F;
- if (c and $20) <> 0 then
- begin
- c := Byte(Source[i]);
- Inc(i);
- if (c and $C0) <> $80 then Exit; // malformed trail byte or out of range char
- if i >= SourceBytes then Exit; // incomplete multibyte char
- end;
- c := Byte(Source[i]);
- Inc(i);
- if (c and $C0) <> $80 then Exit; // malformed trail byte
- end;
- Inc(count);
- end;
- end;
- Result := count+1;
- end;
- function Utf8Encode(const WS: WideString): UTF8String;
- var
- L: Integer;
- Temp: UTF8String;
- begin
- Result := '';
- if WS = '' then Exit;
- SetLength(Temp, Length(WS) * 3); // SetLength includes space for null terminator
- L := UnicodeToUtf8(PChar(Temp), Length(Temp)+1, PWideChar(WS), Length(WS));
- if L > 0 then
- SetLength(Temp, L-1)
- else
- Temp := '';
- Result := Temp;
- end;
- function Utf8Decode(const S: UTF8String): WideString;
- var
- L: Integer;
- Temp: WideString;
- begin
- Result := '';
- if S = '' then Exit;
- SetLength(Temp, Length(S));
- L := Utf8ToUnicode(PWideChar(Temp), Length(Temp)+1, PChar(S), Length(S));
- if L > 0 then
- SetLength(Temp, L-1)
- else
- Temp := '';
- Result := Temp;
- end;
- function AnsiToUtf8(const S: string): UTF8String;
- begin
- Result := Utf8Encode(S);
- end;
- function Utf8ToAnsi(const S: UTF8String): string;
- begin
- Result := Utf8Decode(S);
- end;
- end.