package GNATCOLL.Mmap is
type Mapped_File is private;
-- File to be mapped in memory.
-- This package will use the fastest possible algorithm to load the
-- file in memory. On systems that support it, the file is not really
-- loaded in memory. Instead, a call to the mmap() system call (or
-- CreateFileMapping()) will keep the file on disk, but make it
-- accessible as if it was in memory.
-- When the system does not support it, the file is actually loaded in
-- memory through calls to read(), and written back with write() when you
-- close it. This is of course much slower.
-- Legacy: each mapped file has a "default" mapped region in it.
type Mapped_Region is private;
-- A representation of part of a file in memory. Actual reading/writing
-- is done through a mapped region. After being returned by Read, a mapped
-- region must be free'd when done. If the original Mapped_File was open
-- for reading, it can be closed before the mapped region is free'd.
Invalid_Mapped_File : constant Mapped_File;
Invalid_Mapped_Region : constant Mapped_Region;
type File_Size is new Interfaces.C.size_t;
function Open_Read
(Filename : String;
Use_Mmap_If_Available : Boolean := True) return Mapped_File;
-- Open a file for reading. The same file can be shared by multiple
-- processes, that will see each others' changes as they occur.
-- Any attempt to write the data might result in a segmentation fault,
-- depending on how the file is open.
-- Name_Error is raised if the file does not exist.
-- Filename should be compatible with the filesystem.
function Open_Write
(Filename : String;
Use_Mmap_If_Available : Boolean := True) return Mapped_File;
-- Open a file for writing.
-- You cannot change the length of the file.
-- Name_Error is raised if the file does not exist
-- Filename should be compatible with the filesystem.
procedure Close (File : in out Mapped_File);
-- Close the file, and unmap the memory that is used for the region
-- contained in File. If the system does not support the unmmap() system
-- call or equivalent, or these were not available for the file itself,
-- then the file is written back to the disk if it was opened for writing.
procedure Free (Region : in out Mapped_Region);
-- Unmap the memory that is used for this region and deallocate the region
type Use_Advice is
(Use_Normal,
Use_Random,
Use_Sequential);
for Use_Advice'Size use Interfaces.C.int'Size;
for Use_Advice use
(Use_Normal => 1,
Use_Random => 2,
Use_Sequential => 4);
-- This type can be used to provide advice to some operation systems on
-- how a mapped page will be used.
--
-- If you specify Use_Sequential, you are telling the system that the
-- contents of the page will be read sequentially from lower to higher
-- address, and therefore the system should use prefetching aggressively.
--
-- If you specify Use_Random, the page will be accessed in a
-- non-sequential manner.
--
-- This advice might be ignored by the system (depending on whether the
-- madvise() system call is supported). It will always be ignored for
-- systems that do not support mmap.
procedure Read
(File : Mapped_File;
Region : in out Mapped_Region;
Offset : File_Size := 0;
Length : File_Size := 0;
Mutable : Boolean := False;
Advice : Use_Advice := Use_Normal);
-- Read a specific part of File and set Region to the corresponding mapped
-- region, or re-use it if possible.
-- Offset is the number of bytes since the beginning of the file at which
-- we should start reading. Length is the number of bytes that should be
-- read. If set to 0, as much of the file as possible is read (presumably
-- the whole file unless you are reading a _huge_ file).
-- Note that no (un)mapping is is done if that part of the file is already
-- available through Region.
-- If the file was opened for writing, any modification you do to the
-- data stored in File will be stored on disk (either immediately when the
-- file is opened through a mmap() system call, or when the file is closed
-- otherwise).
-- Mutable is processed only for reading files. If set to True, the
-- data can be modified, even through it will not be carried through the
-- underlying file, nor it is guaranteed to be carried through remapping.
-- This function takes care of page size alignment issues. The accessors
-- below only expose the region that has been requested by this call, even
-- if more bytes were actually mapped by this function.
--
-- TODO??? Enable to have a private copy for readable files
--
-- Operating systems generally limit the number of open file descriptors
-- that an application can have at one time (typically 1024 or 2048).
-- They however often have a much higher limit on the number of mapped
-- regions (65535 for instance). If you hitting the first limit, you
-- could use the following workflow:
--
-- File := Open_Read ("filename.txt");
-- Region := Read (File);
-- Close (File); -- release the file descriptor
-- ...
-- Free (Region); -- release the mapped file
function Read
(File : Mapped_File;
Offset : File_Size := 0;
Length : File_Size := 0;
Mutable : Boolean := False;
Advice : Use_Advice := Use_Normal) return Mapped_Region;
-- Likewise, return a new mapped region
procedure Read
(File : Mapped_File;
Offset : File_Size := 0;
Length : File_Size := 0;
Mutable : Boolean := False)
with Obsolescent;
-- Likewise, use the legacy "default" region in File
function Length (File : Mapped_File) return File_Size
with Inline;
-- Size of the file on the disk
function Offset (Region : Mapped_Region) return File_Size
with Inline;
-- Return the offset, in the physical file on disk, corresponding to the
-- requested mapped region. The first byte in the file has offset 0.
function Offset (File : Mapped_File) return File_Size
with Inline, Obsolescent;
-- Likewise for the region contained in File
function Data_Address (Region : Mapped_Region) return System.Address
with Inline;
function Data_Address (File : Mapped_File) return System.Address
with Inline, Obsolescent;
-- Return the address of the internal buffer.
-- Do not use this function directly, but via an instance of the
-- package Data_Getters below.
function Data_Size (Region : Mapped_Region) return File_Size
with Inline;
function Data_Size (File : Mapped_File) return File_Size
with Inline, Obsolescent;
-- Full size of the mapped region.
-- Better to use one of the instances of Data_Getters instead.
generic
type Index_Type is range <>;
-- The type of indexes used when mapping the file to memory.
-- Typical values are 'Positive' when you want to read files less than
-- 2Gb in size, although you might want to use
-- System.Storage_Elements.Storage_Offset or Long_Long_Integer on
-- 64 bits system supporting the mmap system call (which will allow
-- you to manipulate Petabytes files...)
type Base_Unconstrained_String is
array (Index_Type range <>) of Character;
-- How is memory represented.
-- For small strings, it is recommended to use the String type
-- directly for ease of use for the user.
package Data_Getters is
pragma Compile_Time_Error
(Index_Type'First /= 1, "Wrong index type");
subtype Extended_Index_Type is Index_Type'Base
range 0 .. Index_Type'Last;
subtype Unconstrained_String is Base_Unconstrained_String (Index_Type);
type Str_Access is access all Unconstrained_String;
pragma No_Strict_Aliasing (Str_Access);
-- We do not use a String, which would limit the index to Integer and
-- not allow us to load files larger than 2Gb.
-- We also do not systematically use a
-- System.Storage_Elements.Storage_Array, since it is easier for users
-- if we directly have Character elements rather than Storage_Element.
function Convert is new Ada.Unchecked_Conversion
(System.Address, Str_Access);
function To_Str_Access
(Str : GNAT.Strings.String_Access) return Str_Access
is (if Str = null then null else Convert (Str.all'Address));
-- Convert Str. The returned value points to the same memory block,
-- but no longer includes the bounds, which you need to manage yourself
function Last (Region : Mapped_Region) return Extended_Index_Type
is (Extended_Index_Type (Data_Size (Region)));
-- Return the number of requested bytes mapped in this region. It is
-- erroneous to access Data for indices outside 1 .. Last (Region).
-- Such accesses may cause Storage_Error to be raised.
--
-- A constraint error is raised if the size of the region is larger
-- than can be represented by Index_Type. So you need to pass a
-- compatible Length parameter in your call to Open_Read.
function Last (File : Mapped_File) return Extended_Index_Type
is (Extended_Index_Type (Data_Size (File)))
with Obsolescent;
-- Return the number of requested bytes mapped in the region contained
-- in File. It is erroneous to access Data for indices outside
-- of 1 .. Last (File); such accesses may cause Storage_Error to
-- be raised.
function Data (Region : Mapped_Region) return Str_Access
is (Convert (Data_Address (Region)));
-- The data mapped in Region as requested. The result is an
-- unconstrained string, so you cannot use the usual 'First and
-- 'Last attributes. Instead, these are respectively 1 and Size.
function Data (File : Mapped_File) return Str_Access
is (Convert (Data_Address (File)))
with Obsolescent;
-- Likewise for the region contained in File
end Data_Getters;
package Short is new Data_Getters (Positive, String);
-- This package can be used when mapping files less than 2Gb.
-- A range of the result of Data can be converted to a String, as in:
-- S : constant String := String (Data (Region) (1 .. Last (Region)));
subtype Long_Index is Long_Long_Integer range 1 .. Long_Long_Integer'Last;
type Large_Unconstrained_String is array (Long_Index range <>) of Character;
package Long is new Data_Getters (Long_Index, Large_Unconstrained_String);
-- This package can be used when mapping files up to a petabyte.
-- The whole data cannot be represented as a single string, so you'll
-- need to iterate on it.
subtype Str_Access is Short.Str_Access;
function "=" (Left, Right : Str_Access) return Boolean
renames Short."=";
function Last (Region : Mapped_Region) return Positive
renames Short.Last;
function Last (File : Mapped_File) return Positive
renames Short.Last;
function Data (Region : Mapped_Region) return Str_Access
renames Short.Data;
function Data (File : Mapped_File) return Str_Access
renames Short.Data;
-- Convenient renamings, for backward compatibility.
-- These functions only work for files up to 2Gb. For larger sizes,
-- you should use Long.Str_Access, Long.Last and Long.Data instead.
function Is_Mutable (Region : Mapped_Region) return Boolean;
-- Return whether it is safe to change bytes in Data (Region). This is true
-- for regions from writable files, for regions mapped with the "Mutable"
-- flag set, and for regions that are copied in a buffer. Note that it is
-- not specified whether empty regions are mutable or not, since there is
-- no byte no modify.
function Is_Mmapped (File : Mapped_File) return Boolean
with Inline;
-- Whether regions for this file are opened through an mmap() system call
-- or equivalent. This is in general irrelevant to your application, unless
-- the file can be accessed by multiple concurrent processes or tasks. In
-- such a case, and if the file is indeed mmap-ed, then the various parts
-- of the file can be written simultaneously, and thus you cannot ensure
-- the integrity of the file. If the file is not mmapped, the latest
-- process to Close it overwrite what other processes have done.
function Get_Page_Size return Positive;
-- Returns the number of bytes in a page. Once a file is mapped from the
-- disk, its offset and Length should be multiples of this page size (which
-- is ensured by this package in any case). Knowing this page size allows
-- you to map as much memory as possible at once, thus potentially reducing
-- the number of system calls to read the file by chunks.
function Read_Whole_File
(Filename : String;
Empty_If_Not_Found : Boolean := False) return GNAT.Strings.String_Access;
function Read_Whole_File
(Filename : String)
return GNATCOLL.Strings.XString;
-- Returns the whole contents of the file.
-- The returned string must be freed by the user.
-- This is a convenience function, which is of course slower than the ones
-- above since we also need to allocate some memory, actually read the file
-- and copy the bytes.
-- If the file does not exist, null is returned. However, if
-- Empty_If_Not_Found is True, then the empty string is returned instead.
-- Filename should be compatible with the filesystem.
--
-- This function only works for files smaller than 2Gb.
end GNATCOLL.Mmap;