/* Copyright 1997 Acorn Computers Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/* > encoding.h */

/**************************************************************************/
/*                                                                        */
/* Copyright [1997-2003] All rights reserved.                             */
/*                                                                        */
/* This file may be included in profit making or non profit making        */
/* software on any system running any version of RISC OS, provided it was */
/* used along with a licensed binary of Unicode Lib                       */
/* It is supplied "as is" without warranty, express or implied, of        */
/* merchantability for any purpose.                                       */
/* No liability can be claimed for any direct or indirect loss            */
/**************************************************************************/

#ifndef unicode_encoding_h
#define unicode_encoding_h

#include "iso10646.h"
#include <stdlib.h>

typedef struct EncodingPriv Encoding;

typedef int (*encoding_read_callback_fn)(void *handle, UCS4 c);

/*
 * startup and close down functions
 */
extern void encoding_initialise(void);
extern void encoding_tidyup(void);

/*
 * read processes the next part of a stream. s points to the data, which
 * is n bytes long. For each UCS character found, ucs_out() will be called
 * with the character code and the value passed in handle.
 * It will cope correctly with multi-byte codes split across two calls.
 * ucs_out() may return non-zero to indicate that decoding should stop.
 * The function returns the number of bytes read - this will equal n unless
 * ucs_out() signals early termination.
 */
extern unsigned encoding_read(Encoding *e,
			      encoding_read_callback_fn ucs_out,
			      const char *s,
			      unsigned int n,
			      void *handle);

/*
 * Determine if the read codec has run out of input in the middle of
 * a multibyte sequence. Returns true if the codec is waiting for input
 * that forms part of a multibyte sequence.
 */
extern int encoding_read_in_multibyte_sequence(Encoding *e);

/*
 * write the UCS4 character c to the specified buffer
 *
 * "buf" is a pointer to a pointer to the buffer, and "bufsize" is a
 * pointer to a variable containing the space available in the buffer.
 *
 * The buffer space remaining is updated (including becoming negative)
 * whether there is sufficient room in the buffer or not.
 *
 * The function returns 1 if there was sufficient room in the buffer, or 0
 * if the buffer was not long enough.
 *
 * Some calls may produce no output, in which case buf and bufsize will be
 * unaltered, and 1 is returned.
 *
 * The buf ptr is only updated if there was sufficient room in the buffer.
 * If there was insufficient room, the buffer may or may not contain
 * partial output.
 *
 * Some encodings are stateful, and require that you call encoding_write
 * with c = NULL_UCS4 at the end of the text to flush pending data,
 * cancel escape sequences, etc. You may also pass in NULL_UCS4 in the
 * middle of the text to force active escape sequences to be cancelled, for
 * instance at the end of each line of an e-mail.
 *
 * If you are writing two totally separate messages with one encoding
 * session, reset should be called between them to clear state.
 */
extern int encoding_write(Encoding *e, UCS4 c, char **buf, int *bufsize);

/*
 * To obtain a new decoder session, use the following calls.
 *
 * new_encoding takes an encoding number. These numbers are Internet MIB numbers.
 * for_encoding is one of four values describing whether to the encoding will be used
 * with 'read' or 'write' methods and how writing should handle encoding errors.
 */

#define encoding_READ		0 /* to UTF 8 */
#define encoding_WRITE		1 /* from UTF8, single alternate for unencodable characters */
#define encoding_WRITE_STRICT	2 /* from UTF8 and return -1 if unencodable character */
#define encoding_WRITE_LOOSE	3 /* from UTF8, alternate list for unencodable characters */

extern Encoding *encoding_new(int n, int for_encoding);

/*
 * When a decoding session has finished, call delete_encoding to free up the Encoding
 * structure and other associated workspace.
 */
extern void encoding_delete(Encoding *e);

/*
 * reset_decoder() resets the decoder to its default state. Some encodings
 * have state (eg ISO 2022's character set and UCS-2's byte ordering) - this
 * will reset to allow a stream to be rescanned or a new stream to be
 * decoded.
 */

extern int encoding_reset(Encoding *e);

/*
 * Change an encoding's options flags. How an encoding interprets these options
 * is up to it. The new flags are set to (old_flags &~ clear) ^ eor. The function
 * returns the old flags. Typically these flags would be set immediately after
 * calling encoding_new/encoding_reset.
 */

extern unsigned int encoding_set_flags(Encoding *e, unsigned int clear, unsigned int eor);

#define encoding_FLAG_LITTLE_ENDIAN   1    /* Write/assume little-endian (eg UTF-16/UCS-4) */
#define encoding_FLAG_ENCODE_OPTIONAL 2    /* Encode optionally encoded chars (eg UTF-7)   */
#define encoding_FLAG_NO_HEADER       4    /* eg no UTF-16 byte order mark                 */

/*
 * Given a MIME charset name (terminated by a character not valid in a MIME
 * charset name, so a direct pointer into the Content-Type header field will do),
 * this returns the encoding number (the Internet MIB coded value if assigned).
 * This number can be passed to new_encoding(). If the name is unknown or
 * unsupported, 0 is returned.
 */
int encoding_number_from_name(const char *name);

/*
 * Setup the memory allocators for the encoding library to use
 * Defaults to malloc and free
 */

typedef void *(*encoding_alloc_fn)(size_t size);
typedef void (*encoding_free_fn)(void *ptr);

extern void encoding_set_alloc_fns(encoding_alloc_fn alloc, encoding_free_fn free);

extern int encoding_max_char_size(int enc_num);

/*
 * Return the default language string (see languages.h) for this encoding.
 * Generic encodings such as UTF8 return ""
 */

extern const char *encoding_default_language(int enc_num);

/* Return the default mime type for this encoding number. Returns true
 * if the encoding number was one recognised by the library */

extern int encoding_default_mime_type(int enc_num, char *buf, int buf_size);

/* Scan the list of loaded tables and free any that have zero usage
 * count and are further down the list than 'start_depth'. When a
 * table is used then it is pulled to the top of the list so the least
 * recently used tables are further down. start_depth==0 will remove
 * all unused tables. start_depth==8 is a suggested value to purge
 * tables that haven't been used for a bit.

 * It is up to a client application to call this function when it
 * wishes to purge unused tables and free up some memory. Otherwise
 * all tables will be kept for ever. */

extern void encoding_table_remove_unused(int start_depth);

/*
 * Get the current alphabet (as a RISC OS alphabet number). 111 means UTF-8...
 */
extern int encoding_current_alphabet(void);

/*
 * Return a pointer to a (static) 256-word table giving the UCS values of
 * each character in the specified alphabet. Returns NULL if not available.
 */
extern const UCS4 *encoding_alphabet_ucs_table(int alphabet);

#endif
