Commit eaadcf1b authored by Samuel Thibault's avatar Samuel Thibault

New upstream version 3.02b

parent 183e062b
Bin/**
.cproject
.project
*.swp
This diff is collapsed.
/*
* FPMs-TCTS SOFTWARE LIBRARY
*
* File: database.h
* Time-stamp: <2000-04-07 16:48:29 pagel>
* Purpose: diphone database management
* Author: Vincent Pagel
* Email : mbrola@tcts.fpms.ac.be
*
* Copyright (c) 1995-2018 Faculte Polytechnique de Mons (TCTS lab)
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* 29/02/96 : Created. Put here everything concerning the speech database
*
* 02/02/97 : Replacement diphones enabled (T. Dutoit)
*
* 08/09/97 : New format -> phonemes are written as Zstring (old limit in
* size was 2 char/phoneme)
* New policy for silence definition
*
* 20/10/97 : Database information = Zstrings at the end of the dba
*
* 05/11/97 : BACON info for 2.05c
*
* 25/02/98 : Database formats cleaning
* drop old compatibility checks with 2.02 and so on
* Escape sequence in database_info for non printable information
* archiving the database construction parameters
*
* 03/03/98 : Non posix platforms can't realloc on a NULL pointer
*
* 23/06/98 : Polymorphic databases !
* A cloning mechanism for multi-instanciation databases
*
* 28/08/98 : in my way to C/ANSI, dropped the 'const' definition
* (that would require extern variables :-(
* Thus "const FrameType VOICING_MASK=2;"
* becomes "#define VOICING_MASK 2"
*
* 20/03/00 : keep tracks of the longest diphone in the database to
* remove static limits from mbrola engine
*
* Detects when opening directories .... it's a FAQ because error
* message was not clear, adding a Database extension like ".dba"
* should do
*
* Rom databases included (database can be FILE* or int16* depending on
* the database mode)
*
* 25% extra space in the hashtable enhances search
*/
#ifndef _DATABASE_H
#define _DATABASE_H
#include "audio.h"
#include "diphone.h"
#include "hash_tab.h"
#define DIPHONE_RAW 1 /* The diphone wave database is raw */
#define ROM_MASK 128 /* The Coding tag of the database indicate if it's in ROM */
#define INFO_ESCAPE 0xFF /* Escape code in database informations (prevents from displaying) */
/*
* Frame types in the MBR analysed database
*/
typedef uint8 FrameType;
#define VOICING_MASK 2 /* Voiced/Unvoiced mask */
#define TRANSIT_MASK 1 /* Stationary/Transitory mask */
#define NV_REG 0 /* unvoiced stable state */
#define NV_TRA TRANSIT_MASK /* unvoiced transient */
#define V_REG VOICING_MASK /* voiced stable state */
#define V_TRA (VOICING_MASK | TRANSIT_MASK) /* voiced transient */
/*
* Main type
*/
typedef struct Database Database;
typedef bool (*getdiphone_DatabaseFunction)(Database* dba, DiphoneSynthesis *diph);
typedef void (*close_DatabaseFunction)(Database* dba);
typedef Database* (*init_DatabaseFunction)(Database* dba);
struct Database
{
/* Polymorphic structure, depends on Coding */
void* self;
/* Virtual function for diphone wave loading */
getdiphone_DatabaseFunction getdiphone_Database;
/* Virtual function to release the memory */
close_DatabaseFunction close_Database;
uint8 Coding; /* Type of coding DIPHONE_RAW, or BACON */
int16 Freq; /* Sampling frequency of the database */
uint8 MBRPeriod; /* Period of the MBR analysis */
int16 nb_diphone; /* Number of diphones in the database */
int32 SizeMrk; /* Size of the pitchmark part */
FrameType *pmrk; /* The whole pitch marks database */
int32 SizeRaw; /* Size of the wave part */
int32 RawOffset; /* Offset for raw samples in database */
uint8 max_frame; /* Maximum number of frames encountered for a diphone in the dba */
int16 max_samples; /* Size of the diphone buffer= 0 means let me manage it myself */
int32 Magic[2]; /* Magic header of the database */
char Version[6]; /* Version of the database */
PhonemeName sil_phon; /* Silence symbol in the database */
HashTab *diphone_table; /* Diphone index table */
ZStringList* info; /* information strings */
char *dbaname; /* name of the diphone file */
void *database; /* diphone wave file or base pointer to wave data, depending on dba type */
};
/* Convenient macros */
#define dbaname(PDatabase) PDatabase->dbaname
/* Those 2 macros access the same field */
#define rom_wave_ptr(PDatabase) (PDatabase->database)
#define database(PDatabase) ((FILE*)PDatabase->database)
#define nb_diphone(PDatabase) PDatabase->nb_diphone
#define RawOffset(PDatabase) PDatabase->RawOffset
#define Coding(PDatabase) PDatabase->Coding
#define Freq(PDatabase) PDatabase->Freq
#define MBRPeriod(PDatabase) PDatabase->MBRPeriod
#define SizeMrk(PDatabase) PDatabase->SizeMrk
#define SizeRaw(PDatabase) PDatabase->SizeRaw
#define max_frame(PDatabase) PDatabase->max_frame
#define max_samples(PDatabase) PDatabase->max_samples
#define Magic(PDatabase) PDatabase->Magic
#define Version(PDatabase) PDatabase->Version
#define sil_phon(PDatabase) PDatabase->sil_phon
#define info(PDatabase) PDatabase->info
#define pmrk(PDatabase) PDatabase->pmrk
#define diphone_table(PDatabase) PDatabase->diphone_table
#ifndef ROMDATABASE_PURE
/*
* Functions relying on FILE based databases
*/
/*
* Three parts of the Database header
*/
bool ReadDatabaseHeader(Database* dba);
/* Reads the diphone database header , and initialize variables */
bool ReadDatabaseIndex(Database* dba);
/*
* Read the index table of diphones, and put them in the hash table
*/
bool ReadDatabasePitchMark(Database* dba);
/* Load pitch markers (Voiced/Unvoiced, Transitory/Stationnary) */
bool ReadDatabaseInfo(Database* dba);
/*
* Extract textual information from the database if any
*/
/*
* Initialisation and loading of Diphones -> depend on database Coding
* Returning NULL means fail (check LastError)
*/
Database* init_DatabaseBasic(Database* dba);
/*
* Basic version, read raw waves = Check there's no coding
* Returning NULL means error
*/
void close_DatabaseBasic(Database* dba);
/* Release the memory allocated for the in-house BACON decoder */
Database* init_Database(char* dbaname);
/* Generic initialization, calls the appropriate constructor
* Returning NULL means fail (check LastError)
*/
Database* init_rename_Database(char* dbaname, ZStringList* rename, ZStringList* clone);
/*
* A variant of init_Database allowing phoneme renaming on the fly
* Returning NULL means fail (check LastError)
*
* rename and clone can be NULL to indicate there's nothing to change
*
* Renaming is a ONCE consuming operation (the database is changed
* at loading) -> it involves a complete reconstruction of the hash table
* but nothing else at run-time
*/
#endif /* ROMDATABASE_PURE */
#ifdef MULTICHANNEL_MODE
Database* copyconstructor_Database(Database* dba);
/* Creates a copy of a diphone database so that many synthesis engine
* can use the same database at the same time (duplicate the file handler)
*
* Returning NULL means fail (check LastError)
*
* Highly recommended with multichannel mbrola, unless you can guaranty
* mutually exclusive access to the getdiphone function
*/
#endif
/*
* Available to everybody
*/
int getDatabaseInfo(Database* dba, char* msg, int size, int index);
/*
* Retrieve the ith info message, NULL means get the size
*/
bool init_common_Database(Database* dba, DiphoneSynthesis *diph);
/*
* Common initialization shared among all database types
*/
bool getdiphone_DatabaseBasic(Database* dba, DiphoneSynthesis *ds);
/*
* Basic loading of the diphone specified by diph. Stores the samples
* Return False in case of error
*/
#endif
/*
* FPMs-TCTS SOFTWARE LIBRARY
*
* File: database_old.c
* Time-stamp: <00/03/30 01:37:55 pagel>
*
* Purpose: Decode raw formats before 2.05 release, here for compatibility purpose
* Use pretty much RAW functions
*
* Author: Vincent Pagel
*
* Copyright (c) 1995-2018 Faculte Polytechnique de Mons (TCTS lab)
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* History:
*
* 25/06/98 : Created from 2.06 code
* 20/03/00 : Add support for max_frame parsing + new hash table scheme
*/
#include "database.h"
#ifndef ROMDATABASE_PURE
/*********************************************************
* OLD TYPES: Better lock it up and throw away the key *
*********************************************************/
/*
* Structure of the diphone database (as stored in file)
* Used before 2.02a
*/
typedef struct
{
char left[2],right[2]; /* Name of the diphone */
int32 pos_wave; /* position in SPEECH_FILE */
int16 halfseg; /* position of center of diphone */
uint16 pos_pm; /* index in PITCHMARK_FILE */
uint8 nb_frame; /* Number of pitch markers */
char dummy[3]; /* Alignment on multiple of 4 */
} DiphoneEvenOlderFile;
/*
* Structure of the diphone database (as stored in file)
* Used from release 2.02a to release 2.05
*
* Since 2.05 there is no more struct related to the
* index datas, we just read the info sequentially
*/
typedef struct
{
char left[2],right[2]; /* Name of the diphone */
int16 halfseg; /* position of center of diphone */
uint8 nb_frame; /* Number of pitch markers */
uint8 nb_wframe; /* index in PITCHMARK_FILE */
} DiphoneOldFile;
/* Specifies a diphone that is the copy of another one */
typedef struct
{
char left[2],right[2]; /* Name of the diphone */
char leftr[2],rightr[2]; /* Name of the replacement diphone */
} DiphoneReplace;
/*******************************************************
* OLD FUNCTIONS: don't even ruin your eyesight on it *
*******************************************************/
Database* init_DatabaseOld(Database* dba)
/*
* Initializes the old ones!
*/
{
int i;
int32 indice_pm=0;
int32 indice_wav=0;
bool even_older;
/* Allocate one more byte for the trailing 0 */
PhonemeName left_cell= (char *) MBR_malloc(3);
PhonemeName right_cell= (char *) MBR_malloc(3);
int16 halfseg_cell;
int32 pos_wave_cell;
int32 pos_pm_cell;
uint8 nb_frame_cell;
debug_message2("Old compatibility mode : %i\n",Coding(dba));
warning_message(WARNING_UPGRADE,"Think of upgrading your database!\n");
/* Except some oddities for ReadDatabaseIndex and PitchMark, once it's
* loaded it works like RAW databases
*/
dba->getdiphone_Database= getdiphone_DatabaseBasic;
dba->close_Database= close_DatabaseBasic;
/* Check version < 2.02 or 2.02 <= version < 2.05 */
if (strcmp("2.02",Version(dba))>0)
{
nb_diphone(dba)= (int16) (nb_diphone(dba) / sizeof(DiphoneEvenOlderFile));
even_older=True;
}
else /* From 2.02 to 2.05 */
{ /* New format allow more diphone in a database than previously */
even_older=False;
}
/*
* Read the index table of diphones, and put them in the hash table
*/
diphone_table(dba)=init_HashTab(nb_diphone(dba)); /* initialize hash table */
/* Insert diphones one by one in the hash table */
for(i=0; (indice_pm!=SizeMrk(dba)) && (i<nb_diphone(dba)); i++)
{
if (even_older)
/* Neolithic 16 bytes structures */
{
DiphoneEvenOlderFile one_oldfcell; /* Cell on file */
fread(&one_oldfcell, sizeof(one_oldfcell),1, database(dba));
/* PC format ! */
#ifdef BIG_ENDIAN
one_oldfcell.pos_wave= (((one_oldfcell.pos_wave&0xFF)<<24) |
((one_oldfcell.pos_wave&0xFF00)<<8) |
((one_oldfcell.pos_wave&0xFF0000)>>8) |
((one_oldfcell.pos_wave>>24)&0xFF));
one_oldfcell.pos_pm = ( ((one_oldfcell.pos_pm&0xFF00) >>8) |
((one_oldfcell.pos_pm&0xFF) <<8));
#endif
strncpy( left_cell , one_oldfcell.left,2);
strncpy( right_cell , one_oldfcell.right,2);
nb_frame_cell= one_oldfcell.nb_frame;
halfseg_cell= one_oldfcell.halfseg;
pos_wave_cell= one_oldfcell.pos_wave;
pos_pm_cell= one_oldfcell.pos_pm;
}
else
{ /* Renaissance 8 bytes structures */
DiphoneOldFile one_fcell; /* Cell on file */
fread(&one_fcell,sizeof(one_fcell),1,database(dba));
strncpy( left_cell, one_fcell.left,2);
strncpy( right_cell, one_fcell.right,2);
nb_frame_cell= one_fcell.nb_frame;
halfseg_cell= one_fcell.halfseg;
pos_pm_cell= indice_pm;
indice_pm+=one_fcell.nb_frame;
pos_wave_cell= indice_wav;
indice_wav+= (long) one_fcell.nb_wframe* (long) MBRPeriod(dba);
}
/* add a trailing 0 to the name */
left_cell[2]= 0;
right_cell[2]= 0;
/* One world, one vision, one diphone database (the PC one) -> swap */
#ifdef BIG_ENDIAN
halfseg_cell= ( (( halfseg_cell & 0xFF00) >>8) |
(( halfseg_cell & 0xFF) <<8));
#endif
debug_message8("%i Diph %s-%s nbframe=%i poswav=%i pospm=%i halfseg=%i\n",
i,
left_cell,
right_cell,
nb_frame_cell,
pos_wave_cell,
pos_pm_cell,
halfseg_cell);
add_HashTab(diphone_table(dba),
left_cell,
right_cell,
pos_wave_cell,
halfseg_cell,
pos_pm_cell,
nb_frame_cell);
/* Keep a record of the longest diphone (to allocate oversized temporary buffer) */
if (nb_frame_cell*1.5 > max_frame(dba))
{ max_frame(dba)=nb_frame_cell*1.5; }
}
/* The last diphone of the database is _-_ */
sil_phon(dba)= MBR_strdup(left_cell);
debug_message2("Init silence with %s\n", left_cell);
/* dutoit 02/02/97
* Check if we've reached the end of the diphone table
* The rest are replacement diphones, they don't correspond
* to new samples, but duplicate existing diphones
*/
for( ; i<nb_diphone(dba); i++)
{
DiphoneInfo cell;
DiphoneReplace replace; /* Replacement on file */
int position;
fread(&replace, sizeof(replace), 1, database(dba));
strncpy(left_cell, replace.leftr, 2);
strncpy(right_cell, replace.rightr, 2);
left_cell[2]=0;
right_cell[2]=0;
position= search_HashTab(diphone_table(dba), left_cell, right_cell);
/* Sanity check: the target exist */
if (position==NONE)
{
fatal_message(ERROR_CANTDUPLICATESEGMENT,
"Fatal error: Can't duplicate %s-%s segment\n",
left_cell,right_cell);
return NULL;
}
debug_message4("Copy %s-%s position %i ",
left_cell,
right_cell,
position);
strncpy(left_cell, replace.left, 2);
left_cell[2]=0;
strncpy(right_cell, replace.right, 2);
right_cell[2]=0;
debug_message3("into %s-%s \n", left_cell, right_cell);
/* Sanity check: the target allready exist */
position= search_HashTab(diphone_table(dba),
left_cell, right_cell);
if (position!=NONE)
{
fatal_message(ERROR_CANTDUPLICATESEGMENT,
"Fatal error: duplicate %s-%s segment allready exist\n",
left_cell,
right_cell);
return NULL;
}
cell= *content(diphone_table(dba), position);
add_HashTab(diphone_table(dba), left_cell, right_cell,
pos_wave(cell),
halfseg(cell),
pos_pm(cell),
nb_frame(cell) );
}
MBR_free(left_cell);
MBR_free(right_cell);
#ifdef DEBUG_HASH
tuning_HashTab(diphone_table(dba));
#endif
/*
* Load pitch markers (Voiced/Unvoiced, Transitory/Stationnary)
*/
if (even_older)
{
pmrk(dba)= (unsigned char *) MBR_malloc(SizeMrk(dba));
fread(pmrk(dba), sizeof(char),SizeMrk(dba), database(dba));
RawOffset(dba)=ftell(database(dba));
}
else
{
if (!ReadDatabasePitchMark(dba))
{
return NULL;
}
}
if (!ReadDatabaseInfo(dba))
{
return NULL;
}
/* Size of the buffer that will be allocated in Diphonesynthesis */
max_samples(dba)= MBRPeriod(dba) * max_frame(dba);
debug_message1("done Compatibility init\n");
return(dba);
}
#endif /* ROMDATABASE_PURE */
/*
* FPMs-TCTS SOFTWARE LIBRARY
*
* File: database_old.h
* Time-stamp: <2000-03-21 00:13:35 vincent>
* Purpose: Decode raw formats before 2.05 release, here for compatibility purpose
* Use pretty much RAW functions
* Author: Vincent Pagel
*
* Copyright (c) 1995-2018 Faculte Polytechnique de Mons (TCTS lab)
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* History:
*
* 25/06/98 : Created from 2.06 code chunks
*/
#ifndef _DATABASE_OLD_H
#define _DATABASE_OLD_H
Database* init_DatabaseOld(Database* dba);
/*
* Initializes the old ones!
*/
#endif
/*
* FPMs-TCTS SOFTWARE LIBRARY
*
* File: diphoneinfo.c
* Purpose: diphone descriptor
* Authors: Vincent Pagel & Alain Ruelle
* Email : mbrola@tcts.fpms.ac.be
*
* Copyright (c) 1995-2018 Faculte Polytechnique de Mons (TCTS lab)
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* 09/04/98 : Created. A diphone descriptor intended for a hash_table
*
* 09/09/98 : DiphoneInfo now includes memory allocation and dereference
*
* 03/03/00 : Use PhonemeCode instead of PhonemeName to get a flat structure
* no more memory memory allocation needed
*/
#include "diphone_info.h"
#include "mbralloc.h"
int32 hash_DiphoneInfo(const char* left_string, const char* right_string)
/*
* Hashing function for the research of the diphone name in diphone_table
*/
{
int32 mult;
int i,shift;
/* nb_empty=433 nb_collision=309
* 504 alone and rest is 742 / 309 = 2.401294 for one hash code
*
* On 1000 phoneme (Levai.pho) average 1.546953 comparison to find the key
* Systematic search for diphones gives average 1.5955 path
*
* Adding extra 25% space in the hash table leads to 1.427 comparisons
* to find the key
*/
shift=0;
mult=0;
for(i=0;left_string[i]!=0;i++)
{
mult+= left_string[i] << shift;
shift=(shift+8)%32;
}
for(i=0;right_string[i]!=0;i++)
{
mult+= right_string[i] << shift ;
shift=(shift+8)%32;
}
return(mult);
}
/*
* FPMs-TCTS SOFTWARE LIBRARY
*
* File: diphoneinfo.c
* Time-stamp: <00/03/29 23:38:54 pagel>
* Purpose: diphone descriptor
* Authors: Vincent Pagel & Alain Ruelle
* Email : mbrola@tcts.fpms.ac.be
*
* Copyright (c) 1995-2018 Faculte Polytechnique de Mons (TCTS lab)
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*
* 09/04/98 : Created. A diphone descriptor intended for a hash_table
*
* 09/09/98 : DiphoneInfo now includes memory allocation and dereference
*
* 03/03/00 : Use PhonemeCode instead of PhonemeName to get a flat structure
* no more memory memory allocation needed
*/
#ifndef DIPHONE_INFO_H
#define DIPHONE_INFO_H
#include "common.h"
/*
* Structure of the diphone database (as stored in memory)
*/
typedef struct
{
/* Name of the diphone */
PhonemeCode left;
PhonemeCode right;
int32 pos_wave; /* position in SPEECH_FILE */
int16 halfseg; /* position of center of diphone */
int32 pos_pm; /* index in PITCHMARK_FILE */
uint8 nb_frame; /* Number of pitch markers */
} DiphoneInfo;
/* Convenience macros */
#define left(diphoneinfo) (diphoneinfo).left
#define right(diphoneinfo) (diphoneinfo).right
#define pos_wave(diphoneinfo) (diphoneinfo).pos_wave
#define halfseg(diphoneinfo) (diphoneinfo).halfseg