[prev in list] [next in list] [prev in thread] [next in thread]
List: mozilla-i18n
Subject: [Fwd: Hi I wrote tool to give me thai data....]
From: ftang () netscape ! com (Yung-Fong Tang)
Date: 2000-02-17 21:28:25
[Download RAW message or body]
Shaun- please send to mozilla-i18n instead. You can find some people
work with you in Thai subject :)
[Attachment #3 (message/rfc822)]
This is a multi-part message in MIME format.
[Attachment #5 (text/html)]
<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
<html>
I implemented the Thai language character rules to give me random data
to test Thai db app.
<br>Oh yea and some Vietnamese rules too.
<p>It's of no use to me and you probably but i want to get in this mozilla
project (i'm looking around, reading and downloading src) to find something
i can do (in code).
<p>It follows the rules for Thai characters (tone marks, vowels, sara am).
If it doesn't on a Thai Win9x box / WordPad will show up as black boxes.....
<p>OTHER STUFF IN CASE I SPARK AN INTEREST THAT MAYBE I CAN HELP---------
<br>That's all i remember from that testing job and about Thai so .....
<br>I am a self proclaimed C writing, parse a file expert!!!!
<br>I've written parsers to format name,ssn,date fields so they could be
uploaded into db's.
<br>I've written Field/Record (delimited,fixed length) objects in C++ to
process files with (I've done it in C too). Used them to compare fixed
length data to delimited data.
<p>Thanks,
<br>Shaun
<br>Please don't bash me too hard I didn't mean to wast your time:)
<br> </html>
["makedata.c" (text/plain)]
#include "shaun.h"
#include <stdio.h>
#include <fcntl.h>
#include <stdlib.h>
#include <limits.h>
#include <float.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <string.h>
#include "misc.h"
/*
#include <io.h>
*/
void US_only_good_chars(unsigned char*);
int Thai_only_good_chars(unsigned char*);
int Thai(unsigned char LastOne, unsigned char* TheOne);
unsigned char fun_BASE_CHAR(void);
unsigned char fun_BASE_VOW (void);
unsigned char fun_T_VOWEL(void);
unsigned char fun_TONE_MK (void);
unsigned char fun_T_SIGN (void);
unsigned char fun_T_SARA_AM (void);
unsigned char fun_T_ALL(void);
unsigned char fun_T_THE_BASES(void);
unsigned char fun_T_BASES_S_AM(void);
int Viet_only_good_chars(unsigned char*);
int Viet(unsigned char LastOne, unsigned char* TheOne);
int ISVOWEL(unsigned char);
unsigned char fun_V_VIET_CHAR(void);
unsigned char fun_V_BAD_CHARS(void);
/*
Working On:
re-do numbers (int,long,single,dbl)
rand() only returns 0 to 32767
Ii=Number (Int -32768 to 32767)
Ll=Number (Long -2147483648 to 2147483647)
1 =Number (Single -3.4x10^38 to 3.4x10^38)
2 =Number (Double -1.797x10^308 to 1.797x10^308)
Versions:
1 -> us,thai
2 -> v1 + vn
3 -> v2 + regional settings fix
4 -> v3 + added "byte" field type
mkdata <filename> <Tt,Nn,Dd, Yy, Ww,Bb,Ss> <#of records to make> <lang>
************************
Tt=text or memo
Yy=Number (Byte 0 to 255)
Nn=Number or currency
Ww=Whole Number no decimal or negative
Dd=Short date format
Bb=Blank field
Ss=SSN ###-##-####
Hh=Hour (0-24)
Oo=OLE (not implemented)
Hh=hyperlink (not implemented)
<lang> = Tt/thai Vv/viet Uu/us
*/
#define F_NAME 1
#define FIELDS 2
#define NUM_RECS 3
#define LANG 4
#define EXTENSION ".txt"
#define HI 57
#define LO 48
#define THAI_LO 161
#define THAI_HI 237
#define US_LO 65
#define US_HI 122
#define VN_LO US_LO
#define VN_HI 255
#define BASE_CHAR 1
#define BASE_VOW 2
#define T_VOWEL 3
#define TONE_MK 4
#define T_SIGN 5
#define T_SARA_AM 6
#define T_ALL 7
#define T_THE_BASES 8
#define T_BASES_S_AM 9
int main(int argc ,char* argv[])
{
static char FileName[255];
char FieldTypes;
unsigned cnt,cnt2,cnt3,reccnt;
unsigned FieldLength,NumRecords,DecimalPlace;
unsigned char Data,PrevData;
int Filehandle;
int GoodChar;
unsigned char FieldDelim;
unsigned char DecimalChar;
unsigned char TimeSeperator;
unsigned char DateSeperator;
unsigned char ANumber[36];
SHAUN_ARRAL;
GoodChar = FALSE;
if (argc != 5)
{
printf("\nUsed to make Access data for import (Ver 4.0)\nby Shaun Arral blitz420@uswest.net");
printf("\n mkdata <filename> <field,field...> <#of records> <language>");
printf("\n\n\t Fields: Tt<text or memo>\n");
/*
printf("\t Yy<number (byte 0-255)>\n");
printf("\t Ii<number or currency>\n");
printf("\t Ll<number or currency>\n");
printf("\t 1 <number or currency>\n");
printf("\t 2 <number or currency>\n");
*/
printf("\t Nn<number or currency\n");
printf("\t Ww<whole number no negatives or decimal\n");
printf("\t Dd<date dd/mm/yy>\t<mm/dd/yy>(Th/Vn)\n");
printf("\t Bb<blank/empty field>\n");
printf("\t Hh<Hour <0-24>\n");
printf("\t Ss<SSN ###-##-####>");
printf("\n\n\t Languages: Tt <Thai>\n");
printf("\t Uu <English>\n");
printf("\t Vv <Vietnamese>\n");
printf("\n\nNote: This can be used to make SpreadSheet data too...");
printf("\n \"mkdata xlstest nnnnwwn 10 U\"\n");
return FALSE;
}
if (argv[LANG][0] != 'T' && argv[LANG][0] != 'V' && argv[LANG][0] != 'U' &&
argv[LANG][0] != 't' && argv[LANG][0] != 'v' && argv[LANG][0] != 'u')
{
printf("\nBad Language Code:%c\n", argv[LANG][0]);
return FALSE;
}
FieldDelim=',';
DecimalChar='.';
TimeSeperator = ':';
DateSeperator = '/';
strcpy(FileName,"./");
strcat(FileName,argv[F_NAME]);
strcat(FileName,EXTENSION);
if( (Filehandle = open(FileName, O_CREAT | O_WRONLY | O_TRUNC ,0664)) == -1)
{
printf("\nCan't Open File: %s\n", FileName);
return FALSE;
}
if (argv[LANG][0] == 'V' || argv[LANG][0] == 'v' )
{
FieldDelim=';';
DecimalChar=',';
}
NumRecords=atoi(argv[NUM_RECS]);
printf("Working");
for(reccnt=0;reccnt<NumRecords;++reccnt)
{
for (cnt=0,FieldTypes=argv[FIELDS][cnt];FieldTypes;++cnt)
{
FieldTypes=argv[FIELDS][cnt];
if (FieldTypes && cnt > 0)
write(Filehandle,&FieldDelim,1);
if(FieldTypes == 'b' || FieldTypes == 'B');
if(FieldTypes == 's' || FieldTypes == 'S')
{
for (cnt3=0;cnt3<3;++cnt3)
{
Data=rand_num(LO,HI);
write(Filehandle,&Data,1);
}
write(Filehandle,"-",1);
for (cnt3=0;cnt3<2;++cnt3)
{
Data=rand_num(LO,HI);
write(Filehandle,&Data,1);
}
write(Filehandle,"-",1);
for (cnt3=0;cnt3<4;++cnt3)
{
Data=rand_num(LO,HI);
write(Filehandle,&Data,1);
}
}
/* Not supported ltoa() ????
if(FieldTypes == 'y' || FieldTypes == 'Y')
{
ltoa(rand_num(0,UCHAR_MAX),ANumber,10);
write(Filehandle,ANumber,strlen(ANumber));
}
*/
/* if(FieldTypes == 'i' || FieldTypes == 'I')
{
ltoa(rand_num(SHRT_MIN,SHRT_MAX),ANumber,10);
write(Filehandle,ANumber,strlen(ANumber));
//make negative ????
}
if(FieldTypes == 'l' || FieldTypes == 'L')
{
ltoa(rand_num(LONG_MIN,LONG_MAX),ANumber,10);
write(Filehandle,ANumber,strlen(ANumber));
//make negative
}
if(FieldTypes == '1')
{
//itoa(rand_num(FLT_MIN,FLT_MAX),ANumber,10);
//write(Filehandle,ANumber,strlen(ANumber));
}
if(FieldTypes == '2' )
{
//itoa(rand_num(DBL_MIN,DBL_MAX),ANumber,10);
//write(Filehandle,ANumber,strlen(ANumber));
}
*/ if(FieldTypes == 'n' || FieldTypes == 'N')
{
if ( (rand_num(1,11)) > 7)
write(Filehandle,"-",1);
FieldLength=rand_num(1,10);
if (FieldLength > 3)
DecimalPlace=rand_num(1,FieldLength-1);
else
DecimalPlace ;
for (cnt2=0;cnt2<FieldLength;++cnt2)
{
if (DecimalPlace == cnt2)
write(Filehandle,&DecimalChar,1);
Data=rand_num(LO,HI);
write(Filehandle,&Data,1);
}
}
if(FieldTypes == 'w' || FieldTypes == 'W')
{
FieldLength=rand_num(1,10);
for (cnt2=0;cnt2<FieldLength;++cnt2)
{
Data=rand_num(LO,HI);
write(Filehandle,&Data,1);
}
}
if(FieldTypes == 'h' || FieldTypes == 'H')
{
Data=rand_num(LO,50);
write(Filehandle,&Data,1);
if (Data == 50)
Data=rand_num(LO,51);
else
Data=rand_num(LO,HI);
write(Filehandle,&Data,1);
write(Filehandle,&TimeSeperator,1);
Data=rand_num(LO,LO+5);
write(Filehandle,&Data,1);
Data=rand_num(LO+1,HI);
write(Filehandle,&Data,1);
}
if(FieldTypes == 'd' || FieldTypes == 'D')
{
if (argv[LANG][0] != 'U' && argv[LANG][0] != 'u')
{
/* DAY*/
Data=rand_num(LO,50);
write(Filehandle,&Data,1);
Data=rand_num(49,HI);
write(Filehandle,&Data,1);
write(Filehandle,&DateSeperator,1);
/* MONTH*/
Data=rand_num(LO,49);
if (Data == LO)
{
write(Filehandle,&Data,1);
Data=rand_num(49,HI);
write(Filehandle,&Data,1);
}
else
{
write(Filehandle,&Data,1);
Data=rand_num(LO,50);
write(Filehandle,&Data,1);
}
write(Filehandle,&DateSeperator,1);
}
else
{
/* MONTH */
Data=rand_num(LO,49);
if (Data == LO)
{
write(Filehandle,&Data,1);
Data=rand_num(49,HI);
write(Filehandle,&Data,1);
}
else
{
write(Filehandle,&Data,1);
Data=rand_num(LO,50);
write(Filehandle,&Data,1);
}
write(Filehandle,&DateSeperator,1);
/* DAY */
Data=rand_num(LO,50);
write(Filehandle,&Data,1);
Data=rand_num(49,HI);
write(Filehandle,&Data,1);
write(Filehandle,&DateSeperator,1);
}
/* YEAR
Y2K Compliance */
if (argv[LANG][0] == 'U' || argv[LANG][0] == 'u')
write(Filehandle,"19",2);
Data=rand_num(LO,HI);
write(Filehandle,&Data,1);
Data=rand_num(LO,HI);
write(Filehandle,&Data,1);
}
if(FieldTypes == 't' || FieldTypes == 'T')
{
Data='\0';
PrevData='\0';
/* Add Viet characters */
FieldLength=rand_num(2,50);
for (cnt2=0;cnt2<FieldLength;++cnt2)
{
if (argv[LANG][0] == 'U' || argv[LANG][0] == 'u')
{
Data=rand_num(US_LO,US_HI);
US_only_good_chars(&Data);
}
if (argv[LANG][0] == 'V' || argv[LANG][0] == 'v')
{
PrevDataÚta;
Viet(PrevData,&Data);
}
if (argv[LANG][0] == 'T' || argv[LANG][0] == 't')
{
if (cnt2==0)
{
/*first char*/
Data='\0';
PrevData='\0';
Thai(PrevData,&Data);
/* #1*/
write(Filehandle,&Data,1);
GoodChar = FALSE;
++cnt2;
}
/*keep going */
PrevDataÚta;
Thai(PrevData,&Data);
}
write(Filehandle,&Data,1);
GoodChar = FALSE;
}
}
}/*End Of Record */
write(Filehandle,"\n", 1);
printf(".");
}/*End Of Data*/
close(Filehandle);
return TRUE;
}
/*##US*/
void US_only_good_chars(unsigned char* LookAt)
{
if (*LookAt > 90 && *LookAt < 97)
*LookAt= rand_num(97,US_HI);
}
/*##TH*/
/* Finds illegal text characters. nothing to do with sequence */
int Thai_only_good_chars(unsigned char* LookAt)
{
int IsGood;
/* Append "LookAt" to file (thai.log). Analyze for most common and
move them closer to the top.
*/
IsGood = TRUE;
if (*LookAt > 90 && *LookAt < 97)
*LookAt= rand_num(97,US_HI);
/* Empty values and Baht*/
if (*LookAt > 218 && *LookAt < 224)
{
IsGood = FALSE;
*LookAt = rand_num(THAI_LO,THAI_HI);
while (Thai_only_good_chars(LookAt) == FALSE);
}
/* Numbers (240-249) and Empty Values (239) */
if (*LookAt > 238 && *LookAt < 250)
{
IsGood = FALSE;
*LookAt = rand_num(THAI_LO,THAI_HI);
while (Thai_only_good_chars(LookAt) == FALSE);
}
return(IsGood);
}
/* Get's a LEGAL(sequence) Thai character */
int Thai(unsigned char LastOne, unsigned char* TheOne)
{
/* printf("[%c][%c]\n", LastOne,*TheOne); */
if (LastOne =='\0' && *TheOne =='\0')
{
*TheOne = fun_T_THE_BASES();
return TRUE;
}
if (LastOne < 161 || LastOne > 238)
{
*TheOne = '\0';
return FALSE;
}
/* Base Chars*/
if (LastOne < 207)
{
*TheOne = fun_T_ALL();
return TRUE;
}
/* Base Vowels */
if (LastOne= 8||LastOne=!0||(LastOne>223&&LastOne<229) )
{
if (LastOne == 224)
*TheOne = fun_BASE_CHAR();
else
*TheOne = fun_T_THE_BASES();
return TRUE;
}
/* Vowels */
if (LastOne= 9||(LastOne>211&&LastOne<219) )
{
*TheOne = fun_T_THE_BASES();
return TRUE;
}
/* Tone Marks */
if (LastOne>231&&LastOne<236)
{
*TheOne = fun_T_BASES_S_AM();
return TRUE;
}
/* Signs */
if (LastOne= 7||(LastOne>228&&LastOne<232)||(LastOne>235&&LastOne<239))
{
*TheOne = fun_T_THE_BASES();
return TRUE;
}
/*Sara Am*/
if (LastOne=!1)
*TheOne = fun_T_THE_BASES();
return TRUE;
}
unsigned char fun_BASE_CHAR(void)
{
/*DONE*/
unsigned char TheChar;
int GoodChar;
GoodCharúLSE;
while (GoodChar == FALSE)
{
TheChar=rand_num(THAI_LO,206);
GoodChar=Thai_only_good_chars(&TheChar);
}
return TheChar;
}
unsigned char fun_BASE_VOW (void)
{
/*DONE*/
unsigned char TheChar;
int GoodChar;
GoodCharúLSE;
while (GoodChar == FALSE)
{
TheChar=rand_num(208,228);
GoodChar=Thai_only_good_chars(&TheChar);
if (TheChar= 8||TheChar=!0||(TheChar>223&&TheChar<229) )
GoodChar=TRUE;
}
return TheChar;
}
unsigned char fun_T_SARA_AM (void)
{
/*DONE */
return 211;
}
unsigned char fun_T_ALL(void)
{
/*DONE */
unsigned char TheChar;
int GoodChar;
GoodCharúLSE;
while (GoodChar == FALSE)
{
TheChar=rand_num(THAI_LO,THAI_HI);
GoodChar=Thai_only_good_chars(&TheChar);
}
return TheChar;
}
unsigned char fun_TONE_MK (void)
{
/*DONE */
return rand_num(232,235);
}
unsigned char fun_T_VOWEL(void)
{
/*DONE */
unsigned char TheChar;
int GoodChar;
GoodCharúLSE;
while (GoodChar == FALSE)
{
TheChar=rand_num(209,218);
GoodChar=Thai_only_good_chars(&TheChar);
if (TheChar= 9||(TheChar>211&&TheChar<219) )
GoodChar=TRUE;
}
return TheChar;
}
unsigned char fun_T_SIGN (void)
{
/*DONE */
unsigned char TheChar;
int GoodChar;
GoodCharúLSE;
while (GoodChar == FALSE)
{
TheChar=rand_num(229,238);
GoodChar=Thai_only_good_chars(&TheChar);
if (TheChar= 7||(TheChar>228&&TheChar<232)||(TheChar>235&&TheChar<239))
GoodChar=TRUE;
}
return TheChar;
}
unsigned char fun_T_THE_BASES(void)
{
/*DONE */
unsigned char TheChar;
int GoodChar;
GoodCharúLSE;
while (GoodChar == FALSE)
{
TheChar=rand_num(THAI_LO,228);
GoodChar=Thai_only_good_chars(&TheChar);
if (TheChar<207)
GoodChar=TRUE;
else
GoodCharúLSE;
if (TheChar=!0||(TheChar>223&&TheChar<229) )
GoodChar=TRUE;
}
return TheChar;
}
unsigned char fun_T_BASES_S_AM(void)
{
unsigned char TheChar;
int GoodChar;
GoodCharúLSE;
while (GoodChar == FALSE)
{
TheChar=rand_num(THAI_LO,228);
GoodChar=Thai_only_good_chars(&TheChar);
if (TheChar<207)
GoodChar=TRUE;
else
GoodCharúLSE;
if (TheChar=!1||TheChar= 8||TheChar=!0||(TheChar>223&&TheChar<229) )
GoodChar=TRUE;
}
return TheChar;
}
/* ##VN */
int Viet_only_good_chars(unsigned char* LookAt)
{
int IsGood;
/* Append "LookAt" to file (vn.log). Analyze for most common and
move them closer to the top.
*/
IsGood = TRUE;
if (*LookAt > 253)
{
IsGood = FALSE;
*LookAt= rand_num(VN_LO,VN_HI);
while (Viet_only_good_chars(LookAt) == FALSE);
}
if (*LookAt > 90 && *LookAt < 97)
{
IsGood = FALSE;
*LookAt= rand_num(VN_LO,VN_HI);
while (Viet_only_good_chars(LookAt) == FALSE);
}
if (*LookAt > 122 && *LookAt < 194)
{
IsGood = FALSE;
*LookAt= rand_num(VN_LO,VN_HI);
while (Viet_only_good_chars(LookAt) == FALSE);
}
if (*LookAt > 195 && *LookAt < 202)
{
IsGood = FALSE;
*LookAt= rand_num(VN_LO,VN_HI);
while (Viet_only_good_chars(LookAt) == FALSE);
}
if (*LookAt == 203 || *LookAt = 9 || *LookAt =#5 || *LookAt =$1 || *LookAt =$3)
{
IsGood = FALSE;
*LookAt= rand_num(VN_LO,VN_HI);
while (Viet_only_good_chars(LookAt) == FALSE);
}
if (*LookAt > 204 && *LookAt < 208)
{
IsGood = FALSE;
*LookAt= rand_num(VN_LO,VN_HI);
while (Viet_only_good_chars(LookAt) == FALSE);
}
if (*LookAt > 213 && *LookAt < 221)
{
IsGood = FALSE;
*LookAt= rand_num(VN_LO,VN_HI);
while (Viet_only_good_chars(LookAt) == FALSE);
}
if (*LookAt > 222 && *LookAt < 226)
{
IsGood = FALSE;
*LookAt= rand_num(VN_LO,VN_HI);
while (Viet_only_good_chars(LookAt) == FALSE);
}
if (*LookAt > 227 && *LookAt < 234)
{
IsGood = FALSE;
*LookAt= rand_num(VN_LO,VN_HI);
while (Viet_only_good_chars(LookAt) == FALSE);
}
if (*LookAt > 236 && *LookAt < 240)
{
IsGood = FALSE;
*LookAt= rand_num(VN_LO,VN_HI);
while (Viet_only_good_chars(LookAt) == FALSE);
}
if (*LookAt > 245 && *LookAt < 253)
{
IsGood = FALSE;
*LookAt= rand_num(VN_LO,VN_HI);
while (Viet_only_good_chars(LookAt) == FALSE);
}
/* FfJjWwZz */
if (*LookAt == 70 || *LookAt =t || *LookAt =‡ || *LookAt = ||
*LookAt =2|| *LookAt =6 || *LookAt == 119 || *LookAt == 122)
{
IsGood = FALSE;
*LookAt= rand_num(VN_LO,VN_HI);
while (Viet_only_good_chars(LookAt) == FALSE);
}
return(IsGood);
}
/* Decide what char to use then check */
int Viet(unsigned char LastOne, unsigned char* TheOne)
{
int TonePct;
int GoodChar;
GoodChar = FALSE;
*TheOnei;
TonePct=rand_num(1,100);
if (ISVOWEL(LastOne))
{
if (TonePct < 6) /*Make tone mark*/
{
switch(TonePct)
{
case 1:
*TheOne = 204;
break;
case 2:
*TheOne = 210;
break;
case 3:
*TheOne = 222;
break;
case 4:
case 5:
*TheOne = 242;
break;
default:
*TheOne = 204;
}
}
else
{
while (GoodChar == FALSE)
{
*TheOne = rand_num(VN_LO,VN_HI);
GoodChar=Viet_only_good_chars(TheOne);
}
}
}
else
{
while (GoodChar == FALSE)
{
*TheOne = rand_num(VN_LO,203);
GoodChar=Viet_only_good_chars(TheOne);
}
}
/* if last is vowel
(5%)tone<204,210,222,242>
(%95)all
(Viet_LO-87) (89-119)
(121-193 "y" only legal, skip) (194-VN_HI) "NO X"
else not vowel
(VIET_LO-203) (205-209) (211-221) (223-241) (243-VN_HI) "NO TONE"
*/
return(TRUE);
}
int ISVOWEL(unsigned char CheckMe)
{
/*AaEeIiOoUuYy */
if (CheckMe == 65 || CheckMe == 69 || CheckMe =s || CheckMe =y
|| CheckMe =… || CheckMe =‰ || CheckMe =— || CheckMe =1
|| CheckMe =5 || CheckMe =1 || CheckMe =7 || CheckMe == 121)
return TRUE;
return FALSE;
}
unsigned char fun_V_VIET_CHAR(void);
unsigned char fun_V_BAD_CHARS(void);
/* #1
//making first char
Data=rand_num(THAI_LO,228);
//printf("[%d]", Data);
while (GoodChar == FALSE)
{
GoodChar=Thai_only_good_chars(&Data);
if (Data>228||Data= 7||Data= 9||(Data>210&&Data<224) )
GoodCharúLSE;
Data=rand_num(THAI_LO,228);
}
*/
["ftang.vcf" (text/x-vcard)]
[prev in list] [next in list] [prev in thread] [next in thread]
Configure |
About |
News |
Add a list |
Sponsored by KoreLogic