diff --git a/README.md b/README.md index 4d8bf15..34280ab 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ This project really added a lot of useful peripheral components, like CI, code s A great gift for me. -## Use in Command Line Utility +## Using the command line ### Build @@ -49,13 +49,14 @@ h3o w3d ```text $ ./i18nglish.out --help -NOTE: Flags are just half stable Usage: i18nglish [--version] [--help] --mode [args] +> Flags are just half stable MODE(for set input source): arguments Use all arguments after it file Read a text file stdin Same 'file' but use stdin + stream-stdin testing mode, use stream parser ``` ### Precautions @@ -74,7 +75,7 @@ The error looked like: `�16�`\ This program just an English joke, so it's should be fine.\ I don't want to adapt to UTF-8 `_(:з」∠)_` -## Move main functions to other project +## Using in other project All the functions/defines you need are in *source/i7h/i7h_processor.c and .h* @@ -84,7 +85,7 @@ The main process function is `i7hProcessor()`, this is its prototype: int i7hProcessor(struct I7hDataStruct i7h_D[restrict], const char src_string[]); ``` -And the structure `I7hDataStruct`, it's like a buffer of processor. +And the structure `I7hDataStruct`, it's like a buffer of processor When the `i7hProcessor()` is called, it'll auto resize the buffer in structure.\ The caller doesn't need to free the buffer in every loop. But **must**:\ @@ -93,29 +94,28 @@ call `i7hFreeStructure()` at the END of the loop to free them See usage details in *source/main.c* -But... it won't delete punctuation with itself. +But... it won't delete punctuation with itself -## Other Idea +Be sure to check out the [#Stream version parser](#stream-version-parser), it's a new and better API -Ah..\ -If just use it in CLI/stdout, you can do it this way(untested): +## Stream version parser -```c -int src_string_length = strlen(src_string); // get length +`i7hParserStream()` is a new API of this project.\ +It gets rid from some crappy frameworks. here's some good thing about it: -putc(src_string[0], stdout); // first char -printf("%d", src_string_length - 2); // the numbers between -putc(src_string[src_string_length - 1], stdout); // last char -putc('\n'); -``` +- Cleaner code! +- Perfect punctuation detect +- Recognize word more correctly +- Don't need create data structure manually +- Direct processing streams, like `stdin/stdout` or a file handle -Maybe I'll put them to *main.c* at later. +Use `--mode stream-stdin` in CLI to try it ## Todo List > Todo list is for myself, not for showing off. The history todo only needs stored in git history. -- Unit test(only for core processor) +- Migrate all mode to stream version parser ## Code style diff --git a/source/GNUmakefile b/source/GNUmakefile index 4f0edd5..30de0f5 100644 --- a/source/GNUmakefile +++ b/source/GNUmakefile @@ -19,7 +19,7 @@ releaseBuild := 0 # combining compiler's flags CFlag_clang += -I./ -CFlag_clang += -Os +CFlag_clang += -O3 CFlag_clang += -DAPP_GIT_COMMIT_INFO="\"$(git_commit_info)\"" CFlag_clang += -DAPP_BUILD_HOST_DESCRIPTION="\"$(build_host_description)\"" CFlag_clang += -DAPP_BUILD_RELEASE_MODE="$(releaseBuild)" @@ -27,7 +27,7 @@ CFlag_clang += -DAPP_BUILD_DATE_UTC="\"$(build_date_UTC)\"" default: i18nglish -i18nglish: $(source_files) +i18nglish: $(source_files) GNUmakefile /usr/bin/clang $(source_files) \ --output $(executable_base_name) \ $(CFlag_clang) diff --git a/source/i7h/i7h_processor.c b/source/i7h/i7h_processor.c index 927d76d..8a056c0 100644 --- a/source/i7h/i7h_processor.c +++ b/source/i7h/i7h_processor.c @@ -1,5 +1,6 @@ #include "i7h_processor.h" +#include #include #include #include @@ -19,59 +20,132 @@ static int getIntLength_(int src_int) return length; } -// i7h_D means "i7h data structure" +/* + Resize the i7h buffer. + + Result: + - true: ok + - false: error + */ +static bool bufferRealloc_(struct I7hDataStruct data[restrict]) +{ + data->real_buffer_size = data->need_buffer_size + 32; // for redundancy, realSize is (nowSize + 32) + data->buffer = realloc(data->buffer, data->real_buffer_size); + if (data->buffer == NULL) + return false; + + return true; +} + +int i7hInitStructure(struct I7hDataStruct i7h_D[restrict]) +{ + i7h_D->need_buffer_size = 0; + i7h_D->real_buffer_size = 3; // two char + one '\0', is default + + i7h_D->buffer = malloc(i7h_D->real_buffer_size); // default size is 3 + if (i7h_D->buffer == NULL) + return kI7hErrorAllocMemory; + + return 0; +} + +int i7hFreeStructure(struct I7hDataStruct i7h_D[restrict]) +{ + if (i7h_D->buffer != NULL) { + free(i7h_D->buffer); + } else { + return kI7hErrorFreeMemory; + } + + return 0; +} + +/* + For single word parsing. Won't care punctuation. + + i7h_D means "i7h data structure" + */ int i7hProcessor(struct I7hDataStruct i7h_D[restrict], const char src_string[]) { /* get length */ i7h_D->src_string_length = strlen(src_string); /* get buffer size */ // +2 is first and end char, last +1 is \0 - i7h_D->now_buffer_size = getIntLength_(i7h_D->src_string_length - 2) + 2 + 1; + i7h_D->need_buffer_size = getIntLength_(i7h_D->src_string_length - 2) + 2 + 1; + /* update max usage memary size of buffer */ - if (i7h_D->now_buffer_size > i7h_D->real_buffer_size) { - i7h_D->real_buffer_size = i7h_D->now_buffer_size * 1.2; // for redundancy, realSize is (nowSize * 1.2) + if (i7h_D->need_buffer_size > i7h_D->real_buffer_size) { // and expand real buffer size - i7h_D->buffer = realloc(i7h_D->buffer, i7h_D->real_buffer_size); - // error detection of realloc - if (i7h_D->buffer == NULL) - return I7hErrorAllocMemory; + if (bufferRealloc_(i7h_D) != true) + return kI7hErrorAllocMemory; } - /* create buffer and processing */ // if only have 2 chars, just return them if (i7h_D->src_string_length <= 2) { strcpy(i7h_D->buffer, src_string); return 0; + } + + // normal one + if (snprintf(i7h_D->buffer, i7h_D->need_buffer_size, "%c%d%c", src_string[0], i7h_D->src_string_length - 2, + src_string[i7h_D->src_string_length - 1]) >= 0) { + return 0; } else { - // normal one - if (snprintf(i7h_D->buffer, i7h_D->now_buffer_size, "%c%d%c", src_string[0], i7h_D->src_string_length - 2, - src_string[i7h_D->src_string_length - 1]) >= 0) { - return 0; - } else { - return 1; - } + return 1; } } -int i7hInitStructure(struct I7hDataStruct i7h_D[restrict]) +/* + The stream version of i7h processor. + Built-in perfect punctuation detection feature. + + WoW + */ +int i7hParserStream(FILE *stream, FILE *output) { - i7h_D->now_buffer_size = 0; - i7h_D->real_buffer_size = 3; // two char + one '\0', is default - i7h_D->buffer = malloc(i7h_D->real_buffer_size); // default size is 3 - // error detection - if (i7h_D->buffer == NULL) - return I7hErrorAllocMemory; + struct I7hDataStruct data; + i7hInitStructure(&data); - return 0; -} + char ch; + int word_pos = 0; -int i7hFreeStructure(struct I7hDataStruct i7h_D[restrict]) -{ - if (i7h_D->buffer != NULL) { - free(i7h_D->buffer); - } else { - return I7hErrorFreeMemory; + while ((ch = getc(stream)) != EOF) { + // if it's the end of a word + if (ch == ' ' or ch == '\n' or ispunct(ch)) { + // if buffer still blank + if (word_pos == 0) { + fputc(ch, output); + continue; + } + + // output the result string + data.buffer[word_pos] = '\0'; // // now, the word_pos and strlen(buffer) are the same + if (word_pos <= 2) { + fputs(data.buffer, output); + } else { + fputc(data.buffer[0], output); + fprintf(output, "%d", word_pos - 2); + fputc(data.buffer[word_pos - 1], output); + } + fputc(ch, output); // don't forget the char in the current loop + fflush(output); // finish + + // reset + word_pos = 0; + continue; + } + + // if buffer size not enough, expand it + if (word_pos >= data.real_buffer_size) { + data.need_buffer_size += 1; + if (bufferRealloc_(&data) != true) + return 1; + } + + data.buffer[word_pos] = ch; + word_pos++; } + i7hFreeStructure(&data); return 0; } diff --git a/source/i7h/i7h_processor.h b/source/i7h/i7h_processor.h index 4db9030..5b87d4e 100644 --- a/source/i7h/i7h_processor.h +++ b/source/i7h/i7h_processor.h @@ -2,28 +2,31 @@ #define I7H_PROCESSOR_H_ #include +#include /* structures */ struct I7hDataStruct { char *buffer; - size_t now_buffer_size; + size_t need_buffer_size; size_t real_buffer_size; - int src_string_length; + int src_string_length; // I want more speed... }; /* enumerates */ enum I7hResultType { - I7hOK = 0, - I7hErrorAllocMemory = 1, - I7hErrorFreeMemory, + kI7hOK = 0, + kI7hErrorAllocMemory = 1, + kI7hErrorFreeMemory, }; /* functions */ -// main i7h processor -int i7hProcessor(struct I7hDataStruct i7h_D[restrict], const char src_string[]); // init structure int i7hInitStructure(struct I7hDataStruct i7h_D[restrict]); // free buffer(if have) int i7hFreeStructure(struct I7hDataStruct i7h_D[restrict]); +// main i7h processor +int i7hProcessor(struct I7hDataStruct i7h_D[restrict], const char src_string[]); +// stream version processor +int i7hParserStream(FILE *stream, FILE *output); #endif diff --git a/source/main.c b/source/main.c index d82bb20..d86e104 100644 --- a/source/main.c +++ b/source/main.c @@ -20,7 +20,7 @@ static int i7hProcessorExitLog_(char source_string[], int proc_result) { printf("Error: Something wrong while processing.\n"); printf("Source string: '%s', Result code: %d\n", source_string, proc_result); - return kMainProcessorError; + return kParserErrorProcessing; } /* @@ -53,7 +53,7 @@ int i7hProcessorArgv(int argc, char *argv[], int argc_begin) { if (argc_begin > argc - 1) { puts("ERROR: Parameter invalid."); - exit(kAppExitGetFlagError); + exit(kAppErrorGetFlag); } // this buffer use to store argv without punctuation @@ -68,7 +68,7 @@ int i7hProcessorArgv(int argc, char *argv[], int argc_begin) for (int i = argc_begin; i < argc; i++) { // delete punct if (deletePunctuations(argv[i], strlen(argv[i]) + 1, argv_nopunct, INPUT_BUFFER_SIZE) != 0) { - exit(kAppPreProcessorError); + exit(kAppErrorPreProcessing); } // call the main function i7h_proc_result = i7hProcessor(&i7h_data, argv_nopunct); @@ -91,7 +91,7 @@ int i7hProcessorFile(char *file_path) FILE *file_handle = fopen(file_path, "r"); if (file_handle == NULL) { printf("ERROR: File con't open\n"); - exit(kAppExitStd); + exit(kAppErrorStd); } char next_char_tmp; char next_string[INPUT_BUFFER_SIZE]; @@ -114,7 +114,7 @@ int i7hProcessorFile(char *file_path) fscanf(file_handle, "%s", next_string); // delete punctuations if (deletePunctuations(next_string, INPUT_BUFFER_SIZE, next_string_nopunct, INPUT_BUFFER_SIZE) != 0) { - exit(kAppPreProcessorError); + exit(kAppErrorPreProcessing); } // call the main function if ((i7h_proc_result = i7hProcessor(&i7h_data, next_string_nopunct)) == 0) { @@ -152,7 +152,7 @@ int i7hProcessorStdin(void) fscanf(stdin, "%s", temp_string); // delete punctuations if (deletePunctuations(temp_string, INPUT_BUFFER_SIZE, temp_string_nopunct, INPUT_BUFFER_SIZE) != 0) { - exit(kAppPreProcessorError); + exit(kAppErrorPreProcessing); } if ((i7h_proc_result = i7hProcessor(&i7h_data, temp_string_nopunct)) == 0) { printf("%s ", i7h_data.buffer); @@ -170,21 +170,22 @@ int i7hProcessorStdin(void) int parseCliFlag(struct AppCliFlagConfig *flag_data, int argc, char *argv[]) { if (argc < 2) { - puts("ERROR: Need some arguments, use \"--help\" flag to check more info."); - return kAppExitGetFlagError; + printf("ERROR: Need some arguments, use \"--help\" flag to check more info.\n\n"); + flag_data->main_mode = kAppInputMode_ShowHelp; + return kAppOk; } for (int i = 1; i + 1 <= argc; i++) { // --help if (strcmp(argv[i], "--help") == 0) { flag_data->main_mode = kAppInputMode_ShowHelp; - return kAppExitOk; + return kAppOk; } // --version if (strcmp(argv[i], "--version") == 0) { flag_data->main_mode = kAppInputMode_ShowVersion; - return kAppExitOk; + return kAppOk; } // --mode @@ -193,7 +194,7 @@ int parseCliFlag(struct AppCliFlagConfig *flag_data, int argc, char *argv[]) // error detect if (not(i + 1 <= argc)) { printf("No value of --mode flag\n"); - return kAppExitGetFlagError; + return kAppErrorGetFlag; } // arguments @@ -204,9 +205,9 @@ int parseCliFlag(struct AppCliFlagConfig *flag_data, int argc, char *argv[]) flag_data->output_argc_begin = i; } else { printf("Invalid/Null value of '--mode argument'\n"); - return kAppExitFlagValueError; + return kAppErrorFlagValue; } - return kAppExitOk; + return kAppOk; } // file if (strcmp(argv[i], "file") == 0) { @@ -216,22 +217,27 @@ int parseCliFlag(struct AppCliFlagConfig *flag_data, int argc, char *argv[]) flag_data->output_file_path = argv[i]; } else { printf("Invalid/Null value of '--mode file'\n"); - return kAppExitFlagValueError; + return kAppErrorFlagValue; } - return kAppExitOk; + return kAppOk; } // stdin if (strcmp(argv[i], "stdin") == 0) { flag_data->main_mode = kAppInputMode_ParseStdin; - return kAppExitOk; + return kAppOk; + } + // stream-stdin + if (strcmp(argv[i], "stream-stdin") == 0) { + flag_data->main_mode = kAppInputMode_ParseStreamStdin; + return kAppOk; } // default printf("Invalid/Null value of '--mode'\n"); - return kAppExitFlagValueError; + return kAppErrorFlagValue; } // default printf("ERROR: Invalid Flag '%s'\n", argv[i]); - return kAppExitGetFlagError; + return kAppErrorGetFlag; } return 0; @@ -245,18 +251,19 @@ int main(int argc, char *argv[]) // parse flags struct AppCliFlagConfig flag_data; int parse_flag_rsult = parseCliFlag(&flag_data, argc, argv); - if (parse_flag_rsult != kAppExitOk) + if (parse_flag_rsult != kAppOk) exit(parse_flag_rsult); switch (flag_data.main_mode) { case kAppInputMode_ShowHelp: - printf("NOTE: Flags are just half stable\n"); printf("Usage: i18nglish [--version] [--help] --mode [args]\n"); + printf("> Flags are just half stable\n"); printf("\nMODE(for set input source):\n"); printf("\targuments\tUse all arguments after it\n"); printf("\tfile \tRead a text file\n"); printf("\tstdin\t\tSame 'file' but use stdin\n"); - exit(kAppExitOk); + printf("\tstream-stdin\ttesting mode, use stream parser\n"); + exit(kAppOk); break; case kAppInputMode_ShowVersion: printf("==== Versions ====\n"); @@ -268,23 +275,29 @@ int main(int argc, char *argv[]) printf("==== Author info ====\n"); printf("Developed by 酸柠檬猹/SourLemonJuice 2024\n"); printf("Published under MIT license\n"); - exit(kAppExitOk); + exit(kAppOk); break; case kAppInputMode_ParseArgument: i7hProcessorArgv(argc, argv, flag_data.output_argc_begin); - exit(kAppExitOk); + exit(kAppOk); break; case kAppInputMode_ParseStdin: i7hProcessorStdin(); - exit(kAppExitOk); + exit(kAppOk); + break; + case kAppInputMode_ParseStreamStdin: + // TODO it's a testing mode + fprintf(stderr, "Note: stream-stdin is a testing mode\n"); + i7hParserStream(stdin, stdout); + exit(kAppOk); break; case kAppInputMode_ParseFile: i7hProcessorFile(flag_data.output_file_path); - exit(kAppExitOk); + exit(kAppOk); break; } // at last print some ERROR by default puts("ERROR: Unknow Error at 'main'"); - exit(kAppExitStd); + exit(kAppErrorStd); } diff --git a/source/main.h b/source/main.h index 0e4621a..a4cfa3b 100644 --- a/source/main.h +++ b/source/main.h @@ -44,18 +44,19 @@ #define INPUT_BUFFER_SIZE (1024 * 1) enum AppExitType { - kAppExitOk = EXIT_SUCCESS, - kAppExitStd = EXIT_FAILURE, - kAppExitGetFlagError, - kAppExitFlagValueError, - kMainProcessorError, - kAppPreProcessorError, + kAppOk = EXIT_SUCCESS, + kAppErrorStd = EXIT_FAILURE, + kAppErrorGetFlag, + kAppErrorFlagValue, + kParserErrorProcessing, + kAppErrorPreProcessing, }; enum AppCommandFlagMode { kAppInputMode_ParseArgument, kAppInputMode_ParseFile, kAppInputMode_ParseStdin, + kAppInputMode_ParseStreamStdin, kAppInputMode_ShowHelp, kAppInputMode_ShowVersion, };