Skip to content

Commit

Permalink
添加流版本的解析器,并为其添加了对应的模式用于测试
Browse files Browse the repository at this point in the history
流解析器可以直接从流中获取字符串并在处理后放入另一个流。
这次提交中也修改了很多 main.c 以及解析器中一些标识符的命名,兼容性上会很炸毛的
  • Loading branch information
SourLemonJuice committed Aug 5, 2024
1 parent 2b819be commit 3d31b51
Show file tree
Hide file tree
Showing 6 changed files with 180 additions and 89 deletions.
34 changes: 17 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ This project really added a lot of useful peripheral components, like CI, code s

A great gift for me.

## Use in Command Line Utility
## Using the command line

### Build

Expand Down Expand Up @@ -49,13 +49,14 @@ h3o w3d

```text
$ ./i18nglish.out --help
NOTE: Flags are just half stable
Usage: i18nglish [--version] [--help] --mode <MODE> [args]
> Flags are just half stable
MODE(for set input source):
arguments Use all arguments after it
file <path> Read a text file
stdin Same 'file' but use stdin
stream-stdin testing mode, use stream parser
```

### Precautions
Expand All @@ -74,7 +75,7 @@ The error looked like: `�16�`\
This program just an English joke, so it's should be fine.\
I don't want to adapt to UTF-8 `_(:з」∠)_`

## Move main functions to other project
## Using in other project

All the functions/defines you need are in *source/i7h/i7h_processor.c and .h*

Expand All @@ -84,7 +85,7 @@ The main process function is `i7hProcessor()`, this is its prototype:
int i7hProcessor(struct I7hDataStruct i7h_D[restrict], const char src_string[]);
```
And the structure `I7hDataStruct`, it's like a buffer of processor.
And the structure `I7hDataStruct`, it's like a buffer of processor
When the `i7hProcessor()` is called, it'll auto resize the buffer in structure.\
The caller doesn't need to free the buffer in every loop. But **must**:\
Expand All @@ -93,29 +94,28 @@ call `i7hFreeStructure()` at the END of the loop to free them
See usage details in *source/main.c*
But... it won't delete punctuation with itself.
But... it won't delete punctuation with itself
## Other Idea
Be sure to check out the [#Stream version parser](#stream-version-parser), it's a new and better API
Ah..\
If just use it in CLI/stdout, you can do it this way(untested):
## Stream version parser
```c
int src_string_length = strlen(src_string); // get length
`i7hParserStream()` is a new API of this project.\
It gets rid from some crappy frameworks. here's some good thing about it:
putc(src_string[0], stdout); // first char
printf("%d", src_string_length - 2); // the numbers between
putc(src_string[src_string_length - 1], stdout); // last char
putc('\n');
```
- Cleaner code!
- Perfect punctuation detect
- Recognize word more correctly
- Don't need create data structure manually
- Direct processing streams, like `stdin/stdout` or a file handle
Maybe I'll put them to *main.c* at later.
Use `--mode stream-stdin` in CLI to try it
## Todo List
> Todo list is for myself, not for showing off. The history todo only needs stored in git history.
- Unit test(only for core processor)
- Migrate all mode to stream version parser
## Code style
Expand Down
4 changes: 2 additions & 2 deletions source/GNUmakefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,15 @@ releaseBuild := 0

# combining compiler's flags
CFlag_clang += -I./
CFlag_clang += -Os
CFlag_clang += -O3
CFlag_clang += -DAPP_GIT_COMMIT_INFO="\"$(git_commit_info)\""
CFlag_clang += -DAPP_BUILD_HOST_DESCRIPTION="\"$(build_host_description)\""
CFlag_clang += -DAPP_BUILD_RELEASE_MODE="$(releaseBuild)"
CFlag_clang += -DAPP_BUILD_DATE_UTC="\"$(build_date_UTC)\""

default: i18nglish

i18nglish: $(source_files)
i18nglish: $(source_files) GNUmakefile
/usr/bin/clang $(source_files) \
--output $(executable_base_name) \
$(CFlag_clang)
Expand Down
136 changes: 105 additions & 31 deletions source/i7h/i7h_processor.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "i7h_processor.h"

#include <ctype.h>
#include <iso646.h>
#include <stdbool.h>
#include <stdio.h>
Expand All @@ -19,59 +20,132 @@ static int getIntLength_(int src_int)
return length;
}

// i7h_D means "i7h data structure"
/*
Resize the i7h buffer.
Result:
- true: ok
- false: error
*/
static bool bufferRealloc_(struct I7hDataStruct data[restrict])
{
data->real_buffer_size = data->need_buffer_size + 32; // for redundancy, realSize is (nowSize + 32)
data->buffer = realloc(data->buffer, data->real_buffer_size);
if (data->buffer == NULL)
return false;

return true;
}

int i7hInitStructure(struct I7hDataStruct i7h_D[restrict])
{
i7h_D->need_buffer_size = 0;
i7h_D->real_buffer_size = 3; // two char + one '\0', is default

i7h_D->buffer = malloc(i7h_D->real_buffer_size); // default size is 3
if (i7h_D->buffer == NULL)
return kI7hErrorAllocMemory;

return 0;
}

int i7hFreeStructure(struct I7hDataStruct i7h_D[restrict])
{
if (i7h_D->buffer != NULL) {
free(i7h_D->buffer);
} else {
return kI7hErrorFreeMemory;
}

return 0;
}

/*
For single word parsing. Won't care punctuation.
i7h_D means "i7h data structure"
*/
int i7hProcessor(struct I7hDataStruct i7h_D[restrict], const char src_string[])
{
/* get length */
i7h_D->src_string_length = strlen(src_string);
/* get buffer size */
// +2 is first and end char, last +1 is \0
i7h_D->now_buffer_size = getIntLength_(i7h_D->src_string_length - 2) + 2 + 1;
i7h_D->need_buffer_size = getIntLength_(i7h_D->src_string_length - 2) + 2 + 1;

/* update max usage memary size of buffer */
if (i7h_D->now_buffer_size > i7h_D->real_buffer_size) {
i7h_D->real_buffer_size = i7h_D->now_buffer_size * 1.2; // for redundancy, realSize is (nowSize * 1.2)
if (i7h_D->need_buffer_size > i7h_D->real_buffer_size) {
// and expand real buffer size
i7h_D->buffer = realloc(i7h_D->buffer, i7h_D->real_buffer_size);
// error detection of realloc
if (i7h_D->buffer == NULL)
return I7hErrorAllocMemory;
if (bufferRealloc_(i7h_D) != true)
return kI7hErrorAllocMemory;
}

/* create buffer and processing */
// if only have 2 chars, just return them
if (i7h_D->src_string_length <= 2) {
strcpy(i7h_D->buffer, src_string);
return 0;
}

// normal one
if (snprintf(i7h_D->buffer, i7h_D->need_buffer_size, "%c%d%c", src_string[0], i7h_D->src_string_length - 2,
src_string[i7h_D->src_string_length - 1]) >= 0) {
return 0;
} else {
// normal one
if (snprintf(i7h_D->buffer, i7h_D->now_buffer_size, "%c%d%c", src_string[0], i7h_D->src_string_length - 2,
src_string[i7h_D->src_string_length - 1]) >= 0) {
return 0;
} else {
return 1;
}
return 1;
}
}

int i7hInitStructure(struct I7hDataStruct i7h_D[restrict])
/*
The stream version of i7h processor.
Built-in perfect punctuation detection feature.
WoW
*/
int i7hParserStream(FILE *stream, FILE *output)
{
i7h_D->now_buffer_size = 0;
i7h_D->real_buffer_size = 3; // two char + one '\0', is default
i7h_D->buffer = malloc(i7h_D->real_buffer_size); // default size is 3
// error detection
if (i7h_D->buffer == NULL)
return I7hErrorAllocMemory;
struct I7hDataStruct data;
i7hInitStructure(&data);

return 0;
}
char ch;
int word_pos = 0;

int i7hFreeStructure(struct I7hDataStruct i7h_D[restrict])
{
if (i7h_D->buffer != NULL) {
free(i7h_D->buffer);
} else {
return I7hErrorFreeMemory;
while ((ch = getc(stream)) != EOF) {
// if it's the end of a word
if (ch == ' ' or ch == '\n' or ispunct(ch)) {
// if buffer still blank
if (word_pos == 0) {
fputc(ch, output);
continue;
}

// output the result string
data.buffer[word_pos] = '\0'; // // now, the word_pos and strlen(buffer) are the same
if (word_pos <= 2) {
fputs(data.buffer, output);
} else {
fputc(data.buffer[0], output);
fprintf(output, "%d", word_pos - 2);
fputc(data.buffer[word_pos - 1], output);
}
fputc(ch, output); // don't forget the char in the current loop
fflush(output); // finish

// reset
word_pos = 0;
continue;
}

// if buffer size not enough, expand it
if (word_pos >= data.real_buffer_size) {
data.need_buffer_size += 1;
if (bufferRealloc_(&data) != true)
return 1;
}

data.buffer[word_pos] = ch;
word_pos++;
}

i7hFreeStructure(&data);
return 0;
}
17 changes: 10 additions & 7 deletions source/i7h/i7h_processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,31 @@
#define I7H_PROCESSOR_H_

#include <stddef.h>
#include <stdio.h>

/* structures */
struct I7hDataStruct {
char *buffer;
size_t now_buffer_size;
size_t need_buffer_size;
size_t real_buffer_size;
int src_string_length;
int src_string_length; // I want more speed...
};

/* enumerates */
enum I7hResultType {
I7hOK = 0,
I7hErrorAllocMemory = 1,
I7hErrorFreeMemory,
kI7hOK = 0,
kI7hErrorAllocMemory = 1,
kI7hErrorFreeMemory,
};

/* functions */
// main i7h processor
int i7hProcessor(struct I7hDataStruct i7h_D[restrict], const char src_string[]);
// init structure
int i7hInitStructure(struct I7hDataStruct i7h_D[restrict]);
// free buffer(if have)
int i7hFreeStructure(struct I7hDataStruct i7h_D[restrict]);
// main i7h processor
int i7hProcessor(struct I7hDataStruct i7h_D[restrict], const char src_string[]);
// stream version processor
int i7hParserStream(FILE *stream, FILE *output);

#endif
Loading

0 comments on commit 3d31b51

Please sign in to comment.