Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add French implementation #33

Draft
wants to merge 5 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ licensedir = $(datarootdir)/licenses/$(TRANSFORMED_PACKAGE_NAME)
bin_PROGRAMS = decasify
decasify_SOURCES = src/bin/decasify.rs src/content.rs src/cli.rs src/lib.rs src/types.rs src/traits.rs
decasify_SOURCES += src/lua.rs src/python.rs src/wasm.rs
decasify_SOURCES += src/en.rs src/tr.rs
decasify_SOURCES += src/en.rs src/fr.rs src/tr.rs
EXTRA_decasify_SOURCES = tests/cli.rs tests/lib.rs
EXTRA_DIST = pyproject.toml spec/decasify_spec.lua tests/test_all.py plugin/decasify.lua sile/decasify.lua
dist_doc_DATA = README.md CHANGELOG.md
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ classifiers = [
"Development Status :: 5 - Production/Stable",
"Intended Audience :: Developers",
"Natural Language :: English",
"Natural Language :: French",
"Natural Language :: Turkish",
"Programming Language :: Python :: Implementation :: CPython",
"Programming Language :: Python :: Implementation :: PyPy",
Expand Down
106 changes: 106 additions & 0 deletions src/fr.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
// SPDX-FileCopyrightText: © 2023 Caleb Maclennan <[email protected]>
// SPDX-License-Identifier: LGPL-3.0-only

use crate::content::{Chunk, Segment, Word};
use crate::types::StyleGuide;

use regex::Regex;
use unicode_titlecase::StrTitleCase;

pub use crate::en::lowercase;
pub use crate::en::sentencecase;
pub use crate::en::uppercase;

pub fn titlecase(chunk: Chunk, style: StyleGuide) -> String {
match style {
StyleGuide::LanguageDefault => titlecase_fr(chunk),
_ => todo!("French implementation doesn't support this style guide."),
}
}

fn titlecase_fr(chunk: Chunk) -> String {
let mut segments: Vec<Segment> = Vec::new();
chunk.clone().segments.into_iter().for_each(|segment| {
match segment {
Segment::Separator(_) => segments.push(segment),
Segment::Word(ref word) => {
let mut segs = word.word.split("-").peekable();
while let Some(s) = segs.next() {
segments.push(Segment::Word(Word { word: s.into() }));
if segs.peek().is_some() {
segments.push(Segment::Separator("-".into()));
}
}
}
};
});
let mut chunk = chunk.clone();
let mut words = chunk
.segments
.iter_mut()
.filter_map(|segment| match segment {
Segment::Word(word) => Some(word),
_ => None,
})
.peekable();
if let Some(word) = words.next() {
word.word = word.to_titlecase_lower_rest();
}
while let Some(word) = words.next() {
word.word = match words.peek().is_none() {
true => word.to_titlecase_lower_rest(),
false => match is_reserved(word) {
true => word.to_lowercase(),
false => word.to_titlecase_lower_rest(),
},
};
}
chunk.into()
}

fn is_reserved(word: &Word) -> bool {
let word = word.to_lowercase();
let word = word.as_str();
// https://github.com/benoitvallon/titlecase-french/blob/83e092e91dccdd39871dfeac0d58dc06d997dabb/config.js#L22
let lower_case_word_list = vec![
"le", "la", "les", // definite articles
"un", "une", "des", // indefinite articles
"du", "de", "des", // partitive articles
"au", "aux", "du", "des", // contracted articles
"ce", "cet", "cette", "ces", // demonstrative adjectives
"quel", "quels", "quelle", "quelles", // exclamative adjectives
"mon", "ton", "son", "notre", "votre", "leur", "ma", "ta", "sa", "mes", "tes", "ses",
"nos", "vos", "leurs", // possessive adjectives
"mais", "ou", "et", "donc", "or", "ni", "car", "voire",
// coordinating conjunctions
"que", "qu", "quand", "comme", "si", "lorsque", "lorsqu", "puisque", "puisqu", "quoique",
"quoiqu", // subordinating conjunctions
"à", "chez", "dans", "entre", "jusque", "jusqu", "hors", "par", "pour", "sans", "vers",
"sur", "pas", "parmi", "avec", "sous", "en", // prepositions
"je", "tu", "il", "elle", "on", "nous", "vous", "ils", "elles", "me", "te", "se", "y",
// personal pronouns
"qui", "que", "quoi", "dont", "où", // relative pronouns
"ne", // others
];
let lower_case_words = lower_case_word_list.join("|");
let lower_case_word = Regex::new(format!("^({lower_case_words})$").as_ref()).unwrap();
lower_case_word.is_match(word)
}

//capitalizedSpecials: [
// { input: 'À', output: 'A'},
// { input: 'Â', output: 'A'},
// { input: 'Ä', output: 'A'},
// { input: 'É', output: 'E'},
// { input: 'È', output: 'E'},
// { input: 'Ê', output: 'E'},
// { input: 'Ë', output: 'E'},
// { input: 'Ç', output: 'C'},
// { input: 'Î', output: 'I'},
// { input: 'Ï', output: 'I'},
// { input: 'Ô', output: 'O'},
// { input: 'Ö', output: 'O'},
// { input: 'Û', output: 'U'},
// { input: 'Ü', output: 'U'},
// { input: 'Ù', output: 'U'}
//],
5 changes: 5 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ pub mod python;
pub mod wasm;

mod en;
mod fr;
mod tr;

/// Convert a string to a specific case following typesetting conventions for a target locale
Expand Down Expand Up @@ -62,6 +63,7 @@ pub fn titlecase(
let style: StyleGuide = style.into();
match locale {
Locale::EN => en::titlecase(chunk, style),
Locale::FR => fr::titlecase(chunk, style),
Locale::TR => tr::titlecase(chunk, style),
}
}
Expand All @@ -72,6 +74,7 @@ pub fn lowercase(chunk: impl Into<Chunk>, locale: impl Into<Locale>) -> String {
let locale: Locale = locale.into();
match locale {
Locale::EN => en::lowercase(chunk),
Locale::FR => fr::lowercase(chunk),
Locale::TR => tr::lowercase(chunk),
}
}
Expand All @@ -82,6 +85,7 @@ pub fn uppercase(chunk: impl Into<Chunk>, locale: impl Into<Locale>) -> String {
let locale: Locale = locale.into();
match locale {
Locale::EN => en::uppercase(chunk),
Locale::FR => fr::uppercase(chunk),
Locale::TR => tr::uppercase(chunk),
}
}
Expand All @@ -92,6 +96,7 @@ pub fn sentencecase(chunk: impl Into<Chunk>, locale: impl Into<Locale>) -> Strin
let locale: Locale = locale.into();
match locale {
Locale::EN => en::sentencecase(chunk),
Locale::FR => fr::sentencecase(chunk),
Locale::TR => tr::sentencecase(chunk),
}
}
2 changes: 2 additions & 0 deletions src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ pub type Result<T, E = Error> = std::result::Result<T, E>;
pub enum Locale {
#[default]
EN,
FR,
TR,
}

Expand Down Expand Up @@ -85,6 +86,7 @@ impl FromStr for Locale {
fn from_str(s: &str) -> Result<Self> {
match s.to_ascii_lowercase().as_str() {
"en" | "english" | "en_en" => Ok(Locale::EN),
"fr" | "french" | "fr_fr" | "français" => Ok(Locale::FR),
"tr" | "turkish" | "tr_tr" | "türkçe" => Ok(Locale::TR),
input => LocaleSnafu { input }.fail()?,
}
Expand Down
43 changes: 43 additions & 0 deletions tests/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,38 @@ titlecase!(
" Serbest Serseri\n Boşluk "
);

titlecase!(
french_def,
Locale::FR,
StyleGuide::LanguageDefault,
"le triangle rouge",
"Le Triangle Rouge"
);

titlecase!(
french_def2,
Locale::FR,
StyleGuide::LanguageDefault,
"loki, le détective mythique",
"Loki, le Détective Mythique"
);

titlecase!(
french_coordinating,
Locale::FR,
StyleGuide::LanguageDefault,
"il est studieux mais turbulent",
"Il Est Studieux mais Turbulent"
);

titlecase!(
french_coordinating2,
Locale::FR,
StyleGuide::LanguageDefault,
"mais comment font-ils?",
"Mais Comment Font-Ils?"
);

macro_rules! lowercase {
($name:ident, $locale:expr, $input:expr, $expected:expr) => {
#[test]
Expand All @@ -225,6 +257,8 @@ macro_rules! lowercase {

lowercase!(lower_en, Locale::EN, "foo BAR BaZ BIKE", "foo bar baz bike");

lowercase!(lower_fr, Locale::FR, "foo BAR BaZ BIKE", "foo bar baz bike");

lowercase!(
lower_tr,
Locale::TR,
Expand All @@ -244,6 +278,8 @@ macro_rules! uppercase {

uppercase!(upper_en, Locale::EN, "foo BAR BaZ bike", "FOO BAR BAZ BIKE");

uppercase!(upper_fr, Locale::FR, "foo BAR BaZ bike", "FOO BAR BAZ BIKE");

uppercase!(
upper_tr,
Locale::TR,
Expand All @@ -268,4 +304,11 @@ sentencecase!(
"Insert bike here"
);

sentencecase!(
sentence_fr,
Locale::FR,
"insert BIKE here",
"Insert bike here"
);

sentencecase!(sentence_tr, Locale::TR, "ilk DAVRANSIN", "İlk davransın");
Loading