Skip to content
This repository has been archived by the owner on May 10, 2023. It is now read-only.

Add validator for Lao (lo) #651

Merged
merged 6 commits into from
Dec 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions server/lib/validation/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ const ig = require('./languages/ig');
const it = require('./languages/it');
const kab = require( './languages/kab');
const ko = require( './languages/ko');
const lo = require( './languages/lo');
const ne = require('./languages/ne');
const or = require('./languages/or');
const ru = require('./languages/ru');
Expand All @@ -27,6 +28,7 @@ const VALIDATORS = {
it,
kab,
ko,
lo,
ne,
or,
ru,
Expand Down
34 changes: 34 additions & 0 deletions server/lib/validation/languages/lo.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Lao rules
// use any rule from Thai rules https://github.com/common-voice/sentence-collector/blob/main/server/lib/validation/languages/th.js
const MIN_LENGTH = 2;
const MAX_LENGTH = 140;

const INVALIDATIONS = [{
fn: (sentence) => {
return sentence.length < MIN_LENGTH || sentence.length > MAX_LENGTH;
},
error: `ຈຳນວນຕົວອັກສອນຕ້ອງຢູ່ລະຫວ່າງ ${MIN_LENGTH} ຫາ ${MAX_LENGTH} (ລວມ)`,
}, {
// Lao digits and Thai digits
regex: /[0-9໑໒໓໔໕໖໗໘໙໐๐-๙]/,
error: 'ປະໂຫຍກບໍ່ຄວນມີຕົວເລກ',
}, {
// English and Thai characters are not allowed
regex: /[A-Za-z\u0E00-\u0E7F]/,
error: 'ປະໂຫຍກບໍ່ຄວນມີຕົວອັກສອນລາຕິນ ຫຼືຕົວອັກສອນໄທ',
}, {
// < > + * \ # @ ^ [ ] ( ) /
// ellipsis: \u0EAF ຯ
// repetition: \u0EC6 ໆ
regex: /[<>+*\\#@^[\]()/\u0EAF\u0EC6]/,
error: 'ປະໂຫຍກບໍ່ຄວນມີສັນຍາລັກ, ລວມທັງ ຯ ແລະ ໆ',
}, {
// Emoji range from https://www.regextester.com/106421 and
// https://stackoverflow.com/questions/10992921/how-to-remove-emoji-code-using-javascript
regex: /(\u00a9|\u00ae|[\u2000-\u3300]|[\u2580-\u27bf]|\ud83c[\ud000-\udfff]|\ud83d[\ud000-\udfff]|\ud83e[\ud000-\udfff]|[\ue000-\uf8ff])/,
error: 'ປະໂຫຍກບໍ່ຄວນມີ ອີໂມຈິ ຫຼືສັນຍາລັກຂອງ Unicode ພິເສດອື່ນໆ',
}];

module.exports = {
INVALIDATIONS,
};