From 763d0162fcc2719a814d776c75489071300fe7bf Mon Sep 17 00:00:00 2001 From: dholms Date: Mon, 17 Jul 2023 15:17:56 -0500 Subject: [PATCH 1/4] content reporting on record fields --- .../api/com/atproto/identity/updateHandle.ts | 5 +- .../api/com/atproto/server/createAccount.ts | 5 +- .../src/content-reporter/explicit-slurs.ts | 17 ++++ packages/pds/src/content-reporter/index.ts | 84 +++++++++++++++++++ .../validator.ts | 2 +- packages/pds/src/context.ts | 6 ++ packages/pds/src/handle/index.ts | 4 +- packages/pds/src/handle/moderation/index.ts | 66 --------------- packages/pds/src/index.ts | 21 +++++ packages/pds/src/repo/prepare.ts | 17 ++++ packages/pds/src/services/index.ts | 4 + packages/pds/src/services/repo/index.ts | 6 ++ 12 files changed, 160 insertions(+), 77 deletions(-) create mode 100644 packages/pds/src/content-reporter/explicit-slurs.ts create mode 100644 packages/pds/src/content-reporter/index.ts rename packages/pds/src/{handle/moderation => content-reporter}/validator.ts (98%) delete mode 100644 packages/pds/src/handle/moderation/index.ts diff --git a/packages/pds/src/api/com/atproto/identity/updateHandle.ts b/packages/pds/src/api/com/atproto/identity/updateHandle.ts index 356165e72f7..7d07dc807fa 100644 --- a/packages/pds/src/api/com/atproto/identity/updateHandle.ts +++ b/packages/pds/src/api/com/atproto/identity/updateHandle.ts @@ -7,7 +7,6 @@ import { UserAlreadyExistsError, } from '../../../../services/account' import { httpLogger } from '../../../../logger' -import { backgroundHandleCheckForFlag } from '../../../../handle/moderation' export default function (server: Server, ctx: AppContext) { server.com.atproto.identity.updateHandle({ @@ -55,9 +54,7 @@ export default function (server: Server, ctx: AppContext) { ) } - if (ctx.cfg.unacceptableHandleWordsB64) { - backgroundHandleCheckForFlag({ ctx, handle, did: requester }) - } + ctx.contentReporter?.checkHandle({ handle, did: requester }) }, }) } diff --git a/packages/pds/src/api/com/atproto/server/createAccount.ts b/packages/pds/src/api/com/atproto/server/createAccount.ts index 2b0e5f72224..6965eb596a0 100644 --- a/packages/pds/src/api/com/atproto/server/createAccount.ts +++ b/packages/pds/src/api/com/atproto/server/createAccount.ts @@ -9,7 +9,6 @@ import { UserAlreadyExistsError } from '../../../../services/account' import AppContext from '../../../../context' import Database from '../../../../db' import { AtprotoData } from '@atproto/identity' -import { backgroundHandleCheckForFlag } from '../../../../handle/moderation' export default function (server: Server, ctx: AppContext) { server.com.atproto.server.createAccount(async ({ input, req }) => { @@ -106,9 +105,7 @@ export default function (server: Server, ctx: AppContext) { } }) - if (ctx.cfg.unacceptableHandleWordsB64) { - backgroundHandleCheckForFlag({ ctx, handle, did: result.did }) - } + ctx.contentReporter?.checkHandle({ handle, did: result.did }) return { encoding: 'application/json', diff --git a/packages/pds/src/content-reporter/explicit-slurs.ts b/packages/pds/src/content-reporter/explicit-slurs.ts new file mode 100644 index 00000000000..d3a8b4be2f1 --- /dev/null +++ b/packages/pds/src/content-reporter/explicit-slurs.ts @@ -0,0 +1,17 @@ +// regexes taken from: https://github.com/Blank-Cheque/Slurs +/* eslint-disable no-misleading-character-class */ +const explicitSlurRegexes = [ + /\b[cĆćĈĉČčĊċÇçḈḉȻȼꞒꞓꟄꞔƇƈɕ][hĤĥȞȟḦḧḢḣḨḩḤḥḪḫH̱ẖĦħⱧⱨꞪɦꞕΗНн][iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIıIi1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴLl][nŃńǸǹŇňÑñṄṅŅņṆṇṊṋṈṉN̈n̈ƝɲŊŋꞐꞑꞤꞥᵰᶇɳȵꬻꬼИиПпNn][kḰḱǨǩĶķḲḳḴḵƘƙⱩⱪᶄꝀꝁꝂꝃꝄꝅꞢꞣ][sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]?\b/, + /\b[cĆćĈĉČčĊċÇçḈḉȻȼꞒꞓꟄꞔƇƈɕ][ÓóÒòŎŏÔôỐốỒồỖỗỔổǑǒÖöȪȫŐőÕõṌṍṎṏȬȭȮȯO͘o͘ȰȱØøǾǿǪǫǬǭŌōṒṓṐṑỎỏȌȍȎȏƠơỚớỜờỠỡỞởỢợỌọỘộO̩o̩Ò̩ò̩Ó̩ó̩ƟɵꝊꝋꝌꝍⱺOo0]{2}[nŃńǸǹŇňÑñṄṅŅņṆṇṊṋṈṉN̈n̈ƝɲŊŋꞐꞑꞤꞥᵰᶇɳȵꬻꬼИиПпNn][sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]?\b/, + /\b[fḞḟƑƒꞘꞙᵮᶂ][aÁáÀàĂăẮắẰằẴẵẲẳÂâẤấẦầẪẫẨẩǍǎÅåǺǻÄäǞǟÃãȦȧǠǡĄąĄ́ą́Ą̃ą̃ĀāĀ̀ā̀ẢảȀȁA̋a̋ȂȃẠạẶặẬậḀḁȺⱥꞺꞻᶏẚAa@4][gǴǵĞğĜĝǦǧĠġG̃g̃ĢģḠḡǤǥꞠꞡƓɠᶃꬶGgqꝖꝗꝘꝙɋʠ]{1,2}([ÓóÒòŎŏÔôỐốỒồỖỗỔổǑǒÖöȪȫŐőÕõṌṍṎṏȬȭȮȯO͘o͘ȰȱØøǾǿǪǫǬǭŌōṒṓṐṑỎỏȌȍȎȏƠơỚớỜờỠỡỞởỢợỌọỘộO̩o̩Ò̩ò̩Ó̩ó̩ƟɵꝊꝋꝌꝍⱺOo0e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEeiÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIıIi1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴLl][tŤťṪṫŢţṬṭȚțṰṱṮṯŦŧȾⱦƬƭƮʈT̈ẗᵵƫȶ]{1,2}([rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ][yÝýỲỳŶŷY̊ẙŸÿỸỹẎẏȲȳỶỷỴỵɎɏƳƴỾỿ]|[rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ][iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIıIi1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴLl][e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEe])?)?[sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]?\b/, + /\b[kḰḱǨǩĶķḲḳḴḵƘƙⱩⱪᶄꝀꝁꝂꝃꝄꝅꞢꞣ][iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIıIi1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴLlyÝýỲỳŶŷY̊ẙŸÿỸỹẎẏȲȳỶỷỴỵɎɏƳƴỾỿ][kḰḱǨǩĶķḲḳḴḵƘƙⱩⱪᶄꝀꝁꝂꝃꝄꝅꞢꞣ][e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEe]([rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ][yÝýỲỳŶŷY̊ẙŸÿỸỹẎẏȲȳỶỷỴỵɎɏƳƴỾỿ]|[rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ][iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIıIi1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴLl][e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEe])?[sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]*\b/, + /\b[nŃńǸǹŇňÑñṄṅŅņṆṇṊṋṈṉN̈n̈ƝɲŊŋꞐꞑꞤꞥᵰᶇɳȵꬻꬼИиПпNn][iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIıIi1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴLloÓóÒòŎŏÔôỐốỒồỖỗỔổǑǒÖöȪȫŐőÕõṌṍṎṏȬȭȮȯO͘o͘ȰȱØøǾǿǪǫǬǭŌōṒṓṐṑỎỏȌȍȎȏƠơỚớỜờỠỡỞởỢợỌọỘộO̩o̩Ò̩ò̩Ó̩ó̩ƟɵꝊꝋꝌꝍⱺOoІіa4ÁáÀàĂăẮắẰằẴẵẲẳÂâẤấẦầẪẫẨẩǍǎÅåǺǻÄäǞǟÃãȦȧǠǡĄąĄ́ą́Ą̃ą̃ĀāĀ̀ā̀ẢảȀȁA̋a̋ȂȃẠạẶặẬậḀḁȺⱥꞺꞻᶏẚAa][gǴǵĞğĜĝǦǧĠġG̃g̃ĢģḠḡǤǥꞠꞡƓɠᶃꬶGgqꝖꝗꝘꝙɋʠ]{2}(l[e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEe]t|[e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEeaÁáÀàĂăẮắẰằẴẵẲẳÂâẤấẦầẪẫẨẩǍǎÅåǺǻÄäǞǟÃãȦȧǠǡĄąĄ́ą́Ą̃ą̃ĀāĀ̀ā̀ẢảȀȁA̋a̋ȂȃẠạẶặẬậḀḁȺⱥꞺꞻᶏẚAa][rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ]?|n[ÓóÒòŎŏÔôỐốỒồỖỗỔổǑǒÖöȪȫŐőÕõṌṍṎṏȬȭȮȯO͘o͘ȰȱØøǾǿǪǫǬǭŌōṒṓṐṑỎỏȌȍȎȏƠơỚớỜờỠỡỞởỢợỌọỘộO̩o̩Ò̩ò̩Ó̩ó̩ƟɵꝊꝋꝌꝍⱺOo0][gǴǵĞğĜĝǦǧĠġG̃g̃ĢģḠḡǤǥꞠꞡƓɠᶃꬶGgqꝖꝗꝘꝙɋʠ]|[a4ÁáÀàĂăẮắẰằẴẵẲẳÂâẤấẦầẪẫẨẩǍǎÅåǺǻÄäǞǟÃãȦȧǠǡĄąĄ́ą́Ą̃ą̃ĀāĀ̀ā̀ẢảȀȁA̋a̋ȂȃẠạẶặẬậḀḁȺⱥꞺꞻᶏẚAa]?)?[sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]?\b/, + /[nŃńǸǹŇňÑñṄṅŅņṆṇṊṋṈṉN̈n̈ƝɲŊŋꞐꞑꞤꞥᵰᶇɳȵꬻꬼИиПпNn][iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIıIi1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴLloÓóÒòŎŏÔôỐốỒồỖỗỔổǑǒÖöȪȫŐőÕõṌṍṎṏȬȭȮȯO͘o͘ȰȱØøǾǿǪǫǬǭŌōṒṓṐṑỎỏȌȍȎȏƠơỚớỜờỠỡỞởỢợỌọỘộO̩o̩Ò̩ò̩Ó̩ó̩ƟɵꝊꝋꝌꝍⱺOoІіa4ÁáÀàĂăẮắẰằẴẵẲẳÂâẤấẦầẪẫẨẩǍǎÅåǺǻÄäǞǟÃãȦȧǠǡĄąĄ́ą́Ą̃ą̃ĀāĀ̀ā̀ẢảȀȁA̋a̋ȂȃẠạẶặẬậḀḁȺⱥꞺꞻᶏẚAa][gǴǵĞğĜĝǦǧĠġG̃g̃ĢģḠḡǤǥꞠꞡƓɠᶃꬶGgqꝖꝗꝘꝙɋʠ]{2}(l[e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEe]t|[e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEe][rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ])[sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]?/, + /\b[tŤťṪṫŢţṬṭȚțṰṱṮṯŦŧȾⱦƬƭƮʈT̈ẗᵵƫȶ][rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ][aÁáÀàĂăẮắẰằẴẵẲẳÂâẤấẦầẪẫẨẩǍǎÅåǺǻÄäǞǟÃãȦȧǠǡĄąĄ́ą́Ą̃ą̃ĀāĀ̀ā̀ẢảȀȁA̋a̋ȂȃẠạẶặẬậḀḁȺⱥꞺꞻᶏẚAa4]+[nŃńǸǹŇňÑñṄṅŅņṆṇṊṋṈṉN̈n̈ƝɲŊŋꞐꞑꞤꞥᵰᶇɳȵꬻꬼИиПпNn]{1,2}([iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIıIi1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴLl][e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEe]|[yÝýỲỳŶŷY̊ẙŸÿỸỹẎẏȲȳỶỷỴỵɎɏƳƴỾỿ]|[e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEe][rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ])[sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]?\b/, +] + +export const hasExplicitSlur = (handle: string): boolean => { + return explicitSlurRegexes.some( + (reg) => reg.test(handle) || reg.test(handle.replaceAll('.', '')), + ) +} diff --git a/packages/pds/src/content-reporter/index.ts b/packages/pds/src/content-reporter/index.ts new file mode 100644 index 00000000000..dc93fdf0755 --- /dev/null +++ b/packages/pds/src/content-reporter/index.ts @@ -0,0 +1,84 @@ +import { AtUri } from '@atproto/uri' +import { RepoRecord } from '@atproto/lexicon' +import { CID } from 'multiformats/cid' +import * as ui8 from 'uint8arrays' +import { UnacceptableWordValidator } from './validator' +import { REASONOTHER } from '../lexicon/types/com/atproto/moderation/defs' +import { isRecord as isList } from '../lexicon/types/app/bsky/graph/list' +import { isRecord as isProfile } from '../lexicon/types/app/bsky/actor/profile' +import { isRecord as isFeedGenerator } from '../lexicon/types/app/bsky/feed/generator' +import { BackgroundQueue } from '../event-stream/background-queue' +import { ModerationService } from '../services/moderation' + +export class ContentReporter { + backgroundQueue: BackgroundQueue + moderationService: ModerationService + reporterDid: string + validator: UnacceptableWordValidator + + constructor(opts: { + backgroundQueue: BackgroundQueue + moderationService: ModerationService + reporterDid: string + unacceptableB64: string + falsePositivesB64?: string + }) { + this.backgroundQueue = opts.backgroundQueue + this.moderationService = opts.moderationService + this.reporterDid = opts.reporterDid + this.validator = new UnacceptableWordValidator( + decode(opts.unacceptableB64), + opts.falsePositivesB64 ? decode(opts.falsePositivesB64) : undefined, + ) + } + + checkHandle(opts: { handle: string; did: string }) { + const { handle, did } = opts + return this.checkContent({ + content: handle, + subject: { did }, + }) + } + + checkRecord(opts: { record: RepoRecord; uri: AtUri; cid: CID }) { + const { record, uri, cid } = opts + let content = uri.rkey + if (isProfile(record)) { + content += ' ' + record.displayName + } else if (isList(record)) { + content += ' ' + record.name + } else if (isFeedGenerator(record)) { + content += ' ' + record.displayName + } + + return this.checkContent({ + content, + subject: { uri, cid }, + }) + } + + checkContent(opts: { + content: string + subject: { did: string } | { uri: AtUri; cid?: CID } + }) { + const { content, subject } = opts + const possibleSlurs = this.validator.getMatches(content) + if (possibleSlurs.length < 1) { + return + } + this.backgroundQueue.add(async () => { + await this.moderationService.report({ + reasonType: REASONOTHER, + reason: `Automatically flagged for possible slurs: ${possibleSlurs.join( + ', ', + )}`, + subject, + reportedBy: this.reporterDid, + }) + }) + } +} + +const decode = (encoded: string): string[] => { + return ui8.toString(ui8.fromString(encoded, 'base64'), 'utf8').split(',') +} diff --git a/packages/pds/src/handle/moderation/validator.ts b/packages/pds/src/content-reporter/validator.ts similarity index 98% rename from packages/pds/src/handle/moderation/validator.ts rename to packages/pds/src/content-reporter/validator.ts index d832e828334..9f5b5689e4a 100644 --- a/packages/pds/src/handle/moderation/validator.ts +++ b/packages/pds/src/content-reporter/validator.ts @@ -1,6 +1,6 @@ import { dedupeStrs } from '@atproto/common' -export class UnacceptableHandleValidator { +export class UnacceptableWordValidator { private bannedWords: Set private falsePositives: Set diff --git a/packages/pds/src/context.ts b/packages/pds/src/context.ts index e811cc68c7d..4aa74e45156 100644 --- a/packages/pds/src/context.ts +++ b/packages/pds/src/context.ts @@ -19,6 +19,7 @@ import DidSqlCache from './did-cache' import { MountedAlgos } from './feed-gen/types' import { Crawlers } from './crawlers' import { LabelCache } from './label-cache' +import { ContentReporter } from './content-reporter' export class AppContext { private _appviewAgent: AtpAgent | null @@ -41,6 +42,7 @@ export class AppContext { sequencerLeader: SequencerLeader | null labeler: Labeler labelCache: LabelCache + contentReporter?: ContentReporter backgroundQueue: BackgroundQueue crawlers: Crawlers algos: MountedAlgos @@ -133,6 +135,10 @@ export class AppContext { return this.opts.labelCache } + get contentReporter(): ContentReporter | undefined { + return this.opts.contentReporter + } + get backgroundQueue(): BackgroundQueue { return this.opts.backgroundQueue } diff --git a/packages/pds/src/handle/index.ts b/packages/pds/src/handle/index.ts index edad6c736e5..d847df1cd45 100644 --- a/packages/pds/src/handle/index.ts +++ b/packages/pds/src/handle/index.ts @@ -1,7 +1,7 @@ import * as ident from '@atproto/identifier' import { InvalidRequestError } from '@atproto/xrpc-server' import { reservedSubdomains } from './reserved' -import { hasExplicitSlur } from './moderation' +import { hasExplicitSlur } from '../content-reporter/explicit-slurs' import AppContext from '../context' export const normalizeAndValidateHandle = async (opts: { @@ -20,7 +20,7 @@ export const normalizeAndValidateHandle = async (opts: { 'InvalidHandle', ) } - // slur check etc + // slur check if (hasExplicitSlur(handle)) { throw new InvalidRequestError( 'Inappropriate language in handle', diff --git a/packages/pds/src/handle/moderation/index.ts b/packages/pds/src/handle/moderation/index.ts deleted file mode 100644 index 3bbf013e8c6..00000000000 --- a/packages/pds/src/handle/moderation/index.ts +++ /dev/null @@ -1,66 +0,0 @@ -import * as ui8 from 'uint8arrays' -import AppContext from '../../context' -import { REASONOTHER } from '../../lexicon/types/com/atproto/moderation/defs' -import { UnacceptableHandleValidator } from './validator' - -// regexes taken from: https://github.com/Blank-Cheque/Slurs -/* eslint-disable no-misleading-character-class */ -const explicitSlurRegexes = [ - /\b[cĆćĈĉČčĊċÇçḈḉȻȼꞒꞓꟄꞔƇƈɕ][hĤĥȞȟḦḧḢḣḨḩḤḥḪḫH̱ẖĦħⱧⱨꞪɦꞕΗНн][iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIıIi1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴLl][nŃńǸǹŇňÑñṄṅŅņṆṇṊṋṈṉN̈n̈ƝɲŊŋꞐꞑꞤꞥᵰᶇɳȵꬻꬼИиПпNn][kḰḱǨǩĶķḲḳḴḵƘƙⱩⱪᶄꝀꝁꝂꝃꝄꝅꞢꞣ][sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]?\b/, - /\b[cĆćĈĉČčĊċÇçḈḉȻȼꞒꞓꟄꞔƇƈɕ][ÓóÒòŎŏÔôỐốỒồỖỗỔổǑǒÖöȪȫŐőÕõṌṍṎṏȬȭȮȯO͘o͘ȰȱØøǾǿǪǫǬǭŌōṒṓṐṑỎỏȌȍȎȏƠơỚớỜờỠỡỞởỢợỌọỘộO̩o̩Ò̩ò̩Ó̩ó̩ƟɵꝊꝋꝌꝍⱺOo0]{2}[nŃńǸǹŇňÑñṄṅŅņṆṇṊṋṈṉN̈n̈ƝɲŊŋꞐꞑꞤꞥᵰᶇɳȵꬻꬼИиПпNn][sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]?\b/, - /[fḞḟƑƒꞘꞙᵮᶂ][aÁáÀàĂăẮắẰằẴẵẲẳÂâẤấẦầẪẫẨẩǍǎÅåǺǻÄäǞǟÃãȦȧǠǡĄąĄ́ą́Ą̃ą̃ĀāĀ̀ā̀ẢảȀȁA̋a̋ȂȃẠạẶặẬậḀḁȺⱥꞺꞻᶏẚAa@4][gǴǵĞğĜĝǦǧĠġG̃g̃ĢģḠḡǤǥꞠꞡƓɠᶃꬶGgqꝖꝗꝘꝙɋʠ]{1,2}([ÓóÒòŎŏÔôỐốỒồỖỗỔổǑǒÖöȪȫŐőÕõṌṍṎṏȬȭȮȯO͘o͘ȰȱØøǾǿǪǫǬǭŌōṒṓṐṑỎỏȌȍȎȏƠơỚớỜờỠỡỞởỢợỌọỘộO̩o̩Ò̩ò̩Ó̩ó̩ƟɵꝊꝋꝌꝍⱺOo0e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEeiÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIıIi1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴLl][tŤťṪṫŢţṬṭȚțṰṱṮṯŦŧȾⱦƬƭƮʈT̈ẗᵵƫȶ]{1,2}([rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ][yÝýỲỳŶŷY̊ẙŸÿỸỹẎẏȲȳỶỷỴỵɎɏƳƴỾỿ]|[rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ][iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIıIi1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴLl][e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEe])?)?[sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]?\b/, - /\b[kḰḱǨǩĶķḲḳḴḵƘƙⱩⱪᶄꝀꝁꝂꝃꝄꝅꞢꞣ][iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIıIi1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴLlyÝýỲỳŶŷY̊ẙŸÿỸỹẎẏȲȳỶỷỴỵɎɏƳƴỾỿ][kḰḱǨǩĶķḲḳḴḵƘƙⱩⱪᶄꝀꝁꝂꝃꝄꝅꞢꞣ][e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEe]([rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ][yÝýỲỳŶŷY̊ẙŸÿỸỹẎẏȲȳỶỷỴỵɎɏƳƴỾỿ]|[rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ][iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIıIi1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴLl][e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEe])?[sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]*\b/, - /\b([sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ][a4ÁáÀàĂăẮắẰằẴẵẲẳÂâẤấẦầẪẫẨẩǍǎÅåǺǻÄäǞǟÃãȦȧǠǡĄąĄ́ą́Ą̃ą̃ĀāĀ̀ā̀ẢảȀȁA̋a̋ȂȃẠạẶặẬậḀḁȺⱥꞺꞻᶏẚAa][nŃńǸǹŇňÑñṄṅŅņṆṇṊṋṈṉN̈n̈ƝɲŊŋꞐꞑꞤꞥᵰᶇɳȵꬻꬼИиПпNn][dĎďḊḋḐḑD̦d̦ḌḍḒḓḎḏĐđÐðƉɖƊɗᵭᶁᶑȡ])?[nŃńǸǹŇňÑñṄṅŅņṆṇṊṋṈṉN̈n̈ƝɲŊŋꞐꞑꞤꞥᵰᶇɳȵꬻꬼИиПпNn][iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIıIi1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴLloÓóÒòŎŏÔôỐốỒồỖỗỔổǑǒÖöȪȫŐőÕõṌṍṎṏȬȭȮȯO͘o͘ȰȱØøǾǿǪǫǬǭŌōṒṓṐṑỎỏȌȍȎȏƠơỚớỜờỠỡỞởỢợỌọỘộO̩o̩Ò̩ò̩Ó̩ó̩ƟɵꝊꝋꝌꝍⱺOoІіa4ÁáÀàĂăẮắẰằẴẵẲẳÂâẤấẦầẪẫẨẩǍǎÅåǺǻÄäǞǟÃãȦȧǠǡĄąĄ́ą́Ą̃ą̃ĀāĀ̀ā̀ẢảȀȁA̋a̋ȂȃẠạẶặẬậḀḁȺⱥꞺꞻᶏẚAa][gǴǵĞğĜĝǦǧĠġG̃g̃ĢģḠḡǤǥꞠꞡƓɠᶃꬶGgqꝖꝗꝘꝙɋʠ]{1,2}(l[e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEe]t|[e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEeaÁáÀàĂăẮắẰằẴẵẲẳÂâẤấẦầẪẫẨẩǍǎÅåǺǻÄäǞǟÃãȦȧǠǡĄąĄ́ą́Ą̃ą̃ĀāĀ̀ā̀ẢảȀȁA̋a̋ȂȃẠạẶặẬậḀḁȺⱥꞺꞻᶏẚAa][rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ]?|n[ÓóÒòŎŏÔôỐốỒồỖỗỔổǑǒÖöȪȫŐőÕõṌṍṎṏȬȭȮȯO͘o͘ȰȱØøǾǿǪǫǬǭŌōṒṓṐṑỎỏȌȍȎȏƠơỚớỜờỠỡỞởỢợỌọỘộO̩o̩Ò̩ò̩Ó̩ó̩ƟɵꝊꝋꝌꝍⱺOo0][gǴǵĞğĜĝǦǧĠġG̃g̃ĢģḠḡǤǥꞠꞡƓɠᶃꬶGgqꝖꝗꝘꝙɋʠ]|[a4ÁáÀàĂăẮắẰằẴẵẲẳÂâẤấẦầẪẫẨẩǍǎÅåǺǻÄäǞǟÃãȦȧǠǡĄąĄ́ą́Ą̃ą̃ĀāĀ̀ā̀ẢảȀȁA̋a̋ȂȃẠạẶặẬậḀḁȺⱥꞺꞻᶏẚAa]?)?[sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]?\b/, - /\b[tŤťṪṫŢţṬṭȚțṰṱṮṯŦŧȾⱦƬƭƮʈT̈ẗᵵƫȶ][rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ][aÁáÀàĂăẮắẰằẴẵẲẳÂâẤấẦầẪẫẨẩǍǎÅåǺǻÄäǞǟÃãȦȧǠǡĄąĄ́ą́Ą̃ą̃ĀāĀ̀ā̀ẢảȀȁA̋a̋ȂȃẠạẶặẬậḀḁȺⱥꞺꞻᶏẚAa4]+[nŃńǸǹŇňÑñṄṅŅņṆṇṊṋṈṉN̈n̈ƝɲŊŋꞐꞑꞤꞥᵰᶇɳȵꬻꬼИиПпNn]{1,2}([iÍíi̇́Ììi̇̀ĬĭÎîǏǐÏïḮḯĨĩi̇̃ĮįĮ́į̇́Į̃į̇̃ĪīĪ̀ī̀ỈỉȈȉI̋i̋ȊȋỊịꞼꞽḬḭƗɨᶖİiIıIi1lĺľļḷḹl̃ḽḻłŀƚꝉⱡɫɬꞎꬷꬸꬹᶅɭȴLl][e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEe]|[yÝýỲỳŶŷY̊ẙŸÿỸỹẎẏȲȳỶỷỴỵɎɏƳƴỾỿ]|[e3ЄєЕеÉéÈèĔĕÊêẾếỀềỄễỂểÊ̄ê̄Ê̌ê̌ĚěËëẼẽĖėĖ́ė́Ė̃ė̃ȨȩḜḝĘęĘ́ę́Ę̃ę̃ĒēḖḗḔḕẺẻȄȅE̋e̋ȆȇẸẹỆệḘḙḚḛɆɇE̩e̩È̩è̩É̩é̩ᶒⱸꬴꬳEe][rŔŕŘřṘṙŖŗȐȑȒȓṚṛṜṝṞṟR̃r̃ɌɍꞦꞧⱤɽᵲᶉꭉ])[sŚśṤṥŜŝŠšṦṧṠṡŞşṢṣṨṩȘșS̩s̩ꞨꞩⱾȿꟅʂᶊᵴ]?\b/, -] - -export const hasExplicitSlur = (handle: string): boolean => { - return explicitSlurRegexes.some((reg) => reg.test(handle)) -} - -const decode = (encoded: string): string[] => { - return ui8.toString(ui8.fromString(encoded, 'base64'), 'utf8').split(',') -} - -let _validator: UnacceptableHandleValidator | undefined = undefined -const getValidator = ( - unacceptable: string, - falsePositives?: string, -): UnacceptableHandleValidator => { - if (!_validator) { - _validator = new UnacceptableHandleValidator( - decode(unacceptable), - falsePositives ? decode(falsePositives) : undefined, - ) - } - return _validator -} - -export const backgroundHandleCheckForFlag = (opts: { - ctx: AppContext - handle: string - did: string -}) => { - const { ctx, handle, did } = opts - if (!ctx.cfg.unacceptableHandleWordsB64) { - return - } - const validator = getValidator( - ctx.cfg.unacceptableHandleWordsB64, - ctx.cfg.falsePositiveHandleWordsB64, - ) - const possibleSlurs = validator.getMatches(handle) - if (possibleSlurs.length < 1) { - return - } - ctx.backgroundQueue.add(async () => { - await ctx.services.moderation(ctx.db).report({ - reasonType: REASONOTHER, - reason: `Automatically flagged for possible slurs: ${possibleSlurs.join( - ', ', - )}`, - subject: { did }, - reportedBy: ctx.cfg.serverDid, - }) - }) -} diff --git a/packages/pds/src/index.ts b/packages/pds/src/index.ts index a349431268d..fda22c86a8f 100644 --- a/packages/pds/src/index.ts +++ b/packages/pds/src/index.ts @@ -42,6 +42,8 @@ import DidSqlCache from './did-cache' import { MountedAlgos } from './feed-gen/types' import { Crawlers } from './crawlers' import { LabelCache } from './label-cache' +import { ContentReporter } from './content-reporter' +import { ModerationService } from './services/moderation' export type { ServerConfigValues } from './config' export { ServerConfig } from './config' @@ -175,6 +177,23 @@ export class PDS { const labelCache = new LabelCache(db) + let contentReporter: ContentReporter | undefined = undefined + if (config.unacceptableHandleWordsB64) { + contentReporter = new ContentReporter({ + backgroundQueue, + moderationService: new ModerationService( + db, + messageDispatcher, + blobstore, + imgUriBuilder, + imgInvalidator, + ), + reporterDid: config.labelerDid, + unacceptableB64: config.unacceptableHandleWordsB64, + falsePositivesB64: config.falsePositiveHandleWordsB64, + }) + } + const services = createServices({ repoSigningKey, messageDispatcher, @@ -183,6 +202,7 @@ export class PDS { imgInvalidator, labeler, labelCache, + contentReporter, backgroundQueue, crawlers, }) @@ -201,6 +221,7 @@ export class PDS { sequencerLeader, labeler, labelCache, + contentReporter, services, mailer, imgUriBuilder, diff --git a/packages/pds/src/repo/prepare.ts b/packages/pds/src/repo/prepare.ts index 875b83e76bf..375307516bf 100644 --- a/packages/pds/src/repo/prepare.ts +++ b/packages/pds/src/repo/prepare.ts @@ -33,6 +33,7 @@ import { } from '../lexicon/types/app/bsky/feed/post' import { isRecord as isList } from '../lexicon/types/app/bsky/graph/list' import { isRecord as isProfile } from '../lexicon/types/app/bsky/actor/profile' +import { hasExplicitSlur } from '../content-reporter/explicit-slurs' // @TODO do this dynamically off of schemas export const blobsForWrite = (record: unknown): PreparedBlobRef[] => { @@ -154,6 +155,7 @@ export const prepareCreate = async (opts: { assertValidRecord(record) } const rkey = opts.rkey || TID.nextStr() + assertNoExplicitSlurs(rkey, record) return { action: WriteOpAction.Create, uri: AtUri.make(did, collection, rkey), @@ -177,6 +179,7 @@ export const prepareUpdate = async (opts: { if (validate) { assertValidRecord(record) } + assertNoExplicitSlurs(rkey, record) return { action: WriteOpAction.Update, uri: AtUri.make(did, collection, rkey), @@ -256,3 +259,17 @@ async function cidForSafeRecord(record: RepoRecord) { throw badRecordErr } } + +async function assertNoExplicitSlurs(rkey: string, record: RepoRecord) { + let toCheck = rkey + if (isProfile(record)) { + toCheck += ' ' + record.displayName + } else if (isList(record)) { + toCheck += ' ' + record.name + } else if (isFeedGenerator(record)) { + toCheck += ' ' + record.displayName + } + if (hasExplicitSlur(toCheck)) { + throw new InvalidRecordError('Unacceptable slur in record') + } +} diff --git a/packages/pds/src/services/index.ts b/packages/pds/src/services/index.ts index f89fd917082..528c6de5165 100644 --- a/packages/pds/src/services/index.ts +++ b/packages/pds/src/services/index.ts @@ -18,6 +18,7 @@ import { LabelService } from '../app-view/services/label' import { BackgroundQueue } from '../event-stream/background-queue' import { Crawlers } from '../crawlers' import { LabelCache } from '../label-cache' +import { ContentReporter } from '../content-reporter' export function createServices(resources: { repoSigningKey: crypto.Keypair @@ -27,6 +28,7 @@ export function createServices(resources: { imgInvalidator: ImageInvalidator labeler: Labeler labelCache: LabelCache + contentReporter?: ContentReporter backgroundQueue: BackgroundQueue crawlers: Crawlers }): Services { @@ -38,6 +40,7 @@ export function createServices(resources: { imgInvalidator, labeler, labelCache, + contentReporter, backgroundQueue, crawlers, } = resources @@ -52,6 +55,7 @@ export function createServices(resources: { backgroundQueue, crawlers, labeler, + contentReporter, ), moderation: ModerationService.creator( messageDispatcher, diff --git a/packages/pds/src/services/repo/index.ts b/packages/pds/src/services/repo/index.ts index 7dc1dac4252..e04a129e8ee 100644 --- a/packages/pds/src/services/repo/index.ts +++ b/packages/pds/src/services/repo/index.ts @@ -28,6 +28,7 @@ import { wait } from '@atproto/common' import { BackgroundQueue } from '../../event-stream/background-queue' import { countAll } from '../../db/util' import { Crawlers } from '../../crawlers' +import { ContentReporter } from '../../content-reporter' export class RepoService { blobs: RepoBlobs @@ -40,6 +41,7 @@ export class RepoService { public backgroundQueue: BackgroundQueue, public crawlers: Crawlers, public labeler: Labeler, + public contentReporter?: ContentReporter, ) { this.blobs = new RepoBlobs(db, blobstore, backgroundQueue) } @@ -51,6 +53,7 @@ export class RepoService { backgroundQueue: BackgroundQueue, crawlers: Crawlers, labeler: Labeler, + contentReporter?: ContentReporter, ) { return (db: Database) => new RepoService( @@ -61,6 +64,7 @@ export class RepoService { backgroundQueue, crawlers, labeler, + contentReporter, ) } @@ -81,6 +85,7 @@ export class RepoService { this.backgroundQueue, this.crawlers, this.labeler, + this.contentReporter, ) return fn(srvc) }) @@ -243,6 +248,7 @@ export class RepoService { write.action === WriteOpAction.Update ) { this.labeler.processRecord(write.uri, write.record) + this.contentReporter?.checkRecord(write) } }) } From f654cd8abd25d47200afbae406c6759867312001 Mon Sep 17 00:00:00 2001 From: dholms Date: Mon, 17 Jul 2023 15:28:08 -0500 Subject: [PATCH 2/4] fix test --- packages/pds/tests/handle-validation.test.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/pds/tests/handle-validation.test.ts b/packages/pds/tests/handle-validation.test.ts index c0174fb533a..14b9b2d2646 100644 --- a/packages/pds/tests/handle-validation.test.ts +++ b/packages/pds/tests/handle-validation.test.ts @@ -1,6 +1,6 @@ import { isValidTld } from '@atproto/identifier' import { ensureHandleServiceConstraints } from '../src/handle' -import { UnacceptableHandleValidator } from '../src/handle/moderation/validator' +import { UnacceptableWordValidator } from '../src/content-reporter/validator' describe('handle validation', () => { it('validates service constraints', () => { @@ -27,7 +27,7 @@ describe('handle validation', () => { expect(isValidTld('atproto.internal')).toBe(false) }) - const validator = new UnacceptableHandleValidator( + const validator = new UnacceptableWordValidator( ['evil', 'mean', 'bad'], ['baddie'], ) From 8ad30d05c77dd23657e1e45286a0fbf7c7b04630 Mon Sep 17 00:00:00 2001 From: dholms Date: Mon, 17 Jul 2023 16:21:22 -0500 Subject: [PATCH 3/4] tests --- packages/pds/src/config.ts | 24 ++-- packages/pds/src/content-reporter/index.ts | 6 +- packages/pds/src/index.ts | 6 +- packages/pds/src/services/repo/index.ts | 21 ++- packages/pds/tests/content-reporter.test.ts | 138 ++++++++++++++++++++ 5 files changed, 168 insertions(+), 27 deletions(-) create mode 100644 packages/pds/tests/content-reporter.test.ts diff --git a/packages/pds/src/config.ts b/packages/pds/src/config.ts index 992b6ea8e5b..4c15fa64b83 100644 --- a/packages/pds/src/config.ts +++ b/packages/pds/src/config.ts @@ -50,8 +50,8 @@ export interface ServerConfigValues { hiveApiKey?: string labelerDid: string labelerKeywords: Record - unacceptableHandleWordsB64?: string - falsePositiveHandleWordsB64?: string + unacceptableWordsB64?: string + falsePositiveWordsB64?: string feedGenDid?: string @@ -166,11 +166,11 @@ export class ServerConfig { const labelerDid = process.env.LABELER_DID || 'did:example:labeler' const labelerKeywords = {} - const unacceptableHandleWordsB64 = nonemptyString( - process.env.UNACCEPTABLE_HANDLE_WORDS_B64, + const unacceptableWordsB64 = nonemptyString( + process.env.UNACCEPTABLE_WORDS_B64, ) - const falsePositiveHandleWordsB64 = nonemptyString( - process.env.FALSE_POSITIVE_HANDLE_WORDS_B64, + const falsePositiveWordsB64 = nonemptyString( + process.env.FALSE_POSITIVE_WORDS_B64, ) const feedGenDid = process.env.FEED_GEN_DID @@ -250,8 +250,8 @@ export class ServerConfig { hiveApiKey, labelerDid, labelerKeywords, - unacceptableHandleWordsB64, - falsePositiveHandleWordsB64, + unacceptableWordsB64, + falsePositiveWordsB64, feedGenDid, maxSubscriptionBuffer, repoBackfillLimitMs, @@ -444,12 +444,12 @@ export class ServerConfig { return this.cfg.labelerKeywords } - get unacceptableHandleWordsB64() { - return this.cfg.unacceptableHandleWordsB64 + get unacceptableWordsB64() { + return this.cfg.unacceptableWordsB64 } - get falsePositiveHandleWordsB64() { - return this.cfg.falsePositiveHandleWordsB64 + get falsePositiveWordsB64() { + return this.cfg.falsePositiveWordsB64 } get feedGenDid() { diff --git a/packages/pds/src/content-reporter/index.ts b/packages/pds/src/content-reporter/index.ts index dc93fdf0755..50088c88a11 100644 --- a/packages/pds/src/content-reporter/index.ts +++ b/packages/pds/src/content-reporter/index.ts @@ -79,6 +79,10 @@ export class ContentReporter { } } -const decode = (encoded: string): string[] => { +export const decode = (encoded: string): string[] => { return ui8.toString(ui8.fromString(encoded, 'base64'), 'utf8').split(',') } + +export const encode = (words: string[]): string => { + return ui8.toString(ui8.fromString(words.join(','), 'utf8'), 'base64') +} diff --git a/packages/pds/src/index.ts b/packages/pds/src/index.ts index fda22c86a8f..33a93eb35f5 100644 --- a/packages/pds/src/index.ts +++ b/packages/pds/src/index.ts @@ -178,7 +178,7 @@ export class PDS { const labelCache = new LabelCache(db) let contentReporter: ContentReporter | undefined = undefined - if (config.unacceptableHandleWordsB64) { + if (config.unacceptableWordsB64) { contentReporter = new ContentReporter({ backgroundQueue, moderationService: new ModerationService( @@ -189,8 +189,8 @@ export class PDS { imgInvalidator, ), reporterDid: config.labelerDid, - unacceptableB64: config.unacceptableHandleWordsB64, - falsePositivesB64: config.falsePositiveHandleWordsB64, + unacceptableB64: config.unacceptableWordsB64, + falsePositivesB64: config.falsePositiveWordsB64, }) } diff --git a/packages/pds/src/services/repo/index.ts b/packages/pds/src/services/repo/index.ts index e04a129e8ee..5e9a3116d3b 100644 --- a/packages/pds/src/services/repo/index.ts +++ b/packages/pds/src/services/repo/index.ts @@ -236,21 +236,20 @@ export class RepoService { this.backgroundQueue.add(async () => { await this.crawlers.notifyOfUpdate() }) + writes.forEach((write) => { + if ( + write.action === WriteOpAction.Create || + write.action === WriteOpAction.Update + ) { + // @TODO move to appview + this.labeler.processRecord(write.uri, write.record) + this.contentReporter?.checkRecord(write) + } + }) }) const seqEvt = await sequencer.formatSeqCommit(did, commitData, writes) await sequencer.sequenceEvt(this.db, seqEvt) - - // @TODO move to appview - writes.forEach((write) => { - if ( - write.action === WriteOpAction.Create || - write.action === WriteOpAction.Update - ) { - this.labeler.processRecord(write.uri, write.record) - this.contentReporter?.checkRecord(write) - } - }) } async rebaseRepo(did: string, swapCommit?: CID) { diff --git a/packages/pds/tests/content-reporter.test.ts b/packages/pds/tests/content-reporter.test.ts new file mode 100644 index 00000000000..e4a25d68f05 --- /dev/null +++ b/packages/pds/tests/content-reporter.test.ts @@ -0,0 +1,138 @@ +import { encode } from '../src/content-reporter' +import { TestNetworkNoAppView } from '@atproto/dev-env' +import { SeedClient } from './seeds/client' +import basicSeed from './seeds/basic' +import { AtpAgent } from '@atproto/api' + +describe('content reporter', () => { + let network: TestNetworkNoAppView + let agent: AtpAgent + let sc: SeedClient + + let alice: string + + beforeAll(async () => { + network = await TestNetworkNoAppView.create({ + dbPostgresSchema: 'content_reporter', + pds: { + unacceptableWordsB64: encode(['evil']), + }, + }) + agent = network.pds.getClient() + sc = new SeedClient(agent) + await basicSeed(sc) + await network.processAll() + alice = sc.dids.alice + }) + + afterAll(async () => { + await network.close() + }) + + const getAllReports = () => { + return network.pds.ctx.db.db + .selectFrom('moderation_report') + .selectAll() + .orderBy('id', 'asc') + .execute() + } + + it('doesnt label any of the content in the seed', async () => { + const reports = await getAllReports() + expect(reports.length).toBe(0) + }) + + it('flags a handle with an unacceptable word', async () => { + await sc.updateHandle(alice, 'evil.test') + await network.processAll() + const reports = await getAllReports() + expect(reports.length).toBe(1) + expect(reports.at(-1)?.subjectDid).toEqual(alice) + }) + + it('flags a profile with an unacceptable displayName', async () => { + const res = await agent.api.com.atproto.repo.putRecord( + { + repo: alice, + collection: 'app.bsky.actor.profile', + rkey: 'self', + record: { + displayName: 'evil alice', + }, + }, + { headers: sc.getHeaders(alice), encoding: 'application/json' }, + ) + await network.pds.ctx.backgroundQueue.processAll() + + const reports = await getAllReports() + expect(reports.length).toBe(2) + expect(reports.at(-1)?.subjectUri).toEqual(res.data.uri) + expect(reports.at(-1)?.subjectCid).toEqual(res.data.cid) + }) + + it('flags a list with an unacceptable name', async () => { + const res = await agent.api.com.atproto.repo.createRecord( + { + repo: alice, + collection: 'app.bsky.graph.list', + rkey: 'list', + record: { + name: 'myevillist', + purpose: 'app.bsky.graph.defs#modList', + createdAt: new Date().toISOString(), + }, + }, + { headers: sc.getHeaders(alice), encoding: 'application/json' }, + ) + await network.processAll() + + const reports = await getAllReports() + expect(reports.length).toBe(3) + expect(reports.at(-1)?.subjectUri).toEqual(res.data.uri) + expect(reports.at(-1)?.subjectCid).toEqual(res.data.cid) + }) + + it('flags a feed generator with an unacceptable displayName', async () => { + const res = await agent.api.com.atproto.repo.createRecord( + { + repo: alice, + collection: 'app.bsky.feed.generator', + rkey: 'generator', + record: { + did: alice, + displayName: 'myevilfeed', + createdAt: new Date().toISOString(), + }, + }, + { headers: sc.getHeaders(alice), encoding: 'application/json' }, + ) + await network.processAll() + + const reports = await getAllReports() + expect(reports.length).toBe(4) + expect(reports.at(-1)?.subjectUri).toEqual(res.data.uri) + expect(reports.at(-1)?.subjectCid).toEqual(res.data.cid) + }) + + it('flags a record with an unacceptable rkey', async () => { + const res = await agent.api.com.atproto.repo.createRecord( + { + repo: alice, + collection: 'app.bsky.feed.generator', + rkey: 'evilrkey', + record: { + did: alice, + displayName: 'totally fine feed', + createdAt: new Date().toISOString(), + }, + }, + { headers: sc.getHeaders(alice), encoding: 'application/json' }, + ) + await network.processAll() + + const reports = await getAllReports() + expect(reports.length).toBe(5) + expect(reports.at(-1)?.subjectUri).toEqual(res.data.uri) + expect(reports.at(-1)?.subjectCid).toEqual(res.data.cid) + }) +}) From d0cadcbb87b4009a5473fa21f3ddef2e6cb8f519 Mon Sep 17 00:00:00 2001 From: dholms Date: Mon, 17 Jul 2023 16:22:13 -0500 Subject: [PATCH 4/4] tidy --- packages/pds/src/content-reporter/explicit-slurs.ts | 6 +++++- packages/pds/src/repo/prepare.ts | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/packages/pds/src/content-reporter/explicit-slurs.ts b/packages/pds/src/content-reporter/explicit-slurs.ts index d3a8b4be2f1..534091366f6 100644 --- a/packages/pds/src/content-reporter/explicit-slurs.ts +++ b/packages/pds/src/content-reporter/explicit-slurs.ts @@ -12,6 +12,10 @@ const explicitSlurRegexes = [ export const hasExplicitSlur = (handle: string): boolean => { return explicitSlurRegexes.some( - (reg) => reg.test(handle) || reg.test(handle.replaceAll('.', '')), + (reg) => + reg.test(handle) || + reg.test( + handle.replaceAll('.', '').replaceAll('-', '').replaceAll('_', ''), + ), ) } diff --git a/packages/pds/src/repo/prepare.ts b/packages/pds/src/repo/prepare.ts index 375307516bf..af4dbbc9365 100644 --- a/packages/pds/src/repo/prepare.ts +++ b/packages/pds/src/repo/prepare.ts @@ -260,7 +260,7 @@ async function cidForSafeRecord(record: RepoRecord) { } } -async function assertNoExplicitSlurs(rkey: string, record: RepoRecord) { +function assertNoExplicitSlurs(rkey: string, record: RepoRecord) { let toCheck = rkey if (isProfile(record)) { toCheck += ' ' + record.displayName