11.314
suntingan
dw>Trappist the monk (tweak to support new oclc limit;) |
k (1 revisi diimpor) |
||
| Baris 1: | Baris 1: | ||
local identifiers = {}; | |||
--[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- | --[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- | ||
]] | ]] | ||
local | local is_set, in_array, set_error, select_one, add_maint_cat, substitute, make_wikilink; -- functions in Module:Citation/CS1/Utilities | ||
local z; -- table of tables defined in Module:Citation/CS1/Utilities | local z; -- table of tables defined in Module:Citation/CS1/Utilities | ||
local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration | local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration | ||
--============================<< H E L P E R F U N C T I O N S >>============================================ | --============================<< H E L P E R F U N C T I O N S >>============================================ | ||
--[[--------------------------< E X T E R N A L _ L I N K _ I D >---------------------------------------------- | --[[--------------------------< E X T E R N A L _ L I N K _ I D >---------------------------------------------- | ||
Formats a wiki | Formats a wiki style external link | ||
]] | ]] | ||
local function external_link_id (options) | local function external_link_id(options) | ||
local url_string = options.id; | local url_string = options.id; | ||
local ext_link; | local ext_link; | ||
if options.encode == true or options.encode == nil then | if options.encode == true or options.encode == nil then | ||
url_string = mw.uri.encode (url_string | url_string = mw.uri.encode( url_string ); | ||
end | end | ||
ext_link = mw.ustring.format ('[%s%s%s %s]', options.prefix, url_string, options.suffix or "", mw.text.nowiki(options.id)); | |||
if is_set(options.access) then | |||
ext_link = substitute (cfg.presentation['access-signal'], {ext_link, cfg.presentation[options.access]}); -- add the free-to-read / paywall lock | |||
end | end | ||
return table.concat ({ | return table.concat ({ | ||
make_wikilink | make_wikilink (options.link, options.label), | ||
options.separator or ' ', | options.separator or ' ', | ||
ext_link | ext_link | ||
| Baris 108: | Baris 44: | ||
--[[--------------------------< I N T E R N A L _ L I N K _ I D >---------------------------------------------- | --[[--------------------------< I N T E R N A L _ L I N K _ I D >---------------------------------------------- | ||
Formats a wiki | Formats a wiki style internal link | ||
]] | ]] | ||
local function internal_link_id (options) | local function internal_link_id(options) | ||
return table.concat ( | return table.concat ( | ||
{ | { | ||
make_wikilink | make_wikilink (options.link, options.label), | ||
options.separator or ' ', | options.separator or ' ', | ||
make_wikilink ( | make_wikilink ( | ||
table.concat ( | table.concat ( | ||
{ | { | ||
options.prefix, | options.prefix, | ||
id, | options.id, | ||
options.suffix or '' | options.suffix or '' | ||
}), | }), | ||
mw.text.nowiki (options.id) | |||
); | ); | ||
}); | }); | ||
end | end | ||
| Baris 138: | Baris 69: | ||
--[[--------------------------< I S _ E M B A R G O E D >------------------------------------------------------ | --[[--------------------------< I S _ E M B A R G O E D >------------------------------------------------------ | ||
Determines if a PMC identifier's online version is embargoed. Compares the date in | | Determines if a PMC identifier's online version is embargoed. Compares the date in |embargo= against today's date. If embargo date is | ||
today's date. If embargo date is in the future, returns the content of | | in the future, returns the content of |embargo=; otherwise, returns and empty string because the embargo has expired or because | ||
|embargo= was not set in this cite. | |||
]] | ]] | ||
| Baris 147: | Baris 78: | ||
if is_set (embargo) then | if is_set (embargo) then | ||
local lang = mw.getContentLanguage(); | local lang = mw.getContentLanguage(); | ||
local good1, embargo_date, todays_date; | local good1, embargo_date, good2, todays_date; | ||
good1, embargo_date = pcall (lang.formatDate, lang, 'U', embargo); | good1, embargo_date = pcall( lang.formatDate, lang, 'U', embargo ); | ||
todays_date = lang | good2, todays_date = pcall( lang.formatDate, lang, 'U' ); | ||
if good1 then | if good1 and good2 then -- if embargo date and today's date are good dates | ||
if tonumber (embargo_date) >= tonumber (todays_date) then -- is embargo date is in the future? | if tonumber( embargo_date ) >= tonumber( todays_date ) then -- is embargo date is in the future? | ||
return embargo; -- still embargoed | return embargo; -- still embargoed | ||
else | else | ||
add_maint_cat ('embargo') | |||
return ''; -- unset because embargo has expired | return ''; -- unset because embargo has expired | ||
end | end | ||
end | end | ||
end | end | ||
return ''; -- | | return ''; -- |embargo= not set return empty string | ||
end | end | ||
| Baris 217: | Baris 97: | ||
--[[--------------------------< IS _ V A L I D _ I S X N >----------------------------------------------------- | --[[--------------------------< IS _ V A L I D _ I S X N >----------------------------------------------------- | ||
ISBN-10 and ISSN validator code calculates checksum across all | ISBN-10 and ISSN validator code calculates checksum across all isbn/issn digits including the check digit. | ||
ISBN-13 is checked in isbn(). | ISBN-13 is checked in isbn(). | ||
If the number is valid the result will be 0. Before calling this function, | If the number is valid the result will be 0. Before calling this function, issbn/issn must be checked for length | ||
and stripped of dashes, spaces and other non- | and stripped of dashes, spaces and other non-isxn characters. | ||
]] | ]] | ||
| Baris 227: | Baris 107: | ||
local function is_valid_isxn (isxn_str, len) | local function is_valid_isxn (isxn_str, len) | ||
local temp = 0; | local temp = 0; | ||
isxn_str = { isxn_str:byte(1, len) }; | isxn_str = { isxn_str:byte(1, len) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39, 'X' → 0x58 | ||
len = len + 1; | len = len+1; -- adjust to be a loop counter | ||
for i, v in ipairs (isxn_str) do | for i, v in ipairs( isxn_str ) do -- loop through all of the bytes and calculate the checksum | ||
if v == string.byte ("X" ) then | if v == string.byte( "X" ) then -- if checkdigit is X (compares the byte value of 'X' which is 0x58) | ||
temp = temp + 10 * (len - i); | temp = temp + 10*( len - i ); -- it represents 10 decimal | ||
else | else | ||
temp = temp + tonumber (string.char (v) )*(len-i); | temp = temp + tonumber( string.char(v) )*(len-i); | ||
end | end | ||
end | end | ||
return temp % 11 == 0; | return temp % 11 == 0; -- returns true if calculation result is zero | ||
end | end | ||
| Baris 242: | Baris 122: | ||
--[[--------------------------< IS _ V A L I D _ I S X N _ 1 3 >----------------------------------------------- | --[[--------------------------< IS _ V A L I D _ I S X N _ 1 3 >----------------------------------------------- | ||
ISBN-13 and ISMN validator code calculates checksum across all 13 | ISBN-13 and ISMN validator code calculates checksum across all 13 isbn/ismn digits including the check digit. | ||
If the number is valid, the result will be 0. Before calling this function, | If the number is valid, the result will be 0. Before calling this function, isbn-13/ismn must be checked for length | ||
and stripped of dashes, spaces and other non- | and stripped of dashes, spaces and other non-isxn-13 characters. | ||
]] | ]] | ||
| Baris 252: | Baris 132: | ||
isxn_str = { isxn_str:byte(1, 13) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39 | isxn_str = { isxn_str:byte(1, 13) }; -- make a table of byte values '0' → 0x30 .. '9' → 0x39 | ||
for i, v in ipairs (isxn_str) do | for i, v in ipairs( isxn_str ) do | ||
temp = temp + (3 - 2*(i % 2)) * tonumber (string.char (v) ); | temp = temp + (3 - 2*(i % 2)) * tonumber( string.char(v) ); -- multiply odd index digits by 1, even index digits by 3 and sum; includes check digit | ||
end | end | ||
return temp % 10 == 0; -- sum modulo 10 is zero when | return temp % 10 == 0; -- sum modulo 10 is zero when isbn-13/ismn is correct | ||
end | end | ||
| Baris 261: | Baris 141: | ||
--[[--------------------------< N O R M A L I Z E _ L C C N >-------------------------------------------------- | --[[--------------------------< N O R M A L I Z E _ L C C N >-------------------------------------------------- | ||
lccn normalization (http://www.loc.gov/marc/lccn-namespace.html#normalization) | |||
1. Remove all blanks. | 1. Remove all blanks. | ||
2. If there is a forward slash (/) in the string, remove it, and remove all characters to the right of the forward slash. | 2. If there is a forward slash (/) in the string, remove it, and remove all characters to the right of the forward slash. | ||
| Baris 270: | Baris 150: | ||
2. If the length of the substring is less than 6, left-fill the substring with zeroes until the length is six. | 2. If the length of the substring is less than 6, left-fill the substring with zeroes until the length is six. | ||
Returns a normalized | Returns a normalized lccn for lccn() to validate. There is no error checking (step 3.b.1) performed in this function. | ||
]] | ]] | ||
local function normalize_lccn (lccn) | local function normalize_lccn (lccn) | ||
lccn = lccn:gsub ("%s", ""); | lccn = lccn:gsub ("%s", ""); -- 1. strip whitespace | ||
if nil ~= string.find (lccn, '/') then | if nil ~= string.find (lccn,'/') then | ||
lccn = lccn:match ("(.-)/"); | lccn = lccn:match ("(.-)/"); -- 2. remove forward slash and all character to the right of it | ||
end | end | ||
local prefix | local prefix | ||
local suffix | local suffix | ||
prefix, suffix = lccn:match ("(.+)%-(.+)"); | prefix, suffix = lccn:match ("(.+)%-(.+)"); -- 3.a remove hyphen by splitting the string into prefix and suffix | ||
if nil ~= suffix then | if nil ~= suffix then -- if there was a hyphen | ||
suffix = string.rep("0", 6-string.len (suffix)) .. suffix; | suffix=string.rep("0", 6-string.len (suffix)) .. suffix; -- 3.b.2 left fill the suffix with 0s if suffix length less than 6 | ||
lccn = prefix..suffix; | lccn=prefix..suffix; -- reassemble the lccn | ||
end | end | ||
return lccn; | return lccn; | ||
end | end | ||
--============================<< I D E N T I F I E R F U N C T I O N S >>==================================== | --============================<< I D E N T I F I E R F U N C T I O N S >>==================================== | ||
| Baris 298: | Baris 176: | ||
--[[--------------------------< A R X I V >-------------------------------------------------------------------- | --[[--------------------------< A R X I V >-------------------------------------------------------------------- | ||
See: | See: http://arxiv.org/help/arxiv_identifier | ||
format and error check arXiv identifier. There are three valid forms of the identifier: | format and error check arXiv identifier. There are three valid forms of the identifier: | ||
the first form, valid only between date codes | the first form, valid only between date codes 9108 and 0703 is: | ||
arXiv:<archive>.<class>/<date code><number><version> | arXiv:<archive>.<class>/<date code><number><version> | ||
where: | where: | ||
<archive> is a string of alpha characters - may be hyphenated; no other punctuation | <archive> is a string of alpha characters - may be hyphenated; no other punctuation | ||
<class> is a string of alpha characters - may be hyphenated; no other punctuation | <class> is a string of alpha characters - may be hyphenated; no other punctuation | ||
<date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 01 | <date code> is four digits in the form YYMM where YY is the last two digits of the four-digit year and MM is the month number January = 01 | ||
first digit of YY for this form can only 9 and 0 | first digit of YY for this form can only 9 and 0 | ||
| Baris 323: | Baris 201: | ||
<date code> and <version> are as defined for 0704-1412 | <date code> and <version> are as defined for 0704-1412 | ||
<number> is a five-digit number | <number> is a five-digit number | ||
]] | ]] | ||
local function arxiv ( | local function arxiv (id, class) | ||
local handler = cfg.id_handlers['ARXIV']; | |||
local handler = | |||
local year, month, version; | local year, month, version; | ||
local | local err_cat = ''; | ||
local text; | local text; | ||
if id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%d$") or id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%dv%d+$") then -- test for the | if id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%d$") or id:match("^%a[%a%.%-]+/[90]%d[01]%d%d%d%dv%d+$") then -- test for the 9108-0703 format w/ & w/o version | ||
year, month = id:match("^%a[%a%.%-]+/([90]%d)([01]%d)%d%d%d[v%d]*$"); | year, month = id:match("^%a[%a%.%-]+/([90]%d)([01]%d)%d%d%d[v%d]*$"); | ||
year = tonumber (year); | year = tonumber(year); | ||
month = tonumber (month); | month = tonumber(month); | ||
if ((not (90 < year or 8 > year)) or (1 > month or 12 < month)) or -- if invalid year or invalid month | if ((not (90 < year or 8 > year)) or (1 > month or 12 < month)) or -- if invalid year or invalid month | ||
((91 == year and 7 > month) or (7 == year and 3 < month)) then -- if years ok, are starting and ending months ok? | ((91 == year and 7 > month) or (7 == year and 3 < month)) then -- if years ok, are starting and ending months ok? | ||
err_cat = ' ' .. set_error( 'bad_arxiv' ); -- set error message | |||
end | end | ||
elseif id:match("^%d%d[01]%d%.%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%dv%d+$") then -- test for the 0704-1412 w/ & w/o version | |||
elseif id:match("^%d%d[01]%d%.%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%dv%d+$") then -- test for the 0704-1412 | |||
year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d[v%d]*$"); | year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d[v%d]*$"); | ||
year = tonumber (year); | year = tonumber(year); | ||
month = tonumber (month); | month = tonumber(month); | ||
if ((7 > year) or (14 < year) or (1 > month or 12 < month)) or -- is year invalid or is month invalid? (doesn't test for future years) | if ((7 > year) or (14 < year) or (1 > month or 12 < month)) or -- is year invalid or is month invalid? (doesn't test for future years) | ||
((7 == year) and (4 > month)) then -- when year is 07, is month invalid (before April)? | ((7 == year) and (4 > month)) then --or -- when year is 07, is month invalid (before April)? | ||
err_cat = ' ' .. set_error( 'bad_arxiv' ); -- set error message | |||
end | end | ||
elseif id:match("^%d%d[01]%d%.%d%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%d%dv%d+$") then -- test for the 1501- format w/ & w/o version | |||
elseif id:match("^%d%d[01]%d%.%d%d%d%d%d$") or id:match("^%d%d[01]%d%.%d%d%d%d%dv%d+$") then -- test for the 1501- format | |||
year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d%d[v%d]*$"); | year, month = id:match("^(%d%d)([01]%d)%.%d%d%d%d%d[v%d]*$"); | ||
year = tonumber (year); | year = tonumber(year); | ||
month = tonumber (month); | month = tonumber(month); | ||
if ((15 > year) or (1 > month or 12 < month)) then -- is year invalid or is month invalid? (doesn't test for future years) | if ((15 > year) or (1 > month or 12 < month)) then -- is year invalid or is month invalid? (doesn't test for future years) | ||
err_cat = ' ' .. set_error( 'bad_arxiv' ); -- set error message | |||
end | end | ||
else | else | ||
err_cat = ' ' .. set_error( 'bad_arxiv' ); -- arXiv id doesn't match any format | |||
end | end | ||
text = external_link_id({link = handler.link, label = handler.label, | |||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat; | |||
text = external_link_id ({link = handler.link, label = handler.label | |||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = handler.access}); | |||
if is_set (class) then | if is_set (class) then | ||
class = ' [[' .. '//arxiv.org/archive/' .. class .. ' ' .. class .. ']]'; -- external link within square brackets, not wikilink | |||
else | |||
class = ''; -- empty string for concatenation | |||
end | end | ||
return text; | return text .. class; | ||
end | end | ||
| Baris 390: | Baris 251: | ||
--[[--------------------------< B I B C O D E >-------------------------------------------------------------------- | --[[--------------------------< B I B C O D E >-------------------------------------------------------------------- | ||
Validates (sort of) and formats a bibcode | Validates (sort of) and formats a bibcode id. | ||
Format for bibcodes is specified here: | Format for bibcodes is specified here: http://adsabs.harvard.edu/abs_doc/help_pages/data.html#bibcodes | ||
But, this: 2015arXiv151206696F is apparently valid so apparently, the only things that really matter are length, 19 characters | But, this: 2015arXiv151206696F is apparently valid so apparently, the only things that really matter are length, 19 characters | ||
| Baris 400: | Baris 261: | ||
1–4 must be digits and must represent a year in the range of 1000 – next year | 1–4 must be digits and must represent a year in the range of 1000 – next year | ||
5 must be a letter | 5 must be a letter | ||
6 must be letter, ampersand, or dot (ampersand cannot directly precede a dot; &. ) | |||
7–8 must be letter, digit, ampersand, or dot (ampersand cannot directly precede a dot; &. ) | |||
9–18 must be letter, digit, or dot | 9–18 must be letter, digit, or dot | ||
19 must be a letter or dot | 19 must be a letter or dot | ||
| Baris 406: | Baris 268: | ||
]] | ]] | ||
local function bibcode ( | local function bibcode (id, access) | ||
local handler = cfg.id_handlers['BIBCODE']; | |||
local handler = | |||
local err_type; | local err_type; | ||
local year; | local year; | ||
local text = external_link_id ({link = handler.link, label = handler.label | local text = external_link_id({link=handler.link, label=handler.label, | ||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, | prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode, | ||
access = access}); | access=access}); | ||
if 19 ~= id:len() then | if 19 ~= id:len() then | ||
err_type = | err_type = 'length'; | ||
else | else | ||
year = id:match ("^(%d%d%d%d)[%a][% | year = id:match ("^(%d%d%d%d)[%a][%a&%.][%a&%.%d][%a&%.%d][%a%d%.]+[%a%.]$") -- | ||
if not year then -- if nil then no pattern match | if not year then -- if nil then no pattern match | ||
err_type = | err_type = 'value'; -- so value error | ||
else | else | ||
local next_year = tonumber (os.date ('%Y')) + 1; | local next_year = tonumber(os.date ('%Y'))+1; -- get the current year as a number and add one for next year | ||
year = tonumber (year); -- convert year portion of bibcode to a number | year = tonumber (year); -- convert year portion of bibcode to a number | ||
if (1000 > year) or (year > next_year) then | if (1000 > year) or (year > next_year) then | ||
err_type = | err_type = 'year'; -- year out of bounds | ||
end | end | ||
if id:find('&%.') then | if id:find('&%.') then | ||
err_type = | err_type = 'journal'; -- journal abbreviation must not have '&.' (if it does its missing a letter) | ||
end | end | ||
end | end | ||
end | end | ||
if is_set (err_type) | if is_set (err_type) then -- if there was an error detected | ||
text = text .. ' ' .. set_error( 'bad_bibcode', {err_type}); | |||
end | end | ||
return text; | return text; | ||
end | end | ||
| Baris 451: | Baris 304: | ||
--[[--------------------------< B I O R X I V >----------------------------------------------------------------- | --[[--------------------------< B I O R X I V >----------------------------------------------------------------- | ||
Format bioRxiv | Format bioRxiv id and do simple error checking. BiorXiv ids are exactly 6 digits. | ||
6 digits. | The bioRxiv id is the number following the last slash in the bioRxiv-issued DOI: | ||
https://doi.org/10.1101/078733 -> 078733 | |||
The bioRxiv | |||
]] | ]] | ||
local function biorxiv ( | local function biorxiv(id) | ||
local handler = cfg.id_handlers['BIORXIV']; | |||
local handler = | local err_cat = ''; -- presume that bioRxiv id is valid | ||
local | |||
if nil == id:match("^%d%d%d%d%d%d$") then -- if bioRxiv id has anything but six digits | |||
err_cat = ' ' .. set_error( 'bad_biorxiv'); -- set an error message | |||
end | end | ||
return external_link_id ({link = handler.link, label = handler.label | return external_link_id({link = handler.link, label = handler.label, | ||
prefix = handler.prefix, id = id, separator = handler.separator, | prefix=handler.prefix,id=id,separator=handler.separator, | ||
encode = handler.encode, access = handler.access}); | encode=handler.encode, access=handler.access}) .. err_cat; | ||
end | end | ||
| Baris 505: | Baris 329: | ||
The description of the structure of this identifier can be found at Help_talk:Citation_Style_1/Archive_26#CiteSeerX_id_structure | The description of the structure of this identifier can be found at Help_talk:Citation_Style_1/Archive_26#CiteSeerX_id_structure | ||
]] | ]] | ||
local function citeseerx ( | local function citeseerx (id) | ||
local handler = cfg.id_handlers['CITESEERX']; | |||
local handler = | |||
local matched; | local matched; | ||
local text = external_link_id ({link = handler.link, label = handler.label | local text = external_link_id({link=handler.link, label=handler.label, | ||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, | prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode, | ||
access = handler.access}); | access=handler.access}); | ||
matched = id:match ("^10%.1%.1%.[1-9]%d?%d?%d?%.[1-9]%d?%d?%d?$"); | matched = id:match ("^10%.1%.1%.[1-9]%d?%d?%d?%.[1-9]%d?%d?%d?$"); | ||
if not matched then | if not matched then | ||
text = text .. ' ' .. set_error( 'bad_citeseerx' ); | |||
end | end | ||
return text; | return text; | ||
end | end | ||
| Baris 535: | Baris 355: | ||
Suffix: character string of any length chosen by the registrant | Suffix: character string of any length chosen by the registrant | ||
This function checks a DOI name for: prefix/suffix. If the | This function checks a DOI name for: prefix/suffix. If the doi name contains spaces or endashes, or, if it ends | ||
with a period or a comma, this function will emit a bad_doi error message. | with a period or a comma, this function will emit a bad_doi error message. | ||
DOI names are case-insensitive and can incorporate any printable Unicode characters so the test for spaces, endash, | DOI names are case-insensitive and can incorporate any printable Unicode characters so the test for spaces, endash, | ||
and terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely | and terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely | ||
if ever used in | if ever used in doi names. | ||
]] | ]] | ||
local function doi ( | local function doi(id, inactive, access) | ||
local cat = "" | |||
local handler = cfg.id_handlers['DOI']; | |||
local | |||
local handler = | |||
local text; | local text; | ||
if is_set (inactive) then | if is_set(inactive) then | ||
local inactive_year = inactive:match("%d%d%d%d"); | local inactive_year = inactive:match("%d%d%d%d") or ''; -- try to get the year portion from the inactive date | ||
if is_set(inactive_year) then | |||
table.insert( z.error_categories, "Pages with DOIs inactive since " .. inactive_year ); | |||
if is_set (inactive_year) then | |||
else | else | ||
table.insert( z.error_categories, "Pages with inactive DOIs" ); -- when inactive doesn't contain a recognizable year | |||
end | end | ||
inactive = " (" .. cfg.messages['inactive'] .. | inactive = " (" .. cfg.messages['inactive'] .. " " .. inactive .. ")" | ||
end | end | ||
text = external_link_id({link = handler.link, label = handler.label, | |||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access}) .. (inactive or '') | |||
if nil == id:match("^10%.[^%s–]-/[^%s–]-[^%.,]$") then -- doi must begin with '10.', must contain a fwd slash, must not contain spaces or endashes, and must not end with period or comma | |||
cat = ' ' .. set_error( 'bad_doi' ); | |||
end | end | ||
return text .. cat | |||
end | end | ||
| Baris 636: | Baris 403: | ||
terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely | terminal punctuation may not be technically correct but it appears, that in practice these characters are rarely | ||
if ever used in HDLs. | if ever used in HDLs. | ||
]] | ]] | ||
local function hdl ( | local function hdl(id, access) | ||
local handler = cfg.id_handlers['HDL']; | |||
local handler = | |||
local | local text = external_link_id({link = handler.link, label = handler.label, | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=access}) | |||
if | if nil == id:match("^[^%s–]-/[^%s–]-[^%.,]$") then -- hdl must contain a fwd slash, must not contain spaces, endashes, and must not end with period or comma | ||
text = text .. ' ' .. set_error( 'bad_hdl' ); | |||
end | end | ||
return text; | return text; | ||
end | end | ||
| Baris 695: | Baris 425: | ||
]] | ]] | ||
local function isbn ( | local function isbn( isbn_str ) | ||
if nil ~= isbn_str:match("[^%s-0-9X]") then | |||
return false, 'invalid character'; -- fail if isbn_str contains anything but digits, hyphens, or the uppercase X | |||
if nil ~= isbn_str:match ( | |||
return | |||
end | end | ||
isbn_str = isbn_str:gsub( "-", "" ):gsub( " ", "" ); -- remove hyphens and spaces | |||
local len = isbn_str:len(); | |||
local len = | |||
if len ~= 10 and len ~= 13 then | if len ~= 10 and len ~= 13 then | ||
return | return false, 'length'; -- fail if incorrect length | ||
end | end | ||
if len == 10 then | if len == 10 then | ||
if | if isbn_str:match( "^%d*X?$" ) == nil then -- fail if isbn_str has 'X' anywhere but last position | ||
return | return false, 'invalid form'; | ||
end | end | ||
return is_valid_isxn(isbn_str, 10), 'checksum'; | |||
else | else | ||
if | if isbn_str:match( "^%d+$" ) == nil then | ||
return | return false, 'invalid character'; -- fail if isbn13 is not all digits | ||
end | end | ||
if | if isbn_str:match( "^97[89]%d*$" ) == nil then | ||
return | return false, 'invalid prefix'; -- fail when isbn13 does not begin with 978 or 979 | ||
end | end | ||
return | return is_valid_isxn_13 (isbn_str), 'checksum'; | ||
end | end | ||
end | end | ||
--[[--------------------------< A | --[[--------------------------< A M A Z O N >------------------------------------------------------------------ | ||
Formats a link to Amazon. Do simple error checking: asin must be mix of 10 numeric or uppercase alpha | |||
characters. If a mix, first character must be uppercase alpha; if all numeric, asins must be 10-digit | |||
isbn. If 10-digit isbn, add a maintenance category so a bot or awb script can replace |asin= with |isbn=. | |||
Error message if not 10 characters, if not isbn10, if mixed and first character is a digit. | |||
This function is positioned here because it calls isbn() | This function is positioned here because it calls isbn() | ||
| Baris 767: | Baris 464: | ||
]] | ]] | ||
local function asin ( | local function asin(id, domain) | ||
local | local err_cat = "" | ||
if not id:match("^[%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u]$") then | if not id:match("^[%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u][%d%u]$") then | ||
err_cat = ' ' .. set_error ('bad_asin'); -- asin is not a mix of 10 uppercase alpha and numeric characters | |||
else | else | ||
if id:match("^%d%d%d%d%d%d%d%d%d[%dX]$") then -- if 10-digit numeric (or 9 digits with terminal X) | if id:match("^%d%d%d%d%d%d%d%d%d[%dX]$") then -- if 10-digit numeric (or 9 digits with terminal X) | ||
if | if isbn( id ) then -- see if asin value is isbn10 | ||
add_maint_cat ('ASIN'); | |||
elseif not is_set (err_cat) then | |||
err_cat = ' ' .. set_error ('bad_asin'); -- asin is not isbn10 | |||
elseif not is_set ( | |||
end | end | ||
elseif not id:match("^%u[%d%u]+$") then | elseif not id:match("^%u[%d%u]+$") then | ||
err_cat = ' ' .. set_error ('bad_asin'); -- asin doesn't begin with uppercase alpha | |||
end | end | ||
end | end | ||
if | if not is_set(domain) then | ||
domain = "com"; | domain = "com"; | ||
elseif in_array (domain, {'jp', 'uk'}) then | elseif in_array (domain, {'jp', 'uk'}) then -- Japan, United Kingdom | ||
domain = "co." .. domain; | domain = "co." .. domain; | ||
elseif in_array (domain, {'au', 'br', 'mx'}) then -- Australia, Brazil, Mexico | |||
elseif in_array (domain, {'au', 'br', 'mx | |||
domain = "com." .. domain; | domain = "com." .. domain; | ||
end | end | ||
local handler = | local handler = cfg.id_handlers['ASIN']; | ||
return external_link_id({link=handler.link, | |||
label=handler.label, prefix=handler.prefix .. domain .. "/dp/", | |||
id=id, encode=handler.encode, separator = handler.separator}) .. err_cat; | |||
return external_link_id ({link = handler.link, label = handler.label, | |||
id = id, encode = handler.encode, separator = handler.separator}) | |||
end | end | ||
| Baris 815: | Baris 496: | ||
--[[--------------------------< I S M N >---------------------------------------------------------------------- | --[[--------------------------< I S M N >---------------------------------------------------------------------- | ||
Determines whether an ISMN string is valid. Similar to | Determines whether an ISMN string is valid. Similar to isbn-13, ismn is 13 digits begining 979-0-... and uses the | ||
same check digit calculations. See | same check digit calculations. See http://www.ismn-international.org/download/Web_ISMN_Users_Manual_2008-6.pdf | ||
section 2, pages 9–12. | section 2, pages 9–12. | ||
]] | ]] | ||
local function ismn ( | local function ismn (id) | ||
local handler = cfg.id_handlers['ISMN']; | |||
local handler = | |||
local text; | local text; | ||
local valid_ismn = true; | local valid_ismn = true; | ||
| Baris 832: | Baris 509: | ||
id_copy = id; -- save a copy because this testing is destructive | id_copy = id; -- save a copy because this testing is destructive | ||
id = id:gsub ( | id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the ismn | ||
if 13 ~= id:len() or id:match ("^9790%d*$" ) == nil then -- | if 13 ~= id:len() or id:match( "^9790%d*$" ) == nil then -- ismn must be 13 digits and begin 9790 | ||
valid_ismn = false; | valid_ismn = false; | ||
else | else | ||
valid_ismn=is_valid_isxn_13 (id); -- validate | valid_ismn=is_valid_isxn_13 (id); -- validate ismn | ||
end | end | ||
-- text = internal_link_id({link = handler.link, label = handler.label, -- use this (or external version) when there is some place to link to | |||
-- prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) | |||
text = table.concat ( | text = table.concat ( | ||
{ | { | ||
make_wikilink | make_wikilink (handler.link, handler.label), | ||
handler.separator, | handler.separator, | ||
id_copy | id_copy | ||
}); | }); -- because no place to link to yet | ||
if false == valid_ismn then | if false == valid_ismn then | ||
text = text .. ' ' .. set_error( 'bad_ismn' ) -- add an error message if the ismn is invalid | |||
end | end | ||
| Baris 861: | Baris 537: | ||
--[[--------------------------< I S S N >---------------------------------------------------------------------- | --[[--------------------------< I S S N >---------------------------------------------------------------------- | ||
Validate and format an | Validate and format an issn. This code fixes the case where an editor has included an ISSN in the citation but | ||
has separated the two groups of four digits with a space. When that condition occurred, the resulting link looked | has separated the two groups of four digits with a space. When that condition occurred, the resulting link looked | ||
like this: | like this: | ||
|issn=0819 4327 gives: [ | |issn=0819 4327 gives: [http://www.worldcat.org/issn/0819 4327 0819 4327] -- can't have spaces in an external link | ||
This code now prevents that by inserting a hyphen at the | This code now prevents that by inserting a hyphen at the issn midpoint. It also validates the issn for length | ||
and makes sure that the checkdigit agrees with the calculated value. Incorrect length (8 digits), characters | and makes sure that the checkdigit agrees with the calculated value. Incorrect length (8 digits), characters | ||
other than 0-9 and X, or checkdigit / calculated value mismatch will all cause a check | other than 0-9 and X, or checkdigit / calculated value mismatch will all cause a check issn error message. The | ||
issn is always displayed with a hyphen, even if the issn was given as a single group of 8 digits. | |||
]] | ]] | ||
local function issn ( | local function issn(id, e) | ||
local issn_copy = id; -- save a copy of unadulterated issn; use this version for display if issn does not validate | |||
local handler; | |||
local issn_copy = id; -- save a copy of unadulterated | |||
local text; | local text; | ||
local valid_issn = true; | local valid_issn = true; | ||
if e then | |||
handler = cfg.id_handlers['EISSN']; | |||
else | |||
handler = cfg.id_handlers['ISSN']; | |||
end | |||
id = id:gsub ( | id=id:gsub( "[%s-–]", "" ); -- strip spaces, hyphens, and endashes from the issn | ||
if 8 ~= id:len() or nil == id:match ("^%d*X?$" ) then -- validate the | if 8 ~= id:len() or nil == id:match( "^%d*X?$" ) then -- validate the issn: 8 digits long, containing only 0-9 or X in the last position | ||
valid_issn = false; -- wrong length or improper character | valid_issn=false; -- wrong length or improper character | ||
else | else | ||
valid_issn = is_valid_isxn (id, 8); -- validate | valid_issn=is_valid_isxn(id, 8); -- validate issn | ||
end | end | ||
if true == valid_issn then | if true == valid_issn then | ||
id = string.sub (id, 1, 4 ) .. "-" .. string.sub (id, 5 ); -- if valid, display correctly formatted version | id = string.sub( id, 1, 4 ) .. "-" .. string.sub( id, 5 ); -- if valid, display correctly formatted version | ||
else | else | ||
id = issn_copy; -- if not valid, show the invalid | id = issn_copy; -- if not valid, use the show the invalid issn with error message | ||
end | end | ||
return text | text = external_link_id({link = handler.link, label = handler.label, | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) | |||
if false == valid_issn then | |||
text = text .. ' ' .. set_error( 'bad_issn', e and 'e' or '' ) -- add an error message if the issn is invalid | |||
end | |||
return text | |||
end | end | ||
| Baris 919: | Baris 593: | ||
]] | ]] | ||
local function jfm ( | local function jfm (id) | ||
local handler = cfg.id_handlers['JFM']; | |||
local handler = | |||
local id_num; | local id_num; | ||
local err_cat = ''; | |||
id_num = id:match ('^[Jj][Ff][Mm](.*)$'); -- identifier with jfm prefix; extract identifier | id_num = id:match ('^[Jj][Ff][Mm](.*)$'); -- identifier with jfm prefix; extract identifier | ||
if is_set (id_num) then | if is_set (id_num) then | ||
add_maint_cat ('jfm_format'); | |||
else -- plain number without | else -- plain number without mr prefix | ||
id_num = id; -- if here id does not have prefix | id_num = id; -- if here id does not have prefix | ||
end | end | ||
| Baris 935: | Baris 609: | ||
id = id_num; -- jfm matches pattern | id = id_num; -- jfm matches pattern | ||
else | else | ||
err_cat = ' ' .. set_error( 'bad_jfm' ); -- set an error message | |||
end | end | ||
return external_link_id ({link = handler.link, label = handler.label | return external_link_id({link = handler.link, label = handler.label, | ||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; | ||
end | end | ||
| Baris 969: | Baris 621: | ||
Format LCCN link and do simple error checking. LCCN is a character string 8-12 characters long. The length of | Format LCCN link and do simple error checking. LCCN is a character string 8-12 characters long. The length of | ||
the LCCN dictates the character type of the first 1-3 characters; the rightmost eight are always digits. | the LCCN dictates the character type of the first 1-3 characters; the rightmost eight are always digits. | ||
http://info-uri.info/registry/OAIHandler?verb=GetRecord&metadataPrefix=reg&identifier=info:lccn/ | |||
length = 8 then all digits | length = 8 then all digits | ||
length = 9 then lccn[1] is | length = 9 then lccn[1] is lower case alpha | ||
length = 10 then lccn[1] and lccn[2] are both | length = 10 then lccn[1] and lccn[2] are both lower case alpha or both digits | ||
length = 11 then lccn[1] is lower case alpha, lccn[2] and lccn[3] are both | length = 11 then lccn[1] is lower case alpha, lccn[2] and lccn[3] are both lower case alpha or both digits | ||
length = 12 then lccn[1] and lccn[2] are both | length = 12 then lccn[1] and lccn[2] are both lower case alpha | ||
]] | ]] | ||
local function lccn ( | local function lccn(lccn) | ||
local | local handler = cfg.id_handlers['LCCN']; | ||
local | local err_cat = ''; -- presume that LCCN is valid | ||
local id = lccn; -- local copy of the lccn | |||
local id = lccn; -- local copy of the | |||
id = normalize_lccn (id); -- get canonical form (no whitespace, hyphens, forward slashes) | id = normalize_lccn (id); -- get canonical form (no whitespace, hyphens, forward slashes) | ||
local len = id:len(); -- get the length of the | local len = id:len(); -- get the length of the lccn | ||
if 8 == len then | if 8 == len then | ||
if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) | if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) | ||
err_cat = ' ' .. set_error( 'bad_lccn' ); -- set an error message | |||
end | end | ||
elseif 9 == len then -- LCCN should be adddddddd | elseif 9 == len then -- LCCN should be adddddddd | ||
if nil == id:match("%l%d%d%d%d%d%d%d%d") then -- does it match our pattern? | if nil == id:match("%l%d%d%d%d%d%d%d%d") then -- does it match our pattern? | ||
err_cat = ' ' .. set_error( 'bad_lccn' ); -- set an error message | |||
end | end | ||
elseif 10 == len then -- LCCN should be aadddddddd or dddddddddd | elseif 10 == len then -- LCCN should be aadddddddd or dddddddddd | ||
if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) ... | if id:match("[^%d]") then -- if LCCN has anything but digits (nil if only digits) ... | ||
if nil == id:match("^%l%l%d%d%d%d%d%d%d%d") then -- ... see if it matches our pattern | if nil == id:match("^%l%l%d%d%d%d%d%d%d%d") then -- ... see if it matches our pattern | ||
err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message | |||
end | end | ||
end | end | ||
elseif 11 == len then -- LCCN should be aaadddddddd or adddddddddd | elseif 11 == len then -- LCCN should be aaadddddddd or adddddddddd | ||
if not (id:match("^%l%l%l%d%d%d%d%d%d%d%d") or id:match("^%l%d%d%d%d%d%d%d%d%d%d")) then -- see if it matches one of our patterns | if not (id:match("^%l%l%l%d%d%d%d%d%d%d%d") or id:match("^%l%d%d%d%d%d%d%d%d%d%d")) then -- see if it matches one of our patterns | ||
err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message | |||
end | end | ||
elseif 12 == len then -- LCCN should be aadddddddddd | elseif 12 == len then -- LCCN should be aadddddddddd | ||
if not id:match("^%l%l%d%d%d%d%d%d%d%d%d%d") then -- see if it matches our pattern | if not id:match("^%l%l%d%d%d%d%d%d%d%d%d%d") then -- see if it matches our pattern | ||
err_cat = ' ' .. set_error( 'bad_lccn' ); -- no match, set an error message | |||
end | end | ||
else | else | ||
err_cat = ' ' .. set_error( 'bad_lccn' ); -- wrong length, set an error message | |||
end | end | ||
if not is_set ( | if not is_set (err_cat) and nil ~= lccn:find ('%s') then | ||
err_cat = ' ' .. set_error( 'bad_lccn' ); -- lccn contains a space, set an error message | |||
end | end | ||
return external_link_id({link = handler.link, label = handler.label, | |||
prefix=handler.prefix,id=lccn,separator=handler.separator, encode=handler.encode}) .. err_cat; | |||
return external_link_id ({link = handler.link, label = handler.label | |||
prefix = handler.prefix, id = lccn, separator = handler.separator, encode = handler.encode}) | |||
end | end | ||
| Baris 1.079: | Baris 680: | ||
]] | ]] | ||
local function mr ( | local function mr (id) | ||
local handler = cfg.id_handlers['MR']; | |||
local handler = | |||
local id_num; | local id_num; | ||
local id_len; | local id_len; | ||
local err_cat = ''; | |||
id_num = id:match ('^[Mm][Rr](%d+)$'); -- identifier with mr prefix | id_num = id:match ('^[Mm][Rr](%d+)$'); -- identifier with mr prefix | ||
if is_set (id_num) then | if is_set (id_num) then | ||
add_maint_cat ('mr_format'); | |||
else -- plain number without mr prefix | else -- plain number without mr prefix | ||
id_num = id:match ('^%d+$'); -- if here id is all digits | id_num = id:match ('^%d+$'); -- if here id is all digits | ||
| Baris 1.095: | Baris 696: | ||
id_len = id_num and id_num:len() or 0; | id_len = id_num and id_num:len() or 0; | ||
if (7 >= id_len) and (0 ~= id_len) then | if (7 >= id_len) and (0 ~= id_len) then | ||
id = string.rep ('0', 7-id_len) .. id_num; -- zero-fill leading digits | id = string.rep ('0', 7-id_len ) .. id_num; -- zero-fill leading digits | ||
else | else | ||
err_cat = ' ' .. set_error( 'bad_mr' ); -- set an error message | |||
end | end | ||
return external_link_id ({link = handler.link, label = handler.label | return external_link_id({link = handler.link, label = handler.label, | ||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}); | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; | ||
end | end | ||
| Baris 1.108: | Baris 708: | ||
--[[--------------------------< O C L C >---------------------------------------------------------------------- | --[[--------------------------< O C L C >---------------------------------------------------------------------- | ||
Validate and format an | Validate and format an oclc id. https://www.oclc.org/batchload/controlnumber.en.html | ||
]] | ]] | ||
local function oclc ( | local function oclc (id) | ||
local handler = cfg.id_handlers['OCLC']; | |||
local handler = | |||
local number; | local number; | ||
local err_msg = ''; -- empty string for concatenation | |||
if id:match('^ocm%d%d%d%d%d%d%d%d$') then -- ocm prefix and 8 digits; 001 field (12 characters) | if id:match('^ocm%d%d%d%d%d%d%d%d$') then -- ocm prefix and 8 digits; 001 field (12 characters) | ||
number = id:match('ocm(%d+)'); -- get the number | number = id:match('ocm(%d+)'); -- get the number | ||
| Baris 1.127: | Baris 726: | ||
number = id:match('%(OCoLC%)([1-9]%d*)'); -- get the number | number = id:match('%(OCoLC%)([1-9]%d*)'); -- get the number | ||
if 9 < number:len() then | if 9 < number:len() then | ||
number = nil; -- | number = nil; -- contrain to 1 to 9 digits; change this when oclc issues 10-digit numbers | ||
end | end | ||
elseif id:match('^%d+$') then -- no prefix | elseif id:match('^%d+$') then -- no prefix | ||
number = id; -- get the number | number = id; -- get the number | ||
if | if 10 < number:len() then | ||
number = nil; -- | number = nil; -- contrain to 1 to 10 digits; change this when oclc issues 11-digit numbers | ||
end | end | ||
end | end | ||
| Baris 1.139: | Baris 738: | ||
id = number; -- exclude prefix, if any, from external link | id = number; -- exclude prefix, if any, from external link | ||
else | else | ||
err_msg = ' ' .. set_error( 'bad_oclc' ) -- add an error message if the id is malformed | |||
end | end | ||
local text = external_link_id({link=handler.link, label=handler.label, | |||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}); | prefix=handler.prefix, id=id, separator=handler.separator, encode=handler.encode}) .. err_msg; | ||
return text; | |||
end | end | ||
| Baris 1.154: | Baris 754: | ||
]] | ]] | ||
local function openlibrary ( | local function openlibrary(id, access) | ||
local | local code; | ||
local | local handler = cfg.id_handlers['OL']; | ||
local | local ident; | ||
ident, code = id:gsub('^OL', ''):match("^(%d+([AMW]))$"); -- optional OL prefix followed immediately by digits followed by 'A', 'M', or 'W'; remove OL prefix | |||
if not ident then | if not is_set (ident) then -- if malformed return an error | ||
return external_link_id({link=handler.link, label=handler.label, | |||
prefix=handler.prefix .. 'OL', | |||
id=id, separator=handler.separator, encode = handler.encode, | |||
access = access}) .. ' ' .. set_error( 'bad_ol' ); | |||
end | |||
id = ident; -- use ident without the optional OL prefix (it has been removed) | |||
if ( code == "A" ) then | |||
return external_link_id({link=handler.link, label=handler.label, | |||
prefix=handler.prefix .. 'authors/OL', | |||
id=id, separator=handler.separator, encode = handler.encode, | |||
access = access}) | |||
end | end | ||
if | if ( code == "M" ) then | ||
return external_link_id({link=handler.link, label=handler.label, | |||
prefix=handler.prefix .. 'books/OL', | |||
id=id, separator=handler.separator, encode = handler.encode, | |||
access = access}) | |||
end | end | ||
return external_link_id ({link = handler.link, label = handler.label | if ( code == "W" ) then | ||
return external_link_id({link=handler.link, label=handler.label, | |||
prefix=handler.prefix .. 'works/OL', | |||
id=id, separator=handler.separator, encode = handler.encode, | |||
access = access}) | |||
end | end | ||
end | end | ||
| Baris 1.234: | Baris 810: | ||
]] | ]] | ||
local function pmc ( | local function pmc(id, embargo) | ||
local | local test_limit = 8000000; -- update this value as PMCs approach | ||
local handler = cfg.id_handlers['PMC']; | |||
local handler = | local err_cat = ''; -- presume that PMC is valid | ||
local | |||
local id_num; | local id_num; | ||
local text; | local text; | ||
id_num = id:match ('^[Pp][Mm][Cc](%d+)$'); -- identifier with | id_num = id:match ('^[Pp][Mm][Cc](%d+)$'); -- identifier with pmc prefix | ||
if is_set (id_num) then | if is_set (id_num) then | ||
add_maint_cat ('pmc_format'); | |||
else -- plain number without | else -- plain number without pmc prefix | ||
id_num = id:match ('^%d+$'); -- if here id is all digits | id_num = id:match ('^%d+$'); -- if here id is all digits | ||
end | end | ||
if is_set (id_num) then -- id_num has a value so test it | if is_set (id_num) then -- id_num has a value so test it | ||
id_num = tonumber (id_num); -- convert id_num to a number for range testing | id_num = tonumber(id_num); -- convert id_num to a number for range testing | ||
if 1 > id_num or | if 1 > id_num or test_limit < id_num then -- if PMC is outside test limit boundaries | ||
err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message | |||
else | else | ||
id = tostring (id_num); -- make sure id is a string | id = tostring (id_num); -- make sure id is a string | ||
end | end | ||
else -- when id format incorrect | else -- when id format incorrect | ||
err_cat = ' ' .. set_error( 'bad_pmc' ); -- set an error message | |||
end | end | ||
if is_set (embargo | if is_set (embargo) then -- is PMC is still embargoed? | ||
text = table.concat ( -- still embargoed so no external link | text = table.concat ( -- still embargoed so no external link | ||
{ | { | ||
make_wikilink | make_wikilink (handler.link, handler.label), | ||
handler.separator, | handler.separator, | ||
id, | id, | ||
err_cat | |||
}); | }); | ||
else | else | ||
text = external_link_id ({link = handler.link, label = handler.label | text = external_link_id({link = handler.link, label = handler.label, -- no embargo date or embargo has expired, ok to link to article | ||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = handler.access | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat; | ||
end | end | ||
return text; | return text; | ||
end | end | ||
| Baris 1.291: | Baris 860: | ||
]] | ]] | ||
local function pmid ( | local function pmid(id) | ||
local | local test_limit = 33000000; -- update this value as PMIDs approach | ||
local handler = | local handler = cfg.id_handlers['PMID']; | ||
local err_cat = ''; -- presume that PMID is valid | |||
if id:match("[^%d]") then -- if PMID has anything but digits | if id:match("[^%d]") then -- if PMID has anything but digits | ||
err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message | |||
else -- PMID is only digits | else -- PMID is only digits | ||
local id_num = tonumber (id); -- convert id to a number for range testing | local id_num = tonumber(id); -- convert id to a number for range testing | ||
if 1 > id_num or | if 1 > id_num or test_limit < id_num then -- if PMID is outside test limit boundaries | ||
err_cat = ' ' .. set_error( 'bad_pmid' ); -- set an error message | |||
end | end | ||
end | end | ||
return external_link_id ({link = handler.link, label = handler.label, | return external_link_id({link = handler.link, label = handler.label, | ||
prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; | |||
end | end | ||
| Baris 1.426: | Baris 881: | ||
--[[--------------------------< S S R N >---------------------------------------------------------------------- | --[[--------------------------< S S R N >---------------------------------------------------------------------- | ||
Format an | Format an ssrn, do simple error checking | ||
SSRNs are sequential numbers beginning at 100? and counting up. This code checks the | SSRNs are sequential numbers beginning at 100? and counting up. This code checks the ssrn to see that it is | ||
only digits and is greater than 99 and less than test_limit; the value in local variable test_limit will need | only digits and is greater than 99 and less than test_limit; the value in local variable test_limit will need | ||
to be updated periodically as more SSRNs are issued. | to be updated periodically as more SSRNs are issued. | ||
| Baris 1.434: | Baris 889: | ||
]] | ]] | ||
local function ssrn ( | local function ssrn (id) | ||
local | local test_limit = 3500000; -- update this value as SSRNs approach | ||
local handler = | local handler = cfg.id_handlers['SSRN']; | ||
local err_cat = ''; -- presume that SSRN is valid | |||
local id_num; | local id_num; | ||
local text; | local text; | ||
| Baris 1.443: | Baris 899: | ||
if is_set (id_num) then -- id_num has a value so test it | if is_set (id_num) then -- id_num has a value so test it | ||
id_num = tonumber (id_num); -- convert id_num to a number for range testing | id_num = tonumber(id_num); -- convert id_num to a number for range testing | ||
if 100 > id_num or | if 100 > id_num or test_limit < id_num then -- if SSRN is outside test limit boundaries | ||
err_cat = ' ' .. set_error( 'bad_ssrn' ); -- set an error message | |||
end | end | ||
else -- when id format incorrect | else -- when id format incorrect | ||
err_cat = ' ' .. set_error( 'bad_ssrn' ); -- set an error message | |||
end | end | ||
text = external_link_id ({link = handler.link, label = handler.label | text = external_link_id({link = handler.link, label = handler.label, | ||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode, access = | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode, access=handler.access}) .. err_cat; | ||
return text; | return text; | ||
| Baris 1.467: | Baris 921: | ||
]] | ]] | ||
local function usenet_id ( | local function usenet_id (id) | ||
local handler = cfg.id_handlers['USENETID']; | |||
local handler = | |||
local text = external_link_id ({link = handler.link, label = handler.label | local text = external_link_id({link = handler.link, label = handler.label, | ||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}) | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) | ||
if not id:match('^.+@.+$') or not id:match('^[^<].*[^>]$') then -- doesn't have '@' or has one or first or last character is '< or '>' | if not id:match('^.+@.+$') or not id:match('^[^<].*[^>]$')then -- doesn't have '@' or has one or first or last character is '< or '>' | ||
text = text .. ' ' .. set_error( 'bad_usenet_id' ) -- add an error message if the message id is invalid | |||
end | end | ||
return text | return text | ||
end | end | ||
| Baris 1.487: | Baris 939: | ||
A numerical identifier in the form nnnn.nnnnn - leading zeros in the first quartet optional | A numerical identifier in the form nnnn.nnnnn - leading zeros in the first quartet optional | ||
]] | |||
local function zbl (id) | |||
local handler = cfg.id_handlers['ZBL']; | |||
local id_num; | |||
local err_cat = ''; | |||
id_num = id:match ('^[Zz][Bb][Ll](.*)$'); -- identifier with zbl prefix; extract identifier | |||
if is_set (id_num) then | |||
add_maint_cat ('zbl_format'); | |||
else -- plain number without zbl prefix | |||
id_num = id; -- if here id does not have prefix | |||
end | |||
if id_num:match('^%d?%d?%d?%d%.%d%d%d%d%d$') then | |||
id = id_num; -- id matches pattern | |||
else | |||
err_cat = ' ' .. set_error( 'bad_zbl' ); -- set an error message | |||
if | |||
end | end | ||
return external_link_id ({link = handler.link, label = handler.label | return external_link_id({link = handler.link, label = handler.label, | ||
prefix = handler.prefix, id = id, separator = handler.separator, encode = handler.encode}); | prefix=handler.prefix,id=id,separator=handler.separator, encode=handler.encode}) .. err_cat; | ||
end | end | ||
| Baris 1.511: | Baris 967: | ||
--============================<< I N T E R F A C E F U N C T I O N S >>========================================== | --============================<< I N T E R F A C E F U N C T I O N S >>========================================== | ||
--[[--------------------------< | --[[--------------------------< B U I L D _ I D _ L I S T >-------------------------------------------------------- | ||
Takes a table of IDs created by extract_ids() and turns it into a table of formatted ID outputs. | |||
inputs: | |||
id_list – table of identifiers built by extract_ids() | |||
options – table of various template parameter values used to modify some manually handled identifiers | |||
]] | ]] | ||
local function | local function build_id_list( id_list, options ) | ||
local | local new_list, handler = {}; | ||
local function fallback(k) return { __index = function(t,i) return cfg.id_handlers[k][i] end } end; | |||
for k, v in pairs( id_list ) do -- k is uc identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table | |||
-- fallback to read-only cfg | |||
handler = setmetatable( { ['id'] = v, ['access'] = options.IdAccessLevels[k] }, fallback(k) ); | |||
if handler.mode == 'external' then | |||
table.insert( new_list, {handler.label, external_link_id( handler ) } ); | |||
elseif handler.mode == 'internal' then | |||
table.insert( new_list, {handler.label, internal_link_id( handler ) } ); | |||
elseif handler.mode ~= 'manual' then | |||
error( cfg.messages['unknown_ID_mode'] ); | |||
elseif k == 'ARXIV' then | |||
table.insert( new_list, {handler.label, arxiv( v, options.Class ) } ); | |||
elseif k == 'ASIN' then | |||
table.insert( new_list, {handler.label, asin( v, options.ASINTLD ) } ); | |||
elseif k == 'BIBCODE' then | |||
table.insert( new_list, {handler.label, bibcode( v, handler.access ) } ); | |||
elseif k == 'BIORXIV' then | |||
table.insert( new_list, {handler.label, biorxiv( v ) } ); | |||
elseif k == 'CITESEERX' then | |||
table.insert( new_list, {handler.label, citeseerx( v ) } ); | |||
local | elseif k == 'DOI' then | ||
table.insert( new_list, {handler.label, doi( v, options.DoiBroken, handler.access ) } ); | |||
if | elseif k == 'EISSN' then | ||
table.insert( new_list, {handler.label, issn( v, true ) } ); -- true distinguishes eissn from issn | |||
elseif k == 'HDL' then | |||
table.insert( new_list, {handler.label, hdl( v, handler.access ) } ); | |||
elseif k == 'ISBN' then | |||
local ISBN = internal_link_id( handler ); | |||
local check; | |||
local err_type = ''; | |||
check, err_type = isbn( v ); | |||
if not check then | |||
if is_set(options.IgnoreISBN) then -- ISBN is invalid; if |ignore-isbn-error= set | |||
add_maint_cat ('ignore_isbn_err'); -- ad a maint category | |||
else | |||
ISBN = ISBN .. set_error( 'bad_isbn', {err_type}, false, " ", "" ); -- else display an error message | |||
end | end | ||
elseif is_set(options.IgnoreISBN) then -- ISBN is OK; if |ignore-isbn-error= set | |||
add_maint_cat ('ignore_isbn_err'); -- because |ignore-isbn-error= unnecessary | |||
end | end | ||
table.insert( new_list, {handler.label, ISBN } ); | |||
elseif k == 'ISMN' then | |||
table.insert( new_list, {handler.label, ismn( v ) } ); | |||
elseif k == 'ISSN' then | |||
table.insert( new_list, {handler.label, issn( v ) } ); | |||
elseif k == 'JFM' then | |||
table.insert( new_list, {handler.label, jfm( v ) } ); | |||
elseif k == 'LCCN' then | |||
table.insert( new_list, {handler.label, lccn( v ) } ); | |||
elseif k == 'MR' then | |||
table.insert( new_list, {handler.label, mr( v ) } ); | |||
elseif k == 'OCLC' then | |||
table.insert( new_list, {handler.label, oclc( v ) } ); | |||
elseif k == 'OL' or k == 'OLA' then | |||
table.insert( new_list, {handler.label, openlibrary( v, handler.access ) } ); | |||
elseif k == 'PMC' then | |||
table.insert( new_list, {handler.label, pmc( v, options.Embargo ) } ); | |||
elseif k == 'PMID' then | |||
table.insert( new_list, {handler.label, pmid( v ) } ); | |||
elseif k == 'SSRN' then | |||
table.insert( new_list, {handler.label, ssrn( v ) } ); | |||
elseif k == 'USENETID' then | |||
table.insert( new_list, {handler.label, usenet_id( v ) } ); | |||
elseif k == 'ZBL' then | |||
table.insert( new_list, {handler.label, zbl( v ) } ); | |||
table.insert ( | |||
else | else | ||
error (cfg.messages[' | error( cfg.messages['unknown_manual_ID'] ); | ||
end | end | ||
end | end | ||
local function comp (a, b) | local function comp( a, b ) -- used in following table.sort() | ||
return a[1]:lower() < b[1]:lower(); | return a[1]:lower() < b[1]:lower(); | ||
end | end | ||
table.sort ( | table.sort( new_list, comp ); | ||
for k, v in ipairs ( | for k, v in ipairs( new_list ) do | ||
new_list[k] = v[2]; | |||
end | end | ||
return | return new_list; | ||
end | end | ||
--[[--------------------------< | --[[--------------------------< E X T R A C T _ I D S >------------------------------------------------------------ | ||
Populates ID table from arguments using configuration settings. Loops through cfg.id_handlers and searches args for | |||
any of the parameters listed in each cfg.id_handlers['...'].parameters. If found, adds the parameter and value to | |||
the identifier list. Emits redundant error message is more than one alias exists in args | |||
]] | ]] | ||
local function | local function extract_ids( args ) | ||
for | local id_list = {}; -- list of identifiers found in args | ||
for k, v in pairs( cfg.id_handlers ) do -- k is uc identifier name as index to cfg.id_handlers; e.g. cfg.id_handlers['ISBN'], v is a table | |||
v = select_one( args, v.parameters, 'redundant_parameters' ); -- v.parameters is a table of aliases for k; here we pick one from args if present | |||
if is_set(v) then id_list[k] = v; end -- if found in args, add identifier to our list | |||
end | end | ||
return id_list; | |||
end | end | ||
--[[--------------------------< | --[[--------------------------< E X T R A C T _ I D _ A C C E S S _ L E V E L S >-------------------------------------- | ||
Fetches custom id access levels from arguments using configuration settings. | |||
Parameters which have a predefined access level (e.g. arxiv) do not use this | |||
function as they are directly rendered as free without using an additional parameter. | |||
]] | ]] | ||
local function | local function extract_id_access_levels( args, id_list ) | ||
local | local id_accesses_list = {}; | ||
for k, v in pairs( cfg.id_handlers ) do | |||
local access_param = v.custom_access; | |||
local k_lower = string.lower(k); | |||
if is_set(access_param) then | |||
return | local access_level = args[access_param]; | ||
if is_set(access_level) then | |||
if not in_array (access_level:lower(), cfg.keywords['id-access']) then | |||
table.insert( z.message_tail, { set_error( 'invalid_param_val', {access_param, access_level}, true ) } ); | |||
access_level = nil; | |||
end | |||
if not is_set(id_list[k]) then | |||
table.insert( z.message_tail, { set_error( 'param_access_requires_param', {k_lower}, true ) } ); | |||
end | |||
if is_set(access_level) then | |||
access_level = access_level:lower(); | |||
end | |||
id_accesses_list[k] = access_level; | |||
end | |||
end | |||
end | |||
return id_accesses_list; | |||
end | end | ||
| Baris 1.695: | Baris 1.126: | ||
cfg = cfg_table_ptr; | cfg = cfg_table_ptr; | ||
is_set = utilities_page_ptr.is_set; -- import functions from select Module:Citation/CS1/Utilities module | |||
in_array = utilities_page_ptr.in_array; | in_array = utilities_page_ptr.in_array; | ||
set_error = utilities_page_ptr.set_error; | |||
select_one = utilities_page_ptr.select_one; | select_one = utilities_page_ptr.select_one; | ||
add_maint_cat = utilities_page_ptr.add_maint_cat; | |||
substitute = utilities_page_ptr.substitute; | substitute = utilities_page_ptr.substitute; | ||
make_wikilink = utilities_page_ptr.make_wikilink; | make_wikilink = utilities_page_ptr.make_wikilink; | ||
| Baris 1.706: | Baris 1.137: | ||
end | end | ||
return { | return { | ||
build_id_list = build_id_list, | |||
extract_ids = extract_ids, | |||
extract_id_access_levels = extract_id_access_levels, | |||
is_embargoed = is_embargoed; | is_embargoed = is_embargoed; | ||
set_selected_modules = set_selected_modules; | set_selected_modules = set_selected_modules; | ||
} | } | ||