|
|
Line 1: |
Line 1: |
| | | |
− | local p = {}
| |
− |
| |
− | -- returns a number according to the month in a date: 1 for January, etc. Capitalization and spelling must be correct. If not a valid month, returns 0
| |
− | function get_month_number (month)
| |
− | local long_months = {['January']=1, ['February']=2, ['March']=3, ['April']=4, ['May']=5, ['June']=6, ['July']=7, ['August']=8, ['September']=9, ['October']=10, ['November']=11, ['December']=12};
| |
− | local short_months = {['Jan']=1, ['Feb']=2, ['Mar']=3, ['Apr']=4, ['May']=5, ['Jun']=6, ['Jul']=7, ['Aug']=8, ['Sep']=9, ['Oct']=10, ['Nov']=11, ['Dec']=12};
| |
− | local temp;
| |
− | temp=long_months[month];
| |
− | if temp then return temp; end -- if month is the long-form name
| |
− | temp=short_months[month];
| |
− | if temp then return temp; end -- if month is the short-form name
| |
− | return 0; -- misspelled, improper case, or not a month name
| |
− | end
| |
− |
| |
− | -- returns a number according to the sequence of seasons in a year: 1 for Winter, etc. Capitalization and spelling must be correct. If not a valid season, returns 0
| |
− | function get_season_number (season)
| |
− | local season_list = {['Winter']=1, ['Spring']=2, ['Summer']=3, ['Fall']=4, ['Autumn']=4}
| |
− | local temp;
| |
− | temp=season_list[season];
| |
− | if temp then return temp; end -- if season is a valid name return its number
| |
− | return 0; -- misspelled, improper case, or not a season name
| |
− | end
| |
− |
| |
− | --returns true if month or season is valid (properly spelled, capitalized, abbreviated)
| |
− | function is_valid_month_or_season (month_season)
| |
− | if 0 == get_month_number (month_season) then -- if month text isn't one of the twelve months, might be a season
| |
− | if 0 == get_season_number (month_season) then -- not a month, is it a season?
| |
− | return false; -- return false not a month or one of the five seasons
| |
− | end
| |
− | end
| |
− | return true;
| |
− | end
| |
− |
| |
− |
| |
− | -- Function gets current year from the server and compares it to year from a citation parameter. Years more than one year in the future are not acceptable.
| |
− | function is_valid_year(year)
| |
− | if not is_set(year_limit) then
| |
− | year_limit = tonumber(os.date("%Y"))+1; -- global variable so we only have to fetch it once (os.date("Y") no longer works?)
| |
− | end
| |
− | return tonumber(year) <= year_limit; -- false if year is in the future more than one year
| |
− | end
| |
− |
| |
− | --[[
| |
− | Returns true if day is less than or equal to the number of days in month and year is no farther into the future than next year; else returns false.
| |
− |
| |
− | Assumes Julian calendar prior to year 1582 and Gregorian calendar thereafter. Accounts for Julian calendar leap years before 1582 and Gregorian leap years after 1582.
| |
− | Where the two calendars overlap (1582 to approximately 1923) dates are assumed to be Gregorian.
| |
− | ]]
| |
− | function is_valid_date (year, month, day)
| |
− | local days_in_month = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
| |
− | local month_length;
| |
− | if not is_valid_year(year) then -- no farther into the future than next year
| |
− | return false;
| |
− | end
| |
− |
| |
− | if (2==month) then -- if February
| |
− | month_length = 28; -- then 28 days unless
| |
− | if 1582 > tonumber(year) then -- Julian calendar
| |
− | if 0==(year%4) then
| |
− | month_length = 29;
| |
− | end
| |
− | else -- Gregorian calendar
| |
− | if (0==(year%4) and (0~=(year%100) or 0==(year%400))) then -- is a leap year?
| |
− | month_length = 29; -- if leap year then 29 days in February
| |
− | end
| |
− | end
| |
− | else
| |
− | month_length=days_in_month[month];
| |
− | end
| |
− |
| |
− | if tonumber (day) > month_length then
| |
− | return false;
| |
− | end
| |
− | return true;
| |
− | end
| |
− |
| |
− | --[[
| |
− | Check a pair of months or seasons to see if both are valid members of a month or season pair.
| |
− |
| |
− | Month pairs are expected to be left to right, earliest to latest in time. Similarly, seasons are also left to right, earliest to latest in time. There is
| |
− | an oddity with seasons. Winter is assigned a value of 1, spring 2, ..., fall and autumn 4. Because winter can follow fall/autumn at the end of a calender year, a special test
| |
− | is made to see if |date=Fall-Winter yyyy (4-1) is the date.
| |
− | ]]
| |
− |
| |
− | function is_valid_month_season_range(range_start, range_end)
| |
− | local range_start_number = get_month_number (range_start);
| |
− |
| |
− | if 0 == range_start_number then -- is this a month range?
| |
− | local range_start_number = get_season_number (range_start); -- not a month; is it a season? get start season number
| |
− | local range_end_number = get_season_number (range_end); -- get end season number
| |
− |
| |
− | if 0 ~= range_start_number then -- is start of range a season?
| |
− | if range_start_number < range_end_number then -- range_start is a season
| |
− | return true; -- return true when range_end is also a season and follows start season; else false
| |
− | end
| |
− | if 4 == range_start_number and 1 == range_end_number then -- special case when range is Fall-Winter or Autumn-Winter
| |
− | return true;
| |
− | end
| |
− | end
| |
− | return false; -- range_start is not a month or a season; or range_start is a season and range_end is not; or improper season sequence
| |
− | end
| |
− |
| |
− | local range_end_number = get_month_number (range_end); -- get end month number
| |
− | if range_start_number < range_end_number then -- range_start is a month; does range_start precede range_end?
| |
− | return true; -- if yes, return true
| |
− | end
| |
− | return false; -- range_start month number is greater than or equal to range end number; or range end isn't a month
| |
− | end
| |
− |
| |
− | --[[
| |
− | Check date format to see that it is one of the formats approved by WP:DATESNO or WP:DATERANGE. Exception: only allowed range separator is endash.
| |
− | Additionally, check the date to see that it is a real date: no 31 in 30-day months; no 29 February when not a leap year. Months, both long-form and three
| |
− | character abbreviations, and seasons must be spelled correctly. Future years beyond next year are not allowed.
| |
− |
| |
− | If the date fails the fomat tests, this function returns false and does not return values for anchor_year and COinS_date. When this happens, the date parameter is
| |
− | used in the COinS metadata and the CITEREF identifier gets its year from the year parameter if present otherwise CITEREF does not get a date value.
| |
− |
| |
− | Inputs:
| |
− | date_string - date string from date-holding parameters (date, year, accessdate, embargo, archivedate, etc)
| |
− |
| |
− | Returns:
| |
− | false if date string is not a real date; else
| |
− | true, anchor_year, COinS_date
| |
− | anchor_year can be used in CITEREF anchors
| |
− | COinS_date is date_string without anchor_year disambiguator if any
| |
− | ]]
| |
− | function check_date (date_string)
| |
− | local year; -- assume that year2, months, and days are not used;
| |
− | local year2=0; -- second year in a year range
| |
− | local month=0;
| |
− | local month2=0; -- second month in a month range
| |
− | local day=0;
| |
− | local day2=0; -- second day in a day range
| |
− | local anchor_year;
| |
− | local coins_date;
| |
− |
| |
− | if date_string:match("^%d%d%d%d%-%d%d%-%d%d$") then -- year-initial numerical year month day format
| |
− | year, month, day=string.match(date_string, "(%d%d%d%d)%-(%d%d)%-(%d%d)");
| |
− | month=tonumber(month);
| |
− | if 12 < month or 1 > month or 1583 > tonumber(year) then return false; end -- month number not valid or not Gregorian calendar
| |
− | anchor_year = year;
| |
− |
| |
− | elseif date_string:match("^%a+ +[1-9]%d?, +[1-9]%d%d%d%a?$") then -- month-initial: month day, year
| |
− | month, day, anchor_year, year=string.match(date_string, "(%a+)%s*(%d%d?),%s*((%d%d%d%d)%a?)");
| |
− | month = get_month_number (month);
| |
− | if 0 == month then return false; end -- return false if month text isn't one of the twelve months
| |
− |
| |
− | elseif date_string:match("^%a+ +[1-9]%d?–[1-9]%d?, +[1-9]%d%d%d%a?$") then -- month-initial day range: month day–day, year; days are separated by endash
| |
− | month, day, day2, anchor_year, year=string.match(date_string, "(%a+) +(%d%d?)–(%d%d?), +((%d%d%d%d)%a?)");
| |
− | if tonumber(day) >= tonumber(day2) then return false; end -- date range order is left to right: earlier to later; dates may not be the same;
| |
− | month = get_month_number (month);
| |
− | if 0 == month then return false; end -- return false if month text isn't one of the twelve months
| |
− |
| |
− | elseif date_string:match("^[1-9]%d? +%a+ +[1-9]%d%d%d%a?$") then -- day-initial: day month year
| |
− | day, month, anchor_year, year=string.match(date_string, "(%d%d*)%s*(%a+)%s*((%d%d%d%d)%a?)");
| |
− | month = get_month_number (month);
| |
− | if 0 == month then return false; end -- return false if month text isn't one of the twelve months
| |
− |
| |
− | elseif date_string:match("^[1-9]%d?–[1-9]%d? +%a+ +[1-9]%d%d%d%a?$") then -- day-range-initial: day–day month year; days are separated by endash
| |
− | day, day2, month, anchor_year, year=string.match(date_string, "(%d%d?)–(%d%d?) +(%a+) +((%d%d%d%d)%a?)");
| |
− | if tonumber(day) >= tonumber(day2) then return false; end -- date range order is left to right: earlier to later; dates may not be the same;
| |
− | month = get_month_number (month);
| |
− | if 0 == month then return false; end -- return false if month text isn't one of the twelve months
| |
− |
| |
− | elseif date_string:match("^[1-9]%d? +%a+ – [1-9]%d? +%a+ +[1-9]%d%d%d%a?$") then -- day initial month-day-range: day month - day month year; uses spaced endash
| |
− | day, month, day2, month2, anchor_year, year=date_string:match("(%d%d?) +(%a+) – (%d%d?) +(%a+) +((%d%d%d%d)%a?)");
| |
− | if (not is_valid_month_season_range(month, month2)) or not is_valid_year(year) then return false; end -- date range order is left to right: earlier to later;
| |
− | month = get_month_number (month);
| |
− | month2 = get_month_number (month2);
| |
− |
| |
− | elseif date_string:match("^%a+ +[1-9]%d? – %a+ +[1-9]%d?, +[1-9]%d%d%d%a?$") then -- month initial month-day-range: month day – month day, year; uses spaced endash
| |
− | month, day, month2, day2, anchor_year, year=date_string:match("(%a+) +(%d%d?) – (%a+) +(%d%d?), +((%d%d%d%d)%a?)");
| |
− | if (not is_valid_month_season_range(month, month2)) or not is_valid_year(year) then return false; end
| |
− | month = get_month_number (month);
| |
− | month2 = get_month_number (month2);
| |
− |
| |
− | elseif date_string:match("^Winter +[1-9]%d%d%d–[1-9]%d%d%d%a?$") then -- special case Winter year-year; year separated with unspaced endash
| |
− | year, anchor_year, year2=date_string:match("Winter +(%d%d%d%d)–((%d%d%d%d)%a?)");
| |
− | anchor_year=year..'–'..anchor_year; -- assemble anchor_year from both years
| |
− | if 1 ~= tonumber(year2) - tonumber(year) then return false; end -- must be sequential years, left to right, earlier to later
| |
− | if not is_valid_year(year2) then return false; end -- no year farther in the future than next year
| |
− |
| |
− | elseif date_string:match("^%a+ +[1-9]%d%d%d% – %a+ +[1-9]%d%d%d%a?$") then -- month/season year - month/season year; separated by spaced endash
| |
− | month, year, month2, anchor_year, year2=date_string:match("(%a+) +(%d%d%d%d) – (%a+) +((%d%d%d%d)%a?)");
| |
− | anchor_year=year..'–'..anchor_year; -- assemble anchor_year from both years
| |
− | if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same
| |
− | if not is_valid_year(year2) then return false; end -- no year farther in the future than next year
| |
− | if not((0 ~= get_month_number(month) and 0 ~= get_month_number(month2)) or -- both must be month year or season year, not mixed
| |
− | (0 ~= get_season_number(month) and 0 ~= get_season_number(month2))) then return false; end
| |
− |
| |
− | elseif date_string:match ("^%a+–%a+ +[1-9]%d%d%d%a?$") then -- month/season range year; months separated by endash
| |
− | month, month2, anchor_year, year=date_string:match ("(%a+)–(%a+)%s*((%d%d%d%d)%a?)");
| |
− | if (not is_valid_month_season_range(month, month2)) or (not is_valid_year(year)) then
| |
− | return false;
| |
− | end
| |
− |
| |
− | elseif date_string:match("^%a+ +%d%d%d%d%a?$") then -- month/season year
| |
− | month, anchor_year, year=date_string:match("(%a+)%s*((%d%d%d%d)%a?)");
| |
− | if not is_valid_year(year) then return false; end
| |
− | if not is_valid_month_or_season (month) then return false; end
| |
− |
| |
− | elseif date_string:match("^[1-9]%d%d%d?–[1-9]%d%d%d?%a?$") then -- Year range: YYY-YYY or YYY-YYYY or YYYY–YYYY; separated by unspaced endash; 100-9999
| |
− | year, anchor_year, year2=date_string:match("(%d%d%d%d?)–((%d%d%d%d?)%a?)");
| |
− | anchor_year=year..'–'..anchor_year; -- assemble anchor year from both years
| |
− | if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same
| |
− | if not is_valid_year(year2) then return false; end -- no year farther in the future than next year
| |
− |
| |
− | elseif date_string:match("^[1-9]%d%d%d–%d%d%a?$") then -- Year range: YYYY–YY; separated by unspaced endash
| |
− | local century;
| |
− | year, century, anchor_year, year2=date_string:match("((%d%d)%d%d)–((%d%d)%a?)");
| |
− | anchor_year=year..'–'..anchor_year; -- assemble anchor year from both years
| |
− | if 13 > tonumber(year2) then return false; end -- don't allow 2003-05 which might be May 2003
| |
− | year2 = century..year2; -- add the century to year2 for comparisons
| |
− | if tonumber(year) >= tonumber(year2) then return false; end -- left to right, earlier to later, not the same
| |
− | if not is_valid_year(year2) then return false; end -- no year farther in the future than next year
| |
− |
| |
− | elseif date_string:match("^[1-9]%d%d%d?%a?$") then -- year; here accept either YYY or YYYY
| |
− | anchor_year, year=date_string:match("((%d%d%d%d?)%a?)");
| |
− | if false == is_valid_year(year) then
| |
− | return false;
| |
− | end
| |
− |
| |
− | else
| |
− | return false; -- date format not one of the MOS:DATE approved formats
| |
− | end
| |
− |
| |
− | local result=true; -- check whole dates for validity; assume true because not all dates will go through this test
| |
− | if 0 ~= year and 0 ~= month and 0 ~= day and 0 == year2 and 0 == month2 and 0 == day2 then -- YMD (simple whole date)
| |
− | result=is_valid_date(year,month,day);
| |
− |
| |
− | elseif 0 ~= year and 0 ~= month and 0 ~= day and 0 == year2 and 0 == month2 and 0 ~= day2 then -- YMD-d (day range)
| |
− | result=is_valid_date(year,month,day);
| |
− | result=result and is_valid_date(year,month,day2);
| |
− |
| |
− | elseif 0 ~= year and 0 ~= month and 0 ~= day and 0 == year2 and 0 ~= month2 and 0 ~= day2 then -- YMD-md (day month range)
| |
− | result=is_valid_date(year,month,day);
| |
− | result=result and is_valid_date(year,month2,day2);
| |
− |
| |
− | elseif 0 ~= year and 0 ~= month and 0 ~= day and 0 ~= year2 and 0 ~= month2 and 0 ~= day2 then -- YMD-ymd (day month year range)
| |
− | result=is_valid_date(year,month,day);
| |
− | result=result and is_valid_date(year2,month2,day2);
| |
− | end
| |
− |
| |
− | if false == result then return false; end
| |
− | -- if here, then date_string is valid; get coins_date from date_string (leave CITEREF disambiguator) ...
| |
− | coins_date=date_string:match("^(.+%d)%a?$"); -- last character of valid disambiguatable date is always a digit
| |
− | coins_date= mw.ustring.gsub(coins_date, "–", "-" ); -- ... and replace any ndash with a hyphen
| |
− |
| |
− | return true, anchor_year, coins_date; -- format is good and date string represents a real date
| |
− | end
| |
− |
| |
− | --[[
| |
− | Cycle the date-holding parameters in passed table date_parameters_list through check_date() to check compliance with MOS:DATE. For all valid dates, check_date() returns
| |
− | true. The |date= parameter test is unique, it is the only date holding parameter from which values for anchor_year (used in CITEREF identifiers) and COinS_date (used in
| |
− | the COinS metadata) are derived. The |date= parameter is the only date-holding parameter that is allowed to contain the no-date keywords "n.d." or "nd" (without quotes).
| |
− |
| |
− | Unlike most error messages created in this module, only one error message is created by this function. Because all of the date holding parameters are processed serially,
| |
− | a single error message is created as the dates are tested.
| |
− | ]]
| |
− |
| |
− | function p.dates(date_parameters_list)
| |
− | local anchor_year; -- will return as nil if the date being tested is not |date=
| |
− | local COinS_date; -- will return as nil if the date being tested is not |date=
| |
− | local error_message ="";
| |
− | local good_date=false;
| |
− |
| |
− | for k, v in pairs(date_parameters_list) do -- for each date-holding parameter in the list
| |
− | if is_set(v) then -- if the parameter has a value
| |
− | if v:match("^c%. [1-9]%d%d%d?%a?$") then -- special case for c. year or with or without CITEREF disambiguator - only |date= and |year=
| |
− | local year = v:match("c%. ([1-9]%d%d%d?)%a?"); -- get the year portion so it can be tested
| |
− | if 'date'==k then
| |
− | anchor_year, COinS_date = v:match("((c%. [1-9]%d%d%d?)%a?)"); -- anchor year and COinS_date only from |date= parameter
| |
− | good_date = is_valid_year(year);
| |
− | elseif 'year'==k then
| |
− | good_date = is_valid_year(year);
| |
− | end
| |
− | elseif 'date'==k then -- if the parameter is |date=
| |
− | if v:match("n%.d%.%a?") then -- if |date=n.d. with or without a CITEREF disambiguator
| |
− | good_date, anchor_year, COinS_date = true, v:match("((n%.d%.)%a?)"); --"n.d."; no error when date parameter is set to no date
| |
− | elseif v:match("nd%a?$") then -- if |date=nd with or without a CITEREF disambiguator
| |
− | good_date, anchor_year, COinS_date = true, v:match("((nd)%a?)"); --"nd"; no error when date parameter is set to no date
| |
− | else
| |
− | good_date, anchor_year, COinS_date = check_date (v); -- go test the date
| |
− | end
| |
− | else -- any other date-holding parameter
| |
− | good_date = check_date (v); -- go test the date
| |
− | end
| |
− | if false==good_date then -- assemble one error message so we don't add the tracking category multiple times
| |
− | if is_set(error_message) then -- once we've added the first portion of the error message ...
| |
− | error_message=error_message .. ", "; -- ... add a comma space separator
| |
− | end
| |
− | error_message=error_message .. "|" .. k .. "="; -- add the failed parameter
| |
− | end
| |
− | end
| |
− | end
| |
− |
| |
− | return anchor_year, COinS_date, error_message; -- and done
| |
− | end
| |
− |
| |
− | return p;
| |