FIX:(#1358) nzbname error when retrieving nzb and assigning filename from WWT tracker, FIX:(#1372)(#1369) Fix for creating series folder when adding a series when series title contains either double quotation marks, or an asterisk, FIX:(#1373) Filechecker would ignore filenames that had the extension captialized, FIX:(#1366) When Comic Publisher is not provided on CV, would error during add, FIX: Attempted fix for unicode characters when importing (series title, filenames), FIX: Removed str references that would cause an error on weekly pull in some instances, FIX: When checking for watched series, if series title being checked against had only one word, would cause a traceback error, FIX: When attempting to retrieve results/torrents from TPSE and was behind cloudflare, would error out, IMP: file-size check now works for 32p feeds, FIX: When pullist issue was marked as Wanted and issue was populated on series detail page, occassionaly would not have the same status of Wanted, FIX: Fixed incorrect placement of Comic Location title in GUI, IMP: Added short description for Search Delay option within GUI, FIX:(#1370) multiple selection from Manage Comics tab (Refresh/Delete/Pause) would only select one item

This commit is contained in:
evilhero 2016-09-06 11:06:07 -04:00
parent a850e386d4
commit e36cb13e6a
68 changed files with 16938 additions and 91 deletions

View File

@ -201,7 +201,7 @@
</div>
</div>
</fieldset>
<fieldset>
<fieldset>
<legend>Interval</legend>
<div class="row">
<label>NZB Search Interval</label>
@ -212,9 +212,11 @@
<div class="row">
<label>Search delay</label>
<input type="text" name="search_delay" value="${config['search_delay']}" size="4" />mins
<small>The amount of time to wait between each search request (minimum is 1 min)</small>
</div>
</div>
</div>
</fieldset>
<fieldset>
<legend>Comic Location</legend>
<div>
<small class="heading"><span style="float: left; margin-right: .3em; margin-top: 4px;" class="ui-icon ui-icon-info"></span>Automatic folder creation happens BENEATH this path</small>

153
lib/cfscrape/__init__.py Normal file
View File

@ -0,0 +1,153 @@
from time import sleep
import logging
import random
import re
import os
from lib.requests.sessions import Session
import lib.js2py as js2py
from lib.js2py import eval_js as eval_js
try:
from urlparse import urlparse
except ImportError:
from urllib.parse import urlparse
DEFAULT_USER_AGENTS = [
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36",
"Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0",
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:41.0) Gecko/20100101 Firefox/41.0"
]
DEFAULT_USER_AGENT = random.choice(DEFAULT_USER_AGENTS)
class CloudflareScraper(Session):
def __init__(self, *args, **kwargs):
super(CloudflareScraper, self).__init__(*args, **kwargs)
if "requests" in self.headers["User-Agent"]:
# Spoof Firefox on Linux if no custom User-Agent has been set
self.headers["User-Agent"] = DEFAULT_USER_AGENT
def request(self, method, url, *args, **kwargs):
resp = super(CloudflareScraper, self).request(method, url, *args, **kwargs)
# Check if Cloudflare anti-bot is on
if resp.status_code == 503 and resp.headers.get("Server") == "cloudflare-nginx":
return self.solve_cf_challenge(resp, **kwargs)
# Otherwise, no Cloudflare anti-bot detected
return resp
def solve_cf_challenge(self, resp, **kwargs):
sleep(5) # Cloudflare requires a delay before solving the challenge
body = resp.text
parsed_url = urlparse(resp.url)
domain = urlparse(resp.url).netloc
submit_url = "%s://%s/cdn-cgi/l/chk_jschl" % (parsed_url.scheme, domain)
params = kwargs.setdefault("params", {})
headers = kwargs.setdefault("headers", {})
headers["Referer"] = resp.url
try:
params["jschl_vc"] = re.search(r'name="jschl_vc" value="(\w+)"', body).group(1)
params["pass"] = re.search(r'name="pass" value="(.+?)"', body).group(1)
# Extract the arithmetic operation
js = self.extract_js(body)
except Exception:
# Something is wrong with the page.
# This may indicate Cloudflare has changed their anti-bot
# technique. If you see this and are running the latest version,
# please open a GitHub issue so I can update the code accordingly.
logging.error("[!] Unable to parse Cloudflare anti-bots page. "
"Try upgrading cloudflare-scrape, or submit a bug report "
"if you are running the latest version. Please read "
"https://github.com/Anorov/cloudflare-scrape#updates "
"before submitting a bug report.")
raise
# Safely evaluate the Javascript expression
js = js.replace('return', '')
params["jschl_answer"] = str(int(eval_js(js)) + len(domain))
return self.get(submit_url, **kwargs)
def extract_js(self, body):
js = re.search(r"setTimeout\(function\(\){\s+(var "
"s,t,o,p,b,r,e,a,k,i,n,g,f.+?\r?\n[\s\S]+?a\.value =.+?)\r?\n", body).group(1)
js = re.sub(r"a\.value = (parseInt\(.+?\)).+", r"\1", js)
js = re.sub(r"\s{3,}[a-z](?: = |\.).+", "", js)
# Strip characters that could be used to exit the string context
# These characters are not currently used in Cloudflare's arithmetic snippet
js = re.sub(r"[\n\\']", "", js)
return js.replace("parseInt", "return parseInt")
@classmethod
def create_scraper(cls, sess=None, **kwargs):
"""
Convenience function for creating a ready-to-go requests.Session (subclass) object.
"""
scraper = cls()
if sess:
attrs = ["auth", "cert", "cookies", "headers", "hooks", "params", "proxies", "data"]
for attr in attrs:
val = getattr(sess, attr, None)
if val:
setattr(scraper, attr, val)
return scraper
## Functions for integrating cloudflare-scrape with other applications and scripts
@classmethod
def get_tokens(cls, url, user_agent=None, **kwargs):
scraper = cls.create_scraper()
if user_agent:
scraper.headers["User-Agent"] = user_agent
try:
resp = scraper.get(url)
resp.raise_for_status()
except Exception as e:
logging.error("'%s' returned an error. Could not collect tokens." % url)
raise
domain = urlparse(resp.url).netloc
cookie_domain = None
for d in scraper.cookies.list_domains():
if d.startswith(".") and d in ("." + domain):
cookie_domain = d
break
else:
raise ValueError("Unable to find Cloudflare cookies. Does the site actually have Cloudflare IUAM mode enabled?")
return ({
"__cfduid": scraper.cookies.get("__cfduid", "", domain=cookie_domain),
"cf_clearance": scraper.cookies.get("cf_clearance", "", domain=cookie_domain)
},
scraper.headers["User-Agent"]
)
@classmethod
def get_cookie_string(cls, url, user_agent=None, **kwargs):
"""
Convenience function for building a Cookie HTTP header value.
"""
tokens, user_agent = cls.get_tokens(url, user_agent=user_agent)
return "; ".join("=".join(pair) for pair in tokens.items()), user_agent
create_scraper = CloudflareScraper.create_scraper
get_tokens = CloudflareScraper.get_tokens
get_cookie_string = CloudflareScraper.get_cookie_string

19
lib/js2py/LICENSE.md Normal file
View File

@ -0,0 +1,19 @@
The MIT License
Copyright © 2014, 2015 Piotr Dabkowski
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the “Software”),
to deal in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
the Software, and to permit persons to whom the Software is furnished to do so, subject
to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or
substantial portions of the Software.
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
OR THE USE OR OTHER DEALINGS IN THE SOFTWARE

69
lib/js2py/__init__.py Normal file
View File

@ -0,0 +1,69 @@
# The MIT License
#
# Copyright 2014, 2015 Piotr Dabkowski
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the 'Software'),
# to deal in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so, subject
# to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
# LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE
""" This module allows you to translate and execute Javascript in pure python.
Basically its implementation of ECMAScript 5.1 in pure python.
Use eval_js method to execute javascript code and get resulting python object (builtin if possible).
EXAMPLE:
>>> import js2py
>>> add = js2py.eval_js('function add(a, b) {return a + b}')
>>> add(1, 2) + 3
6
>>> add('1', 2, 3)
u'12'
>>> add.constructor
function Function() { [python code] }
Or use EvalJs to execute many javascript code fragments under same context - you would be able to get any
variable from the context!
>>> js = js2py.EvalJs()
>>> js.execute('var a = 10; function f(x) {return x*x};')
>>> js.f(9)
81
>>> js.a
10
Also you can use its console method to play with interactive javascript console.
Use parse_js to parse (syntax tree is just like in esprima.js) and translate_js to trasnlate JavaScript.
Finally, you can use pyimport statement from inside JS code to import and use python libraries.
>>> js2py.eval_js('pyimport urllib; urllib.urlopen("https://www.google.com")')
NOTE: This module is still not fully finished:
Date and JSON builtin objects are not implemented
Array prototype is not fully finished (will be soon)
Other than that everything should work fine.
"""
__author__ = 'Piotr Dabkowski'
__all__ = ['EvalJs', 'translate_js', 'import_js', 'eval_js', 'parse_js', 'translate_file', 'run_file']
from .evaljs import *
from .translators import parse as parse_js

1937
lib/js2py/base.py Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1 @@
__author__ = 'Piotr Dabkowski'

View File

@ -0,0 +1,38 @@
from ..base import *
@Js
def Array():
if len(arguments)==0 or len(arguments)>1:
return arguments.to_list()
a = arguments[0]
if isinstance(a, PyJsNumber):
length = a.to_uint32()
if length!=a.value:
raise MakeError('RangeError', 'Invalid array length')
temp = Js([])
temp.put('length', a)
return temp
return [a]
Array.create = Array
Array.own['length']['value'] = Js(1)
@Js
def isArray(arg):
return arg.Class=='Array'
Array.define_own_property('isArray', {'value': isArray,
'enumerable': False,
'writable': True,
'configurable': True})
Array.define_own_property('prototype', {'value': ArrayPrototype,
'enumerable': False,
'writable': False,
'configurable': False})
ArrayPrototype.define_own_property('constructor', {'value': Array,
'enumerable': False,
'writable': True,
'configurable': True})

View File

@ -0,0 +1,11 @@
from ..base import *
BooleanPrototype.define_own_property('constructor', {'value': Boolean,
'enumerable': False,
'writable': True,
'configurable': True})
Boolean.define_own_property('prototype', {'value': BooleanPrototype,
'enumerable': False,
'writable': False,
'configurable': False})

View File

@ -0,0 +1,362 @@
from ..base import *
from .time_helpers import *
TZ_OFFSET = (time.altzone//3600)
ABS_OFFSET = abs(TZ_OFFSET)
TZ_NAME = time.tzname[1]
ISO_FORMAT = '%s-%s-%sT%s:%s:%s.%sZ'
@Js
def Date(year, month, date, hours, minutes, seconds, ms):
return now().to_string()
Date.Class = 'Date'
def now():
return PyJsDate(int(time.time()*1000), prototype=DatePrototype)
@Js
def UTC(year, month, date, hours, minutes, seconds, ms): # todo complete this
args = arguments
y = args[0].to_number()
m = args[1].to_number()
l = len(args)
dt = args[2].to_number() if l>2 else Js(1)
h = args[3].to_number() if l>3 else Js(0)
mi = args[4].to_number() if l>4 else Js(0)
sec = args[5].to_number() if l>5 else Js(0)
mili = args[6].to_number() if l>6 else Js(0)
if not y.is_nan() and 0<=y.value<=99:
y = y + Js(1900)
t = TimeClip(MakeDate(MakeDay(y, m, dt), MakeTime(h, mi, sec, mili)))
return PyJsDate(t, prototype=DatePrototype)
@Js
def parse(string):
return PyJsDate(TimeClip(parse_date(string.to_string().value)), prototype=DatePrototype)
Date.define_own_property('now', {'value': Js(now),
'enumerable': False,
'writable': True,
'configurable': True})
Date.define_own_property('parse', {'value': parse,
'enumerable': False,
'writable': True,
'configurable': True})
Date.define_own_property('UTC', {'value': UTC,
'enumerable': False,
'writable': True,
'configurable': True})
class PyJsDate(PyJs):
Class = 'Date'
extensible = True
def __init__(self, value, prototype=None):
self.value = value
self.own = {}
self.prototype = prototype
# todo fix this problematic datetime part
def to_local_dt(self):
return datetime.datetime.utcfromtimestamp(UTCToLocal(self.value)//1000)
def to_utc_dt(self):
return datetime.datetime.utcfromtimestamp(self.value//1000)
def local_strftime(self, pattern):
if self.value is NaN:
return 'Invalid Date'
try:
dt = self.to_local_dt()
except:
raise MakeError('TypeError', 'unsupported date range. Will fix in future versions')
try:
return dt.strftime(pattern)
except:
raise MakeError('TypeError', 'Could not generate date string from this date (limitations of python.datetime)')
def utc_strftime(self, pattern):
if self.value is NaN:
return 'Invalid Date'
try:
dt = self.to_utc_dt()
except:
raise MakeError('TypeError', 'unsupported date range. Will fix in future versions')
try:
return dt.strftime(pattern)
except:
raise MakeError('TypeError', 'Could not generate date string from this date (limitations of python.datetime)')
def parse_date(py_string):
return NotImplementedError()
def date_constructor(*args):
if len(args)>=2:
return date_constructor2(*args)
elif len(args)==1:
return date_constructor1(args[0])
else:
return date_constructor0()
def date_constructor0():
return now()
def date_constructor1(value):
v = value.to_primitive()
if v._type()=='String':
v = parse_date(v.value)
else:
v = v.to_int()
return PyJsDate(TimeClip(v), prototype=DatePrototype)
def date_constructor2(*args):
y = args[0].to_number()
m = args[1].to_number()
l = len(args)
dt = args[2].to_number() if l>2 else Js(1)
h = args[3].to_number() if l>3 else Js(0)
mi = args[4].to_number() if l>4 else Js(0)
sec = args[5].to_number() if l>5 else Js(0)
mili = args[6].to_number() if l>6 else Js(0)
if not y.is_nan() and 0<=y.value<=99:
y = y + Js(1900)
t = TimeClip(LocalToUTC(MakeDate(MakeDay(y, m, dt), MakeTime(h, mi, sec, mili))))
return PyJsDate(t, prototype=DatePrototype)
Date.create = date_constructor
DatePrototype = PyJsDate(float('nan'), prototype=ObjectPrototype)
def check_date(obj):
if obj.Class!='Date':
raise MakeError('TypeError', 'this is not a Date object')
class DateProto:
def toString():
check_date(this)
if this.value is NaN:
return 'Invalid Date'
offset = (UTCToLocal(this.value) - this.value)//msPerHour
return this.local_strftime('%a %b %d %Y %H:%M:%S GMT') + '%s00 (%s)' % (pad(offset, 2, True), GetTimeZoneName(this.value))
def toDateString():
check_date(this)
return this.local_strftime('%d %B %Y')
def toTimeString():
check_date(this)
return this.local_strftime('%H:%M:%S')
def toLocaleString():
check_date(this)
return this.local_strftime('%d %B %Y %H:%M:%S')
def toLocaleDateString():
check_date(this)
return this.local_strftime('%d %B %Y')
def toLocaleTimeString():
check_date(this)
return this.local_strftime('%H:%M:%S')
def valueOf():
check_date(this)
return this.value
def getTime():
check_date(this)
return this.value
def getFullYear():
check_date(this)
if this.value is NaN:
return NaN
return YearFromTime(UTCToLocal(this.value))
def getUTCFullYear():
check_date(this)
if this.value is NaN:
return NaN
return YearFromTime(this.value)
def getMonth():
check_date(this)
if this.value is NaN:
return NaN
return MonthFromTime(UTCToLocal(this.value))
def getDate():
check_date(this)
if this.value is NaN:
return NaN
return DateFromTime(UTCToLocal(this.value))
def getUTCMonth():
check_date(this)
if this.value is NaN:
return NaN
return MonthFromTime(this.value)
def getUTCDate():
check_date(this)
if this.value is NaN:
return NaN
return DateFromTime(this.value)
def getDay():
check_date(this)
if this.value is NaN:
return NaN
return WeekDay(UTCToLocal(this.value))
def getUTCDay():
check_date(this)
if this.value is NaN:
return NaN
return WeekDay(this.value)
def getHours():
check_date(this)
if this.value is NaN:
return NaN
return HourFromTime(UTCToLocal(this.value))
def getUTCHours():
check_date(this)
if this.value is NaN:
return NaN
return HourFromTime(this.value)
def getMinutes():
check_date(this)
if this.value is NaN:
return NaN
return MinFromTime(UTCToLocal(this.value))
def getUTCMinutes():
check_date(this)
if this.value is NaN:
return NaN
return MinFromTime(this.value)
def getSeconds():
check_date(this)
if this.value is NaN:
return NaN
return SecFromTime(UTCToLocal(this.value))
def getUTCSeconds():
check_date(this)
if this.value is NaN:
return NaN
return SecFromTime(this.value)
def getMilliseconds():
check_date(this)
if this.value is NaN:
return NaN
return msFromTime(UTCToLocal(this.value))
def getUTCMilliseconds():
check_date(this)
if this.value is NaN:
return NaN
return msFromTime(this.value)
def getTimezoneOffset():
check_date(this)
if this.value is NaN:
return NaN
return (UTCToLocal(this.value) - this.value)//60000
def setTime(time):
check_date(this)
this.value = TimeClip(time.to_number().to_int())
return this.value
def setMilliseconds(ms):
check_date(this)
t = UTCToLocal(this.value)
tim = MakeTime(HourFromTime(t), MinFromTime(t), SecFromTime(t), ms.to_int())
u = TimeClip(LocalToUTC(MakeDate(Day(t), tim)))
this.value = u
return u
def setUTCMilliseconds(ms):
check_date(this)
t = this.value
tim = MakeTime(HourFromTime(t), MinFromTime(t), SecFromTime(t), ms.to_int())
u = TimeClip(MakeDate(Day(t), tim))
this.value = u
return u
# todo Complete all setters!
def toUTCString():
check_date(this)
return this.utc_strftime('%d %B %Y %H:%M:%S')
def toISOString():
check_date(this)
t = this.value
year = YearFromTime(t)
month, day, hour, minute, second, milli = pad(MonthFromTime(t)+1), pad(DateFromTime(t)), pad(HourFromTime(t)), pad(MinFromTime(t)), pad(SecFromTime(t)), pad(msFromTime(t))
return ISO_FORMAT % (unicode(year) if 0<=year<=9999 else pad(year, 6, True), month, day, hour, minute, second, milli)
def toJSON(key):
o = this.to_object()
tv = o.to_primitive('Number')
if tv.Class=='Number' and not tv.is_finite():
return this.null
toISO = o.get('toISOString')
if not toISO.is_callable():
raise this.MakeError('TypeError', 'toISOString is not callable')
return toISO.call(o, ())
def pad(num, n=2, sign=False):
'''returns n digit string representation of the num'''
s = unicode(abs(num))
if len(s)<n:
s = '0'*(n-len(s)) + s
if not sign:
return s
if num>=0:
return '+'+s
else:
return '-'+s
fill_prototype(DatePrototype, DateProto, default_attrs)
Date.define_own_property('prototype', {'value': DatePrototype,
'enumerable': False,
'writable': False,
'configurable': False})
DatePrototype.define_own_property('constructor', {'value': Date,
'enumerable': False,
'writable': True,
'configurable': True})

View File

@ -0,0 +1,49 @@
from ..base import *
try:
from ..translators.translator import translate_js
except:
pass
@Js
def Function():
# convert arguments to python list of strings
a = [e.to_string().value for e in arguments.to_list()]
body = ';'
args = ()
if len(a):
body = '%s;' % a[-1]
args = a[:-1]
# translate this function to js inline function
js_func = '(function (%s) {%s})' % (','.join(args), body)
# now translate js inline to python function
py_func = translate_js(js_func, '')
# add set func scope to global scope
# a but messy solution but works :)
globals()['var'] = PyJs.GlobalObject
# define py function and return it
temp = executor(py_func, globals())
temp.source = '{%s}'%body
temp.func_name = 'anonymous'
return temp
def executor(f, glob):
exec(f, globals())
return globals()['PyJs_anonymous_0_']
#new statement simply calls Function
Function.create = Function
#set constructor property inside FunctionPrototype
fill_in_props(FunctionPrototype, {'constructor':Function}, default_attrs)
#attach prototype to Function constructor
Function.define_own_property('prototype', {'value': FunctionPrototype,
'enumerable': False,
'writable': False,
'configurable': False})
#Fix Function length (its 0 and should be 1)
Function.own['length']['value'] = Js(1)

View File

@ -0,0 +1,151 @@
from ..base import *
import math
import random
Math = PyJsObject(prototype=ObjectPrototype)
Math.Class = 'Math'
CONSTANTS = {'E': 2.7182818284590452354,
'LN10': 2.302585092994046,
'LN2': 0.6931471805599453,
'LOG2E': 1.4426950408889634,
'LOG10E': 0.4342944819032518,
'PI': 3.1415926535897932,
'SQRT1_2': 0.7071067811865476,
'SQRT2': 1.4142135623730951}
for constant, value in CONSTANTS.items():
Math.define_own_property(constant, {'value': Js(value),
'writable': False,
'enumerable': False,
'configurable': False})
class MathFunctions:
def abs(x):
a = x.to_number().value
if a!=a: # it must be a nan
return NaN
return abs(a)
def acos(x):
a = x.to_number().value
if a!=a: # it must be a nan
return NaN
try:
return math.acos(a)
except:
return NaN
def asin(x):
a = x.to_number().value
if a!=a: # it must be a nan
return NaN
try:
return math.asin(a)
except:
return NaN
def atan(x):
a = x.to_number().value
if a!=a: # it must be a nan
return NaN
return math.atan(a)
def atan2(y, x):
a = x.to_number().value
b = y.to_number().value
if a!=a or b!=b: # it must be a nan
return NaN
return math.atan2(b, a)
def ceil(x):
a = x.to_number().value
if a!=a: # it must be a nan
return NaN
return math.ceil(a)
def floor(x):
a = x.to_number().value
if a!=a: # it must be a nan
return NaN
return math.floor(a)
def round(x):
a = x.to_number().value
if a!=a: # it must be a nan
return NaN
return round(a)
def sin(x):
a = x.to_number().value
if a!=a: # it must be a nan
return NaN
return math.sin(a)
def cos(x):
a = x.to_number().value
if a!=a: # it must be a nan
return NaN
return math.cos(a)
def tan(x):
a = x.to_number().value
if a!=a: # it must be a nan
return NaN
return math.tan(a)
def log(x):
a = x.to_number().value
if a!=a: # it must be a nan
return NaN
try:
return math.log(a)
except:
return NaN
def exp(x):
a = x.to_number().value
if a!=a: # it must be a nan
return NaN
return math.exp(a)
def pow(x, y):
a = x.to_number().value
b = y.to_number().value
if a!=a or b!=b: # it must be a nan
return NaN
try:
return a**b
except:
return NaN
def sqrt(x):
a = x.to_number().value
if a!=a: # it must be a nan
return NaN
try:
return a**0.5
except:
return NaN
def min():
if not len(arguments):
return -Infinity
lis = tuple(e.to_number().value for e in arguments.to_list())
if any(e!=e for e in lis): # we dont want NaNs
return NaN
return min(*lis)
def max():
if not len(arguments):
return -Infinity
lis = tuple(e.to_number().value for e in arguments.to_list())
if any(e!=e for e in lis): # we dont want NaNs
return NaN
return max(*lis)
def random():
return random.random()
fill_prototype(Math, MathFunctions, default_attrs)

View File

@ -0,0 +1,18 @@
from ..base import *
CONSTS = {'prototype': NumberPrototype,
'MAX_VALUE':1.7976931348623157e308,
'MIN_VALUE': 5.0e-324,
'NaN': NaN,
'NEGATIVE_INFINITY': float('-inf'),
'POSITIVE_INFINITY': float('inf')}
fill_in_props(Number, CONSTS, {'enumerable': False,
'writable': False,
'configurable': False})
NumberPrototype.define_own_property('constructor', {'value': Number,
'enumerable': False,
'writable': True,
'configurable': True})

View File

@ -0,0 +1,172 @@
from ..base import *
import six
#todo Double check everything is OK
@Js
def Object():
val = arguments.get('0')
if val.is_null() or val.is_undefined():
return PyJsObject(prototype=ObjectPrototype)
return val.to_object()
@Js
def object_constructor():
if len(arguments):
val = arguments.get('0')
if val.TYPE=='Object':
#Implementation dependent, but my will simply return :)
return val
elif val.TYPE in {'Number', 'String', 'Boolean'}:
return val.to_object()
return PyJsObject(prototype=ObjectPrototype)
Object.create = object_constructor
Object.own['length']['value'] = Js(1)
class ObjectMethods:
def getPrototypeOf(obj):
if not obj.is_object():
raise MakeError('TypeError', 'Object.getPrototypeOf called on non-object')
return null if obj.prototype is None else obj.prototype
def getOwnPropertyDescriptor (obj, prop):
if not obj.is_object():
raise MakeError('TypeError', 'Object.getOwnPropertyDescriptor called on non-object')
return obj.own.get(prop.to_string().value) # will return undefined if we dont have this prop
def getOwnPropertyNames(obj):
if not obj.is_object():
raise MakeError('TypeError', 'Object.getOwnPropertyDescriptor called on non-object')
return obj.own.keys()
def create(obj):
if not (obj.is_object() or obj.is_null()):
raise MakeError('TypeError', 'Object prototype may only be an Object or null')
temp = PyJsObject(prototype=(None if obj.is_null() else obj))
if len(arguments)>1 and not arguments[1].is_undefined():
if six.PY2:
ObjectMethods.defineProperties.__func__(temp, arguments[1])
else:
ObjectMethods.defineProperties(temp, arguments[1])
return temp
def defineProperty(obj, prop, attrs):
if not obj.is_object():
raise MakeError('TypeError', 'Object.defineProperty called on non-object')
name = prop.to_string().value
if not obj.define_own_property(name, ToPropertyDescriptor(attrs)):
raise MakeError('TypeError', 'Cannot redefine property: %s' % name)
return obj
def defineProperties(obj, properties):
if not obj.is_object():
raise MakeError('TypeError', 'Object.defineProperties called on non-object')
props = properties.to_object()
for name in props:
desc = ToPropertyDescriptor(props.get(name.value))
if not obj.define_own_property(name.value, desc):
raise MakeError('TypeError', 'Failed to define own property: %s'%name.value)
return obj
def seal(obj):
if not obj.is_object():
raise MakeError('TypeError', 'Object.seal called on non-object')
for desc in obj.own.values():
desc['configurable'] = False
obj.extensible = False
return obj
def freeze(obj):
if not obj.is_object():
raise MakeError('TypeError', 'Object.freeze called on non-object')
for desc in obj.own.values():
desc['configurable'] = False
if is_data_descriptor(desc):
desc['writable'] = False
obj.extensible = False
return obj
def preventExtensions(obj):
if not obj.is_object():
raise MakeError('TypeError', 'Object.preventExtensions on non-object')
obj.extensible = False
return obj
def isSealed(obj):
if not obj.is_object():
raise MakeError('TypeError', 'Object.isSealed called on non-object')
if obj.extensible:
return False
for desc in obj.own.values():
if desc['configurable']:
return False
return True
def isFrozen(obj):
if not obj.is_object():
raise MakeError('TypeError', 'Object.isFrozen called on non-object')
if obj.extensible:
return False
for desc in obj.own.values():
if desc['configurable']:
return False
if is_data_descriptor(desc) and desc['writable']:
return False
return True
def isExtensible(obj):
if not obj.is_object():
raise MakeError('TypeError', 'Object.isExtensible called on non-object')
return obj.extensible
def keys(obj):
if not obj.is_object():
raise MakeError('TypeError', 'Object.keys called on non-object')
return [e for e,d in six.iteritems(obj.own) if d.get('enumerable')]
# add methods attached to Object constructor
fill_prototype(Object, ObjectMethods, default_attrs)
# add constructor to prototype
fill_in_props(ObjectPrototype, {'constructor':Object}, default_attrs)
# add prototype property to the constructor.
Object.define_own_property('prototype', {'value': ObjectPrototype,
'enumerable': False,
'writable': False,
'configurable': False})
# some utility functions:
def ToPropertyDescriptor(obj): # page 38 (50 absolute)
if obj.TYPE!='Object':
raise MakeError('TypeError', 'Can\'t convert non-object to property descriptor')
desc = {}
if obj.has_property('enumerable'):
desc['enumerable'] = obj.get('enumerable').to_boolean().value
if obj.has_property('configurable'):
desc['configurable'] = obj.get('configurable').to_boolean().value
if obj.has_property('value'):
desc['value'] = obj.get('value')
if obj.has_property('writable'):
desc['writable'] = obj.get('writable').to_boolean().value
if obj.has_property('get'):
cand = obj.get('get')
if not (cand.is_undefined() or cand.is_callable()):
raise MakeError('TypeError', 'Invalid getter (it has to be a function or undefined)')
desc['get'] = cand
if obj.has_property('set'):
cand = obj.get('set')
if not (cand.is_undefined() or cand.is_callable()):
raise MakeError('TypeError', 'Invalid setter (it has to be a function or undefined)')
desc['set'] = cand
if ('get' in desc or 'set' in desc) and ('value' in desc or 'writable' in desc):
raise MakeError('TypeError', 'Invalid property. A property cannot both have accessors and be writable or have a value.')
return desc

View File

@ -0,0 +1,11 @@
from ..base import *
RegExpPrototype.define_own_property('constructor', {'value': RegExp,
'enumerable': False,
'writable': True,
'configurable': True})
RegExp.define_own_property('prototype', {'value': RegExpPrototype,
'enumerable': False,
'writable': False,
'configurable': False})

View File

@ -0,0 +1,30 @@
from ..base import *
# python 3 support
import six
if six.PY3:
unichr = chr
@Js
def fromCharCode():
args = arguments.to_list()
res = u''
for e in args:
res +=unichr(e.to_uint16())
return this.Js(res)
fromCharCode.own['length']['value'] = Js(1)
String.define_own_property('fromCharCode', {'value': fromCharCode,
'enumerable': False,
'writable': True,
'configurable': True})
String.define_own_property('prototype', {'value': StringPrototype,
'enumerable': False,
'writable': False,
'configurable': False})
StringPrototype.define_own_property('constructor', {'value': String,
'enumerable': False,
'writable': True,
'configurable': True})

View File

@ -0,0 +1,183 @@
# NOTE: t must be INT!!!
import time
import datetime
import warnings
try:
from tzlocal import get_localzone
LOCAL_ZONE = get_localzone()
except: # except all problems...
warnings.warn('Please install or fix tzlocal library (pip install tzlocal) in order to make Date object work better. Otherwise I will assume DST is in effect all the time', Warning)
class LOCAL_ZONE:
@staticmethod
def dst(*args):
return 1
from lib.js2py.base import MakeError
CUM = (0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365)
msPerDay = 86400000
msPerYear = int(86400000*365.242)
msPerSecond = 1000
msPerMinute = 60000
msPerHour = 3600000
HoursPerDay = 24
MinutesPerHour = 60
SecondsPerMinute = 60
NaN = float('nan')
LocalTZA = - time.timezone * msPerSecond
def DaylightSavingTA(t):
if t is NaN:
return t
try:
return int(LOCAL_ZONE.dst(datetime.datetime.utcfromtimestamp(t//1000)).seconds)*1000
except:
warnings.warn('Invalid datetime date, assumed DST time, may be inaccurate...', Warning)
return 1
#raise MakeError('TypeError', 'date not supported by python.datetime. I will solve it in future versions')
def GetTimeZoneName(t):
return time.tzname[DaylightSavingTA(t)>0]
def LocalToUTC(t):
return t - LocalTZA - DaylightSavingTA(t - LocalTZA)
def UTCToLocal(t):
return t + LocalTZA + DaylightSavingTA(t)
def Day(t):
return t//86400000
def TimeWithinDay(t):
return t%86400000
def DaysInYear(y):
if y%4:
return 365
elif y%100:
return 366
elif y%400:
return 365
else:
return 366
def DayFromYear(y):
return 365 * (y-1970) + (y-1969)//4 -(y-1901)//100 + (y-1601)//400
def TimeFromYear(y):
return 86400000 * DayFromYear(y)
def YearFromTime(t):
guess = 1970 - t//31556908800 # msPerYear
gt = TimeFromYear(guess)
if gt<=t:
while gt<=t:
guess += 1
gt = TimeFromYear(guess)
return guess-1
else:
while gt>t:
guess -= 1
gt = TimeFromYear(guess)
return guess
def DayWithinYear(t):
return Day(t) - DayFromYear(YearFromTime(t))
def InLeapYear(t):
y = YearFromTime(t)
if y%4:
return 0
elif y%100:
return 1
elif y%400:
return 0
else:
return 1
def MonthFromTime(t):
day = DayWithinYear(t)
leap = InLeapYear(t)
if day<31:
return 0
day -= leap
if day<59:
return 1
elif day<90:
return 2
elif day<120:
return 3
elif day<151:
return 4
elif day<181:
return 5
elif day<212:
return 6
elif day<243:
return 7
elif day<273:
return 8
elif day<304:
return 9
elif day<334:
return 10
else:
return 11
def DateFromTime(t):
mon = MonthFromTime(t)
day = DayWithinYear(t)
return day-CUM[mon] - (1 if InLeapYear(t) and mon>=2 else 0) + 1
def WeekDay(t):
# 0 == sunday
return (Day(t) + 4) % 7
def msFromTime(t):
return t % 1000
def SecFromTime(t):
return (t//1000) % 60
def MinFromTime(t):
return (t//60000) % 60
def HourFromTime(t):
return (t//3600000) % 24
def MakeTime (hour, Min, sec, ms):
# takes PyJs objects and returns t
if not (hour.is_finite() and Min.is_finite() and sec.is_finite() and ms.is_finite()):
return NaN
h, m, s, milli = hour.to_int(), Min.to_int(), sec.to_int(), ms.to_int()
return h*3600000 + m*60000 + s*1000 + milli
def MakeDay(year, month, date):
# takes PyJs objects and returns t
if not (year.is_finite() and month.is_finite() and date.is_finite()):
return NaN
y, m, dt = year.to_int(), month.to_int(), date.to_int()
y += m//12
mn = m % 12
d = DayFromYear(y) + CUM[mn] + dt - 1 + (1 if DaysInYear(y)==366 and mn>=2 else 0)
return d # ms per day
def MakeDate (day, time):
return 86400000*day + time
def TimeClip(t):
if t!=t or abs(t)==float('inf'):
return NaN
if abs(t) > 8.64 *10**15:
return NaN
return int(t)

250
lib/js2py/evaljs.py Normal file
View File

@ -0,0 +1,250 @@
# coding=utf-8
""" This module is still experimental!
"""
from .translators import translate_js, DEFAULT_HEADER
import sys
import time
import json
import six
import os
import hashlib
import codecs
import pyjs
__all__ = ['EvalJs', 'translate_js', 'import_js', 'eval_js', 'translate_file', 'run_file']
DEBUG = False
def path_as_local(path):
if os.path.isabs(path):
return path
# relative to cwd
return os.path.join(os.getcwd(), path)
def import_js(path, lib_name, globals):
"""Imports from javascript source file.
globals is your globals()"""
with codecs.open(path_as_local(path), "r", "utf-8") as f:
js = f.read()
e = EvalJs()
e.execute(js)
var = e.context['var']
globals[lib_name] = var.to_python()
def get_file_contents(path_or_file):
if hasattr(path_or_file, 'read'):
js = path_or_file.read()
else:
with codecs.open(path_as_local(path_or_file), "r", "utf-8") as f:
js = f.read()
return js
def write_file_contents(path_or_file, contents):
if hasattr(path_or_file, 'write'):
path_or_file.write(contents)
else:
with open(path_as_local(path_or_file), 'w') as f:
f.write(contents)
def translate_file(input_path, output_path):
'''
Translates input JS file to python and saves the it to the output path.
It appends some convenience code at the end so that it is easy to import JS objects.
For example we have a file 'example.js' with: var a = function(x) {return x}
translate_file('example.js', 'example.py')
Now example.py can be easily importend and used:
>>> from example import example
>>> example.a(30)
30
'''
js = get_file_contents(input_path)
py_code = translate_js(js)
lib_name = os.path.basename(output_path).split('.')[0]
head = '__all__ = [%s]\n\n# Don\'t look below, you will not understand this Python code :) I don\'t.\n\n' % repr(lib_name)
tail = '\n\n# Add lib to the module scope\n%s = var.to_python()' % lib_name
out = head + py_code + tail
write_file_contents(output_path, out)
def run_file(path_or_file, context=None):
''' Context must be EvalJS object. Runs given path as a JS program. Returns (eval_value, context).
'''
if context is None:
context = EvalJs()
if not isinstance(context, EvalJs):
raise TypeError('context must be the instance of EvalJs')
eval_value = context.eval(get_file_contents(path_or_file))
return eval_value, context
def eval_js(js):
"""Just like javascript eval. Translates javascript to python,
executes and returns python object.
js is javascript source code
EXAMPLE:
>>> import js2py
>>> add = js2py.eval_js('function add(a, b) {return a + b}')
>>> add(1, 2) + 3
6
>>> add('1', 2, 3)
u'12'
>>> add.constructor
function Function() { [python code] }
NOTE: For Js Number, String, Boolean and other base types returns appropriate python BUILTIN type.
For Js functions and objects, returns Python wrapper - basically behaves like normal python object.
If you really want to convert object to python dict you can use to_dict method.
"""
e = EvalJs()
return e.eval(js)
class EvalJs(object):
"""This class supports continuous execution of javascript under same context.
>>> js = EvalJs()
>>> js.execute('var a = 10;function f(x) {return x*x};')
>>> js.f(9)
81
>>> js.a
10
context is a python dict or object that contains python variables that should be available to JavaScript
For example:
>>> js = EvalJs({'a': 30})
>>> js.execute('var x = a')
>>> js.x
30
You can run interactive javascript console with console method!"""
def __init__(self, context={}):
self.__dict__['_context'] = {}
exec(DEFAULT_HEADER, self._context)
self.__dict__['_var'] = self._context['var'].to_python()
if not isinstance(context, dict):
try:
context = context.__dict__
except:
raise TypeError('context has to be either a dict or have __dict__ attr')
for k, v in six.iteritems(context):
setattr(self._var, k, v)
def execute(self, js=None, use_compilation_plan=False):
"""executes javascript js in current context
During initial execute() the converted js is cached for re-use. That means next time you
run the same javascript snippet you save many instructions needed to parse and convert the
js code to python code.
This cache causes minor overhead (a cache dicts is updated) but the Js=>Py conversion process
is typically expensive compared to actually running the generated python code.
Note that the cache is just a dict, it has no expiration or cleanup so when running this
in automated situations with vast amounts of snippets it might increase memory usage.
"""
try:
cache = self.__dict__['cache']
except KeyError:
cache = self.__dict__['cache'] = {}
hashkey = hashlib.md5(js.encode('utf-8')).digest()
try:
compiled = cache[hashkey]
except KeyError:
code = translate_js(js, '', use_compilation_plan=use_compilation_plan)
compiled = cache[hashkey] = compile(code, '<EvalJS snippet>', 'exec')
exec(compiled, self._context)
def eval(self, expression, use_compilation_plan=False):
"""evaluates expression in current context and returns its value"""
code = 'PyJsEvalResult = eval(%s)'%json.dumps(expression)
self.execute(code, use_compilation_plan=use_compilation_plan)
return self['PyJsEvalResult']
def execute_debug(self, js):
"""executes javascript js in current context
as opposed to the (faster) self.execute method, you can use your regular debugger
to set breakpoints and inspect the generated python code
"""
code = translate_js(js, '')
# make sure you have a temp folder:
filename = 'temp' + os.sep + '_' + hashlib.md5(code).hexdigest() + '.py'
try:
with open(filename, mode='w') as f:
f.write(code)
execfile(filename, self._context)
except Exception as err:
raise err
finally:
os.remove(filename)
try:
os.remove(filename + 'c')
except:
pass
def eval_debug(self, expression):
"""evaluates expression in current context and returns its value
as opposed to the (faster) self.execute method, you can use your regular debugger
to set breakpoints and inspect the generated python code
"""
code = 'PyJsEvalResult = eval(%s)'%json.dumps(expression)
self.execute_debug(code)
return self['PyJsEvalResult']
def __getattr__(self, var):
return getattr(self._var, var)
def __getitem__(self, var):
return getattr(self._var, var)
def __setattr__(self, var, val):
return setattr(self._var, var, val)
def __setitem__(self, var, val):
return setattr(self._var, var, val)
def console(self):
"""starts to interact (starts interactive console) Something like code.InteractiveConsole"""
while True:
if six.PY2:
code = raw_input('>>> ')
else:
code = input('>>>')
try:
print(self.eval(code))
except KeyboardInterrupt:
break
except Exception as e:
import traceback
if DEBUG:
sys.stderr.write(traceback.format_exc())
else:
sys.stderr.write('EXCEPTION: '+str(e)+'\n')
time.sleep(0.01)
#print x
if __name__=='__main__':
#with open('C:\Users\Piotrek\Desktop\esprima.js', 'rb') as f:
# x = f.read()
e = EvalJs()
e.execute('square(x)')
#e.execute(x)
e.console()

View File

11
lib/js2py/host/console.py Normal file
View File

@ -0,0 +1,11 @@
from lib.js2py.base import *
@Js
def console():
pass
@Js
def log():
print(arguments[0])
console.put('log', log)

View File

View File

@ -0,0 +1,47 @@
from js2py.base import *
def _get_conts(idl):
def is_valid(c):
try:
exec(c)
return 1
except:
pass
return '\n'.join(filter(is_valid, (' '.join(e.strip(' ;').split()[-3:]) for e in idl.splitlines())))
default_attrs = {'writable':True, 'enumerable':True, 'configurable':True}
def compose_prototype(Class, attrs=default_attrs):
prototype = Class()
for i in dir(Class):
e = getattr(Class, i)
if hasattr(e, '__func__'):
temp = PyJsFunction(e.__func__, FunctionPrototype)
attrs = {k:v for k,v in attrs.iteritems()}
attrs['value'] = temp
prototype.define_own_property(i, attrs)
return prototype
# Error codes
INDEX_SIZE_ERR = 1
DOMSTRING_SIZE_ERR = 2
HIERARCHY_REQUEST_ERR = 3
WRONG_DOCUMENT_ERR = 4
INVALID_CHARACTER_ERR = 5
NO_DATA_ALLOWED_ERR = 6
NO_MODIFICATION_ALLOWED_ERR = 7
NOT_FOUND_ERR = 8
NOT_SUPPORTED_ERR = 9
INUSE_ATTRIBUTE_ERR = 10
INVALID_STATE_ERR = 11
SYNTAX_ERR = 12
INVALID_MODIFICATION_ERR = 13
NAMESPACE_ERR = 14
INVALID_ACCESS_ERR = 15
VALIDATION_ERR = 16
TYPE_MISMATCH_ERR = 17

View File

@ -0,0 +1,73 @@
from StringIO import StringIO
from constants import *
from bs4 import BeautifulSoup
from js2py.base import *
try:
import lxml
def parse(source):
return BeautifulSoup(source, 'lxml')
except:
def parse(source):
return BeautifulSoup(source)
x = '''<table>
<tbody>
<tr>
<td>Shady Grove</td>
<td>Aeolian</td>
</tr>
<tr>
<td>Over the River, Charlie</td>
<td>Dorian</td>
</tr>
</tbody>
</table>'''
class DOM(PyJs):
prototype = ObjectPrototype
def __init__(self):
self.own = {}
def readonly(self, name, val):
self.define_own_property(name, {'writable':False, 'enumerable':False, 'configurable':False, 'value': Js(val)})
# DOMStringList
class DOMStringListPrototype(DOM):
Class = 'DOMStringListPrototype'
def contains(element):
return element.to_string().value in this._string_list
def item(index):
return this._string_list[index.to_int()] if 0<=index.to_int()<len(this._string_list) else index.null
class DOMStringList(DOM):
Class = 'DOMStringList'
prototype = compose_prototype(DOMStringListPrototype)
def __init__(self, _string_list):
self.own = {}
self._string_list = _string_list
# NameList
class NameListPrototype(DOM):

50
lib/js2py/host/jseval.py Normal file
View File

@ -0,0 +1,50 @@
from lib.js2py.base import *
import inspect
try:
from lib.js2py.translators.translator import translate_js
except:
pass
@Js
def Eval(code):
local_scope = inspect.stack()[3][0].f_locals['var']
global_scope = this.GlobalObject
# todo fix scope - we have to behave differently if called through variable other than eval
# we will use local scope (default)
globals()['var'] = local_scope
try:
py_code = translate_js(code.to_string().value, '')
except SyntaxError as syn_err:
raise MakeError('SyntaxError', str(syn_err))
lines = py_code.split('\n')
# a simple way to return value from eval. Will not work in complex cases.
has_return = False
for n in xrange(len(lines)):
line = lines[len(lines)-n-1]
if line.strip():
if line.startswith(' '):
break
elif line.strip()=='pass':
continue
elif any(line.startswith(e) for e in ['return ', 'continue ', 'break', 'raise ']):
break
else:
has_return = True
cand = 'EVAL_RESULT = (%s)\n'%line
try:
compile(cand, '', 'exec')
except SyntaxError:
break
lines[len(lines)-n-1] = cand
py_code = '\n'.join(lines)
break
#print py_code
executor(py_code)
if has_return:
return globals()['EVAL_RESULT']
def executor(code):
exec(code, globals())

View File

@ -0,0 +1,85 @@
from lib.js2py.base import *
RADIX_CHARS = {'1': 1, '0': 0, '3': 3, '2': 2, '5': 5, '4': 4, '7': 7, '6': 6, '9': 9, '8': 8, 'a': 10, 'c': 12,
'b': 11, 'e': 14, 'd': 13, 'g': 16, 'f': 15, 'i': 18, 'h': 17, 'k': 20, 'j': 19, 'm': 22, 'l': 21,
'o': 24, 'n': 23, 'q': 26, 'p': 25, 's': 28, 'r': 27, 'u': 30, 't': 29, 'w': 32, 'v': 31, 'y': 34,
'x': 33, 'z': 35, 'A': 10, 'C': 12, 'B': 11, 'E': 14, 'D': 13, 'G': 16, 'F': 15, 'I': 18, 'H': 17,
'K': 20, 'J': 19, 'M': 22, 'L': 21, 'O': 24, 'N': 23, 'Q': 26, 'P': 25, 'S': 28, 'R': 27, 'U': 30,
'T': 29, 'W': 32, 'V': 31, 'Y': 34, 'X': 33, 'Z': 35}
@Js
def parseInt (string , radix):
string = string.to_string().value.lstrip()
sign = 1
if string and string[0] in {'+', '-'}:
if string[0]=='-':
sign = -1
string = string[1:]
r = radix.to_int32()
strip_prefix = True
if r:
if r<2 or r>36:
return NaN
if r!=16:
strip_prefix = False
else:
r = 10
if strip_prefix:
if len(string)>=2 and string[:2] in {'0x', '0X'}:
string = string[2:]
r = 16
n = 0
num = 0
while n<len(string):
cand = RADIX_CHARS.get(string[n])
if cand is None or not cand < r:
break
num = cand + num*r
n += 1
if not n:
return NaN
return sign*num
@Js
def parseFloat(string):
string = string.to_string().value.strip()
sign = 1
if string and string[0] in {'+', '-'}:
if string[0]=='-':
sign = -1
string = string[1:]
num = None
length = 1
max_len = None
failed = 0
while length<=len(string):
try:
num = float(string[:length])
max_len = length
failed = 0
except:
failed += 1
if failed>4: # cant be a number anymore
break
length += 1
if num is None:
return NaN
return sign*float(string[:max_len])
@Js
def isNaN(number):
if number.to_number().is_nan():
return true
return false
@Js
def isFinite(number):
num = number.to_number()
if num.is_nan() or num.is_infinity():
return false
return true
#todo URI handling!

View File

@ -0,0 +1 @@
__author__ = 'Piotrek'

View File

@ -0,0 +1,294 @@
from string import ascii_lowercase, digits
##################################
StringName = u'PyJsConstantString%d_'
NumberName = u'PyJsConstantNumber%d_'
RegExpName = u'PyJsConstantRegExp%d_'
##################################
ALPHAS = set(ascii_lowercase+ ascii_lowercase.upper())
NUMS = set(digits)
IDENTIFIER_START = ALPHAS.union(NUMS)
ESCAPE_CHARS = {'n', '0', 'b', 'f', 'r', 't', 'v', '"', "'", '\\'}
OCTAL = {'0', '1', '2', '3', '4', '5', '6', '7'}
HEX = set('0123456789abcdefABCDEF')
from utils import *
IDENTIFIER_PART = IDENTIFIER_PART.union({'.'})
def _is_cancelled(source, n):
cancelled = False
k = 0
while True:
k+=1
if source[n-k]!='\\':
break
cancelled = not cancelled
return cancelled
def _ensure_regexp(source, n): #<- this function has to be improved
'''returns True if regexp starts at n else returns False
checks whether it is not a division '''
markers = '(+~"\'=[%:?!*^|&-,;/\\'
k = 0
while True:
k+=1
if n-k<0:
return True
char = source[n-k]
if char in markers:
return True
if char!=' ' and char!='\n':
break
return False
def parse_num(source, start, charset):
"""Returns a first index>=start of chat not in charset"""
while start<len(source) and source[start] in charset:
start+=1
return start
def parse_exponent(source, start):
"""returns end of exponential, raises SyntaxError if failed"""
if not source[start] in {'e', 'E'}:
if source[start] in IDENTIFIER_PART:
raise SyntaxError('Invalid number literal!')
return start
start += 1
if source[start] in {'-', '+'}:
start += 1
FOUND = False
# we need at least one dig after exponent
while source[start] in NUMS:
FOUND = True
start+=1
if not FOUND or source[start] in IDENTIFIER_PART:
raise SyntaxError('Invalid number literal!')
return start
def remove_constants(source):
'''Replaces Strings and Regexp literals in the source code with
identifiers and *removes comments*. Identifier is of the format:
PyJsStringConst(String const number)_ - for Strings
PyJsRegExpConst(RegExp const number)_ - for RegExps
Returns dict which relates identifier and replaced constant.
Removes single line and multiline comments from JavaScript source code
Pseudo comments (inside strings) will not be removed.
For example this line:
var x = "/*PSEUDO COMMENT*/ TEXT //ANOTHER PSEUDO COMMENT"
will be unaltered'''
source=' '+source+'\n'
comments = []
inside_comment, single_comment = False, False
inside_single, inside_double = False, False
inside_regexp = False
regexp_class_count = 0
n = 0
while n < len(source):
char = source[n]
if char=='"' and not (inside_comment or inside_single or inside_regexp):
if not _is_cancelled(source, n):
if inside_double:
inside_double[1] = n+1
comments.append(inside_double)
inside_double = False
else:
inside_double = [n, None, 0]
elif char=="'" and not (inside_comment or inside_double or inside_regexp):
if not _is_cancelled(source, n):
if inside_single:
inside_single[1] = n+1
comments.append(inside_single)
inside_single = False
else:
inside_single = [n, None, 0]
elif (inside_single or inside_double):
if char in LINE_TERMINATOR:
if _is_cancelled(source, n):
if char==CR and source[n+1]==LF:
n+=1
n+=1
continue
else:
raise SyntaxError('Invalid string literal. Line terminators must be escaped!')
else:
if inside_comment:
if single_comment:
if char in LINE_TERMINATOR:
inside_comment[1] = n
comments.append(inside_comment)
inside_comment = False
single_comment = False
else: # Multiline
if char=='/' and source[n-1]=='*':
inside_comment[1] = n+1
comments.append(inside_comment)
inside_comment = False
elif inside_regexp:
if not quiting_regexp:
if char in LINE_TERMINATOR:
raise SyntaxError('Invalid regexp literal. Line terminators cant appear!')
if _is_cancelled(source, n):
n+=1
continue
if char=='[':
regexp_class_count += 1
elif char==']':
regexp_class_count = max(regexp_class_count-1, 0)
elif char=='/' and not regexp_class_count:
quiting_regexp = True
else:
if char not in IDENTIFIER_START:
inside_regexp[1] = n
comments.append(inside_regexp)
inside_regexp = False
elif char=='/' and source[n-1]=='/':
single_comment = True
inside_comment = [n-1, None, 1]
elif char=='*' and source[n-1]=='/':
inside_comment = [n-1, None, 1]
elif char=='/' and source[n+1] not in ('/', '*'):
if not _ensure_regexp(source, n): #<- improve this one
n+=1
continue #Probably just a division
quiting_regexp = False
inside_regexp = [n, None, 2]
elif not (inside_comment or inside_regexp):
if (char in NUMS and source[n-1] not in IDENTIFIER_PART) or char=='.':
if char=='.':
k = parse_num(source,n+1, NUMS)
if k==n+1: # just a stupid dot...
n+=1
continue
k = parse_exponent(source, k)
elif char=='0' and source[n+1] in {'x', 'X'}: #Hex number probably
k = parse_num(source, n+2, HEX)
if k==n+2 or source[k] in IDENTIFIER_PART:
raise SyntaxError('Invalid hex literal!')
else: #int or exp or flot or exp flot
k = parse_num(source, n+1, NUMS)
if source[k]=='.':
k = parse_num(source, k+1, NUMS)
k = parse_exponent(source, k)
comments.append((n, k, 3))
n = k
continue
n+=1
res = ''
start = 0
count = 0
constants = {}
for end, next_start, typ in comments:
res += source[start:end]
start = next_start
if typ==0: # String
name = StringName
elif typ==1: # comment
continue
elif typ==2: # regexp
name = RegExpName
elif typ==3: # number
name = NumberName
else:
raise RuntimeError()
res += ' '+name % count+' '
constants[name % count] = source[end: next_start]
count += 1
res+=source[start:]
# remove this stupid white space
for e in WHITE:
res = res.replace(e, ' ')
res = res.replace(CR+LF, '\n')
for e in LINE_TERMINATOR:
res = res.replace(e, '\n')
return res.strip(), constants
def recover_constants(py_source, replacements): #now has n^2 complexity. improve to n
'''Converts identifiers representing Js constants to the PyJs constants
PyJsNumberConst_1_ which has the true value of 5 will be converted to PyJsNumber(5)'''
for identifier, value in replacements.iteritems():
if identifier.startswith('PyJsConstantRegExp'):
py_source = py_source.replace(identifier, 'JsRegExp(%s)'%repr(value))
elif identifier.startswith('PyJsConstantString'):
py_source = py_source.replace(identifier, 'Js(u%s)' % unify_string_literals(value))
else:
py_source = py_source.replace(identifier, 'Js(%s)'%value)
return py_source
def unify_string_literals(js_string):
"""this function parses the string just like javascript
for example literal '\d' in JavaScript would be interpreted
as 'd' - backslash would be ignored and in Pyhon this
would be interpreted as '\\d' This function fixes this problem."""
n = 0
res = ''
limit = len(js_string)
while n < limit:
char = js_string[n]
if char=='\\':
new, n = do_escape(js_string, n)
res += new
else:
res += char
n += 1
return res
def unify_regexp_literals(js):
pass
def do_escape(source, n):
"""Its actually quite complicated to cover every case :)
http://www.javascriptkit.com/jsref/escapesequence.shtml"""
if not n+1 < len(source):
return '' # not possible here but can be possible in general case.
if source[n+1] in LINE_TERMINATOR:
if source[n+1]==CR and n+2<len(source) and source[n+2]==LF:
return source[n:n+3], n+3
return source[n:n+2], n+2
if source[n+1] in ESCAPE_CHARS:
return source[n:n+2], n+2
if source[n+1]in {'x', 'u'}:
char, length = ('u', 4) if source[n+1]=='u' else ('x', 2)
n+=2
end = parse_num(source, n, HEX)
if end-n < length:
raise SyntaxError('Invalid escape sequence!')
#if length==4:
# return unichr(int(source[n:n+4], 16)), n+4 # <- this was a very bad way of solving this problem :)
return source[n-2:n+length], n+length
if source[n+1] in OCTAL:
n += 1
end = parse_num(source, n, OCTAL)
end = min(end, n+3) # cant be longer than 3
# now the max allowed is 377 ( in octal) and 255 in decimal
max_num = 255
num = 0
len_parsed = 0
for e in source[n:end]:
cand = 8*num + int(e)
if cand > max_num:
break
num = cand
len_parsed += 1
# we have to return in a different form because python may want to parse more...
# for example '\777' will be parsed by python as a whole while js will use only \77
return '\\' + hex(num)[1:], n + len_parsed
return source[n+1], n+2
#####TEST######
if __name__=='__main__':
test = ('''
''')
t, d = remove_constants(test)
print t, d

View File

@ -0,0 +1,79 @@
"""
exp_translate routine:
It takes a single line of JS code and returns a SINGLE line of Python code.
Note var is not present here because it was removed in previous stages. Also remove this useless void keyword
If case of parsing errors it must return a pos of error.
1. Convert all assignment operations to put operations, this may be hard :( DONE, wasn't that bad
2. Convert all gets and calls to get and callprop.
3. Convert unary operators like typeof, new, !, delete, ++, --
Delete can be handled by replacing last get method with delete.
4. Convert remaining operators that are not handled by python:
&&, || <= these should be easy simply replace && by and and || by or
=== and !==
comma operator , in, instanceof and finally :?
NOTES:
Strings and other literals are not present so each = means assignment
"""
from utils import *
from jsparser import *
def exps_translator(js):
#Check () {} and [] nums
ass = assignment_translator(js)
# Step 1
def assignment_translator(js):
sep = js.split(',')
res = sep[:]
for i, e in enumerate(sep):
if '=' not in e: # no need to convert
continue
res[i] = bass_translator(e)
return ','.join(res)
def bass_translator(s):
# I hope that I will not have to fix any bugs here because it will be terrible
if '(' in s or '[' in s:
converted = ''
for e in bracket_split(s, ['()','[]'], strip=False):
if e[0]=='(':
converted += '(' + bass_translator(e[1:-1])+')'
elif e[0]=='[':
converted += '[' + bass_translator(e[1:-1])+']'
else:
converted += e
s = converted
if '=' not in s:
return s
ass = reversed(s.split('='))
last = ass.next()
res = last
for e in ass:
op = ''
if e[-1] in OP_METHODS: #increment assign like +=
op = ', "'+e[-1]+'"'
e = e[:-1]
cand = e.strip('() ') # (a) = 40 is valid so we need to transform '(a) ' to 'a'
if not is_property_accessor(cand): # it is not a property assignment
if not is_lval(cand) or is_internal(cand):
raise SyntaxError('Invalid left-hand side in assignment')
res = 'var.put(%s, %s%s)'%(cand.__repr__(), res, op)
elif cand[-1]==']': # property assignment via []
c = list(bracket_split(cand, ['[]'], strip=False))
meth, prop = ''.join(c[:-1]).strip(), c[-1][1:-1].strip() #this does not have to be a string so dont remove
#() because it can be a call
res = '%s.put(%s, %s%s)'%(meth, prop, res, op)
else: # Prop set via '.'
c = cand.rfind('.')
meth, prop = cand[:c].strip(), cand[c+1:].strip('() ')
if not is_lval(prop):
raise SyntaxError('Invalid left-hand side in assignment')
res = '%s.put(%s, %s%s)'%(meth, prop.__repr__(), res, op)
return res
if __name__=='__main__':
print bass_translator('3.ddsd = 40')

View File

@ -0,0 +1,456 @@
"""This module translates JS flow into PY flow.
Translates:
IF ELSE
DO WHILE
WHILE
FOR 123
FOR iter
CONTINUE, BREAK, RETURN, LABEL, THROW, TRY, SWITCH
"""
from utils import *
from jsparser import *
from nodevisitor import exp_translator
import random
TO_REGISTER = []
CONTINUE_LABEL = 'JS_CONTINUE_LABEL_%s'
BREAK_LABEL = 'JS_BREAK_LABEL_%s'
PREPARE = '''HOLDER = var.own.get(NAME)\nvar.force_own_put(NAME, PyExceptionToJs(PyJsTempException))\n'''
RESTORE = '''if HOLDER is not None:\n var.own[NAME] = HOLDER\nelse:\n del var.own[NAME]\ndel HOLDER\n'''
TRY_CATCH = '''%stry:\nBLOCKfinally:\n%s''' % (PREPARE, indent(RESTORE))
def get_continue_label(label):
return CONTINUE_LABEL%label.encode('hex')
def get_break_label(label):
return BREAK_LABEL%label.encode('hex')
def pass_until(source, start, tokens=(';',)):
while start < len(source) and source[start] not in tokens:
start+=1
return start+1
def do_bracket_exp(source, start, throw=True):
bra, cand = pass_bracket(source, start, '()')
if throw and not bra:
raise SyntaxError('Missing bracket expression')
bra = exp_translator(bra[1:-1])
if throw and not bra:
raise SyntaxError('Empty bracket condition')
return bra, cand if bra else start
def do_if(source, start):
start += 2 # pass this if
bra, start = do_bracket_exp(source, start, throw=True)
statement, start = do_statement(source, start)
if statement is None:
raise SyntaxError('Invalid if statement')
translated = 'if %s:\n'%bra+indent(statement)
elseif = except_keyword(source, start, 'else')
is_elseif = False
if elseif:
start = elseif
if except_keyword(source, start, 'if'):
is_elseif = True
elseif, start = do_statement(source, start)
if elseif is None:
raise SyntaxError('Invalid if statement)')
if is_elseif:
translated += 'el' + elseif
else:
translated += 'else:\n'+ indent(elseif)
return translated, start
def do_statement(source, start):
"""returns none if not found other functions that begin with 'do_' raise
also this do_ type function passes white space"""
start = pass_white(source, start)
# start is the fist position after initial start that is not a white space or \n
if not start < len(source): #if finished parsing return None
return None, start
if any(startswith_keyword(source[start:], e) for e in {'case', 'default'}):
return None, start
rest = source[start:]
for key, meth in KEYWORD_METHODS.iteritems(): # check for statements that are uniquely defined by their keywords
if rest.startswith(key):
# has to startwith this keyword and the next letter after keyword must be either EOF or not in IDENTIFIER_PART
if len(key)==len(rest) or rest[len(key)] not in IDENTIFIER_PART:
return meth(source, start)
if rest[0] == '{': #Block
return do_block(source, start)
# Now only label and expression left
cand = parse_identifier(source, start, False)
if cand is not None: # it can mean that its a label
label, cand_start = cand
cand_start = pass_white(source, cand_start)
if source[cand_start]==':':
return do_label(source, start)
return do_expression(source, start)
def do_while(source, start):
start += 5 # pass while
bra, start = do_bracket_exp(source, start, throw=True)
statement, start = do_statement(source, start)
if statement is None:
raise SyntaxError('Missing statement to execute in while loop!')
return 'while %s:\n'%bra + indent(statement), start
def do_dowhile(source, start):
start += 2 # pass do
statement, start = do_statement(source, start)
if statement is None:
raise SyntaxError('Missing statement to execute in do while loop!')
start = except_keyword(source, start, 'while')
if not start:
raise SyntaxError('Missing while keyword in do-while loop')
bra, start = do_bracket_exp(source, start, throw=True)
statement += 'if not %s:\n' % bra + indent('break\n')
return 'while 1:\n' + indent(statement), start
def do_block(source, start):
bra, start = pass_bracket(source, start, '{}')
#print source[start:], bra
#return bra +'\n', start
if bra is None:
raise SyntaxError('Missing block ( {code} )')
code = ''
bra = bra[1:-1]+';'
bra_pos = 0
while bra_pos<len(bra):
st, bra_pos = do_statement(bra, bra_pos)
if st is None:
break
code += st
bra_pos = pass_white(bra, bra_pos)
if bra_pos<len(bra):
raise SyntaxError('Block has more code that could not be parsed:\n'+bra[bra_pos:])
return code, start
def do_empty(source, start):
return 'pass\n', start + 1
def do_expression(source, start):
start = pass_white(source, start)
end = pass_until(source, start, tokens=(';',))
if end==start+1: #empty statement
return 'pass\n', end
# AUTOMATIC SEMICOLON INSERTION FOLLOWS
# Without ASI this function would end with: return exp_translator(source[start:end].rstrip(';'))+'\n', end
# ASI makes things a bit more complicated:
# we will try to parse as much as possible, inserting ; in place of last new line in case of error
rev = False
rpos = 0
while True:
try:
code = source[start:end].rstrip(';')
cand = exp_translator(code)+'\n', end
just_to_test = compile(cand[0], '', 'exec')
return cand
except Exception as e:
if not rev:
rev = source[start:end][::-1]
lpos = rpos
while True:
rpos = pass_until(rev, rpos, LINE_TERMINATOR)
if rpos>=len(rev):
raise
if filter(lambda x: x not in SPACE, rev[lpos:rpos]):
break
end = start + len(rev) - rpos + 1
def do_var(source, start):
#todo auto ; insertion
start += 3 #pass var
end = pass_until(source, start, tokens=(';',))
defs = argsplit(source[start:end-1]) # defs is the list of defined vars with optional initializer
code = ''
for de in defs:
var, var_end = parse_identifier(de, 0, True)
TO_REGISTER.append(var)
var_end = pass_white(de, var_end)
if var_end<len(de): # we have something more to parse... It has to start with =
if de[var_end] != '=':
raise SyntaxError('Unexpected initializer in var statement. Expected "=", got "%s"'%de[var_end])
code += exp_translator(de) + '\n'
if not code.strip():
code = 'pass\n'
return code, end
def do_label(source, start):
label, end = parse_identifier(source, start)
end = pass_white(source, end)
#now source[end] must be :
assert source[end]==':'
end += 1
inside, end = do_statement(source, end)
if inside is None:
raise SyntaxError('Missing statement after label')
defs = ''
if inside.startswith('while ') or inside.startswith('for ') or inside.startswith('#for'):
# we have to add contine label as well...
# 3 or 1 since #for loop type has more lines before real for.
sep = 1 if not inside.startswith('#for') else 3
cont_label = get_continue_label(label)
temp = inside.split('\n')
injected = 'try:\n'+'\n'.join(temp[sep:])
injected += 'except %s:\n pass\n'%cont_label
inside = '\n'.join(temp[:sep])+'\n'+indent(injected)
defs += 'class %s(Exception): pass\n'%cont_label
break_label = get_break_label(label)
inside = 'try:\n%sexcept %s:\n pass\n'% (indent(inside), break_label)
defs += 'class %s(Exception): pass\n'%break_label
return defs + inside, end
def do_for(source, start):
start += 3 # pass for
entered = start
bra, start = pass_bracket(source, start , '()')
inside, start = do_statement(source, start)
if inside is None:
raise SyntaxError('Missing statement after for')
bra = bra[1:-1]
if ';' in bra:
init = argsplit(bra, ';')
if len(init)!=3:
raise SyntaxError('Invalid for statement')
args = []
for i, item in enumerate(init):
end = pass_white(item, 0)
if end==len(item):
args.append('' if i!=1 else '1')
continue
if not i and except_keyword(item, end, 'var') is not None:
# var statement
args.append(do_var(item, end)[0])
continue
args.append(do_expression(item, end)[0])
return '#for JS loop\n%swhile %s:\n%s%s\n' %(args[0], args[1].strip(), indent(inside), indent(args[2])), start
# iteration
end = pass_white(bra, 0)
register = False
if bra[end:].startswith('var '):
end+=3
end = pass_white(bra, end)
register = True
name, end = parse_identifier(bra, end)
if register:
TO_REGISTER.append(name)
end = pass_white(bra, end)
if bra[end:end+2]!='in' or bra[end+2] in IDENTIFIER_PART:
#print source[entered-10:entered+50]
raise SyntaxError('Invalid "for x in y" statement')
end+=2 # pass in
exp = exp_translator(bra[end:])
res = 'for temp in %s:\n' % exp
res += indent('var.put(%s, temp)\n' % name.__repr__()) + indent(inside)
return res, start
# todo - IMPORTANT
def do_continue(source, start, name='continue'):
start += len(name) #pass continue
start = pass_white(source, start)
if start<len(source) and source[start] == ';':
return '%s\n'%name, start+1
# labeled statement or error
label, start = parse_identifier(source, start)
start = pass_white(source, start)
if start<len(source) and source[start] != ';':
raise SyntaxError('Missing ; after label name in %s statement'%name)
return 'raise %s("%s")\n' % (get_continue_label(label) if name=='continue' else get_break_label(label), name), start+1
def do_break(source, start):
return do_continue(source, start, 'break')
def do_return(source, start):
start += 6 # pass return
end = source.find(';', start)+1
if end==-1:
end = len(source)
trans = exp_translator(source[start:end].rstrip(';'))
return 'return %s\n' % (trans if trans else "var.get('undefined')"), end
# todo later?- Also important
def do_throw(source, start):
start += 5 # pass throw
end = source.find(';', start)+1
if not end:
end = len(source)
trans = exp_translator(source[start:end].rstrip(';'))
if not trans:
raise SyntaxError('Invalid throw statement: nothing to throw')
res = 'PyJsTempException = JsToPyException(%s)\nraise PyJsTempException\n' % trans
return res, end
def do_try(source, start):
start += 3 # pass try
block, start = do_block(source, start)
result = 'try:\n%s' %indent(block)
catch = except_keyword(source, start, 'catch')
if catch:
bra, catch = pass_bracket(source, catch, '()')
bra = bra[1:-1]
identifier, bra_end = parse_identifier(bra, 0)
holder = 'PyJsHolder_%s_%d'%(identifier.encode('hex'), random.randrange(1e8))
identifier = identifier.__repr__()
bra_end = pass_white(bra, bra_end)
if bra_end<len(bra):
raise SyntaxError('Invalid content of catch statement')
result += 'except PyJsException as PyJsTempException:\n'
block, catch = do_block(source, catch)
# fill in except ( catch ) block and remember to recover holder variable to its previous state
result += indent(TRY_CATCH.replace('HOLDER', holder).replace('NAME', identifier).replace('BLOCK', indent(block)))
start = max(catch, start)
final = except_keyword(source, start, 'finally')
if not (final or catch):
raise SyntaxError('Try statement has to be followed by catch or finally')
if not final:
return result, start
# translate finally statement
block, start = do_block(source, final)
return result + 'finally:\n%s' % indent(block), start
def do_debugger(source, start):
start += 8 # pass debugger
end = pass_white(source, start)
if end<len(source) and source[end]==';':
end += 1
return 'pass\n', end #ignore errors...
# todo automatic ; insertion. fuck this crappy feature
# Least important
def do_switch(source, start):
start += 6 # pass switch
code = 'while 1:\n' + indent('SWITCHED = False\nCONDITION = (%s)\n')
# parse value of check
val, start = pass_bracket(source, start, '()')
if val is None:
raise SyntaxError('Missing () after switch statement')
if not val.strip():
raise SyntaxError('Missing content inside () after switch statement')
code = code % exp_translator(val)
bra, start = pass_bracket(source, start, '{}')
if bra is None:
raise SyntaxError('Missing block {} after switch statement')
bra_pos = 0
bra = bra[1:-1] + ';'
while True:
case = except_keyword(bra, bra_pos, 'case')
default = except_keyword(bra, bra_pos, 'default')
assert not (case and default)
if case or default: # this ?: expression makes things much harder....
case_code = None
if case:
case_code = 'if SWITCHED or PyJsStrictEq(CONDITION, %s):\n'
# we are looking for a first : with count 1. ? gives -1 and : gives +1.
count = 0
for pos, e in enumerate(bra[case:], case):
if e=='?':
count -= 1
elif e==':':
count += 1
if count==1:
break
else:
raise SyntaxError('Missing : token after case in switch statement')
case_condition = exp_translator(bra[case:pos]) # switch {case CONDITION: statements}
case_code = case_code % case_condition
case = pos + 1
if default:
case = except_token(bra, default, ':')
case_code = 'if True:\n'
# now parse case statements (things after ':' )
cand, case = do_statement(bra, case)
while cand:
case_code += indent(cand)
cand, case = do_statement(bra, case)
case_code += indent('SWITCHED = True\n')
code += indent(case_code)
bra_pos = case
else:
break
# prevent infinite loop :)
code += indent('break\n')
return code, start
def do_pyimport(source, start):
start += 8
lib, start = parse_identifier(source, start)
jlib = 'PyImport_%s' % lib
code = 'import %s as %s\n' % (lib, jlib)
#check whether valid lib name...
try:
compile(code, '', 'exec')
except:
raise SyntaxError('Invalid Python module name (%s) in pyimport statement'%lib)
# var.pyimport will handle module conversion to PyJs object
code += 'var.pyimport(%s, %s)\n' % (repr(lib), jlib)
return code, start
def do_with(source, start):
raise NotImplementedError('With statement is not implemented yet :(')
KEYWORD_METHODS = {'do': do_dowhile,
'while': do_while,
'if': do_if,
'throw': do_throw,
'return': do_return,
'continue': do_continue,
'break': do_break,
'try': do_try,
'for': do_for,
'switch': do_switch,
'var': do_var,
'debugger': do_debugger, # this one does not do anything
'with': do_with,
'pyimport': do_pyimport
}
#Also not specific statements (harder to detect)
# Block {}
# Expression or Empty Statement
# Label
#
# Its easy to recognize block but harder to distinguish between label and expression statement
def translate_flow(source):
"""Source cant have arrays, object, constant or function literals.
Returns PySource and variables to register"""
global TO_REGISTER
TO_REGISTER = []
return do_block('{%s}'%source, 0)[0], TO_REGISTER
if __name__=='__main__':
#print do_dowhile('do {} while(k+f)', 0)[0]
#print 'e: "%s"'%do_expression('++(c?g:h); mj', 0)[0]
print translate_flow('a; yimport test')[0]

View File

@ -0,0 +1,84 @@
"""This module removes JS functions from source code"""
from jsparser import *
from utils import *
INLINE_NAME = 'PyJsLvalInline%d_'
INLINE_COUNT = 0
PRE_EXP_STARTS = {'return', 'new', 'void', 'throw', 'typeof', 'in', 'instanceof'}
PRE_ALLOWED = IDENTIFIER_PART.union({';', '{', '}', ']', ')', ':'})
INCREMENTS = {'++', '--'}
def reset_inline_count():
global INLINE_COUNT
INLINE_COUNT = 0
def remove_functions(source, all_inline=False):
"""removes functions and returns new source, and 2 dicts.
first dict with removed hoisted(global) functions and second with replaced inline functions"""
global INLINE_COUNT
inline = {}
hoisted = {}
n = 0
limit = len(source) - 9 # 8 is length of 'function'
res = ''
last = 0
while n < limit:
if n and source[n-1] in IDENTIFIER_PART:
n+=1
continue
if source[n:n+8] == 'function' and source[n+8] not in IDENTIFIER_PART:
if source[:n].rstrip().endswith('.'): # allow function as a property name :)
n+=1
continue
if source[n+8:].lstrip().startswith(':'): # allow functions inside objects...
n+=1
continue
entered = n
res += source[last:n]
name = ''
n = pass_white(source, n+8)
if source[n] in IDENTIFIER_START: # hoisted function
name, n= parse_identifier(source, n)
args, n = pass_bracket(source, n, '()')
if not args:
raise SyntaxError('Function misses bracket with argnames ()')
args = args.strip('() \n')
args = tuple(parse_identifier(e, 0)[0] for e in argsplit(args)) if args else ()
if len(args) - len(set(args)):
# I know its legal in JS but python does not allow duplicate argnames
# I will not work around it
raise SyntaxError('Function has duplicate argument names. Its not legal in this implementation. Sorry.')
block, n = pass_bracket(source, n, '{}')
if not block:
raise SyntaxError('Function does not have any code block to execute')
mixed = False # named function expression flag
if name and not all_inline:
# Here I will distinguish between named function expression (mixed) and a function statement
before = source[:entered].rstrip()
if any(endswith_keyword(before, e) for e in PRE_EXP_STARTS):
#print 'Ended ith keyword'
mixed = True
elif before and before[-1] not in PRE_ALLOWED and not before[-2:] in INCREMENTS:
#print 'Ended with'+repr(before[-1]), before[-1]=='}'
mixed = True
else:
#print 'FUNCTION STATEMENT'
#its a function statement.
# todo remove fucking label if present!
hoisted[name] = block, args
if not name or mixed or all_inline: # its a function expression (can be both named and not named)
#print 'FUNCTION EXPRESSION'
INLINE_COUNT += 1
iname = INLINE_NAME%INLINE_COUNT # inline name
res += ' '+ iname
inline['%s@%s' %(iname, name)] = block, args #here added real name at the end because it has to be added to the func scope
last = n
else:
n+=1
res += source[last:]
return res, hoisted, inline
if __name__=='__main__':
print remove_functions('5+5 function n (functiona ,functionaj) {dsd s, dsdd}')

View File

@ -0,0 +1,307 @@
"""
The process of translating JS will go like that: # TOP = 'imports and scope set'
1. Remove all the comments
2. Replace number, string and regexp literals with markers
4. Remove global Functions and move their translation to the TOP. Also add register code there.
5. Replace inline functions with lvals
6. Remove List and Object literals and replace them with lvals
7. Find and remove var declarations, generate python register code that would go on TOP.
Here we should be left with global code only where 1 line of js code = 1 line of python code.
Routine translating this code should be called glob_translate:
1. Search for outer structures and translate them using glob and inside using exps_translate
exps_translate routine:
1. Remove outer {}
2. Split lines at ;
3. Convert line by line using exp_translate
4. In case of error in 3 try to insert ; according to ECMA rules and repeat 3.
exp_translate routine:
It takes a single line of JS code and returns a SINGLE line of Python code.
Note var is not present here because it was removed in previous stages.
If case of parsing errors it must return a pos of error.
1. Convert all assignment operations to put operations, this may be hard :(
2. Convert all gets and calls to get and callprop.
3. Convert unary operators like typeof, new, !, delete.
Delete can be handled by replacing last get method with delete.
4. Convert remaining operators that are not handled by python eg: === and ,
lval format PyJsLvalNR
marker PyJs(TYPE_NAME)(NR)
TODO
1. Number literal replacement
2. Array literal replacement
3. Object literal replacement
5. Function replacement
4. Literal replacement translators
"""
from utils import *
OP_METHODS = {'*': '__mul__',
'/': '__div__',
'%': '__mod__',
'+': '__add__',
'-': '__sub__',
'<<': '__lshift__',
'>>': '__rshift__',
'&': '__and__',
'^': '__xor__',
'|': '__or__'}
def dbg(source):
try:
with open('C:\Users\Piotrek\Desktop\dbg.py','w') as f:
f.write(source)
except:
pass
def indent(lines, ind=4):
return ind*' '+lines.replace('\n', '\n'+ind*' ').rstrip(' ')
def inject_before_lval(source, lval, code):
if source.count(lval)>1:
dbg(source)
print
print lval
raise RuntimeError('To many lvals (%s)' % lval)
elif not source.count(lval):
dbg(source)
print
print lval
assert lval not in source
raise RuntimeError('No lval found "%s"' % lval)
end = source.index(lval)
inj = source.rfind('\n', 0, end)
ind = inj
while source[ind+1]==' ':
ind+=1
ind -= inj
return source[:inj+1]+ indent(code, ind) + source[inj+1:]
def bracket_split(source, brackets=('()','{}','[]'), strip=False):
"""DOES NOT RETURN EMPTY STRINGS (can only return empty bracket content if strip=True)"""
starts = [e[0] for e in brackets]
in_bracket = 0
n = 0
last = 0
while n<len(source):
e = source[n]
if not in_bracket and e in starts:
in_bracket = 1
start = n
b_start, b_end = brackets[starts.index(e)]
elif in_bracket:
if e==b_start:
in_bracket += 1
elif e==b_end:
in_bracket -= 1
if not in_bracket:
if source[last:start]:
yield source[last:start]
last = n+1
yield source[start+strip:n+1-strip]
n+=1
if source[last:]:
yield source[last:]
def pass_bracket(source, start, bracket='()'):
"""Returns content of brackets with brackets and first pos after brackets
if source[start] is followed by some optional white space and brackets.
Otherwise None"""
e = bracket_split(source[start:],[bracket], False)
try:
cand = e.next()
except StopIteration:
return None, None
if not cand.strip(): #white space...
try:
res = e.next()
return res, start + len(cand) + len(res)
except StopIteration:
return None, None
elif cand[-1] == bracket[1]:
return cand, start + len(cand)
else:
return None, None
def startswith_keyword(start, keyword):
start = start.lstrip()
if start.startswith(keyword):
if len(keyword)<len(start):
if start[len(keyword)] in IDENTIFIER_PART:
return False
return True
return False
def endswith_keyword(ending, keyword):
ending = ending.rstrip()
if ending.endswith(keyword):
if len(keyword)<len(ending):
if ending[len(ending)-len(keyword)-1] in IDENTIFIER_PART:
return False
return True
return False
def pass_white(source, start):
n = start
while n<len(source):
if source[n] in SPACE:
n += 1
else:
break
return n
def except_token(source, start, token, throw=True):
"""Token can be only a single char. Returns position after token if found. Otherwise raises syntax error if throw
otherwise returns None"""
start = pass_white(source, start)
if start<len(source) and source[start]==token:
return start+1
if throw:
raise SyntaxError('Missing token. Expected %s'%token)
return None
def except_keyword(source, start, keyword):
""" Returns position after keyword if found else None
Note: skips white space"""
start = pass_white(source, start)
kl = len(keyword) #keyword len
if kl+start > len(source):
return None
if source[start:start+kl] != keyword:
return None
if kl+start<len(source) and source[start+kl] in IDENTIFIER_PART:
return None
return start + kl
def parse_identifier(source, start, throw=True):
"""passes white space from start and returns first identifier,
if identifier invalid and throw raises SyntaxError otherwise returns None"""
start = pass_white(source, start)
end = start
if not end<len(source):
if throw:
raise SyntaxError('Missing identifier!')
return None
if source[end] not in IDENTIFIER_START:
if throw:
raise SyntaxError('Invalid identifier start: "%s"'%source[end])
return None
end += 1
while end < len(source) and source[end] in IDENTIFIER_PART:
end += 1
if not is_valid_lval(source[start:end]):
if throw:
raise SyntaxError('Invalid identifier name: "%s"'%source[start:end])
return None
return source[start:end], end
def argsplit(args, sep=','):
"""used to split JS args (it is not that simple as it seems because
sep can be inside brackets).
pass args *without* brackets!
Used also to parse array and object elements, and more"""
parsed_len = 0
last = 0
splits = []
for e in bracket_split(args, brackets=['()', '[]', '{}']):
if e[0] not in {'(', '[', '{'}:
for i, char in enumerate(e):
if char==sep:
splits.append(args[last:parsed_len+i])
last = parsed_len + i + 1
parsed_len += len(e)
splits.append(args[last:])
return splits
def split_add_ops(text):
"""Specialized function splitting text at add/sub operators.
Operands are *not* translated. Example result ['op1', '+', 'op2', '-', 'op3']"""
n = 0
text = text.replace('++', '##').replace('--', '@@') #text does not normally contain any of these
spotted = False # set to true if noticed anything other than +- or white space
last = 0
while n<len(text):
e = text[n]
if e=='+' or e=='-':
if spotted:
yield text[last:n].replace('##', '++').replace('@@', '--')
yield e
last = n+1
spotted = False
elif e=='/' or e=='*' or e=='%':
spotted = False
elif e!=' ':
spotted = True
n+=1
yield text[last:n].replace('##', '++').replace('@@', '--')
def split_at_any(text, lis, translate=False, not_before=[], not_after=[], validitate=None):
""" doc """
lis.sort(key=lambda x: len(x), reverse=True)
last = 0
n = 0
text_len = len(text)
while n<text_len:
if any(text[:n].endswith(e) for e in not_before): #Cant end with end before
n+=1
continue
for e in lis:
s = len(e)
if s+n>text_len:
continue
if validitate and not validitate(e, text[:n], text[n+s:]):
continue
if any(text[n+s:].startswith(e) for e in not_after): #Cant end with end before
n+=1
break
if e==text[n:n+s]:
yield text[last:n] if not translate else translate(text[last:n])
yield e
n+=s
last = n
break
else:
n+=1
yield text[last:n] if not translate else translate(text[last:n])
def split_at_single(text, sep, not_before=[], not_after=[]):
"""Works like text.split(sep) but separated fragments
cant end with not_before or start with not_after"""
n = 0
lt, s= len(text), len(sep)
last = 0
while n<lt:
if not s+n>lt:
if sep==text[n:n+s]:
if any(text[last:n].endswith(e) for e in not_before):
pass
elif any(text[n+s:].startswith(e) for e in not_after):
pass
else:
yield text[last:n]
last = n+s
n += s-1
n+=1
yield text[last:]

View File

@ -0,0 +1,500 @@
from jsparser import *
from utils import *
import re
from utils import *
#Note all white space sent to this module must be ' ' so no '\n'
REPL = {}
#PROBLEMS
# <<=, >>=, >>>=
# they are unusual so I will not fix that now. a++ +b works fine and a+++++b (a++ + ++b) does not work even in V8
ASSIGNMENT_MATCH = '(?<!=|!|<|>)=(?!=)'
def unary_validitator(keyword, before, after):
if keyword[-1] in IDENTIFIER_PART:
if not after or after[0] in IDENTIFIER_PART:
return False
if before and before[-1] in IDENTIFIER_PART: # I am not sure here...
return False
return True
def comb_validitator(keyword, before, after):
if keyword=='instanceof' or keyword=='in':
if before and before[-1] in IDENTIFIER_PART:
return False
elif after and after[0] in IDENTIFIER_PART:
return False
return True
def bracket_replace(code):
new = ''
for e in bracket_split(code, ['()','[]'], False):
if e[0]=='[':
name = '#PYJSREPL'+str(len(REPL))+'{'
new+= name
REPL[name] = e
elif e[0]=='(': # can be a function call
name = '@PYJSREPL'+str(len(REPL))+'}'
new+= name
REPL[name] = e
else:
new+=e
return new
class NodeVisitor:
def __init__(self, code):
self.code = code
def rl(self, lis, op):
"""performs this operation on a list from *right to left*
op must take 2 args
a,b,c => op(a, op(b, c))"""
it = reversed(lis)
res = trans(it.next())
for e in it:
e = trans(e)
res = op(e, res)
return res
def lr(self, lis, op):
"""performs this operation on a list from *left to right*
op must take 2 args
a,b,c => op(op(a, b), c)"""
it = iter(lis)
res = trans(it.next())
for e in it:
e = trans(e)
res = op(res, e)
return res
def translate(self):
"""Translates outer operation and calls translate on inner operation.
Returns fully translated code."""
if not self.code:
return ''
new = bracket_replace(self.code)
#Check comma operator:
cand = new.split(',') #every comma in new must be an operator
if len(cand)>1: #LR
return self.lr(cand, js_comma)
#Check = operator:
# dont split at != or !== or == or === or <= or >=
#note <<=, >>= or this >>> will NOT be supported
# maybe I will change my mind later
# Find this crappy ?:
if '?' in new:
cond_ind = new.find('?')
tenary_start = 0
for ass in re.finditer(ASSIGNMENT_MATCH, new):
cand = ass.span()[1]
if cand < cond_ind:
tenary_start = cand
else:
break
actual_tenary = new[tenary_start:]
spl = ''.join(split_at_any(new, [':', '?'], translate=trans))
tenary_translation = transform_crap(spl)
assignment = new[:tenary_start] + ' PyJsConstantTENARY'
return trans(assignment).replace('PyJsConstantTENARY', tenary_translation)
cand = list(split_at_single(new, '=', ['!', '=','<','>'], ['=']))
if len(cand)>1: # RL
it = reversed(cand)
res = trans(it.next())
for e in it:
e = e.strip()
if not e:
raise SyntaxError('Missing left-hand in assignment!')
op = ''
if e[-2:] in OP_METHODS:
op = ','+e[-2:].__repr__()
e = e[:-2]
elif e[-1:] in OP_METHODS:
op = ','+e[-1].__repr__()
e = e[:-1]
e = trans(e)
#Now replace last get method with put and change args
c = list(bracket_split(e, ['()']))
beg, arglist = ''.join(c[:-1]).strip(), c[-1].strip() #strips just to make sure... I will remove it later
if beg[-4:]!='.get':
raise SyntaxError('Invalid left-hand side in assignment')
beg = beg[0:-3]+'put'
arglist = arglist[0:-1]+', '+res+op+')'
res = beg+arglist
return res
#Now check remaining 2 arg operators that are not handled by python
#They all have Left to Right (LR) associativity
order = [OR, AND, BOR, BXOR, BAND, EQS, COMPS, BSHIFTS, ADDS, MULTS]
# actually we dont need OR and AND because they can be handled easier. But just for fun
dangerous = ['<', '>']
for typ in order:
#we have to use special method for ADDS since they can be also unary operation +/++ or -/-- FUCK
if '+' in typ:
cand = list(split_add_ops(new))
else:
#dont translate. cant start or end on dangerous op.
cand = list(split_at_any(new, typ.keys(), False, dangerous, dangerous,validitate=comb_validitator))
if not len(cand)>1:
continue
n = 1
res = trans(cand[0])
if not res:
raise SyntaxError("Missing operand!")
while n<len(cand):
e = cand[n]
if not e:
raise SyntaxError("Missing operand!")
if n%2:
op = typ[e]
else:
res = op(res, trans(e))
n+=1
return res
#Now replace unary operators - only they are left
cand = list(split_at_any(new, UNARY.keys(), False, validitate=unary_validitator))
if len(cand)>1: #contains unary operators
if '++' in cand or '--' in cand: #it cant contain both ++ and --
if '--' in cand:
op = '--'
meths = js_post_dec, js_pre_dec
else:
op = '++'
meths = js_post_inc, js_pre_inc
pos = cand.index(op)
if cand[pos-1].strip(): # post increment
a = cand[pos-1]
meth = meths[0]
elif cand[pos+1].strip(): #pre increment
a = cand[pos+1]
meth = meths[1]
else:
raise SyntaxError('Invalid use of ++ operator')
if cand[pos+2:]:
raise SyntaxError('Too many operands')
operand = meth(trans(a))
cand = cand[:pos-1]
# now last cand should be operand and every other odd element should be empty
else:
operand = trans(cand[-1])
del cand[-1]
for i, e in enumerate(reversed(cand)):
if i%2:
if e.strip():
raise SyntaxError('Too many operands')
else:
operand = UNARY[e](operand)
return operand
#Replace brackets
if new[0]=='@' or new[0]=='#':
if len(list(bracket_split(new, ('#{','@}')))) ==1: # we have only one bracket, otherwise pseudobracket like @@....
assert new in REPL
if new[0]=='#':
raise SyntaxError('[] cant be used as brackets! Use () instead.')
return '('+trans(REPL[new][1:-1])+')'
#Replace function calls and prop getters
# 'now' must be a reference like: a or b.c.d but it can have also calls or getters ( for example a["b"](3))
#From here @@ means a function call and ## means get operation (note they dont have to present)
it = bracket_split(new, ('#{','@}'))
res = []
for e in it:
if e[0]!='#' and e[0]!='@':
res += [x.strip() for x in e.split('.')]
else:
res += [e.strip()]
# res[0] can be inside @@ (name)...
res = filter(lambda x: x, res)
if is_internal(res[0]):
out = res[0]
elif res[0][0] in {'#', '@'}:
out = '('+trans(REPL[res[0]][1:-1])+')'
elif is_valid_lval(res[0]) or res[0] in {'this', 'false', 'true', 'null'}:
out = 'var.get('+res[0].__repr__()+')'
else:
if is_reserved(res[0]):
raise SyntaxError('Unexpected reserved word: "%s"'%res[0])
raise SyntaxError('Invalid identifier: "%s"'%res[0])
if len(res)==1:
return out
n = 1
while n<len(res): #now every func call is a prop call
e = res[n]
if e[0]=='@': # direct call
out += trans_args(REPL[e])
n += 1
continue
args = False #assume not prop call
if n+1<len(res) and res[n+1][0]=='@': #prop call
args = trans_args(REPL[res[n+1]])[1:]
if args!=')':
args = ','+args
if e[0]=='#':
prop = trans(REPL[e][1:-1])
else:
if not is_lval(e):
raise SyntaxError('Invalid identifier: "%s"'%e)
prop = e.__repr__()
if args: # prop call
n+=1
out += '.callprop('+prop+args
else: #prop get
out += '.get('+prop+')'
n+=1
return out
def js_comma(a, b):
return 'PyJsComma('+a+','+b+')'
def js_or(a, b):
return '('+a+' or '+b+')'
def js_bor(a, b):
return '('+a+'|'+b+')'
def js_bxor(a, b):
return '('+a+'^'+b+')'
def js_band(a, b):
return '('+a+'&'+b+')'
def js_and(a, b):
return '('+a+' and '+b+')'
def js_strict_eq(a, b):
return 'PyJsStrictEq('+a+','+b+')'
def js_strict_neq(a, b):
return 'PyJsStrictNeq('+a+','+b+')'
#Not handled by python in the same way like JS. For example 2==2==True returns false.
# In JS above would return true so we need brackets.
def js_abstract_eq(a, b):
return '('+a+'=='+b+')'
#just like ==
def js_abstract_neq(a, b):
return '('+a+'!='+b+')'
def js_lt(a, b):
return '('+a+'<'+b+')'
def js_le(a, b):
return '('+a+'<='+b+')'
def js_ge(a, b):
return '('+a+'>='+b+')'
def js_gt(a, b):
return '('+a+'>'+b+')'
def js_in(a, b):
return b+'.contains('+a+')'
def js_instanceof(a, b):
return a+'.instanceof('+b+')'
def js_lshift(a, b):
return '('+a+'<<'+b+')'
def js_rshift(a, b):
return '('+a+'>>'+b+')'
def js_shit(a, b):
return 'PyJsBshift('+a+','+b+')'
def js_add(a, b): # To simplify later process of converting unary operators + and ++
return '(%s+%s)'%(a, b)
def js_sub(a, b): # To simplify
return '(%s-%s)'%(a, b)
def js_mul(a, b):
return '('+a+'*'+b+')'
def js_div(a, b):
return '('+a+'/'+b+')'
def js_mod(a, b):
return '('+a+'%'+b+')'
def js_typeof(a):
cand = list(bracket_split(a, ('()',)))
if len(cand)==2 and cand[0]=='var.get':
return cand[0]+cand[1][:-1]+',throw=False).typeof()'
return a+'.typeof()'
def js_void(a):
return '('+a+')'
def js_new(a):
cands = list(bracket_split(a, ('()',)))
lim = len(cands)
if lim < 2:
return a + '.create()'
n = 0
while n < lim:
c = cands[n]
if c[0]=='(':
if cands[n-1].endswith('.get') and n+1>=lim: # last get operation.
return a + '.create()'
elif cands[n-1][0]=='(':
return ''.join(cands[:n])+'.create' + c + ''.join(cands[n+1:])
elif cands[n-1]=='.callprop':
beg = ''.join(cands[:n-1])
args = argsplit(c[1:-1],',')
prop = args[0]
new_args = ','.join(args[1:])
create = '.get(%s).create(%s)' % (prop, new_args)
return beg + create + ''.join(cands[n+1:])
n+=1
return a + '.create()'
def js_delete(a):
#replace last get with delete.
c = list(bracket_split(a, ['()']))
beg, arglist = ''.join(c[:-1]).strip(), c[-1].strip() #strips just to make sure... I will remove it later
if beg[-4:]!='.get':
raise SyntaxError('Invalid delete operation')
return beg[:-3]+'delete'+arglist
def js_neg(a):
return '(-'+a+')'
def js_pos(a):
return '(+'+a+')'
def js_inv(a):
return '(~'+a+')'
def js_not(a):
return a+'.neg()'
def postfix(a, inc, post):
bra = list(bracket_split(a, ('()',)))
meth = bra[-2]
if not meth.endswith('get'):
raise SyntaxError('Invalid ++ or -- operation.')
bra[-2] = bra[-2][:-3] + 'put'
bra[-1] = '(%s,%s%sJs(1))' % (bra[-1][1:-1], a, '+' if inc else '-')
res = ''.join(bra)
return res if not post else '(%s%sJs(1))' % (res, '-' if inc else '+')
def js_pre_inc(a):
return postfix(a, True, False)
def js_post_inc(a):
return postfix(a, True, True)
def js_pre_dec(a):
return postfix(a, False, False)
def js_post_dec(a):
return postfix(a, False, True)
OR = {'||': js_or}
AND = {'&&': js_and}
BOR = {'|': js_bor}
BXOR = {'^': js_bxor}
BAND = {'&': js_band}
EQS = {'===': js_strict_eq,
'!==': js_strict_neq,
'==': js_abstract_eq, # we need == and != too. Read a note above method
'!=': js_abstract_neq}
#Since JS does not have chained comparisons we need to implement all cmp methods.
COMPS = {'<': js_lt,
'<=': js_le,
'>=': js_ge,
'>': js_gt,
'instanceof': js_instanceof, #todo change to validitate
'in': js_in}
BSHIFTS = {'<<': js_lshift,
'>>': js_rshift,
'>>>': js_shit}
ADDS = {'+': js_add,
'-': js_sub}
MULTS = {'*': js_mul,
'/': js_div,
'%': js_mod}
#Note they dont contain ++ and -- methods because they both have 2 different methods
# correct method will be found automatically in translate function
UNARY = {'typeof': js_typeof,
'void': js_void,
'new': js_new,
'delete': js_delete,
'!': js_not,
'-': js_neg,
'+': js_pos,
'~': js_inv,
'++': None,
'--': None
}
def transform_crap(code): #needs some more tests
"""Transforms this ?: crap into if else python syntax"""
ind = code.rfind('?')
if ind==-1:
return code
sep = code.find(':', ind)
if sep==-1:
raise SyntaxError('Invalid ?: syntax (probably missing ":" )')
beg = max(code.rfind(':', 0, ind), code.find('?', 0, ind))+1
end = code.find(':',sep+1)
end = len(code) if end==-1 else end
formula = '('+code[ind+1:sep]+' if '+code[beg:ind]+' else '+code[sep+1:end]+')'
return transform_crap(code[:beg]+formula+code[end:])
from code import InteractiveConsole
#e = InteractiveConsole(globals()).interact()
import traceback
def trans(code):
return NodeVisitor(code.strip()).translate().strip()
#todo finish this trans args
def trans_args(code):
new = bracket_replace(code.strip()[1:-1])
args = ','.join(trans(e) for e in new.split(','))
return '(%s)'%args
EXP = 0
def exp_translator(code):
global REPL, EXP
EXP += 1
REPL = {}
#print EXP, code
code = code.replace('\n', ' ')
assert '@' not in code
assert ';' not in code
assert '#' not in code
#if not code.strip(): #?
# return 'var.get("undefined")'
try:
return trans(code)
except:
#print '\n\ntrans failed on \n\n' + code
#raw_input('\n\npress enter')
raise
if __name__=='__main__':
#print 'Here', trans('(eee ) . ii [ PyJsMarker ] [ jkj ] ( j , j ) .
# jiji (h , ji , i)(non )( )()()()')
for e in xrange(3):
print exp_translator('jk = kk.ik++')
#First line translated with PyJs: PyJsStrictEq(PyJsAdd((Js(100)*Js(50)),Js(30)), Js("5030")), yay!
print exp_translator('delete a.f')

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,287 @@
""" This module removes all objects/arrays from JS source code and replace them with LVALS.
Also it has s function translating removed object/array to python code.
Use this module just after removing constants. Later move on to removing functions"""
OBJECT_LVAL = 'PyJsLvalObject%d_'
ARRAY_LVAL = 'PyJsLvalArray%d_'
from utils import *
from jsparser import *
from nodevisitor import exp_translator
import functions
from flow import KEYWORD_METHODS
def FUNC_TRANSLATOR(*a):# stupid import system in python
raise RuntimeError('Remember to set func translator. Thank you.')
def set_func_translator(ftrans):
# stupid stupid Python or Peter
global FUNC_TRANSLATOR
FUNC_TRANSLATOR = ftrans
def is_empty_object(n, last):
"""n may be the inside of block or object"""
if n.strip():
return False
# seems to be but can be empty code
last = last.strip()
markers = {')', ';',}
if not last or last[-1] in markers:
return False
return True
# todo refine this function
def is_object(n, last):
"""n may be the inside of block or object.
last is the code before object"""
if is_empty_object(n, last):
return True
if not n.strip():
return False
#Object contains lines of code so it cant be an object
if len(argsplit(n, ';'))>1:
return False
cands = argsplit(n, ',')
if not cands[-1].strip():
return True # {xxxx,} empty after last , it must be an object
for cand in cands:
cand = cand.strip()
# separate each candidate element at : in dict and check whether they are correct...
kv = argsplit(cand, ':')
if len(kv) > 2: # set the len of kv to 2 because of this stupid : expression
kv = kv[0],':'.join(kv[1:])
if len(kv)==2:
# key value pair, check whether not label or ?:
k, v = kv
if not is_lval(k.strip()):
return False
v = v.strip()
if v.startswith('function'):
continue
#will fail on label... {xxx: while {}}
if v[0]=='{': # value cant be a code block
return False
for e in KEYWORD_METHODS:
# if v starts with any statement then return false
if v.startswith(e) and len(e)<len(v) and v[len(e)] not in IDENTIFIER_PART:
return False
elif not (cand.startswith('set ') or cand.startswith('get ')):
return False
return True
def is_array(last):
#it can be prop getter
last = last.strip()
if any(endswith_keyword(last, e) for e in {'return', 'new', 'void', 'throw', 'typeof', 'in', 'instanceof'}):
return True
markers = {')', ']'}
return not last or not (last[-1] in markers or last[-1] in IDENTIFIER_PART)
def remove_objects(code, count=1):
""" This function replaces objects with OBJECTS_LVALS, returns new code, replacement dict and count.
count arg is the number that should be added to the LVAL of the first replaced object
"""
replacements = {} #replacement dict
br = bracket_split(code, ['{}', '[]'])
res = ''
last = ''
for e in br:
#test whether e is an object
if e[0]=='{':
n, temp_rep, cand_count = remove_objects(e[1:-1], count)
# if e was not an object then n should not contain any :
if is_object(n, last):
#e was an object
res += ' '+OBJECT_LVAL % count
replacements[OBJECT_LVAL % count] = e
count += 1
else:
# e was just a code block but could contain objects inside
res += '{%s}' % n
count = cand_count
replacements.update(temp_rep)
elif e[0]=='[':
if is_array(last):
res += e # will be translated later
else: # prop get
n, rep, count = remove_objects(e[1:-1], count)
res += '[%s]' % n
replacements.update(rep)
else: # e does not contain any objects
res += e
last = e #needed to test for this stipid empty object
return res, replacements, count
def remove_arrays(code, count=1):
"""removes arrays and replaces them with ARRAY_LVALS
returns new code and replacement dict
*NOTE* has to be called AFTER remove objects"""
res = ''
last = ''
replacements = {}
for e in bracket_split(code, ['[]']):
if e[0]=='[':
if is_array(last):
name = ARRAY_LVAL % count
res += ' ' + name
replacements[name] = e
count += 1
else: # pseudo array. But pseudo array can contain true array. for example a[['d'][3]] has 2 pseudo and 1 true array
cand, new_replacements, count = remove_arrays(e[1:-1], count)
res += '[%s]' % cand
replacements.update(new_replacements)
else:
res += e
last = e
return res, replacements, count
def translate_object(obj, lval, obj_count=1, arr_count=1):
obj = obj[1:-1] # remove {} from both ends
obj, obj_rep, obj_count = remove_objects(obj, obj_count)
obj, arr_rep, arr_count = remove_arrays(obj, arr_count)
# functions can be defined inside objects. exp translator cant translate them.
# we have to remove them and translate with func translator
# its better explained in translate_array function
obj, hoisted, inline = functions.remove_functions(obj, all_inline=True)
assert not hoisted
gsetters_after = ''
keys = argsplit(obj)
res = []
for i, e in enumerate(keys, 1):
e = e.strip()
if e.startswith('set '):
gsetters_after += translate_setter(lval, e)
elif e.startswith('get '):
gsetters_after += translate_getter(lval, e)
elif ':' not in e:
if i<len(keys): # can happen legally only in the last element {3:2,}
raise SyntaxError('Unexpected "," in Object literal')
break
else: #Not getter, setter or elision
spl = argsplit(e, ':')
if len(spl)<2:
raise SyntaxError('Invalid Object literal: '+e)
try:
key, value = spl
except: #len(spl)> 2
print 'Unusual case ' + repr(e)
key = spl[0]
value = ':'.join(spl[1:])
key = key.strip()
if is_internal(key):
key = '%s.to_string().value' % key
else:
key = repr(key)
value = exp_translator(value)
if not value:
raise SyntaxError('Missing value in Object literal')
res.append('%s:%s' % (key, value))
res = '%s = Js({%s})\n' % (lval, ','.join(res)) + gsetters_after
# translate all the nested objects (including removed earlier functions)
for nested_name, nested_info in inline.iteritems(): # functions
nested_block, nested_args = nested_info
new_def = FUNC_TRANSLATOR(nested_name, nested_block, nested_args)
res = new_def + res
for lval, obj in obj_rep.iteritems(): #objects
new_def, obj_count, arr_count = translate_object(obj, lval, obj_count, arr_count)
# add object definition BEFORE array definition
res = new_def + res
for lval, obj in arr_rep.iteritems(): # arrays
new_def, obj_count, arr_count = translate_array(obj, lval, obj_count, arr_count)
# add object definition BEFORE array definition
res = new_def + res
return res, obj_count, arr_count
def translate_setter(lval, setter):
func = 'function' + setter[3:]
try:
_, data, _ = functions.remove_functions(func)
if not data or len(data)>1:
raise Exception()
except:
raise SyntaxError('Could not parse setter: '+setter)
prop = data.keys()[0]
body, args = data[prop]
if len(args)!=1: #setter must have exactly 1 argument
raise SyntaxError('Invalid setter. It must take exactly 1 argument.')
# now messy part
res = FUNC_TRANSLATOR('setter', body, args)
res += "%s.define_own_property(%s, {'set': setter})\n"%(lval, repr(prop))
return res
def translate_getter(lval, getter):
func = 'function' + getter[3:]
try:
_, data, _ = functions.remove_functions(func)
if not data or len(data)>1:
raise Exception()
except:
raise SyntaxError('Could not parse getter: '+getter)
prop = data.keys()[0]
body, args = data[prop]
if len(args)!=0: #setter must have exactly 0 argument
raise SyntaxError('Invalid getter. It must take exactly 0 argument.')
# now messy part
res = FUNC_TRANSLATOR('getter', body, args)
res += "%s.define_own_property(%s, {'get': setter})\n"%(lval, repr(prop))
return res
def translate_array(array, lval, obj_count=1, arr_count=1):
"""array has to be any js array for example [1,2,3]
lval has to be name of this array.
Returns python code that adds lval to the PY scope it should be put before lval"""
array = array[1:-1]
array, obj_rep, obj_count = remove_objects(array, obj_count)
array, arr_rep, arr_count = remove_arrays(array, arr_count)
#functions can be also defined in arrays, this caused many problems since in Python
# functions cant be defined inside literal
# remove functions (they dont contain arrays or objects so can be translated easily)
# hoisted functions are treated like inline
array, hoisted, inline = functions.remove_functions(array, all_inline=True)
assert not hoisted
arr = []
# separate elements in array
for e in argsplit(array, ','):
# translate expressions in array PyJsLvalInline will not be translated!
e = exp_translator(e.replace('\n', ''))
arr.append(e if e else 'None')
arr = '%s = Js([%s])\n' % (lval, ','.join(arr))
#But we can have more code to add to define arrays/objects/functions defined inside this array
# translate nested objects:
# functions:
for nested_name, nested_info in inline.iteritems():
nested_block, nested_args = nested_info
new_def = FUNC_TRANSLATOR(nested_name, nested_block, nested_args)
arr = new_def + arr
for lval, obj in obj_rep.iteritems():
new_def, obj_count, arr_count = translate_object(obj, lval, obj_count, arr_count)
# add object definition BEFORE array definition
arr = new_def + arr
for lval, obj in arr_rep.iteritems():
new_def, obj_count, arr_count = translate_array(obj, lval, obj_count, arr_count)
# add object definition BEFORE array definition
arr = new_def + arr
return arr, obj_count, arr_count
if __name__=='__main__':
test = 'a = {404:{494:19}}; b = 303; if () {f={:}; { }}'
#print remove_objects(test)
#print list(bracket_split(' {}'))
print
print remove_arrays('typeof a&&!db.test(a)&&!ib[(bb.exec(a)||["",""], [][[5][5]])[1].toLowerCase()])')
print is_object('', ')')

View File

@ -0,0 +1,4 @@
from jsparser import *
from utils import *
# maybe I will try rewriting my parser in the future... Tokenizer makes things much easier and faster, unfortunately I
# did not know anything about parsers when I was starting this project so I invented my own.

View File

@ -0,0 +1,143 @@
from flow import translate_flow
from constants import remove_constants, recover_constants
from objects import remove_objects, remove_arrays, translate_object, translate_array, set_func_translator
from functions import remove_functions, reset_inline_count
from jsparser import inject_before_lval, indent, dbg
TOP_GLOBAL = '''from js2py.pyjs import *\nvar = Scope( JS_BUILTINS )\nset_global_object(var)\n'''
def translate_js(js, top=TOP_GLOBAL):
"""js has to be a javascript source code.
returns equivalent python code."""
# Remove constant literals
no_const, constants = remove_constants(js)
#print 'const count', len(constants)
# Remove object literals
no_obj, objects, obj_count = remove_objects(no_const)
#print 'obj count', len(objects)
# Remove arrays
no_arr, arrays, arr_count = remove_arrays(no_obj)
#print 'arr count', len(arrays)
# Here remove and replace functions
reset_inline_count()
no_func, hoisted, inline = remove_functions(no_arr)
#translate flow and expressions
py_seed, to_register = translate_flow(no_func)
# register variables and hoisted functions
#top += '# register variables\n'
top += 'var.registers(%s)\n' % str(to_register + hoisted.keys())
#Recover functions
# hoisted functions recovery
defs = ''
#defs += '# define hoisted functions\n'
#print len(hoisted) , 'HH'*40
for nested_name, nested_info in hoisted.iteritems():
nested_block, nested_args = nested_info
new_code = translate_func('PyJsLvalTempHoisted', nested_block, nested_args)
new_code += 'PyJsLvalTempHoisted.func_name = %s\n' %repr(nested_name)
defs += new_code +'\nvar.put(%s, PyJsLvalTempHoisted)\n' % repr(nested_name)
#defs += '# Everting ready!\n'
# inline functions recovery
for nested_name, nested_info in inline.iteritems():
nested_block, nested_args = nested_info
new_code = translate_func(nested_name, nested_block, nested_args)
py_seed = inject_before_lval(py_seed, nested_name.split('@')[0], new_code)
# add hoisted definitiond - they have literals that have to be recovered
py_seed = defs + py_seed
#Recover arrays
for arr_lval, arr_code in arrays.iteritems():
translation, obj_count, arr_count = translate_array(arr_code, arr_lval, obj_count, arr_count)
py_seed = inject_before_lval(py_seed, arr_lval, translation)
#Recover objects
for obj_lval, obj_code in objects.iteritems():
translation, obj_count, arr_count = translate_object(obj_code, obj_lval, obj_count, arr_count)
py_seed = inject_before_lval(py_seed, obj_lval, translation)
#Recover constants
py_code = recover_constants(py_seed, constants)
return top + py_code
def translate_func(name, block, args):
"""Translates functions and all nested functions to Python code.
name - name of that function (global functions will be available under var while
inline will be available directly under this name )
block - code of the function (*with* brackets {} )
args - arguments that this function takes"""
inline = name.startswith('PyJsLvalInline')
real_name = ''
if inline:
name, real_name = name.split('@')
arglist = ', '.join(args) +', ' if args else ''
code = '@Js\ndef %s(%sthis, arguments, var=var):\n' % (name, arglist)
# register local variables
scope = "'this':this, 'arguments':arguments" #it will be a simple dictionary
for arg in args:
scope += ', %s:%s' %(repr(arg), arg)
if real_name:
scope += ', %s:%s' % (repr(real_name), name)
code += indent('var = Scope({%s}, var)\n' % scope)
block, nested_hoisted, nested_inline = remove_functions(block)
py_code, to_register = translate_flow(block)
#register variables declared with var and names of hoisted functions.
to_register += nested_hoisted.keys()
if to_register:
code += indent('var.registers(%s)\n'% str(to_register))
for nested_name, info in nested_hoisted.iteritems():
nested_block, nested_args = info
new_code = translate_func('PyJsLvalTempHoisted', nested_block, nested_args)
# Now put definition of hoisted function on the top
code += indent(new_code)
code += indent('PyJsLvalTempHoisted.func_name = %s\n' %repr(nested_name))
code += indent('var.put(%s, PyJsLvalTempHoisted)\n' % repr(nested_name))
for nested_name, info in nested_inline.iteritems():
nested_block, nested_args = info
new_code = translate_func(nested_name, nested_block, nested_args)
# Inject definitions of inline functions just before usage
# nested inline names have this format : LVAL_NAME@REAL_NAME
py_code = inject_before_lval(py_code, nested_name.split('@')[0], new_code)
if py_code.strip():
code += indent(py_code)
return code
set_func_translator(translate_func)
#print inject_before_lval(' chuj\n moj\n lval\nelse\n', 'lval', 'siema\njestem piter\n')
import time
#print time.time()
#print translate_js('if (1) console.log("Hello, World!"); else if (5) console.log("Hello world?");')
#print time.time()
t = """
var x = [1,2,3,4,5,6];
for (var e in x) {console.log(e); delete x[3];}
console.log(5 in [1,2,3,4,5]);
"""
SANDBOX ='''
import traceback
try:
%s
except:
print traceback.format_exc()
print
raw_input('Press Enter to quit')
'''
if __name__=='__main__':
# test with jq if works then it really works :)
#with open('jq.js', 'r') as f:
#jq = f.read()
#res = translate_js(jq)
res = translate_js(t)
dbg(SANDBOX% indent(res))
print 'Done'

View File

@ -0,0 +1,80 @@
import sys
import unicodedata
from collections import defaultdict
def is_lval(t):
"""Does not chceck whether t is not resticted or internal"""
if not t:
return False
i = iter(t)
if i.next() not in IDENTIFIER_START:
return False
return all(e in IDENTIFIER_PART for e in i)
def is_valid_lval(t):
"""Checks whether t is valid JS identifier name (no keyword like var, function, if etc)
Also returns false on internal"""
if not is_internal(t) and is_lval(t) and t not in RESERVED_NAMES:
return True
return False
def is_plval(t):
return t.startswith('PyJsLval')
def is_marker(t):
return t.startswith('PyJsMarker') or t.startswith('PyJsConstant')
def is_internal(t):
return is_plval(t) or is_marker(t) or t=='var' # var is a scope var
def is_property_accessor(t):
return '[' in t or '.' in t
def is_reserved(t):
return t in RESERVED_NAMES
#http://stackoverflow.com/questions/14245893/efficiently-list-all-characters-in-a-given-unicode-category
BOM = u'\uFEFF'
ZWJ = u'\u200D'
ZWNJ = u'\u200C'
TAB = u'\u0009'
VT = u'\u000B'
FF = u'\u000C'
SP = u'\u0020'
NBSP = u'\u00A0'
LF = u'\u000A'
CR = u'\u000D'
LS = u'\u2028'
PS = u'\u2029'
U_CATEGORIES = defaultdict(list) # Thank you Martijn Pieters!
for c in map(unichr, range(sys.maxunicode + 1)):
U_CATEGORIES[unicodedata.category(c)].append(c)
UNICODE_LETTER = set(U_CATEGORIES['Lu']+U_CATEGORIES['Ll']+
U_CATEGORIES['Lt']+U_CATEGORIES['Lm']+
U_CATEGORIES['Lo']+U_CATEGORIES['Nl'])
UNICODE_COMBINING_MARK = set(U_CATEGORIES['Mn']+U_CATEGORIES['Mc'])
UNICODE_DIGIT = set(U_CATEGORIES['Nd'])
UNICODE_CONNECTOR_PUNCTUATION = set(U_CATEGORIES['Pc'])
IDENTIFIER_START = UNICODE_LETTER.union({'$','_'}) # and some fucking unicode escape sequence
IDENTIFIER_PART = IDENTIFIER_START.union(UNICODE_COMBINING_MARK).union(UNICODE_DIGIT).union(UNICODE_CONNECTOR_PUNCTUATION).union({ZWJ, ZWNJ})
USP = U_CATEGORIES['Zs']
KEYWORD = {'break', 'do', 'instanceof', 'typeof', 'case', 'else', 'new',
'var', 'catch', 'finally', 'return', 'void', 'continue', 'for',
'switch', 'while', 'debugger', 'function', 'this', 'with', 'default',
'if', 'throw', 'delete', 'in', 'try'}
FUTURE_RESERVED_WORD = {'class', 'enum', 'extends', 'super', 'const', 'export', 'import'}
RESERVED_NAMES = KEYWORD.union(FUTURE_RESERVED_WORD).union({'null', 'false', 'true'})
WHITE = {TAB, VT, FF, SP, NBSP, BOM}.union(USP)
LINE_TERMINATOR = {LF, CR, LS, PS}
LLINE_TERMINATOR = list(LINE_TERMINATOR)
x = ''.join(WHITE)+''.join(LINE_TERMINATOR)
SPACE = WHITE.union(LINE_TERMINATOR)
LINE_TERMINATOR_SEQUENCE = LINE_TERMINATOR.union({CR+LF})

View File

@ -0,0 +1 @@
__author__ = 'Piotr Dabkowski'

View File

@ -0,0 +1,458 @@
import six
if six.PY3:
xrange = range
import functools
def to_arr(this):
"""Returns Python array from Js array"""
return [this.get(str(e)) for e in xrange(len(this))]
ARR_STACK = set({})
class ArrayPrototype:
def toString():
# this function is wrong but I will leave it here fore debugging purposes.
func = this.get('join')
if not func.is_callable():
@this.Js
def func():
return '[object %s]'%this.Class
return func.call(this, ())
def toLocaleString():
array = this.to_object()
arr_len = array.get('length').to_uint32()
# separator is simply a comma ','
if not arr_len:
return ''
res = []
for i in xrange(arr_len):
element = array[str(i)]
if element.is_undefined() or element.is_null():
res.append('')
else:
cand = element.to_object()
str_func = element.get('toLocaleString')
if not str_func.is_callable():
raise this.MakeError('TypeError', 'toLocaleString method of item at index %d is not callable'%i)
res.append(element.callprop('toLocaleString').value)
return ','.join(res)
def concat():
array = this.to_object()
A = this.Js([])
items = [array]
items.extend(to_arr(arguments))
n = 0
for E in items:
if E.Class=='Array':
k = 0
e_len = len(E)
while k<e_len:
if E.has_property(str(k)):
A.put(str(n), E.get(str(k)))
n+=1
k+=1
else:
A.put(str(n), E)
n+=1
return A
def join(separator):
ARR_STACK.add(this)
array = this.to_object()
arr_len = array.get('length').to_uint32()
separator = ',' if separator.is_undefined() else separator.to_string().value
elems = []
for e in xrange(arr_len):
elem = array.get(str(e))
if elem in ARR_STACK:
s = ''
else:
s = elem.to_string().value
elems.append(s if not (elem.is_undefined() or elem.is_null()) else '')
res = separator.join(elems)
ARR_STACK.remove(this)
return res
def pop(): #todo check
array = this.to_object()
arr_len = array.get('length').to_uint32()
if not arr_len:
array.put('length', this.Js(arr_len))
return None
ind = str(arr_len-1)
element = array.get(ind)
array.delete(ind)
array.put('length', this.Js(arr_len-1))
return element
def push(item): # todo check
array = this.to_object()
arr_len = array.get('length').to_uint32()
to_put = arguments.to_list()
i = arr_len
for i, e in enumerate(to_put, arr_len):
array.put(str(i), e)
if to_put:
i+=1
array.put('length', this.Js(i))
return i
def reverse():
array = this.to_object() # my own algorithm
vals = to_arr(array)
has_props = [array.has_property(str(e)) for e in xrange(len(array))]
vals.reverse()
has_props.reverse()
for i, val in enumerate(vals):
if has_props[i]:
array.put(str(i), val)
else:
array.delete(str(i))
return array
def shift(): #todo check
array = this.to_object()
arr_len = array.get('length').to_uint32()
if not arr_len:
array.put('length', this.Js(0))
return None
first = array.get('0')
for k in xrange(1, arr_len):
from_s, to_s = str(k), str(k-1)
if array.has_property(from_s):
array.put(to_s, array.get(from_s))
else:
array.delete(to)
array.delete(str(arr_len-1))
array.put('length', this.Js(str(arr_len-1)))
return first
def slice(start, end): # todo check
array = this.to_object()
arr_len = array.get('length').to_uint32()
relative_start = start.to_int()
k = max((arr_len + relative_start), 0) if relative_start<0 else min(relative_start, arr_len)
relative_end = arr_len if end.is_undefined() else end.to_int()
final = max((arr_len + relative_end), 0) if relative_end<0 else min(relative_end, arr_len)
res = []
n = 0
while k<final:
pk = str(k)
if array.has_property(pk):
res.append(array.get(pk))
k += 1
n += 1
return res
def sort(cmpfn):
if not this.Class in {'Array', 'Arguments'}:
return this.to_object() # do nothing
arr = []
for i in xrange(len(this)):
arr.append(this.get(six.text_type(i)))
if not arr:
return this
if not cmpfn.is_callable():
cmpfn = None
cmp = lambda a,b: sort_compare(a, b, cmpfn)
if six.PY3:
key = functools.cmp_to_key(cmp)
arr.sort(key=key)
else:
arr.sort(cmp=cmp)
for i in xrange(len(arr)):
this.put(six.text_type(i), arr[i])
return this
def splice(start, deleteCount):
# 1-8
array = this.to_object()
arr_len = array.get('length').to_uint32()
relative_start = start.to_int()
actual_start = max((arr_len + relative_start),0) if relative_start<0 else min(relative_start, arr_len)
actual_delete_count = min(max(deleteCount.to_int(),0 ), arr_len - actual_start)
k = 0
A = this.Js([])
# 9
while k<actual_delete_count:
if array.has_property(str(actual_start+k)):
A.put(str(k), array.get(str(actual_start+k)))
k += 1
# 10-11
items = to_arr(arguments)[2:]
items_len = len(items)
# 12
if items_len<actual_delete_count:
k = actual_start
while k < (arr_len-actual_delete_count):
fr = str(k+actual_delete_count)
to = str(k+items_len)
if array.has_property(fr):
array.put(to, array.get(fr))
else:
array.delete(to)
k += 1
k = arr_len
while k > (arr_len - actual_delete_count + items_len):
array.delete(str(k-1))
k -= 1
# 13
elif items_len>actual_delete_count:
k = arr_len - actual_delete_count
while k>actual_start:
fr = str(k + actual_delete_count - 1)
to = str(k + items_len - 1)
if array.has_property(fr):
array.put(to, array.get(fr))
else:
array.delete(to)
k -= 1
# 14-17
k = actual_start
while items:
E = items.pop(0)
array.put(str(k), E)
k += 1
array.put('length', this.Js(arr_len - actual_delete_count + items_len))
return A
def unshift():
array = this.to_object()
arr_len = array.get('length').to_uint32()
argCount = len(arguments)
k = arr_len
while k > 0:
fr = str(k - 1)
to = str(k + argCount - 1)
if array.has_property(fr):
array.put(to, array.get(fr))
else:
array.delete(to)
k -= 1
j = 0
items = to_arr(arguments)
while items:
E = items.pop(0)
array.put(str(j), E)
j += 1
array.put('length', this.Js(arr_len + argCount))
return arr_len + argCount
def indexOf(searchElement):
array = this.to_object()
arr_len = array.get('length').to_uint32()
if arr_len == 0:
return -1
if len(arguments)>1:
n = arguments[1].to_int()
else:
n = 0
if n >= arr_len:
return -1
if n >= 0:
k = n
else:
k = arr_len - abs(n)
if k < 0:
k = 0
while k < arr_len:
if array.has_property(str(k)):
elementK = array.get(str(k))
if searchElement.strict_equality_comparison(elementK):
return k
k += 1
return -1
def lastIndexOf(searchElement):
array = this.to_object()
arr_len = array.get('length').to_uint32()
if arr_len == 0:
return -1
if len(arguments)>1:
n = arguments[1].to_int()
else:
n = arr_len - 1
if n >= 0:
k = min(n, arr_len-1)
else:
k = arr_len - abs(n)
while k >= 0:
if array.has_property(str(k)):
elementK = array.get(str(k))
if searchElement.strict_equality_comparison(elementK):
return k
k -= 1
return -1
def every(callbackfn):
array = this.to_object()
arr_len = array.get('length').to_uint32()
if not callbackfn.is_callable():
raise this.MakeError('TypeError', 'callbackfn must be a function')
T = arguments[1]
k = 0
while k<arr_len:
if array.has_property(str(k)):
kValue = array.get(str(k))
if not callbackfn.call(T, (kValue, this.Js(k), array)).to_boolean().value:
return False
k += 1
return True
def some(callbackfn):
array = this.to_object()
arr_len = array.get('length').to_uint32()
if not callbackfn.is_callable():
raise this.MakeError('TypeError', 'callbackfn must be a function')
T = arguments[1]
k = 0
while k<arr_len:
if array.has_property(str(k)):
kValue = array.get(str(k))
if callbackfn.call(T, (kValue, this.Js(k), array)).to_boolean().value:
return True
k += 1
return False
def forEach(callbackfn):
array = this.to_object()
arr_len = array.get('length').to_uint32()
if not callbackfn.is_callable():
raise this.MakeError('TypeError', 'callbackfn must be a function')
T = arguments[1]
k = 0
while k<arr_len:
if array.has_property(str(k)):
kValue = array.get(str(k))
callbackfn.call(T, (kValue, this.Js(k), array))
k+=1
def map(callbackfn):
array = this.to_object()
arr_len = array.get('length').to_uint32()
if not callbackfn.is_callable():
raise this.MakeError('TypeError', 'callbackfn must be a function')
T = arguments[1]
A = this.Js([])
k = 0
while k<arr_len:
Pk = str(k)
if array.has_property(Pk):
kValue = array.get(Pk)
mappedValue = callbackfn.call(T, (kValue, this.Js(k), array))
A.define_own_property(Pk, {'value': mappedValue, 'writable': True,
'enumerable': True, 'configurable': True})
k += 1
return A
def filter(callbackfn):
array = this.to_object()
arr_len = array.get('length').to_uint32()
if not callbackfn.is_callable():
raise this.MakeError('TypeError', 'callbackfn must be a function')
T = arguments[1]
res = []
k = 0
while k<arr_len:
if array.has_property(str(k)):
kValue = array.get(str(k))
if callbackfn.call(T, (kValue, this.Js(k), array)).to_boolean().value:
res.append(kValue)
k += 1
return res # converted to js array automatically
def reduce(callbackfn):
array = this.to_object()
arr_len = array.get('length').to_uint32()
if not callbackfn.is_callable():
raise this.MakeError('TypeError', 'callbackfn must be a function')
if not arr_len and len(arguments)<2:
raise this.MakeError('TypeError', 'Reduce of empty array with no initial value')
k = 0
if len(arguments)>1: # initial value present
accumulator = arguments[1]
else:
kPresent = False
while not kPresent and k<arr_len:
kPresent = array.has_property(str(k))
if kPresent:
accumulator = array.get(str(k))
k += 1
if not kPresent:
raise this.MakeError('TypeError', 'Reduce of empty array with no initial value')
while k<arr_len:
if array.has_property(str(k)):
kValue = array.get(str(k))
accumulator = callbackfn.call(this.undefined, (accumulator, kValue, this.Js(k), array))
k += 1
return accumulator
def reduceRight(callbackfn):
array = this.to_object()
arr_len = array.get('length').to_uint32()
if not callbackfn.is_callable():
raise this.MakeError('TypeError', 'callbackfn must be a function')
if not arr_len and len(arguments)<2:
raise this.MakeError('TypeError', 'Reduce of empty array with no initial value')
k = arr_len - 1
if len(arguments)>1: # initial value present
accumulator = arguments[1]
else:
kPresent = False
while not kPresent and k>=0:
kPresent = array.has_property(str(k))
if kPresent:
accumulator = array.get(str(k))
k -= 1
if not kPresent:
raise this.MakeError('TypeError', 'Reduce of empty array with no initial value')
while k>=0:
if array.has_property(str(k)):
kValue = array.get(str(k))
accumulator = callbackfn.call(this.undefined, (accumulator, kValue, this.Js(k), array))
k -= 1
return accumulator
def sort_compare(a, b, comp):
if a is None:
if b is None:
return 0
return 1
if b is None:
if a is None:
return 0
return -1
if a.is_undefined():
if b.is_undefined():
return 0
return 1
if b.is_undefined():
if a.is_undefined():
return 0
return -1
if comp is not None:
res = comp.call(a.undefined, (a, b))
return res.to_int()
x, y = a.to_string(), b.to_string()
if x<y:
return -1
elif x>y:
return 1
return 0

View File

@ -0,0 +1,16 @@
class BooleanPrototype:
def toString():
if this.Class!='Boolean':
raise this.Js(TypeError)('this must be a boolean')
return 'true' if this.value else 'false'
def valueOf():
if this.Class!='Boolean':
raise this.Js(TypeError)('this must be a boolean')
return this.value

View File

@ -0,0 +1,10 @@
class ErrorPrototype:
def toString():
if this.TYPE!='Object':
raise this.MakeError('TypeError', 'Error.prototype.toString called on non-object')
name = this.get('name')
name = 'Error' if name.is_undefined() else name.to_string().value
msg = this.get('message')
msg = '' if msg.is_undefined() else msg.to_string().value
return name + (name and msg and ': ') + msg

View File

@ -0,0 +1,53 @@
# python 3 support
import six
if six.PY3:
basestring = str
long = int
xrange = range
unicode = str
# todo fix apply and bind
class FunctionPrototype:
def toString():
if not this.is_callable():
raise TypeError('toString is not generic!')
args = ', '.join(this.code.__code__.co_varnames[:this.argcount])
return 'function %s(%s) '%(this.func_name, args)+this.source
def call():
arguments_ = arguments
if not len(arguments):
obj = this.Js(None)
else:
obj = arguments[0]
if len(arguments)<=1:
args = ()
else:
args = tuple([arguments_[e] for e in xrange(1, len(arguments_))])
return this.call(obj, args)
def apply():
if not len(arguments):
obj = this.Js(None)
else:
obj = arguments[0]
if len(arguments)<=1:
args = ()
else:
appl = arguments[1]
args = tuple([appl[e] for e in xrange(len(appl))])
return this.call(obj, args)
def bind(thisArg):
target = this
if not target.is_callable():
raise this.MakeError('Object must be callable in order to be used with bind method')
if len(arguments) <= 1:
args = ()
else:
args = tuple([arguments[e] for e in xrange(1, len(arguments))])
return this.PyJsBoundFunction(target, thisArg, args)

View File

@ -0,0 +1,210 @@
import json
from lib.js2py.base import Js
indent = ''
# python 3 support
import six
if six.PY3:
basestring = str
long = int
xrange = range
unicode = str
def parse(text):
reviver = arguments[1]
s = text.to_string().value
try:
unfiltered = json.loads(s)
except:
raise this.MakeError('SyntaxError', 'Could not parse JSON string - Invalid syntax')
unfiltered = to_js(this, unfiltered)
if reviver.is_callable():
root = this.Js({'': unfiltered})
walk(root, '', reviver)
else:
return unfiltered
def stringify(value, replacer, space):
global indent
stack = set([])
indent = ''
property_list = replacer_function = this.undefined
if replacer.is_object():
if replacer.is_callable():
replacer_function = replacer
elif replacer.Class=='Array':
property_list = []
for e in replacer:
v = replacer[e]
item = this.undefined
if v._type()=='Number':
item = v.to_string()
elif v._type()=='String':
item = v
elif v.is_object():
if v.Class in {'String', 'Number'}:
item = v.to_string()
if not item.is_undefined() and item.value not in property_list:
property_list.append(item.value)
if space.is_object():
if space.Class=='Number':
space = space.to_number()
elif space.Class=='String':
space = space.to_string()
if space._type()=='Number':
space = this.Js(min(10, space.to_int()))
gap = max(int(space.value), 0)* ' '
elif space._type()=='String':
gap = space.value[:10]
else:
gap = ''
return this.Js(Str('', this.Js({'':value}), replacer_function, property_list, gap, stack, space))
def Str(key, holder, replacer_function, property_list, gap, stack, space):
value = holder[key]
if value.is_object():
to_json = value.get('toJSON')
if to_json.is_callable():
value = to_json.call(value, (key,))
if not replacer_function.is_undefined():
value = replacer_function.call(holder, (key, value))
if value.is_object():
if value.Class=='String':
value = value.to_string()
elif value.Class=='Number':
value = value.to_number()
elif value.Class=='Boolean':
value = value.to_boolean()
if value.is_null():
return 'null'
elif value.Class=='Boolean':
return 'true' if value.value else 'false'
elif value._type()=='String':
return Quote(value)
elif value._type()=='Number':
if not value.is_infinity():
return value.to_string()
return 'null'
if value.is_object() and not value.is_callable():
if value.Class=='Array':
return ja(value, stack, gap, property_list, replacer_function, space)
else:
return jo(value, stack, gap, property_list, replacer_function, space)
return None # undefined
def jo(value, stack, gap, property_list, replacer_function, space):
global indent
if value in stack:
raise value.MakeError('TypeError', 'Converting circular structure to JSON')
stack.add(value)
stepback = indent
indent += gap
if not property_list.is_undefined():
k = property_list
else:
k = [e.value for e in value]
partial = []
for p in k:
str_p = value.Js(Str(p, value, replacer_function, property_list, gap, stack, space))
if not str_p.is_undefined():
member = json.dumps(p) + ':' + (' ' if gap else '') + str_p.value # todo not sure here - what space character?
partial.append(member)
if not partial:
final = '{}'
else:
if not gap:
final = '{%s}' % ','.join(partial)
else:
sep = ',\n'+indent
properties = sep.join(partial)
final = '{\n'+indent+properties+'\n'+stepback+'}'
stack.remove(value)
indent = stepback
return final
def ja(value, stack, gap, property_list, replacer_function, space):
global indent
if value in stack:
raise value.MakeError('TypeError', 'Converting circular structure to JSON')
stack.add(value)
stepback = indent
indent += gap
partial = []
length = len(value)
for index in xrange(length):
index = str(index)
str_index = value.Js(Str(index, value, replacer_function, property_list, gap, stack, space))
if str_index.is_undefined():
partial.append('null')
else:
partial.append(str_index.value)
if not partial:
final = '[]'
else:
if not gap:
final = '[%s]' % ','.join(partial)
else:
sep = ',\n'+indent
properties = sep.join(partial)
final = '[\n'+indent +properties+'\n'+stepback+']'
stack.remove(value)
indent = stepback
return final
def Quote(string):
return string.Js(json.dumps(string.value))
def to_js(this, d):
if isinstance(d, dict):
return this.Js({k:this.Js(v) for k, v in six.iteritems(d)})
return this.Js(d)
def walk(holder, name, reviver):
val = holder.get(name)
if val.Class=='Array':
for i in xrange(len(val)):
i = unicode(i)
new_element = walk(val, i, reviver)
if new_element.is_undefined():
val.delete(i)
else:
new_element.put(i, new_element)
elif val.is_object():
for key in val:
new_element = walk(val, key, reviver)
if new_element.is_undefined():
val.delete(key)
else:
val.put(key, new_element)
return reviver.call(holder, (name, val))
JSON = Js({})
JSON.define_own_property('parse', {'value': Js(parse),
'enumerable': False,
'writable': True,
'configurable': True})
JSON.define_own_property('stringify', {'value': Js(stringify),
'enumerable': False,
'writable': True,
'configurable': True})

View File

@ -0,0 +1,100 @@
import six
if six.PY3:
basestring = str
long = int
xrange = range
unicode = str
RADIX_SYMBOLS = {0: '0', 1: '1', 2: '2', 3: '3', 4: '4', 5: '5', 6: '6', 7: '7', 8: '8', 9: '9',
10: 'a', 11: 'b', 12: 'c', 13: 'd', 14: 'e', 15: 'f', 16: 'g', 17: 'h', 18: 'i', 19: 'j', 20: 'k',
21: 'l', 22: 'm', 23: 'n', 24: 'o', 25: 'p', 26: 'q', 27: 'r', 28: 's', 29: 't', 30: 'u', 31: 'v',
32: 'w', 33: 'x', 34: 'y', 35: 'z'}
def to_str_rep(num):
if num.is_nan():
return num.Js('NaN')
elif num.is_infinity():
sign = '-' if num.value<0 else ''
return num.Js(sign+'Infinity')
elif isinstance(num.value, (long, int)) or num.value.is_integer(): # dont print .0
return num.Js(unicode(int(num.value)))
return num.Js(unicode(num.value)) # accurate enough
class NumberPrototype:
def toString(radix):
if this.Class!='Number':
raise this.MakeError('TypeError', 'Number.prototype.valueOf is not generic')
if radix.is_undefined():
return to_str_rep(this)
r = radix.to_int()
if r==10:
return to_str_rep(this)
if r not in xrange(2, 37):
raise this.MakeError('RangeError', 'Number.prototype.toString() radix argument must be between 2 and 36')
num = this.to_int()
if num < 0:
num = -num
sign = '-'
else:
sign = ''
res = ''
while num:
s = RADIX_SYMBOLS[num % r]
num = num // r
res = s + res
return sign + (res if res else '0')
def valueOf():
if this.Class!='Number':
raise this.MakeError('TypeError', 'Number.prototype.valueOf is not generic')
return this.value
def toLocaleString():
return this.to_string()
def toFixed (fractionDigits):
if this.Class!='Number':
raise this.MakeError('TypeError', 'Number.prototype.toFixed called on incompatible receiver')
digs = fractionDigits.to_int()
if digs<0 or digs>20:
raise this.MakeError('RangeError', 'toFixed() digits argument must be between 0 and 20')
elif this.is_infinity():
return 'Infinity' if this.value>0 else '-Infinity'
elif this.is_nan():
return 'NaN'
return format(this.value, '-.%df'%digs)
def toExponential (fractionDigits):
if this.Class!='Number':
raise this.MakeError('TypeError', 'Number.prototype.toExponential called on incompatible receiver')
digs = fractionDigits.to_int()
if digs<0 or digs>20:
raise this.MakeError('RangeError', 'toFixed() digits argument must be between 0 and 20')
elif this.is_infinity():
return 'Infinity' if this.value>0 else '-Infinity'
elif this.is_nan():
return 'NaN'
return format(this.value, '-.%de'%digs)
def toPrecision (precision):
if this.Class!='Number':
raise this.MakeError('TypeError', 'Number.prototype.toPrecision called on incompatible receiver')
if precision.is_undefined():
return this.to_String()
prec = precision.to_int()
if this.is_nan():
return 'NaN'
elif this.is_infinity():
return 'Infinity' if this.value>0 else '-Infinity'
digs = prec - len(str(int(this.value)))
if digs>=0:
return format(this.value, '-.%df'%digs)
else:
return format(this.value, '-.%df'%(prec-1))

View File

@ -0,0 +1,36 @@
class ObjectPrototype:
def toString():
return '[object %s]'%this.Class
def valueOf():
return this.to_object()
def toLocaleString():
return this.callprop('toString')
def hasOwnProperty(prop):
return this.get_own_property(prop.to_string().value) is not None
def isPrototypeOf(obj):
#a bit stupid specification but well
# for example Object.prototype.isPrototypeOf.call((5).__proto__, 5) gives false
if not obj.is_object():
return False
while 1:
obj = obj.prototype
if obj is None or obj.is_null():
return False
if obj is this:
return True
def propertyIsEnumerable(prop):
cand = this.own.get(prop.to_string().value)
return cand is not None and cand.get('enumerable')

View File

@ -0,0 +1,43 @@
class RegExpPrototype:
def toString():
flags = u''
if this.glob:
flags += u'g'
if this.ignore_case:
flags += u'i'
if this.multiline:
flags += u'm'
v = this.value if this.value else '(?:)'
return u'/%s/'%v + flags
def test(string):
return Exec(this, string) is not this.null
def exec2(string): # will be changed to exec in base.py. cant name it exec here
return Exec(this, string)
def Exec(this, string):
if this.Class!='RegExp':
raise this.MakeError('TypeError', 'RegExp.prototype.exec is not generic!')
string = string.to_string()
length = len(string)
i = this.get('lastIndex').to_int() if this.glob else 0
matched = False
while not matched:
if i < 0 or i > length:
this.put('lastIndex', this.Js(0))
return this.null
matched = this.match(string.value, i)
i += 1
start, end = matched.span()#[0]+i-1, matched.span()[1]+i-1
if this.glob:
this.put('lastIndex', this.Js(end))
arr = this.Js([this.Js(e) for e in [matched.group()]+list(matched.groups())])
arr.put('index', this.Js(start))
arr.put('input', string)
return arr

View File

@ -0,0 +1,307 @@
# -*- coding: utf-8 -*-
from .jsregexp import Exec
import re
DIGS = set('0123456789')
WHITE = u"\u0009\u000A\u000B\u000C\u000D\u0020\u00A0\u1680\u180E\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200A\u2028\u2029\u202F\u205F\u3000\uFEFF"
def replacement_template(rep, source, span, npar):
"""Takes the replacement template and some info about the match and returns filled template
"""
n = 0
res = ''
while n < len(rep)-1:
char = rep[n]
if char=='$':
if rep[n+1]=='$':
res += '$'
n += 2
continue
elif rep[n+1]=='`':
# replace with string that is BEFORE match
res += source[:span[0]]
n += 2
continue
elif rep[n+1]=='\'':
# replace with string that is AFTER match
res += source[span[1]:]
n += 2
continue
elif rep[n+1] in DIGS:
dig = rep[n+1]
if n+2<len(rep) and rep[n+2] in DIGS:
dig += rep[n+2]
num = int(dig)
# we will not do any replacements if we dont have this npar or dig is 0
if not num or num>len(npar):
res += '$'+dig
else:
# None - undefined has to be replaced with ''
res += npar[num-1] if npar[num-1] else ''
n += 1 + len(dig)
continue
res += char
n += 1
if n<len(rep):
res += rep[-1]
return res
###################################################
class StringPrototype:
def toString():
if this.Class!='String':
raise this.MakeError('TypeError', 'String.prototype.toString is not generic')
return this.value
def valueOf():
if this.Class!='String':
raise this.MakeError('TypeError', 'String.prototype.valueOf is not generic')
return this.value
def charAt(pos):
this.cok()
pos = pos.to_int()
s = this.to_string()
if 0<= pos < len(s.value):
char = s.value[pos]
if char not in s.CHAR_BANK:
s.Js(char) # add char to char bank
return s.CHAR_BANK[char]
return s.CHAR_BANK['']
def charCodeAt(pos):
this.cok()
pos = pos.to_int()
s = this.to_string()
if 0<= pos < len(s.value):
return s.Js(ord(s.value[pos]))
return s.NaN
def concat():
this.cok()
s = this.to_string()
res = s.value
for e in arguments.to_list():
res += e.to_string().value
return res
def indexOf(searchString, position):
this.cok()
s = this.to_string().value
search = searchString.to_string().value
pos = position.to_int()
return this.Js(s.find(search, min(max(pos, 0), len(s))) )
def lastIndexOf(searchString, position):
this.cok()
s = this.to_string().value
search = searchString.to_string().value
pos = position.to_number()
pos = 10**15 if pos.is_nan() else pos.to_int()
return s.rfind(search, 0, min(max(pos, 0)+1, len(s)))
def localeCompare(that):
this.cok()
s = this.to_string()
that = that.to_string()
if s<that:
return this.Js(-1)
elif s>that:
return this.Js(1)
return this.Js(0)
def match(regexp):
this.cok()
s = this.to_string()
r = this.RegExp(regexp) if regexp.Class!='RegExp' else regexp
if not r.glob:
return Exec(r, s)
r.put('lastIndex', this.Js(0))
found = []
previous_last_index = 0
last_match = True
while last_match:
result = Exec(r, s)
if result.is_null():
last_match=False
else:
this_index = r.get('lastIndex').value
if this_index==previous_last_index:
r.put('lastIndex', this.Js(this_index+1))
previous_last_index += 1
else:
previous_last_index = this_index
matchStr = result.get('0')
found.append(matchStr)
if not found:
return this.null
return found
def replace(searchValue, replaceValue):
# VERY COMPLICATED. to check again.
this.cok()
string = this.to_string()
s = string.value
res = ''
if not replaceValue.is_callable():
replaceValue = replaceValue.to_string().value
func = False
else:
func = True
# Replace all ( global )
if searchValue.Class == 'RegExp' and searchValue.glob:
last = 0
for e in re.finditer(searchValue.pat, s):
res += s[last:e.span()[0]]
if func:
# prepare arguments for custom func (replaceValue)
args = (e.group(),) + e.groups() + (e.span()[1], string)
# convert all types to JS
args = map(this.Js, args)
res += replaceValue(*args).to_string().value
else:
res += replacement_template(replaceValue, s, e.span(), e.groups())
last = e.span()[1]
res += s[last:]
return this.Js(res)
elif searchValue.Class=='RegExp':
e = re.search(searchValue.pat, s)
if e is None:
return string
span = e.span()
pars = e.groups()
match = e.group()
else:
match = searchValue.to_string().value
ind = s.find(match)
if ind==-1:
return string
span = ind, ind + len(match)
pars = ()
res = s[:span[0]]
if func:
args = (match,) + pars + (span[1], string)
# convert all types to JS
this_ = this
args = tuple([this_.Js(x) for x in args])
res += replaceValue(*args).to_string().value
else:
res += replacement_template(replaceValue, s, span, pars)
res += s[span[1]:]
return res
def search(regexp):
this.cok()
string = this.to_string()
if regexp.Class=='RegExp':
rx = regexp
else:
rx = this.RegExp(regexp)
res = re.search(rx.pat, string.value)
if res is not None:
return this.Js(res.span()[0])
return -1
def slice(start, end):
this.cok()
s = this.to_string()
start = start.to_int()
length = len(s.value)
end = length if end.is_undefined() else end.to_int()
#From = max(length+start, 0) if start<0 else min(length, start)
#To = max(length+end, 0) if end<0 else min(length, end)
return s.value[start:end]
def split (separator, limit):
# its a bit different that re.split!
this.cok()
S = this.to_string()
s = S.value
lim = 2**32-1 if limit.is_undefined() else limit.to_uint32()
if not lim:
return []
if separator.is_undefined():
return [s]
len_s = len(s)
res = []
R = separator if separator.Class=='RegExp' else separator.to_string()
if not len_s:
if SplitMatch(s, 0, R) is None:
return [S]
return []
p = q = 0
while q!=len_s:
e, cap = SplitMatch(s, q, R)
if e is None or e==p:
q += 1
continue
res.append(s[p:q])
p = q = e
if len(res)==lim:
return res
for element in cap:
res.append(this.Js(element))
if len(res)==lim:
return res
res.append(s[p:])
return res
def substring (start, end):
this.cok()
s = this.to_string().value
start = start.to_int()
length = len(s)
end = length if end.is_undefined() else end.to_int()
fstart = min(max(start, 0), length)
fend = min(max(end, 0), length)
return this.Js(s[min(fstart, fend):max(fstart, fend)])
def substr(start, length):
#I hate this function and its description in specification
r1 = this.to_string().value
r2 = start.to_int()
r3 = 10**20 if length.is_undefined() else length.to_int()
r4 = len(r1)
r5 = r2 if r2>=0 else max(0, r2+r4)
r6 = min(max(r3 ,0), r4 - r5)
if r6<=0:
return ''
return r1[r5:r5+r6]
def toLowerCase():
this.cok()
return this.Js(this.to_string().value.lower())
def toLocaleLowerCase():
this.cok()
return this.Js(this.to_string().value.lower())
def toUpperCase():
this.cok()
return this.Js(this.to_string().value.upper())
def toLocaleUpperCase():
this.cok()
return this.Js(this.to_string().value.upper())
def trim():
this.cok()
return this.Js(this.to_string().value.strip(WHITE))
def SplitMatch(s, q, R):
# s is Py String to match, q is the py int match start and R is Js RegExp or String.
if R.Class=='RegExp':
res = R.match(s, q)
return (None, ()) if res is None else (res.span()[1], res.groups())
# R is just a string
if s[q:].startswith(R.value):
return q+len(R.value), ()
return None, ()

51
lib/js2py/pyjs.py Normal file
View File

@ -0,0 +1,51 @@
from .base import *
from .constructors.jsmath import Math
from .constructors.jsdate import Date
from .constructors.jsobject import Object
from .constructors.jsfunction import Function
from .constructors.jsstring import String
from .constructors.jsnumber import Number
from .constructors.jsboolean import Boolean
from .constructors.jsregexp import RegExp
from .constructors.jsarray import Array
from .prototypes.jsjson import JSON
from .host.console import console
from .host.jseval import Eval
from .host.jsfunctions import parseFloat, parseInt, isFinite, isNaN
# Now we have all the necessary items to create global environment for script
__all__ = ['Js', 'PyJsComma', 'PyJsStrictEq', 'PyJsStrictNeq',
'PyJsException', 'PyJsBshift', 'Scope', 'PyExceptionToJs',
'JsToPyException', 'JS_BUILTINS', 'appengine', 'set_global_object',
'JsRegExp', 'PyJsException', 'PyExceptionToJs', 'JsToPyException', 'PyJsSwitchException']
# these were defined in base.py
builtins = ('true','false','null','undefined','Infinity',
'NaN', 'console', 'String', 'Number', 'Boolean', 'RegExp',
'Math', 'Date', 'Object', 'Function', 'Array',
'parseFloat', 'parseInt', 'isFinite', 'isNaN')
#Array, Function, JSON, Error is done later :)
# also some built in functions like eval...
def set_global_object(obj):
obj.IS_CHILD_SCOPE = False
this = This({})
this.own = obj.own
this.prototype = obj.prototype
PyJs.GlobalObject = this
# make this available
obj.register('this')
obj.put('this', this)
scope = dict(zip(builtins, [globals()[e] for e in builtins]))
# Now add errors:
for name, error in ERRORS.items():
scope[name] = error
#add eval
scope['eval'] = Eval
scope['JSON'] = JSON
JS_BUILTINS = {k:v for k,v in scope.items()}

18
lib/js2py/todo Normal file
View File

@ -0,0 +1,18 @@
# TODO
Check Object Constructor
Complete list prototype
Fix function bind...
Fix regexp compile ???
Check prototypes:
String: replace, split
fix recursion error in special case in to_dict and to_list
Array constructor
escape, URL... etc
Check primitive.to_object()
var obj = new Number(0); var x = new Array(obj); x
Smarter import...

View File

@ -0,0 +1,38 @@
# The MIT License
#
# Copyright 2014, 2015 Piotr Dabkowski
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the 'Software'),
# to deal in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so, subject
# to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
# LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE
__all__ = ['PyJsParser', 'Node', 'WrappingNode', 'node_to_dict', 'parse', 'translate_js', 'translate', 'syntax_tree_translate',
'DEFAULT_HEADER']
__author__ = 'Piotr Dabkowski'
__version__ = '2.2.0'
from .pyjsparser import PyJsParser, Node, WrappingNode, node_to_dict
from .translator import translate_js, trasnlate, syntax_tree_translate, DEFAULT_HEADER
def parse(javascript_code):
"""Returns syntax tree of javascript_code.
Syntax tree has the same structure as syntax tree produced by esprima.js
Same as PyJsParser().parse For your convenience :) """
p = PyJsParser()
return p.parse(javascript_code)

View File

@ -0,0 +1,327 @@
import binascii
from .pyjsparser import PyJsParser
import six
if six.PY3:
basestring = str
long = int
xrange = range
unicode = str
REGEXP_CONVERTER = PyJsParser()
def to_hex(s):
return binascii.hexlify(s.encode('utf8')).decode('utf8') # fucking python 3, I hate it so much
# wtf was wrong with s.encode('hex') ???
def indent(lines, ind=4):
return ind*' '+lines.replace('\n', '\n'+ind*' ').rstrip(' ')
def inject_before_lval(source, lval, code):
if source.count(lval)>1:
print()
print(lval)
raise RuntimeError('To many lvals (%s)' % lval)
elif not source.count(lval):
print()
print(lval)
assert lval not in source
raise RuntimeError('No lval found "%s"' % lval)
end = source.index(lval)
inj = source.rfind('\n', 0, end)
ind = inj
while source[ind+1]==' ':
ind+=1
ind -= inj
return source[:inj+1]+ indent(code, ind) + source[inj+1:]
def get_continue_label(label):
return CONTINUE_LABEL%to_hex(label)
def get_break_label(label):
return BREAK_LABEL%to_hex(label)
def is_valid_py_name(name):
try:
compile(name+' = 11', 'a','exec')
except:
return False
return True
def indent(lines, ind=4):
return ind*' '+lines.replace('\n', '\n'+ind*' ').rstrip(' ')
def compose_regex(val):
reg, flags = val
#reg = REGEXP_CONVERTER._unescape_string(reg)
return u'/%s/%s' % (reg, flags)
def float_repr(f):
if int(f)==f:
return repr(int(f))
return repr(f)
def argsplit(args, sep=','):
"""used to split JS args (it is not that simple as it seems because
sep can be inside brackets).
pass args *without* brackets!
Used also to parse array and object elements, and more"""
parsed_len = 0
last = 0
splits = []
for e in bracket_split(args, brackets=['()', '[]', '{}']):
if e[0] not in {'(', '[', '{'}:
for i, char in enumerate(e):
if char==sep:
splits.append(args[last:parsed_len+i])
last = parsed_len + i + 1
parsed_len += len(e)
splits.append(args[last:])
return splits
def bracket_split(source, brackets=('()','{}','[]'), strip=False):
"""DOES NOT RETURN EMPTY STRINGS (can only return empty bracket content if strip=True)"""
starts = [e[0] for e in brackets]
in_bracket = 0
n = 0
last = 0
while n<len(source):
e = source[n]
if not in_bracket and e in starts:
in_bracket = 1
start = n
b_start, b_end = brackets[starts.index(e)]
elif in_bracket:
if e==b_start:
in_bracket += 1
elif e==b_end:
in_bracket -= 1
if not in_bracket:
if source[last:start]:
yield source[last:start]
last = n+1
yield source[start+strip:n+1-strip]
n+=1
if source[last:]:
yield source[last:]
def js_comma(a, b):
return 'PyJsComma('+a+','+b+')'
def js_or(a, b):
return '('+a+' or '+b+')'
def js_bor(a, b):
return '('+a+'|'+b+')'
def js_bxor(a, b):
return '('+a+'^'+b+')'
def js_band(a, b):
return '('+a+'&'+b+')'
def js_and(a, b):
return '('+a+' and '+b+')'
def js_strict_eq(a, b):
return 'PyJsStrictEq('+a+','+b+')'
def js_strict_neq(a, b):
return 'PyJsStrictNeq('+a+','+b+')'
#Not handled by python in the same way like JS. For example 2==2==True returns false.
# In JS above would return true so we need brackets.
def js_abstract_eq(a, b):
return '('+a+'=='+b+')'
#just like ==
def js_abstract_neq(a, b):
return '('+a+'!='+b+')'
def js_lt(a, b):
return '('+a+'<'+b+')'
def js_le(a, b):
return '('+a+'<='+b+')'
def js_ge(a, b):
return '('+a+'>='+b+')'
def js_gt(a, b):
return '('+a+'>'+b+')'
def js_in(a, b):
return b+'.contains('+a+')'
def js_instanceof(a, b):
return a+'.instanceof('+b+')'
def js_lshift(a, b):
return '('+a+'<<'+b+')'
def js_rshift(a, b):
return '('+a+'>>'+b+')'
def js_shit(a, b):
return 'PyJsBshift('+a+','+b+')'
def js_add(a, b): # To simplify later process of converting unary operators + and ++
return '(%s+%s)'%(a, b)
def js_sub(a, b): # To simplify
return '(%s-%s)'%(a, b)
def js_mul(a, b):
return '('+a+'*'+b+')'
def js_div(a, b):
return '('+a+'/'+b+')'
def js_mod(a, b):
return '('+a+'%'+b+')'
def js_typeof(a):
cand = list(bracket_split(a, ('()',)))
if len(cand)==2 and cand[0]=='var.get':
return cand[0]+cand[1][:-1]+',throw=False).typeof()'
return a+'.typeof()'
def js_void(a):
# eval and return undefined
return 'PyJsComma(%s, Js(None))' % a
def js_new(a):
cands = list(bracket_split(a, ('()',)))
lim = len(cands)
if lim < 2:
return a + '.create()'
n = 0
while n < lim:
c = cands[n]
if c[0]=='(':
if cands[n-1].endswith('.get') and n+1>=lim: # last get operation.
return a + '.create()'
elif cands[n-1][0]=='(':
return ''.join(cands[:n])+'.create' + c + ''.join(cands[n+1:])
elif cands[n-1]=='.callprop':
beg = ''.join(cands[:n-1])
args = argsplit(c[1:-1],',')
prop = args[0]
new_args = ','.join(args[1:])
create = '.get(%s).create(%s)' % (prop, new_args)
return beg + create + ''.join(cands[n+1:])
n+=1
return a + '.create()'
def js_delete(a):
#replace last get with delete.
c = list(bracket_split(a, ['()']))
beg, arglist = ''.join(c[:-1]).strip(), c[-1].strip() #strips just to make sure... I will remove it later
if beg[-4:]!='.get':
print(a)
raise SyntaxError('Invalid delete operation')
return beg[:-3]+'delete'+arglist
def js_neg(a):
return '(-'+a+')'
def js_pos(a):
return '(+'+a+')'
def js_inv(a):
return '(~'+a+')'
def js_not(a):
return a+'.neg()'
def js_postfix(a, inc, post):
bra = list(bracket_split(a, ('()',)))
meth = bra[-2]
if not meth.endswith('get'):
raise SyntaxError('Invalid ++ or -- operation.')
bra[-2] = bra[-2][:-3] + 'put'
bra[-1] = '(%s,Js(%s.to_number())%sJs(1))' % (bra[-1][1:-1], a, '+' if inc else '-')
res = ''.join(bra)
return res if not post else '(%s%sJs(1))' % (res, '-' if inc else '+')
def js_pre_inc(a):
return js_postfix(a, True, False)
def js_post_inc(a):
return js_postfix(a, True, True)
def js_pre_dec(a):
return js_postfix(a, False, False)
def js_post_dec(a):
return js_postfix(a, False, True)
CONTINUE_LABEL = 'JS_CONTINUE_LABEL_%s'
BREAK_LABEL = 'JS_BREAK_LABEL_%s'
PREPARE = '''HOLDER = var.own.get(NAME)\nvar.force_own_put(NAME, PyExceptionToJs(PyJsTempException))\n'''
RESTORE = '''if HOLDER is not None:\n var.own[NAME] = HOLDER\nelse:\n del var.own[NAME]\ndel HOLDER\n'''
TRY_CATCH = '''%stry:\nBLOCKfinally:\n%s''' % (PREPARE, indent(RESTORE))
OR = {'||': js_or}
AND = {'&&': js_and}
BOR = {'|': js_bor}
BXOR = {'^': js_bxor}
BAND = {'&': js_band}
EQS = {'===': js_strict_eq,
'!==': js_strict_neq,
'==': js_abstract_eq, # we need == and != too. Read a note above method
'!=': js_abstract_neq}
#Since JS does not have chained comparisons we need to implement all cmp methods.
COMPS = {'<': js_lt,
'<=': js_le,
'>=': js_ge,
'>': js_gt,
'instanceof': js_instanceof, #todo change to validitate
'in': js_in}
BSHIFTS = {'<<': js_lshift,
'>>': js_rshift,
'>>>': js_shit}
ADDS = {'+': js_add,
'-': js_sub}
MULTS = {'*': js_mul,
'/': js_div,
'%': js_mod}
BINARY = {}
BINARY.update(ADDS)
BINARY.update(MULTS)
BINARY.update(BSHIFTS)
BINARY.update(COMPS)
BINARY.update(EQS)
BINARY.update(BAND)
BINARY.update(BXOR)
BINARY.update(BOR)
BINARY.update(AND)
BINARY.update(OR)
#Note they dont contain ++ and -- methods because they both have 2 different methods
# correct method will be found automatically in translate function
UNARY = {'typeof': js_typeof,
'void': js_void,
'new': js_new,
'delete': js_delete,
'!': js_not,
'-': js_neg,
'+': js_pos,
'~': js_inv,
'++': None,
'--': None
}

View File

@ -0,0 +1,219 @@
from pyjsparserdata import *
REGEXP_SPECIAL_SINGLE = {'\\', '^', '$', '*', '+', '?', '.'}
NOT_PATTERN_CHARS = {'^', '$', '\\', '.', '*', '+', '?', '(', ')', '[', ']', '|'} # what about '{', '}', ???
CHAR_CLASS_ESCAPE = {'d', 'D', 's', 'S', 'w', 'W'}
CONTROL_ESCAPE_CHARS = {'f', 'n', 'r', 't', 'v'}
CONTROL_LETTERS = {'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't',
'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'}
def SpecialChar(char):
return {'type': 'SpecialChar',
'content': char}
def isPatternCharacter(char):
return char not in NOT_PATTERN_CHARS
class JsRegExpParser:
def __init__(self, source, flags):
self.source = source
self.flags = flags
self.index = 0
self.length = len(source)
self.lineNumber = 0
self.lineStart = 0
def parsePattern(self):
'''Perform sctring escape - for regexp literals'''
return {'type': 'Pattern',
'contents': self.parseDisjunction()}
def parseDisjunction(self):
alternatives = []
while True:
alternatives.append(self.parseAlternative())
if not self.isEOF():
self.expect_character('|')
else:
break
return {'type': 'Disjunction',
'contents': alternatives}
def isEOF(self):
if self.index>=self.length:
return True
return False
def expect_character(self, character):
if self.source[self.index]!=character:
self.throwUnexpected(character)
self.index += 1
def parseAlternative(self):
contents = []
while not self.isEOF() and self.source[self.index]!='|':
contents.append(self.parseTerm())
return {'type': 'Alternative',
'contents': contents}
def follows(self, chars):
for i, c in enumerate(chars):
if self.index+i>=self.length or self.source[self.index+i] != c:
return False
return True
def parseTerm(self):
assertion = self.parseAssertion()
if assertion:
return assertion
else:
return {'type': 'Term',
'contents': self.parseAtom()} # quantifier will go inside atom!
def parseAssertion(self):
if self.follows('$'):
content = SpecialChar('$')
self.index += 1
elif self.follows('^'):
content = SpecialChar('^')
self.index += 1
elif self.follows('\\b'):
content = SpecialChar('\\b')
self.index += 2
elif self.follows('\\B'):
content = SpecialChar('\\B')
self.index += 2
elif self.follows('(?='):
self.index += 3
dis = self.parseDisjunction()
self.expect_character(')')
content = {'type': 'Lookached',
'contents': dis,
'negated': False}
elif self.follows('(?!'):
self.index += 3
dis = self.parseDisjunction()
self.expect_character(')')
content = {'type': 'Lookached',
'contents': dis,
'negated': True}
else:
return None
return {'type': 'Assertion',
'content': content}
def parseAtom(self):
if self.follows('.'):
content = SpecialChar('.')
self.index += 1
elif self.follows('\\'):
self.index += 1
content = self.parseAtomEscape()
elif self.follows('['):
content = self.parseCharacterClass()
elif self.follows('(?:'):
self.index += 3
dis = self.parseDisjunction()
self.expect_character(')')
content = 'idk'
elif self.follows('('):
self.index += 1
dis = self.parseDisjunction()
self.expect_character(')')
content = 'idk'
elif isPatternCharacter(self.source[self.index]):
content = self.source[self.index]
self.index += 1
else:
return None
quantifier = self.parseQuantifier()
return {'type': 'Atom',
'content': content,
'quantifier': quantifier}
def parseQuantifier(self):
prefix = self.parseQuantifierPrefix()
if not prefix:
return None
greedy = True
if self.follows('?'):
self.index += 1
greedy = False
return {'type': 'Quantifier',
'contents': prefix,
'greedy': greedy}
def parseQuantifierPrefix(self):
if self.isEOF():
return None
if self.follows('+'):
content = '+'
self.index += 1
elif self.follows('?'):
content = '?'
self.index += 1
elif self.follows('*'):
content = '*'
self.index += 1
elif self.follows('{'): # try matching otherwise return None and restore the state
i = self.index
self.index += 1
digs1 = self.scanDecimalDigs()
# if no minimal number of digs provided then return no quantifier
if not digs1:
self.index = i
return None
# scan char limit if provided
if self.follows(','):
self.index += 1
digs2 = self.scanDecimalDigs()
else:
digs2 = ''
# must be valid!
if not self.follows('}'):
self.index = i
return None
else:
self.expect_character('}')
content = int(digs1), int(digs2) if digs2 else None
else:
return None
return content
def parseAtomEscape(self):
ch = self.source[self.index]
if isDecimalDigit(ch) and ch!=0:
digs = self.scanDecimalDigs()
elif ch in CHAR_CLASS_ESCAPE:
self.index += 1
return SpecialChar('\\' + ch)
else:
return self.parseCharacterEscape()
def parseCharacterEscape(self):
ch = self.source[self.index]
if ch in CONTROL_ESCAPE_CHARS:
return SpecialChar('\\' + ch)
if ch=='c':
'ok, fuck this shit.'
def scanDecimalDigs(self):
s = self.index
while not self.isEOF() and isDecimalDigit(self.source[self.index]):
self.index += 1
return self.source[s:self.index]
a = JsRegExpParser('a(?=x)', '')
print(a.parsePattern())

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,297 @@
# The MIT License
#
# Copyright 2014, 2015 Piotr Dabkowski
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the 'Software'),
# to deal in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so, subject
# to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or
# substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
# LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
# OR THE USE OR OTHER DEALINGS IN THE SOFTWARE
from __future__ import unicode_literals
import sys
import unicodedata
import six
from collections import defaultdict
if six.PY3:
unichr = chr
xrange = range
unicode = str
token = {
'BooleanLiteral': 1,
'EOF': 2,
'Identifier': 3,
'Keyword': 4,
'NullLiteral': 5,
'NumericLiteral': 6,
'Punctuator': 7,
'StringLiteral': 8,
'RegularExpression': 9,
'Template': 10
}
TokenName = {v:k for k,v in token.items()}
FnExprTokens = ['(', '{', '[', 'in', 'typeof', 'instanceof', 'new',
'return', 'case', 'delete', 'throw', 'void',
# assignment operators
'=', '+=', '-=', '*=', '/=', '%=', '<<=', '>>=', '>>>=',
'&=', '|=', '^=', ',',
# binary/unary operators
'+', '-', '*', '/', '%', '++', '--', '<<', '>>', '>>>', '&',
'|', '^', '!', '~', '&&', '||', '?', ':', '===', '==', '>=',
'<=', '<', '>', '!=', '!==']
syntax= {'AssignmentExpression',
'AssignmentPattern',
'ArrayExpression',
'ArrayPattern',
'ArrowFunctionExpression',
'BlockStatement',
'BinaryExpression',
'BreakStatement',
'CallExpression',
'CatchClause',
'ClassBody',
'ClassDeclaration',
'ClassExpression',
'ConditionalExpression',
'ContinueStatement',
'DoWhileStatement',
'DebuggerStatement',
'EmptyStatement',
'ExportAllDeclaration',
'ExportDefaultDeclaration',
'ExportNamedDeclaration',
'ExportSpecifier',
'ExpressionStatement',
'ForStatement',
'ForInStatement',
'FunctionDeclaration',
'FunctionExpression',
'Identifier',
'IfStatement',
'ImportDeclaration',
'ImportDefaultSpecifier',
'ImportNamespaceSpecifier',
'ImportSpecifier',
'Literal',
'LabeledStatement',
'LogicalExpression',
'MemberExpression',
'MethodDefinition',
'NewExpression',
'ObjectExpression',
'ObjectPattern',
'Program',
'Property',
'RestElement',
'ReturnStatement',
'SequenceExpression',
'SpreadElement',
'Super',
'SwitchCase',
'SwitchStatement',
'TaggedTemplateExpression',
'TemplateElement',
'TemplateLiteral',
'ThisExpression',
'ThrowStatement',
'TryStatement',
'UnaryExpression',
'UpdateExpression',
'VariableDeclaration',
'VariableDeclarator',
'WhileStatement',
'WithStatement'}
# Error messages should be identical to V8.
messages = {
'UnexpectedToken': 'Unexpected token %s',
'UnexpectedNumber': 'Unexpected number',
'UnexpectedString': 'Unexpected string',
'UnexpectedIdentifier': 'Unexpected identifier',
'UnexpectedReserved': 'Unexpected reserved word',
'UnexpectedTemplate': 'Unexpected quasi %s',
'UnexpectedEOS': 'Unexpected end of input',
'NewlineAfterThrow': 'Illegal newline after throw',
'InvalidRegExp': 'Invalid regular expression',
'UnterminatedRegExp': 'Invalid regular expression: missing /',
'InvalidLHSInAssignment': 'Invalid left-hand side in assignment',
'InvalidLHSInForIn': 'Invalid left-hand side in for-in',
'MultipleDefaultsInSwitch': 'More than one default clause in switch statement',
'NoCatchOrFinally': 'Missing catch or finally after try',
'UnknownLabel': 'Undefined label \'%s\'',
'Redeclaration': '%s \'%s\' has already been declared',
'IllegalContinue': 'Illegal continue statement',
'IllegalBreak': 'Illegal break statement',
'IllegalReturn': 'Illegal return statement',
'StrictModeWith': 'Strict mode code may not include a with statement',
'StrictCatchVariable': 'Catch variable may not be eval or arguments in strict mode',
'StrictVarName': 'Variable name may not be eval or arguments in strict mode',
'StrictParamName': 'Parameter name eval or arguments is not allowed in strict mode',
'StrictParamDupe': 'Strict mode function may not have duplicate parameter names',
'StrictFunctionName': 'Function name may not be eval or arguments in strict mode',
'StrictOctalLiteral': 'Octal literals are not allowed in strict mode.',
'StrictDelete': 'Delete of an unqualified identifier in strict mode.',
'StrictLHSAssignment': 'Assignment to eval or arguments is not allowed in strict mode',
'StrictLHSPostfix': 'Postfix increment/decrement may not have eval or arguments operand in strict mode',
'StrictLHSPrefix': 'Prefix increment/decrement may not have eval or arguments operand in strict mode',
'StrictReservedWord': 'Use of future reserved word in strict mode',
'TemplateOctalLiteral': 'Octal literals are not allowed in template strings.',
'ParameterAfterRestParameter': 'Rest parameter must be last formal parameter',
'DefaultRestParameter': 'Unexpected token =',
'ObjectPatternAsRestParameter': 'Unexpected token {',
'DuplicateProtoProperty': 'Duplicate __proto__ fields are not allowed in object literals',
'ConstructorSpecialMethod': 'Class constructor may not be an accessor',
'DuplicateConstructor': 'A class may only have one constructor',
'StaticPrototype': 'Classes may not have static property named prototype',
'MissingFromClause': 'Unexpected token',
'NoAsAfterImportNamespace': 'Unexpected token',
'InvalidModuleSpecifier': 'Unexpected token',
'IllegalImportDeclaration': 'Unexpected token',
'IllegalExportDeclaration': 'Unexpected token'}
PRECEDENCE = {'||':1,
'&&':2,
'|':3,
'^':4,
'&':5,
'==':6,
'!=':6,
'===':6,
'!==':6,
'<':7,
'>':7,
'<=':7,
'>=':7,
'instanceof':7,
'in':7,
'<<':8,
'>>':8,
'>>>':8,
'+':9,
'-':9,
'*':11,
'/':11,
'%':11}
class Token: pass
class Syntax: pass
class Messages: pass
class PlaceHolders:
ArrowParameterPlaceHolder = 'ArrowParameterPlaceHolder'
for k,v in token.items():
setattr(Token, k, v)
for e in syntax:
setattr(Syntax, e, e)
for k,v in messages.items():
setattr(Messages, k, v)
#http://stackoverflow.com/questions/14245893/efficiently-list-all-characters-in-a-given-unicode-category
BOM = u'\uFEFF'
ZWJ = u'\u200D'
ZWNJ = u'\u200C'
TAB = u'\u0009'
VT = u'\u000B'
FF = u'\u000C'
SP = u'\u0020'
NBSP = u'\u00A0'
LF = u'\u000A'
CR = u'\u000D'
LS = u'\u2028'
PS = u'\u2029'
U_CATEGORIES = defaultdict(list)
for c in map(unichr, range(sys.maxunicode + 1)):
U_CATEGORIES[unicodedata.category(c)].append(c)
UNICODE_LETTER = set(U_CATEGORIES['Lu']+U_CATEGORIES['Ll']+
U_CATEGORIES['Lt']+U_CATEGORIES['Lm']+
U_CATEGORIES['Lo']+U_CATEGORIES['Nl'])
UNICODE_COMBINING_MARK = set(U_CATEGORIES['Mn']+U_CATEGORIES['Mc'])
UNICODE_DIGIT = set(U_CATEGORIES['Nd'])
UNICODE_CONNECTOR_PUNCTUATION = set(U_CATEGORIES['Pc'])
IDENTIFIER_START = UNICODE_LETTER.union({'$','_', '\\'}) # and some fucking unicode escape sequence
IDENTIFIER_PART = IDENTIFIER_START.union(UNICODE_COMBINING_MARK).union(UNICODE_DIGIT).union(UNICODE_CONNECTOR_PUNCTUATION).union({ZWJ, ZWNJ})
WHITE_SPACE = {0x20, 0x09, 0x0B, 0x0C, 0xA0, 0x1680,
0x180E, 0x2000, 0x2001, 0x2002, 0x2003,
0x2004, 0x2005, 0x2006, 0x2007, 0x2008,
0x2009, 0x200A, 0x202F, 0x205F, 0x3000,
0xFEFF}
LINE_TERMINATORS = {0x0A, 0x0D, 0x2028, 0x2029}
def isIdentifierStart(ch):
return (ch if isinstance(ch, unicode) else unichr(ch)) in IDENTIFIER_START
def isIdentifierPart(ch):
return (ch if isinstance(ch, unicode) else unichr(ch)) in IDENTIFIER_PART
def isWhiteSpace(ch):
return (ord(ch) if isinstance(ch, unicode) else ch) in WHITE_SPACE
def isLineTerminator(ch):
return (ord(ch) if isinstance(ch, unicode) else ch) in LINE_TERMINATORS
OCTAL = {'0', '1', '2', '3', '4', '5', '6', '7'}
DEC = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9'}
HEX = set('0123456789abcdefABCDEF')
HEX_CONV = {'0123456789abcdef'[n]:n for n in xrange(16)}
for i,e in enumerate('ABCDEF', 10):
HEX_CONV[e] = i
def isDecimalDigit(ch):
return (ch if isinstance(ch, unicode) else unichr(ch)) in DEC
def isHexDigit(ch):
return (ch if isinstance(ch, unicode) else unichr(ch)) in HEX
def isOctalDigit(ch):
return (ch if isinstance(ch, unicode) else unichr(ch)) in OCTAL
def isFutureReservedWord(w):
return w in { 'enum', 'export', 'import', 'super'}
def isStrictModeReservedWord(w):
return w in {'implements', 'interface', 'package', 'private', 'protected', 'public', 'static', 'yield', 'let'}
def isRestrictedWord(w):
return w in {'eval', 'arguments'}
def isKeyword(w):
# 'const' is specialized as Keyword in V8.
# 'yield' and 'let' are for compatibility with SpiderMonkey and ES.next.
# Some others are from future reserved words.
return w in {'if', 'in', 'do', 'var', 'for', 'new', 'try', 'let', 'this', 'else', 'case',
'void', 'with', 'enum', 'while', 'break', 'catch', 'throw', 'const', 'yield',
'class', 'super', 'return', 'typeof', 'delete', 'switch', 'export', 'import',
'default', 'finally', 'extends', 'function', 'continue', 'debugger', 'instanceof', 'pyimport'}
class JsSyntaxError(Exception): pass
if __name__=='__main__':
assert isLineTerminator('\n')
assert isLineTerminator(0x0A)
assert isIdentifierStart('$')
assert isIdentifierStart(100)
assert isWhiteSpace(' ')

View File

@ -0,0 +1,531 @@
from .pyjsparserdata import *
import six
class BaseNode:
def finish(self):
pass
def finishArrayExpression(self, elements):
self.type = Syntax.ArrayExpression
self.elements = elements
self.finish()
return self
def finishArrayPattern(self, elements):
self.type = Syntax.ArrayPattern
self.elements = elements
self.finish()
return self
def finishArrowFunctionExpression(self, params, defaults, body, expression):
self.type = Syntax.ArrowFunctionExpression
self.id = None
self.params = params
self.defaults = defaults
self.body = body
self.generator = False
self.expression = expression
self.finish()
return self
def finishAssignmentExpression(self, operator, left, right):
self.type = Syntax.AssignmentExpression
self.operator = operator
self.left = left
self.right = right
self.finish()
return self
def finishAssignmentPattern(self, left, right):
self.type = Syntax.AssignmentPattern
self.left = left
self.right = right
self.finish()
return self
def finishBinaryExpression(self, operator, left, right):
self.type = Syntax.LogicalExpression if (operator == '||' or operator == '&&') else Syntax.BinaryExpression
self.operator = operator
self.left = left
self.right = right
self.finish()
return self
def finishBlockStatement(self, body):
self.type = Syntax.BlockStatement
self.body = body
self.finish()
return self
def finishBreakStatement(self, label):
self.type = Syntax.BreakStatement
self.label = label
self.finish()
return self
def finishCallExpression(self, callee, args):
self.type = Syntax.CallExpression
self.callee = callee
self.arguments = args
self.finish()
return self
def finishCatchClause(self, param, body):
self.type = Syntax.CatchClause
self.param = param
self.body = body
self.finish()
return self
def finishClassBody(self, body):
self.type = Syntax.ClassBody
self.body = body
self.finish()
return self
def finishClassDeclaration(self, id, superClass, body):
self.type = Syntax.ClassDeclaration
self.id = id
self.superClass = superClass
self.body = body
self.finish()
return self
def finishClassExpression(self, id, superClass, body):
self.type = Syntax.ClassExpression
self.id = id
self.superClass = superClass
self.body = body
self.finish()
return self
def finishConditionalExpression(self, test, consequent, alternate):
self.type = Syntax.ConditionalExpression
self.test = test
self.consequent = consequent
self.alternate = alternate
self.finish()
return self
def finishContinueStatement(self, label):
self.type = Syntax.ContinueStatement
self.label = label
self.finish()
return self
def finishDebuggerStatement(self, ):
self.type = Syntax.DebuggerStatement
self.finish()
return self
def finishDoWhileStatement(self, body, test):
self.type = Syntax.DoWhileStatement
self.body = body
self.test = test
self.finish()
return self
def finishEmptyStatement(self, ):
self.type = Syntax.EmptyStatement
self.finish()
return self
def finishExpressionStatement(self, expression):
self.type = Syntax.ExpressionStatement
self.expression = expression
self.finish()
return self
def finishForStatement(self, init, test, update, body):
self.type = Syntax.ForStatement
self.init = init
self.test = test
self.update = update
self.body = body
self.finish()
return self
def finishForInStatement(self, left, right, body):
self.type = Syntax.ForInStatement
self.left = left
self.right = right
self.body = body
self.each = False
self.finish()
return self
def finishFunctionDeclaration(self, id, params, defaults, body):
self.type = Syntax.FunctionDeclaration
self.id = id
self.params = params
self.defaults = defaults
self.body = body
self.generator = False
self.expression = False
self.finish()
return self
def finishFunctionExpression(self, id, params, defaults, body):
self.type = Syntax.FunctionExpression
self.id = id
self.params = params
self.defaults = defaults
self.body = body
self.generator = False
self.expression = False
self.finish()
return self
def finishIdentifier(self, name):
self.type = Syntax.Identifier
self.name = name
self.finish()
return self
def finishIfStatement(self, test, consequent, alternate):
self.type = Syntax.IfStatement
self.test = test
self.consequent = consequent
self.alternate = alternate
self.finish()
return self
def finishLabeledStatement(self, label, body):
self.type = Syntax.LabeledStatement
self.label = label
self.body = body
self.finish()
return self
def finishLiteral(self, token):
self.type = Syntax.Literal
self.value = token['value']
self.raw = None # todo fix it?
if token.get('regex'):
self.regex = token['regex']
self.finish()
return self
def finishMemberExpression(self, accessor, object, property):
self.type = Syntax.MemberExpression
self.computed = accessor == '['
self.object = object
self.property = property
self.finish()
return self
def finishNewExpression(self, callee, args):
self.type = Syntax.NewExpression
self.callee = callee
self.arguments = args
self.finish()
return self
def finishObjectExpression(self, properties):
self.type = Syntax.ObjectExpression
self.properties = properties
self.finish()
return self
def finishObjectPattern(self, properties):
self.type = Syntax.ObjectPattern
self.properties = properties
self.finish()
return self
def finishPostfixExpression(self, operator, argument):
self.type = Syntax.UpdateExpression
self.operator = operator
self.argument = argument
self.prefix = False
self.finish()
return self
def finishProgram(self, body):
self.type = Syntax.Program
self.body = body
self.finish()
return self
def finishPyimport(self, imp):
self.type = 'PyimportStatement'
self.imp = imp
self.finish()
return self
def finishProperty(self, kind, key, computed, value, method, shorthand):
self.type = Syntax.Property
self.key = key
self.computed = computed
self.value = value
self.kind = kind
self.method = method
self.shorthand = shorthand
self.finish()
return self
def finishRestElement(self, argument):
self.type = Syntax.RestElement
self.argument = argument
self.finish()
return self
def finishReturnStatement(self, argument):
self.type = Syntax.ReturnStatement
self.argument = argument
self.finish()
return self
def finishSequenceExpression(self, expressions):
self.type = Syntax.SequenceExpression
self.expressions = expressions
self.finish()
return self
def finishSpreadElement(self, argument):
self.type = Syntax.SpreadElement
self.argument = argument
self.finish()
return self
def finishSwitchCase(self, test, consequent):
self.type = Syntax.SwitchCase
self.test = test
self.consequent = consequent
self.finish()
return self
def finishSuper(self, ):
self.type = Syntax.Super
self.finish()
return self
def finishSwitchStatement(self, discriminant, cases):
self.type = Syntax.SwitchStatement
self.discriminant = discriminant
self.cases = cases
self.finish()
return self
def finishTaggedTemplateExpression(self, tag, quasi):
self.type = Syntax.TaggedTemplateExpression
self.tag = tag
self.quasi = quasi
self.finish()
return self
def finishTemplateElement(self, value, tail):
self.type = Syntax.TemplateElement
self.value = value
self.tail = tail
self.finish()
return self
def finishTemplateLiteral(self, quasis, expressions):
self.type = Syntax.TemplateLiteral
self.quasis = quasis
self.expressions = expressions
self.finish()
return self
def finishThisExpression(self, ):
self.type = Syntax.ThisExpression
self.finish()
return self
def finishThrowStatement(self, argument):
self.type = Syntax.ThrowStatement
self.argument = argument
self.finish()
return self
def finishTryStatement(self, block, handler, finalizer):
self.type = Syntax.TryStatement
self.block = block
self.guardedHandlers = []
self.handlers = [handler] if handler else []
self.handler = handler
self.finalizer = finalizer
self.finish()
return self
def finishUnaryExpression(self, operator, argument):
self.type = Syntax.UpdateExpression if (operator == '++' or operator == '--') else Syntax.UnaryExpression
self.operator = operator
self.argument = argument
self.prefix = True
self.finish()
return self
def finishVariableDeclaration(self, declarations):
self.type = Syntax.VariableDeclaration
self.declarations = declarations
self.kind = 'var'
self.finish()
return self
def finishLexicalDeclaration(self, declarations, kind):
self.type = Syntax.VariableDeclaration
self.declarations = declarations
self.kind = kind
self.finish()
return self
def finishVariableDeclarator(self, id, init):
self.type = Syntax.VariableDeclarator
self.id = id
self.init = init
self.finish()
return self
def finishWhileStatement(self, test, body):
self.type = Syntax.WhileStatement
self.test = test
self.body = body
self.finish()
return self
def finishWithStatement(self, object, body):
self.type = Syntax.WithStatement
self.object = object
self.body = body
self.finish()
return self
def finishExportSpecifier(self, local, exported):
self.type = Syntax.ExportSpecifier
self.exported = exported or local
self.local = local
self.finish()
return self
def finishImportDefaultSpecifier(self, local):
self.type = Syntax.ImportDefaultSpecifier
self.local = local
self.finish()
return self
def finishImportNamespaceSpecifier(self, local):
self.type = Syntax.ImportNamespaceSpecifier
self.local = local
self.finish()
return self
def finishExportNamedDeclaration(self, declaration, specifiers, src):
self.type = Syntax.ExportNamedDeclaration
self.declaration = declaration
self.specifiers = specifiers
self.source = src
self.finish()
return self
def finishExportDefaultDeclaration(self, declaration):
self.type = Syntax.ExportDefaultDeclaration
self.declaration = declaration
self.finish()
return self
def finishExportAllDeclaration(self, src):
self.type = Syntax.ExportAllDeclaration
self.source = src
self.finish()
return self
def finishImportSpecifier(self, local, imported):
self.type = Syntax.ImportSpecifier
self.local = local or imported
self.imported = imported
self.finish()
return self
def finishImportDeclaration(self, specifiers, src):
self.type = Syntax.ImportDeclaration
self.specifiers = specifiers
self.source = src
self.finish()
return self
def __getitem__(self, item):
return getattr(self, item)
def __setitem__(self, key, value):
setattr(self, key, value)
class Node(BaseNode):
pass
class WrappingNode(BaseNode):
def __init__(self, startToken=None):
pass
def node_to_dict(node): # extremely important for translation speed
if isinstance(node, list):
return [node_to_dict(e) for e in node]
elif isinstance(node, dict):
return {k:node_to_dict(v) for k,v in six.iteritems(node)}
elif not isinstance(node, BaseNode):
return node
return {k:node_to_dict(v) for k, v in six.iteritems(node.__dict__)}

View File

@ -0,0 +1,641 @@
from __future__ import unicode_literals
from .pyjsparserdata import *
from .friendly_nodes import *
import random
import six
if six.PY3:
from functools import reduce
xrange = range
unicode = str
# number of characters above which expression will be split to multiple lines in order to avoid python parser stack overflow
# still experimental so I suggest to set it to 400 in order to avoid common errors
# set it to smaller value only if you have problems with parser stack overflow
LINE_LEN_LIMIT = 400 # 200 # or any other value - the larger the smaller probability of errors :)
class ForController:
def __init__(self):
self.inside = [False]
self.update = ''
def enter_for(self, update):
self.inside.append(True)
self.update = update
def leave_for(self):
self.inside.pop()
def enter_other(self):
self.inside.append(False)
def leave_other(self):
self.inside.pop()
def is_inside(self):
return self.inside[-1]
class InlineStack:
NAME = 'PyJs_%s_%d_'
def __init__(self):
self.reps = {}
self.names = []
def inject_inlines(self, source):
for lval in self.names: # first in first out! Its important by the way
source = inject_before_lval(source, lval, self.reps[lval])
return source
def require(self, typ):
name = self.NAME % (typ, len(self.names))
self.names.append(name)
return name
def define(self, name, val):
self.reps[name] = val
def reset(self):
self.rel = {}
self.names = []
class ContextStack:
def __init__(self):
self.to_register = set([])
self.to_define = {}
def reset(self):
self.to_register = set([])
self.to_define = {}
def register(self, var):
self.to_register.add(var)
def define(self, name, code):
self.to_define[name] = code
self.register(name)
def get_code(self):
code = 'var.registers([%s])\n' % ', '.join(repr(e) for e in self.to_register)
for name, func_code in six.iteritems(self.to_define):
code += func_code
return code
def clean_stacks():
global Context, inline_stack
Context = ContextStack()
inline_stack = InlineStack()
def to_key(literal_or_identifier):
''' returns string representation of this object'''
if literal_or_identifier['type']=='Identifier':
return literal_or_identifier['name']
elif literal_or_identifier['type']=='Literal':
k = literal_or_identifier['value']
if isinstance(k, float):
return unicode(float_repr(k))
elif 'regex' in literal_or_identifier:
return compose_regex(k)
elif isinstance(k, bool):
return 'true' if k else 'false'
elif k is None:
return 'null'
else:
return unicode(k)
def trans(ele, standard=False):
"""Translates esprima syntax tree to python by delegating to appropriate translating node"""
try:
node = globals().get(ele['type'])
if not node:
raise NotImplementedError('%s is not supported!' % ele['type'])
if standard:
node = node.__dict__['standard'] if 'standard' in node.__dict__ else node
return node(**ele)
except:
#print ele
raise
def limited(func):
'''Decorator limiting resulting line length in order to avoid python parser stack overflow -
If expression longer than LINE_LEN_LIMIT characters then it will be moved to upper line
USE ONLY ON EXPRESSIONS!!! '''
def f(standard=False, **args):
insert_pos = len(inline_stack.names) # in case line is longer than limit we will have to insert the lval at current position
# this is because calling func will change inline_stack.
# we cant use inline_stack.require here because we dont know whether line overflows yet
res = func(**args)
if len(res)>LINE_LEN_LIMIT:
name = inline_stack.require('LONG')
inline_stack.names.pop()
inline_stack.names.insert(insert_pos, name)
res = 'def %s(var=var):\n return %s\n' % (name, res)
inline_stack.define(name, res)
return name+'()'
else:
return res
f.__dict__['standard'] = func
return f
# ==== IDENTIFIERS AND LITERALS =======
inf = float('inf')
def Literal(type, value, raw, regex=None):
if regex: # regex
return 'JsRegExp(%s)' % repr(compose_regex(value))
elif value is None: # null
return 'var.get(u"null")'
# Todo template
# String, Bool, Float
return 'Js(%s)' % repr(value) if value!=inf else 'Js(float("inf"))'
def Identifier(type, name):
return 'var.get(%s)' % repr(name)
@limited
def MemberExpression(type, computed, object, property):
far_left = trans(object)
if computed: # obj[prop] type accessor
# may be literal which is the same in every case so we can save some time on conversion
if property['type'] == 'Literal':
prop = repr(to_key(property))
else: # worst case
prop = trans(property)
else: # always the same since not computed (obj.prop accessor)
prop = repr(to_key(property))
return far_left + '.get(%s)' % prop
def ThisExpression(type):
return 'var.get(u"this")'
@limited
def CallExpression(type, callee, arguments):
arguments = [trans(e) for e in arguments]
if callee['type']=='MemberExpression':
far_left = trans(callee['object'])
if callee['computed']: # obj[prop] type accessor
# may be literal which is the same in every case so we can save some time on conversion
if callee['property']['type'] == 'Literal':
prop = repr(to_key(callee['property']))
else: # worst case
prop = trans(callee['property']) # its not a string literal! so no repr
else: # always the same since not computed (obj.prop accessor)
prop = repr(to_key(callee['property']))
arguments.insert(0, prop)
return far_left + '.callprop(%s)' % ', '.join(arguments)
else: # standard call
return trans(callee) + '(%s)' % ', '.join(arguments)
# ========== ARRAYS ============
def ArrayExpression(type, elements): # todo fix null inside problem
return 'Js([%s])' % ', '.join(trans(e) if e else 'None' for e in elements)
# ========== OBJECTS =============
def ObjectExpression(type, properties):
name = inline_stack.require('Object')
elems = []
after = ''
for p in properties:
if p['kind']=='init':
elems.append('%s:%s' % Property(**p))
elif p['kind']=='set':
k, setter = Property(**p) # setter is just a lval referring to that function, it will be defined in InlineStack automatically
after += '%s.define_own_property(%s, {"set":%s, "configurable":True, "enumerable":True})\n' % (name, k, setter)
elif p['kind']=='get':
k, getter = Property(**p)
after += '%s.define_own_property(%s, {"get":%s, "configurable":True, "enumerable":True})\n' % (name, k, getter)
else:
raise RuntimeError('Unexpected object propery kind')
obj = '%s = Js({%s})\n' % (name, ','.join(elems))
inline_stack.define(name, obj+after)
return name
def Property(type, kind, key, computed, value, method, shorthand):
if shorthand or computed:
raise NotImplementedError('Shorthand and Computed properties not implemented!')
k = to_key(key)
if k is None:
raise SyntaxError('Invalid key in dictionary! Or bug in Js2Py')
v = trans(value)
return repr(k), v
# ========== EXPRESSIONS ============
@limited
def UnaryExpression(type, operator, argument, prefix):
a = trans(argument, standard=True) # unary involve some complex operations so we cant use line shorteners here
if operator=='delete':
if argument['type'] in {'Identifier', 'MemberExpression'}:
# means that operation is valid
return js_delete(a)
return 'PyJsComma(%s, Js(True))' % a # otherwise not valid, just perform expression and return true.
elif operator=='typeof':
return js_typeof(a)
return UNARY[operator](a)
@limited
def BinaryExpression(type, operator, left, right):
a = trans(left)
b = trans(right)
# delegate to our friends
return BINARY[operator](a,b)
@limited
def UpdateExpression(type, operator, argument, prefix):
a = trans(argument, standard=True) # also complex operation involving parsing of the result so no line length reducing here
return js_postfix(a, operator=='++', not prefix)
@limited
def AssignmentExpression(type, operator, left, right):
operator = operator[:-1]
if left['type']=='Identifier':
if operator:
return 'var.put(%s, %s, %s)' % (repr(to_key(left)), trans(right), repr(operator))
else:
return 'var.put(%s, %s)' % (repr(to_key(left)), trans(right))
elif left['type']=='MemberExpression':
far_left = trans(left['object'])
if left['computed']: # obj[prop] type accessor
# may be literal which is the same in every case so we can save some time on conversion
if left['property']['type'] == 'Literal':
prop = repr(to_key(left['property']))
else: # worst case
prop = trans(left['property']) # its not a string literal! so no repr
else: # always the same since not computed (obj.prop accessor)
prop = repr(to_key(left['property']))
if operator:
return far_left + '.put(%s, %s, %s)' % (prop, trans(right), repr(operator))
else:
return far_left + '.put(%s, %s)' % (prop, trans(right))
else:
raise SyntaxError('Invalid left hand side in assignment!')
six
@limited
def SequenceExpression(type, expressions):
return reduce(js_comma, (trans(e) for e in expressions))
@limited
def NewExpression(type, callee, arguments):
return trans(callee) + '.create(%s)' % ', '.join(trans(e) for e in arguments)
@limited
def ConditionalExpression(type, test, consequent, alternate): # caused plenty of problems in my home-made translator :)
return '(%s if %s else %s)' % (trans(consequent), trans(test), trans(alternate))
# =========== STATEMENTS =============
def BlockStatement(type, body):
return StatementList(body) # never returns empty string! In the worst case returns pass\n
def ExpressionStatement(type, expression):
return trans(expression) + '\n' # end expression space with new line
def BreakStatement(type, label):
if label:
return 'raise %s("Breaked")\n' % (get_break_label(label['name']))
else:
return 'break\n'
def ContinueStatement(type, label):
if label:
return 'raise %s("Continued")\n' % (get_continue_label(label['name']))
else:
return 'continue\n'
def ReturnStatement(type, argument):
return 'return %s\n' % (trans(argument) if argument else "var.get('undefined')")
def EmptyStatement(type):
return 'pass\n'
def DebuggerStatement(type):
return 'pass\n'
def DoWhileStatement(type, body, test):
inside = trans(body) + 'if not %s:\n' % trans(test) + indent('break\n')
result = 'while 1:\n' + indent(inside)
return result
def ForStatement(type, init, test, update, body):
update = indent(trans(update)) if update else ''
init = trans(init) if init else ''
if not init.endswith('\n'):
init += '\n'
test = trans(test) if test else '1'
if not update:
result = '#for JS loop\n%swhile %s:\n%s%s\n' % (init, test, indent(trans(body)), update)
else:
result = '#for JS loop\n%swhile %s:\n' % (init, test)
body = 'try:\n%sfinally:\n %s\n' % (indent(trans(body)), update)
result += indent(body)
return result
def ForInStatement(type, left, right, body, each):
res = 'for PyJsTemp in %s:\n' % trans(right)
if left['type']=="VariableDeclaration":
addon = trans(left) # make sure variable is registered
if addon != 'pass\n':
res = addon + res # we have to execute this expression :(
# now extract the name
try:
name = left['declarations'][0]['id']['name']
except:
raise RuntimeError('Unusual ForIn loop')
elif left['type']=='Identifier':
name = left['name']
else:
raise RuntimeError('Unusual ForIn loop')
res += indent('var.put(%s, PyJsTemp)\n' % repr(name) + trans(body))
return res
def IfStatement(type, test, consequent, alternate):
# NOTE we cannot do elif because function definition inside elif statement would not be possible!
IF = 'if %s:\n' % trans(test)
IF += indent(trans(consequent))
if not alternate:
return IF
ELSE = 'else:\n' + indent(trans(alternate))
return IF + ELSE
def LabeledStatement(type, label, body):
# todo consider using smarter approach!
inside = trans(body)
defs = ''
if inside.startswith('while ') or inside.startswith('for ') or inside.startswith('#for'):
# we have to add contine label as well...
# 3 or 1 since #for loop type has more lines before real for.
sep = 1 if not inside.startswith('#for') else 3
cont_label = get_continue_label(label['name'])
temp = inside.split('\n')
injected = 'try:\n'+'\n'.join(temp[sep:])
injected += 'except %s:\n pass\n'%cont_label
inside = '\n'.join(temp[:sep])+'\n'+indent(injected)
defs += 'class %s(Exception): pass\n'%cont_label
break_label = get_break_label(label['name'])
inside = 'try:\n%sexcept %s:\n pass\n'% (indent(inside), break_label)
defs += 'class %s(Exception): pass\n'%break_label
return defs + inside
def StatementList(lis):
if lis: # ensure we don't return empty string because it may ruin indentation!
code = ''.join(trans(e) for e in lis)
return code if code else 'pass\n'
else:
return 'pass\n'
def PyimportStatement(type, imp):
lib = imp['name']
jlib = 'PyImport_%s' % lib
code = 'import %s as %s\n' % (lib, jlib)
#check whether valid lib name...
try:
compile(code, '', 'exec')
except:
raise SyntaxError('Invalid Python module name (%s) in pyimport statement'%lib)
# var.pyimport will handle module conversion to PyJs object
code += 'var.pyimport(%s, %s)\n' % (repr(lib), jlib)
return code
def SwitchStatement(type, discriminant, cases):
#TODO there will be a problem with continue in a switch statement.... FIX IT
code = 'while 1:\n' + indent('SWITCHED = False\nCONDITION = (%s)\n')
code = code % trans(discriminant)
for case in cases:
case_code = None
if case['test']: # case (x):
case_code = 'if SWITCHED or PyJsStrictEq(CONDITION, %s):\n' % (trans(case['test']))
else: # default:
case_code = 'if True:\n'
case_code += indent('SWITCHED = True\n')
case_code += indent(StatementList(case['consequent']))
# one more indent for whole
code += indent(case_code)
# prevent infinite loop and sort out nested switch...
code += indent('SWITCHED = True\nbreak\n')
return code
def ThrowStatement(type, argument):
return 'PyJsTempException = JsToPyException(%s)\nraise PyJsTempException\n' % trans(argument)
def TryStatement(type, block, handler, handlers, guardedHandlers, finalizer):
result = 'try:\n%s' % indent(trans(block))
# complicated catch statement...
if handler:
identifier = handler['param']['name']
holder = 'PyJsHolder_%s_%d'%(to_hex(identifier), random.randrange(1e8))
identifier = repr(identifier)
result += 'except PyJsException as PyJsTempException:\n'
# fill in except ( catch ) block and remember to recover holder variable to its previous state
result += indent(TRY_CATCH.replace('HOLDER', holder).replace('NAME', identifier).replace('BLOCK', indent(trans(handler['body']))))
# translate finally statement if present
if finalizer:
result += 'finally:\n%s' % indent(trans(finalizer))
return result
def LexicalDeclaration(type, declarations, kind):
raise NotImplementedError('let and const not implemented yet but they will be soon! Check github for updates.')
def VariableDeclarator(type, id, init):
name = id['name']
# register the name if not already registered
Context.register(name)
if init:
return 'var.put(%s, %s)\n' % (repr(name), trans(init))
return ''
def VariableDeclaration(type, declarations, kind):
code = ''.join(trans(d) for d in declarations)
return code if code else 'pass\n'
def WhileStatement(type, test, body):
result = 'while %s:\n'%trans(test) + indent(trans(body))
return result
def WithStatement(type, object, body):
raise NotImplementedError('With statement not implemented!')
def Program(type, body):
inline_stack.reset()
code = ''.join(trans(e) for e in body)
# here add hoisted elements (register variables and define functions)
code = Context.get_code() + code
# replace all inline variables
code = inline_stack.inject_inlines(code)
return code
# ======== FUNCTIONS ============
def FunctionDeclaration(type, id, params, defaults, body, generator, expression):
if generator:
raise NotImplementedError('Generators not supported')
if defaults:
raise NotImplementedError('Defaults not supported')
if not id:
return FunctionExpression(type, id, params, defaults, body, generator, expression)
JsName = id['name']
PyName = 'PyJsHoisted_%s_' % JsName
PyName = PyName if is_valid_py_name(PyName) else 'PyJsHoistedNonPyName'
# this is quite complicated
global Context
previous_context = Context
# change context to the context of this function
Context = ContextStack()
# translate body within current context
code = trans(body)
# get arg names
vars = [v['name'] for v in params]
# args are automaticaly registered variables
Context.to_register.update(vars)
# add all hoisted elements inside function
code = Context.get_code() + code
# check whether args are valid python names:
used_vars = []
for v in vars:
if is_valid_py_name(v):
used_vars.append(v)
else: # invalid arg in python, for example $, replace with alternatice arg
used_vars.append('PyJsArg_%s_' % to_hex(v))
header = '@Js\n'
header+= 'def %s(%sthis, arguments, var=var):\n' % (PyName, ', '.join(used_vars) +(', ' if vars else ''))
# transfer names from Py scope to Js scope
arg_map = dict(zip(vars, used_vars))
arg_map.update({'this':'this', 'arguments':'arguments'})
arg_conv = 'var = Scope({%s}, var)\n' % ', '.join(repr(k)+':'+v for k,v in six.iteritems(arg_map))
# and finally set the name of the function to its real name:
footer = '%s.func_name = %s\n' % (PyName, repr(JsName))
footer+= 'var.put(%s, %s)\n' % (repr(JsName), PyName)
whole_code = header + indent(arg_conv+code) + footer
# restore context
Context = previous_context
# define in upper context
Context.define(JsName, whole_code)
return 'pass\n'
def FunctionExpression(type, id, params, defaults, body, generator, expression):
if generator:
raise NotImplementedError('Generators not supported')
if defaults:
raise NotImplementedError('Defaults not supported')
JsName = id['name'] if id else 'anonymous'
if not is_valid_py_name(JsName):
ScriptName = 'InlineNonPyName'
else:
ScriptName = JsName
PyName = inline_stack.require(ScriptName) # this is unique
# again quite complicated
global Context
previous_context = Context
# change context to the context of this function
Context = ContextStack()
# translate body within current context
code = trans(body)
# get arg names
vars = [v['name'] for v in params]
# args are automaticaly registered variables
Context.to_register.update(vars)
# add all hoisted elements inside function
code = Context.get_code() + code
# check whether args are valid python names:
used_vars = []
for v in vars:
if is_valid_py_name(v):
used_vars.append(v)
else: # invalid arg in python, for example $, replace with alternatice arg
used_vars.append('PyJsArg_%s_' % to_hex(v))
header = '@Js\n'
header+= 'def %s(%sthis, arguments, var=var):\n' % (PyName, ', '.join(used_vars) +(', ' if vars else ''))
# transfer names from Py scope to Js scope
arg_map = dict(zip(vars, used_vars))
arg_map.update({'this':'this', 'arguments':'arguments'})
if id: # make self available from inside...
if id['name'] not in arg_map:
arg_map[id['name']] = PyName
arg_conv = 'var = Scope({%s}, var)\n' % ', '.join(repr(k)+':'+v for k,v in six.iteritems(arg_map))
# and finally set the name of the function to its real name:
footer = '%s._set_name(%s)\n' % (PyName, repr(JsName))
whole_code = header + indent(arg_conv+code) + footer
# restore context
Context = previous_context
# define in upper context
inline_stack.define(PyName, whole_code)
return PyName
LogicalExpression = BinaryExpression
PostfixExpression = UpdateExpression
clean_stacks()
if __name__=='__main__':
import codecs
import time
import pyjsparser
c = None#'''`ijfdij`'''
if not c:
with codecs.open("esp.js", "r", "utf-8") as f:
c = f.read()
print('Started')
t = time.time()
res = trans(pyjsparser.PyJsParser().parse(c))
dt = time.time() - t+ 0.000000001
print('Translated everyting in', round(dt,5), 'seconds.')
print('Thats %d characters per second' % int(len(c)/dt))
with open('res.py', 'w') as f:
f.write(res)

View File

@ -0,0 +1,174 @@
from . import pyjsparser
#from pyesprima import esprima
from . import translating_nodes
import hashlib
import re
# the re below is how we'll recognise numeric constants.
# it finds any 'simple numeric that is not preceded with an alphanumeric character
# the numeric can be a float (so a dot is found) but
# it does not recognise notation such as 123e5, 0xFF, infinity or NaN
CP_NUMERIC_RE = re.compile(r'(?<![a-zA-Z0-9_"\'])([0-9\.]+)')
CP_NUMERIC_PLACEHOLDER = '__PyJsNUM_%i_PyJsNUM__'
CP_NUMERIC_PLACEHOLDER_REVERSE_RE = re.compile(
CP_NUMERIC_PLACEHOLDER.replace('%i', '([0-9\.]+)')
)
# the re below is how we'll recognise string constants
# it finds a ' or ", then reads until the next matching ' or "
# this re only services simple cases, it can not be used when
# there are escaped quotes in the expression
#CP_STRING_1 = re.compile(r'(["\'])(.*?)\1') # this is how we'll recognise string constants
CP_STRING = '"([^\\\\"]+|\\\\([bfnrtv\'"\\\\]|[0-3]?[0-7]{1,2}|x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}))*"|\'([^\\\\\']+|\\\\([bfnrtv\'"\\\\]|[0-3]?[0-7]{1,2}|x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}))*\''
CP_STRING_RE = re.compile(CP_STRING) # this is how we'll recognise string constants
CP_STRING_PLACEHOLDER = '__PyJsSTR_%i_PyJsSTR__'
CP_STRING_PLACEHOLDER_REVERSE_RE = re.compile(
CP_STRING_PLACEHOLDER.replace('%i', '([0-9\.]+)')
)
cache = {}
# This crap is still needed but I removed it for speed reasons. Have to think of better idea
# import js2py.pyjs, sys
# # Redefine builtin objects... Do you have a better idea?
# for m in list(sys.modules):
# if m.startswith('js2py'):
# del sys.modules[m]
# del js2py.pyjs
# del js2py
DEFAULT_HEADER = u'''from js2py.pyjs import *
# setting scope
var = Scope( JS_BUILTINS )
set_global_object(var)
# Code follows:
'''
def dbg(x):
"""does nothing, legacy dummy function"""
return ''
def translate_js(js, HEADER=DEFAULT_HEADER, use_compilation_plan=False):
"""js has to be a javascript source code.
returns equivalent python code."""
if use_compilation_plan and not '//' in js and not '/*' in js:
return translate_js_with_compilation_plan(js, HEADER=HEADER)
parser = pyjsparser.PyJsParser()
parsed = parser.parse(js) # js to esprima syntax tree
# Another way of doing that would be with my auto esprima translation but its much slower and causes import problems:
# parsed = esprima.parse(js).to_dict()
translating_nodes.clean_stacks()
return HEADER + translating_nodes.trans(parsed) # syntax tree to python code
class match_unumerator(object):
"""This class ise used """
matchcount = -1
def __init__(self, placeholder_mask):
self.placeholder_mask = placeholder_mask
self.matches = []
def __call__(self, match):
self.matchcount += 1
self.matches.append(match.group(0))
return self.placeholder_mask%self.matchcount
def __repr__(self):
return '\n'.join(self.placeholder_mask%counter + '=' + match for counter, match in enumerate(self.matches))
def wrap_up(self, output):
for counter, value in enumerate(self.matches):
output = output.replace("u'" + self.placeholder_mask%(counter) + "'", value, 1)
return output
def get_compilation_plan(js):
match_increaser_str = match_unumerator(CP_STRING_PLACEHOLDER)
compilation_plan = re.sub(
CP_STRING, match_increaser_str, js
)
match_increaser_num = match_unumerator(CP_NUMERIC_PLACEHOLDER)
compilation_plan = re.sub(CP_NUMERIC_RE, match_increaser_num, compilation_plan)
# now put quotes, note that just patching string replaces is somewhat faster than
# using another re:
compilation_plan = compilation_plan.replace('__PyJsNUM_', '"__PyJsNUM_').replace('_PyJsNUM__', '_PyJsNUM__"')
compilation_plan = compilation_plan.replace('__PyJsSTR_', '"__PyJsSTR_').replace('_PyJsSTR__', '_PyJsSTR__"')
return match_increaser_str, match_increaser_num, compilation_plan
def translate_js_with_compilation_plan(js, HEADER=DEFAULT_HEADER):
"""js has to be a javascript source code.
returns equivalent python code.
compile plans only work with the following restrictions:
- only enabled for oneliner expressions
- when there are comments in the js code string substitution is disabled
- when there nested escaped quotes string substitution is disabled, so
cacheable:
Q1 == 1 && name == 'harry'
not cacheable:
Q1 == 1 && name == 'harry' // some comment
not cacheable:
Q1 == 1 && name == 'o\'Reilly'
not cacheable:
Q1 == 1 && name /* some comment */ == 'o\'Reilly'
"""
match_increaser_str, match_increaser_num, compilation_plan = get_compilation_plan(js)
cp_hash = hashlib.md5(compilation_plan.encode('utf-8')).digest()
try:
python_code = cache[cp_hash]['proto_python_code']
except:
parser = pyjsparser.PyJsParser()
parsed = parser.parse(compilation_plan) # js to esprima syntax tree
# Another way of doing that would be with my auto esprima translation but its much slower and causes import problems:
# parsed = esprima.parse(js).to_dict()
translating_nodes.clean_stacks()
python_code = translating_nodes.trans(parsed) # syntax tree to python code
cache[cp_hash] = {
'compilation_plan': compilation_plan,
'proto_python_code': python_code,
}
python_code = match_increaser_str.wrap_up(python_code)
python_code = match_increaser_num.wrap_up(python_code)
return HEADER + python_code
def trasnlate(js, HEADER=DEFAULT_HEADER):
"""js has to be a javascript source code.
returns equivalent python code.
Equivalent to translate_js"""
return translate_js(js, HEADER)
syntax_tree_translate = translating_nodes.trans
if __name__=='__main__':
PROFILE = False
import js2py
import codecs
def main():
with codecs.open("esprima.js", "r", "utf-8") as f:
d = f.read()
r = js2py.translate_js(d)
with open('res.py','wb') as f2:
f2.write(r)
exec(r, {})
if PROFILE:
import cProfile
cProfile.run('main()', sort='tottime')
else:
main()

View File

View File

@ -0,0 +1,80 @@
from pyparsing import *
IdentifierStart = oneOf(['$', '_']+list(alphas))
Identifier = Combine(IdentifierStart + Optional(Word(alphas+nums+'$_')))
_keywords = ['break', 'do', 'instanceof', 'typeof', 'case', 'else', 'new', 'var', 'catch', 'finally',
'return', 'void', 'continue', 'for', 'switch', 'while', 'debugger', 'function', 'this',
'with', 'default', 'if', 'throw', 'delete', 'in', 'try']
Keyword = oneOf(_keywords)
#Literals
#Bool
BooleanLiteral = oneOf(('true', 'false'))
#Null
NullLiteral = Literal('null')
#Undefined
UndefinedLiteral = Literal('undefined')
#NaN
NaNLiteral = Literal('NaN')
#Number
NonZeroDigit = oneOf(['1','2','3','4','5','6','7','8','9'])
DecimalDigit = oneOf(['0', '1','2','3','4','5','6','7','8','9'])
HexDigit = oneOf(list('0123456789abcdefABCDEF'))
DecimalDigits = Word(nums)
DecimalIntegerLiteral = Combine(NonZeroDigit+Optional(DecimalDigits)) | '0'
SignedInteger = Combine('-'+DecimalDigits) | Combine('+'+DecimalDigits) | DecimalDigits
ExponentPart = Combine(oneOf('e', 'E')+SignedInteger)
_DecimalLiteral = (Combine(DecimalIntegerLiteral('int')+'.'+Optional(DecimalDigits('float'))+Optional(ExponentPart('exp'))) |
Combine('.'+DecimalDigits('float')+Optional(ExponentPart('exp'))) |
DecimalIntegerLiteral('int')+Optional(ExponentPart('exp')))
DecimalLiteral = Combine(_DecimalLiteral+NotAny(IdentifierStart))
HexIntegerLiteral = Combine(oneOf(('0x','0X'))+Word('0123456789abcdefABCDEF')('hex'))
NumericLiteral = Group(DecimalLiteral)('decimal') ^ Group(HexIntegerLiteral)('hex')
def js_num(num):
res = NumericLiteral.parseString(num)
if res.decimal:
res = res.decimal
cand = int(res.int if res.int else 0)+ float('0.'+res.float if res.float else 0)
if res.exp:
cand*= 10**int(res.exp)
return cand
elif res.hex:
return int(res.hex.hex, 16)
#String
LineTerminator = White('\n', 1,1,1) | White('\r', 1,1,1)
LineTerminatorSequence = Combine(White('\r', 1,1,1)+White('\n', 1,1,1)) | White('\n', 1,1,1) | White('\r', 1,1,1)
LineContinuation = Combine('\\'+LineTerminatorSequence)
UnicodeEscapeSequence = Combine('u'+HexDigit+HexDigit+HexDigit+HexDigit)
HexEscapeSequence = Combine('x'+HexDigit+HexDigit)
SingleEscapeCharacter = oneOf(["'", '"', '\\', 'b', 'f', 'n', 'r', 't', 'v'])
EscapeCharacter = SingleEscapeCharacter | '0' | 'x' | 'u' # Changed DecimalDigit to 0 since it would match for example "\3" To verify..
NonEscapeCharacter = CharsNotIn([EscapeCharacter |LineTerminator])
CharacterEscapeSequence = SingleEscapeCharacter | NonEscapeCharacter
EscapeSequence = CharacterEscapeSequence | Combine('0'+NotAny(DecimalDigit)) | HexEscapeSequence | UnicodeEscapeSequence
SingleStringCharacter = CharsNotIn([LineTerminator | '\\' | "'"]) | Combine('\\'+EscapeSequence) | LineContinuation
DoubleStringCharacter = CharsNotIn([LineTerminator | '\\' | '"']) | Combine('\\'+EscapeSequence) | LineContinuation
StringLiteral = Combine('"'+ZeroOrMore(DoubleStringCharacter)+'"') ^ Combine("'"+ZeroOrMore(SingleStringCharacter)+"'")
#Array
#Dict

184
lib/js2py/utils/injector.py Normal file
View File

@ -0,0 +1,184 @@
__all__ = ['fix_js_args']
import types
import opcode
import six
if six.PY3:
xrange = range
chr = lambda x: x
# Opcode constants used for comparison and replacecment
LOAD_FAST = opcode.opmap['LOAD_FAST']
LOAD_GLOBAL = opcode.opmap['LOAD_GLOBAL']
STORE_FAST = opcode.opmap['STORE_FAST']
def fix_js_args(func):
'''Use this function when unsure whether func takes this and arguments as its last 2 args.
It will append 2 args if it does not.'''
fcode = six.get_function_code(func)
fargs = fcode.co_varnames[fcode.co_argcount-2:fcode.co_argcount]
if fargs==('this', 'arguments') or fargs==('arguments', 'var'):
return func
code = append_arguments(six.get_function_code(func), ('this','arguments'))
return types.FunctionType(code, six.get_function_globals(func), func.__name__, closure=six.get_function_closure(func))
def append_arguments(code_obj, new_locals):
co_varnames = code_obj.co_varnames # Old locals
co_names = code_obj.co_names # Old globals
co_names+=tuple(e for e in new_locals if e not in co_names)
co_argcount = code_obj.co_argcount # Argument count
co_code = code_obj.co_code # The actual bytecode as a string
# Make one pass over the bytecode to identify names that should be
# left in code_obj.co_names.
not_removed = set(opcode.hasname) - set([LOAD_GLOBAL])
saved_names = set()
for inst in instructions(co_code):
if inst[0] in not_removed:
saved_names.add(co_names[inst[1]])
# Build co_names for the new code object. This should consist of
# globals that were only accessed via LOAD_GLOBAL
names = tuple(name for name in co_names
if name not in set(new_locals) - saved_names)
# Build a dictionary that maps the indices of the entries in co_names
# to their entry in the new co_names
name_translations = dict((co_names.index(name), i)
for i, name in enumerate(names))
# Build co_varnames for the new code object. This should consist of
# the entirety of co_varnames with new_locals spliced in after the
# arguments
new_locals_len = len(new_locals)
varnames = (co_varnames[:co_argcount] + new_locals +
co_varnames[co_argcount:])
# Build the dictionary that maps indices of entries in the old co_varnames
# to their indices in the new co_varnames
range1, range2 = xrange(co_argcount), xrange(co_argcount, len(co_varnames))
varname_translations = dict((i, i) for i in range1)
varname_translations.update((i, i + new_locals_len) for i in range2)
# Build the dictionary that maps indices of deleted entries of co_names
# to their indices in the new co_varnames
names_to_varnames = dict((co_names.index(name), varnames.index(name))
for name in new_locals)
# Now we modify the actual bytecode
modified = []
for inst in instructions(code_obj.co_code):
# If the instruction is a LOAD_GLOBAL, we have to check to see if
# it's one of the globals that we are replacing. Either way,
# update its arg using the appropriate dict.
if inst[0] == LOAD_GLOBAL:
if inst[1] in names_to_varnames:
inst[0] = LOAD_FAST
inst[1] = names_to_varnames[inst[1]]
elif inst[1] in name_translations:
inst[1] = name_translations[inst[1]]
else:
raise ValueError("a name was lost in translation")
# If it accesses co_varnames or co_names then update its argument.
elif inst[0] in opcode.haslocal:
inst[1] = varname_translations[inst[1]]
elif inst[0] in opcode.hasname:
inst[1] = name_translations[inst[1]]
modified.extend(write_instruction(inst))
if six.PY2:
code = ''.join(modified)
args = (co_argcount + new_locals_len,
code_obj.co_nlocals + new_locals_len,
code_obj.co_stacksize,
code_obj.co_flags,
code,
code_obj.co_consts,
names,
varnames,
code_obj.co_filename,
code_obj.co_name,
code_obj.co_firstlineno,
code_obj.co_lnotab,
code_obj.co_freevars,
code_obj.co_cellvars)
else:
#print(modified)
code = bytes(modified)
#print(code)
args = (co_argcount + new_locals_len,
0,
code_obj.co_nlocals + new_locals_len,
code_obj.co_stacksize,
code_obj.co_flags,
code,
code_obj.co_consts,
names,
varnames,
code_obj.co_filename,
code_obj.co_name,
code_obj.co_firstlineno,
code_obj.co_lnotab,
code_obj.co_freevars,
code_obj.co_cellvars)
# Done modifying codestring - make the code object
return types.CodeType(*args)
def instructions(code):
if six.PY2:
code = map(ord, code)
i, L = 0, len(code)
extended_arg = 0
while i < L:
op = code[i]
i+= 1
if op < opcode.HAVE_ARGUMENT:
yield [op, None]
continue
oparg = code[i] + (code[i+1] << 8) + extended_arg
extended_arg = 0
i += 2
if op == opcode.EXTENDED_ARG:
extended_arg = oparg << 16
continue
yield [op, oparg]
def write_instruction(inst):
op, oparg = inst
if oparg is None:
return [chr(op)]
elif oparg <= 65536:
return [chr(op), chr(oparg & 255), chr((oparg >> 8) & 255)]
elif oparg <= 4294967296:
return [chr(opcode.EXTENDED_ARG),
chr((oparg >> 16) & 255),
chr((oparg >> 24) & 255),
chr(op),
chr(oparg & 255),
chr((oparg >> 8) & 255)]
else:
raise ValueError("Invalid oparg: {0} is too large".format(oparg))
if __name__=='__main__':
x = 'Wrong'
dick = 3000
def func(a):
print(x,y,z, a)
print(dick)
d = (x,)
for e in (e for e in x):
print(e)
return x, y, z
func2 =types.FunctionType(append_arguments(six.get_function_code(func), ('x', 'y', 'z')), six.get_function_globals(func), func.__name__, closure=six.get_function_closure(func))
args = (2,2,3,4),3,4
assert func2(1, *args) == args

View File

@ -242,6 +242,7 @@ def GetComicInfo(comicid, dom, safechk=None):
try:
names = len(dom.getElementsByTagName('name'))
n = 0
comic['ComicPublisher'] = 'Unknown' #set this to a default value here so that it will carry through properly
while (n < names):
if dom.getElementsByTagName('name')[n].parentNode.nodeName == 'results':
try:

View File

@ -972,7 +972,7 @@ class FileChecker(object):
#Ignoring MAC OS Finder directory of cached files (/.AppleDouble/<name of file(s)>)
continue
if fname.endswith(comic_ext):
if os.path.splitext(fname)[1].lower().endswith(comic_ext):
if direc is None:
comicsize = os.path.getsize(os.path.join(dir, fname))
else:

View File

@ -1349,8 +1349,8 @@ def filesafe(comic):
import unicodedata
u_comic = unicodedata.normalize('NFKD', comic).encode('ASCII', 'ignore').strip()
comicname_filesafe = re.sub('[\:\'\,\?\!\\\]', '', u_comic)
comicname_filesafe = re.sub('[\/]', '-', comicname_filesafe)
comicname_filesafe = re.sub('[\:\'\"\,\?\!\\\]', '', u_comic)
comicname_filesafe = re.sub('[\/\*]', '-', comicname_filesafe)
return comicname_filesafe

View File

@ -1,3 +1,4 @@
# -*- coding: utf-8 -*-
# This file is part of Mylar.
#
# Mylar is free software: you can redistribute it and/or modify
@ -136,7 +137,6 @@ def libraryScan(dir=None, append=False, ComicID=None, ComicName=None, cron=None,
logger.info('However, ' + str(cbz_retry) + ' files are in a cbz format, which may contain metadata.')
mylar.IMPORT_STATUS = 'Successfully parsed ' + str(comiccnt) + ' files'
#return queue.put(valreturn)
myDB = db.DBConnection()
@ -638,19 +638,30 @@ def scanLibrary(scan=None, queue=None):
else:
theissuenumber = ss['issuenumber']
thefilename = ss['comfilename']
thelocation = ss['comlocation']
if type(ss['comfilename']) != unicode:
thefilename = thefilename.decode('utf-8')
if type(ss['comlocation']) != unicode:
thelocation = thelocation.decode('utf-8')
nspace_dynamicname = re.sub('[\|\s]', '', ss['dynamicname'].lower()).strip()
if type(nspace_dynamicname) != unicode:
nspace_dynamicname = nspace_dynamicname.decode('utf-8')
controlValue = {"impID": ss['impid']}
newValue = {"ComicYear": ss['comicyear'],
"Status": "Not Imported",
"ComicName": ss['comicname'], #.encode('utf-8'),
"DisplayName": ss['displayname'], #.encode('utf-8'),
"ComicName": ss['comicname'].decode('utf-8'),
"DisplayName": ss['displayname'].decode('utf-8'),
"DynamicName": nspace_dynamicname,
"ComicID": ss['comicid'], #if it's been scanned in for cvinfo, this will be the CID - otherwise it's None
"IssueID": None,
"Volume": ss['volume'],
"IssueNumber": theissuenumber,
"ComicFilename": ss['comfilename'].decode('utf-8'), #ss['comfilename'].encode('utf-8'),
"ComicLocation": ss['comlocation'],
"ComicFilename": thefilename,#.decode('utf-8'), #ss['comfilename'].encode('utf-8'),
"ComicLocation": thelocation,
"ImportDate": helpers.today(),
"WatchMatch": ss['watchmatch']}
myDB.upsert("importresults", newValue, controlValue)

View File

@ -4,6 +4,7 @@ import os, sys
import re
import lib.feedparser as feedparser
import lib.requests as requests
import lib.cfscrape as cfscrape
import urlparse
import ftpsshup
import datetime
@ -143,9 +144,10 @@ def torrents(pickfeed=None, seriesname=None, issue=None, feedinfo=None):
if all([pickfeed != '4', pickfeed != '3', pickfeed != '5', pickfeed != '999']):
payload = None
try:
r = requests.get(feed, params=payload, verify=verify)
scraper = cfscrape.create_scraper()
r = scraper.get(feed, verify=verify)#requests.get(feed, params=payload, verify=verify)
except Exception, e:
logger.warn('Error fetching RSS Feed Data from %s: %s' % (picksite, e))
return
@ -253,38 +255,7 @@ def torrents(pickfeed=None, seriesname=None, issue=None, feedinfo=None):
issue = feedme.entries[i].title[iss_st +3:].strip()
#logger.fdebug('issue # : ' + str(issue))
#break it down to get the Size since it's available on THIS 32P feed only so far.
#when it becomes available in the new feeds, this will be working, for now it just nulls out.
sizestart = tmpdesc.find('Size:')
justdigits = 0
if sizestart >= 0:
sizeend = tmpdesc.find('Leechers:')
sizestart +=5 # to get to the end of the word 'Size:'
tmpsize = tmpdesc[sizestart:sizeend].strip()
fdigits = re.sub("[^0123456789\.]", "", tmpsize).strip()
if '.' in fdigits:
decfind = fdigits.find('.')
wholenum = fdigits[:decfind]
decnum = fdigits[decfind +1:]
else:
wholenum = fdigits
decnum = 0
if 'MB' in tmpsize:
wholebytes = int(wholenum) * 1048576
wholedecimal = (int(decnum) * 1048576) / 100
justdigits = wholebytes + wholedecimal
else:
#it's 'GB' then
wholebytes = (int(wholenum) * 1024) * 1048576
wholedecimal = ((int(decnum) * 1024) * 1048576) / 100
justdigits = wholebytes + wholedecimal
#this is not currently working for 32p
#Get the # of seeders.
#seedstart = tmpdesc.find('Seeders:')
#seedend = tmpdesc.find('Added:')
#seedstart +=8 # to get to the end of the word 'Seeders:'
#tmpseed = tmpdesc[seedstart:seedend].strip()
#seeddigits = re.sub("[^0123456789\.]", "", tmpseed).strip()
justdigits = feedme.entries[i].torrent_contentlength
seeddigits = 0
if int(mylar.MINSEEDS) >= int(seeddigits):
@ -799,6 +770,7 @@ def torsend2client(seriesname, issue, seriesyear, linkit, site):
logger.error('No Local Watch Directory or Seedbox Watch Directory specified. Set it and try again.')
return "fail"
cf_cookievalue = None
if site == '32P':
url = 'https://32pag.es/torrents.php'
@ -854,9 +826,14 @@ def torsend2client(seriesname, issue, seriesyear, linkit, site):
else:
tpse_referrer = 'http://torrentproject.se/'
headers = {'Accept-encoding': 'gzip',
'User-Agent': str(mylar.USER_AGENT),
'Referer': tpse_referrer}
try:
scraper = cfscrape.create_scraper()
cf_cookievalue, cf_user_agent = cfscrape.get_tokens(url)
headers = {'Accept-encoding': 'gzip',
'User-Agent': cf_user_agent}
except Exception, e:
return "fail"
logger.fdebug('Grabbing torrent from url:' + str(url))
@ -924,7 +901,12 @@ def torsend2client(seriesname, issue, seriesyear, linkit, site):
return "fail"
try:
r = requests.get(url, params=payload, verify=verify, stream=True, headers=headers)
scraper = cfscrape.create_scraper()
if cf_cookievalue:
r = scraper.get(url, params=payload, cookies=cf_cookievalue, verify=verify, stream=True, headers=headers)
else:
r = scraper.get(url, params=payload, verify=verify, stream=True, headers=headers)
#r = requests.get(url, params=payload, verify=verify, stream=True, headers=headers)
except Exception, e:
logger.warn('Error fetching data from %s: %s' % (site, e))
@ -949,16 +931,28 @@ def torsend2client(seriesname, issue, seriesyear, linkit, site):
logger.info('blah: ' + str(r.status_code))
return "fail"
if any([site == 'TPSE', site == 'DEM', site == 'WWT']) and any([str(r.status_code) == '403', str(r.status_code) == '404']):
logger.warn('Unable to download from ' + site + ' [' + str(r.status_code) + ']')
#retry with the alternate torrent link.
url = helpers.torrent_create(site, linkit, True)
logger.fdebug('Trying alternate url: ' + str(url))
try:
r = requests.get(url, params=payload, verify=verify, stream=True, headers=headers)
if any([site == 'TPSE', site == 'DEM', site == 'WWT']) and any([str(r.status_code) == '403', str(r.status_code) == '404', str(r.status_code) == '503']):
if str(r.status_code) != '503':
logger.warn('Unable to download from ' + site + ' [' + str(r.status_code) + ']')
#retry with the alternate torrent link.
url = helpers.torrent_create(site, linkit, True)
logger.fdebug('Trying alternate url: ' + str(url))
try:
r = requests.get(url, params=payload, verify=verify, stream=True, headers=headers)
except Exception, e:
return "fail"
except Exception, e:
return "fail"
else:
logger.warn('Cloudflare protection online for ' + site + '. Attempting to bypass...')
try:
scraper = cfscrape.create_scraper()
cf_cookievalue, cf_user_agent = cfscrape.get_cookie_string(url)
headers = {'Accept-encoding': 'gzip',
'User-Agent': cf_user_agent}
r = scraper.get(url, verify=verify, cookies=cf_cookievalue, stream=True, headers=headers)
except Exception, e:
return "fail"
if str(r.status_code) != '200':
logger.warn('Unable to download torrent from ' + site + ' [Status Code returned: ' + str(r.status_code) + ']')

View File

@ -705,7 +705,11 @@ def NZB_SEARCH(ComicName, IssueNumber, ComicYear, SeriesYear, Publisher, IssueDa
#rss for experimental doesn't have the size constraints embedded. So we do it here.
if RSS == "yes":
if nzbprov == '32P':
comsize_b = None #entry['length']
try:
#newer rss feeds will now return filesize from 32p. Safe-guard it incase it's an older result
comsize_b = entry['length']
except:
comsize_b = None
elif nzbprov == 'TPSE':
comsize_b = entry['length']
else:
@ -1845,7 +1849,7 @@ def nzbname_create(provider, title=None, info=None):
logger.fdebug("nzb name to be used for post-processing is : " + str(nzbname))
elif provider == '32P' or provider == 'TPSE':
elif any([provider == '32P', provider == 'TPSE', provider == 'WWT', provider == 'DEM']):
#filesafe the name cause people are idiots when they post sometimes.
nzbname = re.sub('\s{2,}', ' ', helpers.filesafe(title)).strip()
#let's change all space to decimals for simplicity

View File

@ -1,5 +1,3 @@
# This file is part of Mylar.
# -*- coding: utf-8 -*-
#
@ -2174,12 +2172,14 @@ class WebInterface(object):
def markComics(self, action=None, **args):
myDB = db.DBConnection()
comicsToAdd = []
logger.info(args)
for ComicID in args:
logger.info(ComicID)
if ComicID == 'manage_comic_length':
continue
else:
for k,v in args.items():
if k == 'manage_comic_length':
break
#k = Comicname[ComicYear]
#v = ComicID
comyr = k.find('[')
@ -2188,26 +2188,24 @@ class WebInterface(object):
ComicID = v
#cid = ComicName.decode('utf-8', 'replace')
if action == 'delete':
logger.info('[MANAGE COMICS][DELETION] Now deleting ' + ComicName + ' (' + str(ComicYear) + ') [' + str(ComicID) + '] form the DB.')
myDB.action('DELETE from comics WHERE ComicID=?', [ComicID])
myDB.action('DELETE from issues WHERE ComicID=?', [ComicID])
logger.info('[MANAGE COMICS][DELETION] Successfully deleted ' + ComicName + '(' + str(ComicYear) + ')')
elif action == 'pause':
controlValueDict = {'ComicID': ComicID}
newValueDict = {'Status': 'Paused'}
myDB.upsert("comics", newValueDict, controlValueDict)
logger.info('[MANAGE COMICS][PAUSE] ' + ComicName + ' has now been put into a Paused State.')
elif action == 'resume':
controlValueDict = {'ComicID': ComicID}
newValueDict = {'Status': 'Active'}
myDB.upsert("comics", newValueDict, controlValueDict)
logger.info('[MANAGE COMICS][RESUME] ' + ComicName + ' has now been put into a Resumed State.')
else:
logger.info('appending ' + str(ComicID) + ' to refresh list.')
comicsToAdd.append(ComicID)
logger.info(comicsToAdd)
if action == 'delete':
logger.info('[MANAGE COMICS][DELETION] Now deleting ' + ComicName + ' (' + str(ComicYear) + ') [' + str(ComicID) + '] form the DB.')
myDB.action('DELETE from comics WHERE ComicID=?', [ComicID])
myDB.action('DELETE from issues WHERE ComicID=?', [ComicID])
logger.info('[MANAGE COMICS][DELETION] Successfully deleted ' + ComicName + '(' + str(ComicYear) + ')')
elif action == 'pause':
controlValueDict = {'ComicID': ComicID}
newValueDict = {'Status': 'Paused'}
myDB.upsert("comics", newValueDict, controlValueDict)
logger.info('[MANAGE COMICS][PAUSE] ' + ComicName + ' has now been put into a Paused State.')
elif action == 'resume':
controlValueDict = {'ComicID': ComicID}
newValueDict = {'Status': 'Active'}
myDB.upsert("comics", newValueDict, controlValueDict)
logger.info('[MANAGE COMICS][RESUME] ' + ComicName + ' has now been put into a Resumed State.')
else:
logger.info('appending ' + str(ComicID) + ' to refresh list.')
comicsToAdd.append(ComicID)
if len(comicsToAdd) > 0:
logger.info('[MANAGE COMICS][REFRESH] Refreshing ' + str(len(comicsToAdd)) + ' series')
@ -3236,6 +3234,10 @@ class WebInterface(object):
importResults.exposed = True
def ImportFilelisting(self, comicname, dynamicname, volume):
if type(comicname) != unicode:
comicname = urllib.unquote(comicname).decode('utf-8')
if type(dynamicname) != unicode:
dynamicname = urllib.unquote(dynamicname).decode('utf-8')
myDB = db.DBConnection()
if volume is None or volume == 'None':
results = myDB.select("SELECT * FROM importresults WHERE (WatchMatch is Null OR WatchMatch LIKE 'C%') AND DynamicName=? AND Volume IS NULL",[dynamicname])

View File

@ -666,8 +666,8 @@ def pullitcheck(comic1off_name=None, comic1off_id=None, forcecheck=None, futurep
logger.fdebug("comparing" + comicnm + "..to.." + unlines[cnt].upper())
watchcomic = unlines[cnt]
logger.fdebug("watchcomic : " + str(watchcomic)) # / mod :" + str(modwatchcomic))
logger.fdebug("comicnm : " + str(comicnm)) # / mod :" + str(modcomicnm))
logger.fdebug("watchcomic : " + watchcomic) # / mod :" + str(modwatchcomic))
logger.fdebug("comicnm : " + comicnm) # / mod :" + str(modcomicnm))
if dyn_comicnm == dyn_watchnm:
if mylar.ANNUALS_ON:
@ -1121,8 +1121,14 @@ def new_pullcheck(weeknumber, pullyear, comic1off_name=None, comic1off_id=None,
else:
logger.fdebug('issue exists in db already: ' + str(issueid))
pass
#make sure the status is Wanted if auto-upcoming is enabled.
if isschk['Status'] == newValue['Status']:
pass
else:
if all([isschk['Status'] != 'Downloaded', isschk['Status'] != 'Snatched', isschk['Status'] != 'Archived', isschk['Status'] != 'Ignored']) and newValue['Status'] == 'Wanted':
#make sure the status is Wanted and that the issue status is identical if not.
newStat = {'Status': 'Wanted'}
ctrlStat = {'IssueID': issueid}
myDB.upsert("issues", newStat, ctrlStat)
else:
continue
# else:
@ -1395,6 +1401,7 @@ def future_check():
if str(sr['comicyear']) == str(theissdate):
logger.fdebug('Matched to : ' + str(theissdate))
matches.append(sr)
if len(matches) == 1:
logger.info('Narrowed down to one series as a direct match: ' + matches[0]['name'] + '[' + str(matches[0]['comicid']) + ']')
cid = matches[0]['comicid']
@ -1426,16 +1433,21 @@ def future_check():
matchword = split_match[i].lower()
except:
break
if split_match.index(ss) == split_series.index(ss):
#will return word position in string.
#logger.fdebug('word match to position found in both strings at position : ' + str(split_match.index(ss)))
word_match+=1
elif any([x == matchword for x in catch_words]):
if any([x == matchword for x in catch_words]):
#logger.fdebug('[MW] common word detected of : ' + matchword)
word_match+=.5
elif any([cw == ss for cw in catch_words]):
#logger.fdebug('[CW] common word detected of : ' + matchword)
word_match+=.5
else:
try:
#will return word position in string.
#logger.fdebug('word match to position found in both strings at position : ' + str(split_match.index(ss)))
if split_match.index(ss) == split_series.index(ss):
word_match+=1
except ValueError:
break
i+=1
logger.fdebug('word match score of : ' + str(word_match) + ' / ' + str(len(split_series)))
if word_match == len(split_series) or (word_match / len(split_series)) > 80: