-
Notifications
You must be signed in to change notification settings - Fork 20
Open
Description
I've tested the platform on how fast it would analyze a big .csv file (532MB, 3 235 282 lines). The execution time of the program (code below) is about 25 minutes.
The program should just print the current line with a very simple comment
main.py
from scramjet.streams import Streamfrom scramjet.streams import Stream
lines_number = 0
def count(x):
global lines_number
lines_number += 1
return lines_number
def show_line_number(x):
global lines_number
if lines_number <1000:
return f"{lines_number} \n"
elif lines_number > 2000:
return f"{lines_number} bigger than 2000 \n"
return None
def run(context,input):
x = (Stream
.read_from(input)
.each(count)
.map(show_line_number)
)
return x
lines_number = 0
def count(x):
global lines_number
lines_number += 1
return lines_number
def show_line_number(x):
global lines_number
if lines_number <1000:
return f"{lines_number} \n"
elif lines_number > 2000:
return f"{lines_number} bigger than 2000 \n"
return None
def run(context,input):
x = (Stream
.read_from(input)
.each(count)
.map(show_line_number)
)
return x
package.json
{
"name": "@scramjet/python-big-files",
"version": "0.22.0",
"lang": "python",
"main": "./main.py",
"author": "XYZ",
"license": "GPL-3.0",
"engines": {
"python3": "3.9.0"
},
"scripts": {
"build:refapps": "yarn build:refapps:only",
"build:refapps:only": "mkdir -p dist/__pypackages__/ && cp *.py dist/ && pip3 install -t dist/__pypackages__/ -r requirements.txt",
"postbuild:refapps": "yarn prepack && yarn packseq",
"packseq": "PACKAGES_DIR=python node ../../scripts/packsequence.js",
"prepack": "PACKAGES_DIR=python node ../../scripts/publish.js",
"clean": "rm -rf ./dist"
}
}
requirements.txt
scramjet-framework-py
Metadata
Metadata
Assignees
Labels
No labels