In Apached Airflow Airflow 1.10.12 -No module named 'httplib2' - airflow

I am getting the below error for a sample dag I am trying to write.
My Airflow is of below configuration:-
pip install apache-airflow[crypto,celery,postgres,hive,jdbc,mysql,ssh,docker,hdfs,redis,slack,webhdfs,httplib2]==1.10.12
--constraint /requirements-python3.7.txt
Error:-
[2020-12-19 22:41:19,342] {dagbag.py:259} ERROR - Failed to import: /usr/local/airflow/dags/alert_dag.py
Traceback (most recent call last):
File "/usr/local/lib/python3.7/dist-packages/airflow/models/dagbag.py", line 256, in process_file
m = imp.load_source(mod_name, filepath)
File "/usr/lib/python3.7/imp.py", line 171, in load_source
module = _load(spec)
File "<frozen importlib._bootstrap>", line 696, in _load
File "<frozen importlib._bootstrap>", line 677, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 728, in exec_module
File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
File "/usr/local/airflow/dags/alert_dag.py", line 6, in <module>
from httplib2 import Http
ModuleNotFoundError: No module named 'httplib2'
Code:-
from airflow import DAG
from airflow.operators.bash_operator import BashOperator
from datetime import datetime, timedelta
from json import dumps
from httplib2 import Http
default_args = {
'start_date': datetime(2020, 12, 19,17,0,0),
'owner': 'Airflow'
}
def on_success(dict):
print('on_success_call_back function')
print(dict)
def on_failure(dict):
print('on_failure_call_back function')
# """Hangouts Chat incoming webhook quickstart."""
# url = 'https://chat.googleapis.com/v1/spaces/XXXX'
# bot_message = {'text': 'alert_dag Failed'}
# message_headers = {'Content-Type': 'application/json; charset=UTF-8'}
# http_obj = Http()
# response = http_obj.request(
# uri=url,
# method='POST',
# headers=message_headers,
# body=dumps(bot_message),
# )
#on_success_call_back=on_success
with DAG(dag_id='alert_dag', schedule_interval="*/5 * * * *", default_args=default_args, catchup=True, dagrun_timeout=timedelta(seconds=25), on_failure_callback=on_failure) as dag:
# Task 1
t1 = BashOperator(task_id='t1', bash_command="exit 0")
# Task 2
t2 = BashOperator(task_id='t2', bash_command="echo 'second task'")
t1 >> t2

Related

Gremlin/Python: run query as string

I have the following code, and run as expected. But I need to use the "g" traversal object to manipulate the graph.
from gremlin_python.process.anonymous_traversal import traversal
from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection
g = traversal().withRemote(DriverRemoteConnection('ws://localhost:8182/gremlin','g'))
g.V().drop().iterate()
g.addV('my-label').property('k', 'v').next()
print(g.V().toList())
Instead of the "g" object, I want to run string query to modify the graph, and the following doesn't work.
from gremlin_python.driver import client
from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection
ws_conn = DriverRemoteConnection('ws://localhost:8182/gremlin','g')
gremlin_conn = client.Client(ws_conn, "g")
query = "g.V().groupCount().by(label).unfold().project('label','count').by(keys).by(values)"
response = gremlin_conn.submit(query)
print(response)
Gives the following error:
(venv) sh-3.2$ python /Users/demo-prj/tests/tools/neptune/local.py
[v[4280]]
Traceback (most recent call last):
File "/Users/demo-prj/tests/tools/neptune/local.py", line 24, in <module>
response = gremlin_conn.submit(query)
File "/Users/demo-prj/venv/lib/python3.8/site-packages/gremlin_python/driver/client.py", line 127, in submit
return self.submitAsync(message, bindings=bindings, request_options=request_options).result()
File "/Users/demo-prj/venv/lib/python3.8/site-packages/gremlin_python/driver/client.py", line 148, in submitAsync
return conn.write(message)
File "/Users/demo-prj/venv/lib/python3.8/site-packages/gremlin_python/driver/connection.py", line 55, in write
self.connect()
File "/Users/demo-prj/venv/lib/python3.8/site-packages/gremlin_python/driver/connection.py", line 45, in connect
self._transport.connect(self._url, self._headers)
File "/Users/demo-prj/venv/lib/python3.8/site-packages/gremlin_python/driver/tornado/transport.py", line 40, in connect
self._ws = self._loop.run_sync(
File "/Users/demo-prj/venv/lib/python3.8/site-packages/tornado/ioloop.py", line 576, in run_sync
return future_cell[0].result()
File "/Users/demo-prj/venv/lib/python3.8/site-packages/tornado/ioloop.py", line 547, in run
result = func()
File "/Users/demo-prj/venv/lib/python3.8/site-packages/gremlin_python/driver/tornado/transport.py", line 41, in <lambda>
lambda: websocket.websocket_connect(url, compression_options=self._compression_options))
File "/Users/demo-prj/venv/lib/python3.8/site-packages/tornado/websocket.py", line 1333, in websocket_connect
conn = WebSocketClientConnection(request,
File "/Users/demo-prj/venv/lib/python3.8/site-packages/tornado/websocket.py", line 1122, in __init__
scheme, sep, rest = request.url.partition(':')
AttributeError: 'DriverRemoteConnection' object has no attribute 'partition'
This works.
from gremlin_python.driver import client
from tornado import httpclient
ws_url = 'ws://localhost:8182/gremlin'
ws_conn = httpclient.HTTPRequest(ws_url)
gremlin_conn = client.Client(ws_conn, "g")
query = "g.V().groupCount().by(label).unfold().project('label','count').by(keys).by(values)"
response = gremlin_conn.submit(query)
print(response)

Cloud Composer scheduler error when adding first dag

I have a DAG running on my local Airflow.
I lunched Cloud Composer and wanted to move my DAGs there.
When added the first DAG file the scheduler shows this error:
Traceback (most recent call last): File
"/usr/local/lib/airflow/airflow/models.py", line 363, in process_file
m = imp.load_source(mod_name, filepath) File
"/usr/local/lib/python3.6/imp.py", line 172, in load_source module =
_load(spec) File "", line 684, in _load File "", line 665, in _load_unlocked File
"", line 674, in exec_module
File "", line 781, in get_code
File "", line 741, in
source_to_code File "", line 219, in
_call_with_frames_removed File "/home/airflow/gcs/dags/testdag.py", line 95 'start_date': datetime(2018, 12, 05),
This is line 95:
args = {
'owner': 'Airflow',
'start_date': datetime(2018, 12, 05),
'retries': 5,
'retry_delay': timedelta(minutes=5)
}
Never encountered this error before.
If you want to run the DAG's and do catchup from the historical dates then you give the past dates as start_date
Try giving
from datetime import datetime, timedelta
args = {
'owner': 'Airflow',
'provide_context': True,
'depends_on_past': False,
'start_date': datetime.combine(datetime.today(),datetime.min.time()),
'retries': 5,
'retry_delay': timedelta(minutes=5)
}
May its the date value you gave in start_date.
Try providing just 5 in datetime(2018, 12, 05) and update the DAG folder again.

PySpark map datetime to DoW

I'm trying to map a column 'eventtimestamp' to its day of week with the following function:
from datetime import datetime
import calendar
from pyspark.sql.functions import UserDefinedFunction as udf
def toWeekDay(x):
v = int(datetime.strptime(str(x),'%Y-%m-%d %H:%M:%S').strftime('%w'))
if v == 0:
v = 6
else:
v = v-1
return calendar.day_name[v]
and for my df trying to create a new column dow with UDF.
udf_toWeekDay = udf(lambda x: toWeekDay(x), StringType())
df = df.withColumn("dow",udf_toWeekDay('eventtimestamp'))
Yet, I'm getting error I do not understand at all. Firstly, it was complaining for inserting datetime.datetime into strptime instead of string. So I parsed to str and now I don't have a clue what's wrong.
Traceback (most recent call last):
File "/tmp/zeppelin_pyspark-9040214714346906648.py", line 267, in <module>
raise Exception(traceback.format_exc())
Exception: Traceback (most recent call last):
File "/tmp/zeppelin_pyspark-9040214714346906648.py", line 260, in <module>
exec(code)
File "<stdin>", line 10, in <module>
File "/usr/lib/spark/python/pyspark/sql/dataframe.py", line 429, in take
return self.limit(num).collect()
File "/usr/lib/spark/python/pyspark/sql/dataframe.py", line 391, in collect
port = self._jdf.collectToPython()
File "/usr/lib/spark/python/lib/py4j-0.10.4-src.zip/py4j/java_gateway.py", line 1133, in __call__
answer, self.gateway_client, self.target_id, self.name)
File "/usr/lib/spark/python/pyspark/sql/utils.py", line 63, in deco
return f(*a, **kw)
File "/usr/lib/spark/python/lib/py4j-0.10.4-src.zip/py4j/protocol.py", line 319, in get_return_value
format(target_id, ".", name), value)
Py4JJavaError: An error occurred while calling o6250.collectToPython.
: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 1107.0 failed 4 times, most recent failure: Lost task 0.3 in stage 1107.0 (TID 63757, ip-172-31-27-113.eu-west-1.compute.internal, executor 819): org.apache.spark.api.python.PythonException: Traceback (most recent call last):
Thanks a lot for clues!
we can use date_format to get dayofweek,
df = df.withColumn("dow",date_format(df['eventtimestamp'],'EEEE'))

After installing R Recommended package, kernel shows error when I use Jupyter Notebook

Package web https://anaconda.org/r/r-recommended
kernel error
Traceback (most recent call last):
File "C:\Users\LIU\Anaconda3\lib\site-packages\notebook\base\handlers.py", line 457, in wrapper
result = yield gen.maybe_future(method(self, *args, **kwargs))
File "C:\Users\LIU\Anaconda3\lib\site-packages\tornado\gen.py", line 1008, in run
value = future.result()
File "C:\Users\LIU\Anaconda3\lib\site-packages\tornado\concurrent.py", line 232, in result
raise_exc_info(self._exc_info)
File "<string>", line 3, in raise_exc_info
File "C:\Users\LIU\Anaconda3\lib\site-packages\tornado\gen.py", line 1014, in run
yielded = self.gen.throw(*exc_info)
File "C:\Users\LIU\Anaconda3\lib\site-packages\notebook\services\sessions\handlers.py", line 62, in post
kernel_id=kernel_id))
File "C:\Users\LIU\Anaconda3\lib\site-packages\tornado\gen.py", line 1008, in run
value = future.result()
File "C:\Users\LIU\Anaconda3\lib\site-packages\tornado\concurrent.py", line 232, in result
raise_exc_info(self._exc_info)
File "<string>", line 3, in raise_exc_info
File "C:\Users\LIU\Anaconda3\lib\site-packages\tornado\gen.py", line 1014, in run
yielded = self.gen.throw(*exc_info)
File "C:\Users\LIU\Anaconda3\lib\site-packages\notebook\services\sessions\sessionmanager.py", line 79, in create_session
kernel_name)
File "C:\Users\LIU\Anaconda3\lib\site-packages\tornado\gen.py", line 1008, in run
value = future.result()
File "C:\Users\LIU\Anaconda3\lib\site-packages\tornado\concurrent.py", line 232, in result
raise_exc_info(self._exc_info)
File "<string>", line 3, in raise_exc_info
File "C:\Users\LIU\Anaconda3\lib\site-packages\tornado\gen.py", line 1014, in run
yielded = self.gen.throw(*exc_info)
File "C:\Users\LIU\Anaconda3\lib\site-packages\notebook\services\sessions\sessionmanager.py", line 92, in start_kernel_for_session
self.kernel_manager.start_kernel(path=kernel_path, kernel_name=kernel_name)
File "C:\Users\LIU\Anaconda3\lib\site-packages\tornado\gen.py", line 1008, in run
value = future.result()
File "C:\Users\LIU\Anaconda3\lib\site-packages\tornado\concurrent.py", line 232, in result
raise_exc_info(self._exc_info)
File "<string>", line 3, in raise_exc_info
File "C:\Users\LIU\Anaconda3\lib\site-packages\tornado\gen.py", line 282, in wrapper
yielded = next(result)
File "C:\Users\LIU\Anaconda3\lib\site-packages\notebook\services\kernels\kernelmanager.py", line 87, in start_kernel
super(MappingKernelManager, self).start_kernel(**kwargs)
File "C:\Users\LIU\Anaconda3\lib\site-packages\jupyter_client\multikernelmanager.py", line 110, in start_kernel
km.start_kernel(**kwargs)
File "C:\Users\LIU\Anaconda3\lib\site-packages\jupyter_client\manager.py", line 243, in start_kernel
**kw)
File "C:\Users\LIU\Anaconda3\lib\site-packages\jupyter_client\manager.py", line 189, in _launch_kernel
return launch_kernel(kernel_cmd, **kw)
File "C:\Users\LIU\Anaconda3\lib\site-packages\jupyter_client\launcher.py", line 123, in launch_kernel
proc = Popen(cmd, **kwargs)
File "C:\Users\LIU\Anaconda3\lib\subprocess.py", line 947, in __init__
restore_signals, start_new_session)
File "C:\Users\LIU\Anaconda3\lib\subprocess.py", line 1224, in _execute_child
startupinfo)
FileNotFoundError: [WinError 2]

Error in sage tutorial coding theory

I have just installed sage 6.3 in Ubuntu 14.04 and I have tried the tutorial in coding theory as follow:
MS = MatrixSpace(GF(2),4,7)
G = MS([[1,1,1,0,0,0,0], [1,0,0,1,1,0,0], [0,1,0,1,0,1,0], [1,1,0,1,0,0,1]])
C = LinearCode(G)
In the third evaluation, sage produced error as follow:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "_sage_input_4.py", line 10, in <module>
exec compile(u'open("___code___.py","w").write("# -*- coding: utf-8 -*-\\n" + _support_.preparse_worksheet_cell(base64.b64decode("QyA9IExpbmVhckNvZGUoRyk="),globals())+"\\n"); execfile(os.path.abspath("___code___.py"))
File "", line 1, in <module>
File "/tmp/tmpXyxNvC/___code___.py", line 2, in <module>
exec compile(u'C = LinearCode(G)
File "", line 1, in <module>
File "/usr/local/sage/local/lib/python2.7/site-packages/sage/coding/linear_code.py", line 785, in __init__
facade_for = gen_mat.row(0).parent()
File "matrix_mod2_dense.pyx", line 576, in sage.matrix.matrix_mod2_dense.Matrix_mod2_dense.row (build/cythonized/sage/matrix/matrix_mod2_dense.c:5387)
File "/usr/local/sage/local/lib/python2.7/site-packages/sage/modules/free_module.py", line 432, in VectorSpace
return FreeModule(K, rank=dimension, sparse=sparse, inner_product_matrix=inner_product_matrix)
File "factory.pyx", line 366, in sage.structure.factory.UniqueFactory.__call__ (build/cythonized/sage/structure/factory.c:1327)
File "factory.pyx", line 410, in sage.structure.factory.UniqueFactory.get_object (build/cythonized/sage/structure/factory.c:1679)
File "/usr/local/sage/local/lib/python2.7/site-packages/sage/modules/free_module.py", line 380, in create_object
return FreeModule_ambient_field(base_ring, rank, sparse=sparse)
File "/usr/local/sage/local/lib/python2.7/site-packages/sage/modules/free_module.py", line 4972, in __init__
FreeModule_ambient_pid.__init__(self, base_field, dimension, sparse=sparse)
File "/usr/local/sage/local/lib/python2.7/site-packages/sage/modules/free_module.py", line 4893, in __init__
FreeModule_ambient_domain.__init__(self, base_ring=base_ring, rank=rank, sparse=sparse)
File "/usr/local/sage/local/lib/python2.7/site-packages/sage/modules/free_module.py", line 4709, in __init__
FreeModule_ambient.__init__(self, base_ring, rank, sparse)
File "/usr/local/sage/local/lib/python2.7/site-packages/sage/modules/free_module.py", line 4184, in __init__
FreeModule_generic.__init__(self, base_ring, rank=rank, degree=rank, sparse=sparse)
File "/usr/local/sage/local/lib/python2.7/site-packages/sage/modules/free_module.py", line 714, in __init__
self.element_class()
File "/usr/local/sage/local/lib/python2.7/site-packages/sage/modules/free_module.py", line 896, in element_class
C = element_class(self.base_ring(), self.is_sparse())
File "/usr/local/sage/local/lib/python2.7/site-packages/sage/modules/free_module.py", line 6721, in element_class
import sage.modules.vector_real_double_dense
File "vector_real_double_dense.pyx", line 1, in init sage.modules.vector_real_double_dense (build/cythonized/sage/modules/vector_real_double_dense.c:5611)
File "__init__.pxd", line 155, in init sage.modules.vector_double_dense (build/cythonized/sage/modules/vector_double_dense.c:11813)
File "/usr/local/sage/local/lib/python2.7/site-packages/numpy/__init__.py", line 153, in <module>
from . import add_newdocs
File "/usr/local/sage/local/lib/python2.7/site-packages/numpy/add_newdocs.py", line 13, in <module>
from numpy.lib import add_newdoc
File "/usr/local/sage/local/lib/python2.7/site-packages/numpy/lib/__init__.py", line 18, in <module>
from .polynomial import *
File "/usr/local/sage/local/lib/python2.7/site-packages/numpy/lib/polynomial.py", line 19, in <module>
from numpy.linalg import eigvals, lstsq, inv
File "/usr/local/sage/local/lib/python2.7/site-packages/numpy/linalg/__init__.py", line 50, in <module>
from .linalg import *
File "/usr/local/sage/local/lib/python2.7/site-packages/numpy/linalg/linalg.py", line 29, in <module>
from numpy.linalg import lapack_lite, _umath_linalg
ImportError: libgfortran.so.3: cannot open shared object file: No such file or directory
How can I solve this problem? I am new to sage!

Resources